import moviepy.editor as mp
# Function to convert time format (MM:SS) to seconds
def time_to_seconds(time_str):
minutes, seconds = map(int, time_str.split(':'))
return minutes * 60 + seconds
# Parse the markdown file and extract the table associated with the video file
def parse_markdown_for_video_table(markdown_file, video_file_name):
with open(markdown_file, 'r') as f:
markdown_content = f.read()
# Convert markdown to HTML using markdown2
html = markdown2.markdown(markdown_content, extras=['tables'])
# Find the matching heading based on video file name
video_base_name = os.path.splitext(os.path.basename(video_file_name))[0]
heading_pattern = f"<h4>{video_base_name}</h4>"
# Split the HTML by the matched heading
split_html = html.split(heading_pattern)
raise ValueError(f"No heading matching '{video_base_name}' found in the markdown file.")
# We want the part of the document after the heading, so we take split_html[1]
# Convert it to pandas DataFrame and extract the first table
tables = pd.read_html(split_html[1])
raise ValueError(f"No tables found under heading '{video_base_name}' in the markdown file.")
return tables[0] # Return the first table under the matched heading
# Function to split the video based on the extracted table
def split_video(video_file, markdown_file, output_dir):
# Parse the markdown file to extract the correct table based on the video file name
table = parse_markdown_for_video_table(markdown_file, video_file)
video = mp.VideoFileClip(video_file)
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Iterate over the rows of the table and split the video
for _, row in table.iterrows():
part_no = str(row['Id']).zfill(2) # Pad part number with leading zero
part_name = re.sub(r'\W+', '_', row['Part Name']) # Sanitize file name
start_time = time_to_seconds(row['Start'])
end_time = time_to_seconds(row['End'])
clip = video.subclip(start_time, end_time)
# Create the output filename
output_filename = f"{part_no}_{part_name}.mp4"
output_filepath = os.path.join(output_dir, output_filename)
# Write the video clip to file while preserving codec, resolution, and fps
clip.write_videofile(output_filepath, codec='libx264', preset='slow', fps=video.fps,
audio_codec='aac', temp_audiofile='temp-audio.m4a', remove_temp=True)
print(f"Created {output_filepath}")
markdown_file = "ACT_03-WIL_02.md" # Path to your markdown file
video_file = "ACT_03-WIL_02.mp4" # Path to your video file
output_dir = "output_videos" # Directory to save the split videos
split_video(video_file, markdown_file, output_dir)