edit-video-like-sheet / helpers.py
otmanheddouch's picture
Upload 6 files
02b8bbc
from faster_whisper import WhisperModel # use WhisperModel for transcribe
import moviepy.editor as mp # moviepy for editing the video
import re
from moviepy.video.io.VideoFileClip import VideoFileClip
from moviepy.video.compositing.concatenate import concatenate_videoclips
## Helper functions
def load_model(model_size="medium"):
"""
Load the model
"""
model = WhisperModel(model_size)
return model
def transribe(video_path, model, audio_path = 'audio.wav'):
"""
Transcribe the video into mapped segments.
Parametres:
------
video_path : the path of the video to be transcribe
model : the model we will use to extract the script from the video
audio_path : path of the audio to be exported to
"""
# Load the video
video = mp.VideoFileClip(video_path)
# Extract the audio from the video
audio_file = video.audio
audio_file.write_audiofile(audio_path)
# Run the transcription
segments, info = model.transcribe(audio_path, word_timestamps=True)
segments = list(segments) # The transcription will actually run here.
return segments
def mapping_segments(segments):
"""
Mapped the subtitles, each word with it correspond start and end time
Parametres:
----
segments: the segements results from runing the model
return dictionairy of each word with it's own start and end time as well as the entire script in single string.
"""
# Empty dictionairy to store the subtitles with it's own start and end time
subtitles_word = {}
# list of all the words
transcript = []
# looping for every segments
for segment in segments:
for word in segment.words:
# clean the word from any space or punctuation.
text_without_punctuation = re.sub(r'[^\w\s]', '', word.word.strip())
# Store the cleaned word in dic
subtitles_word[f"{word.start}-{word.end}"] = text_without_punctuation
# as well as in list
transcript.append(text_without_punctuation)
return subtitles_word, transcript
def find_time_range_cutted(subtitles_word, edited_script_list_word):
"""
Return the time range that correspond to cutted word
Parametres
----
subtitles_word : mapped words with their own time(start and end)
edited_script_list_word : list of words with no punctuation and space comming from user submition.
"""
# assign 0 to tracked_index which track the word index in original script with index in new edited script
tracked_index = 0
# empty list to store the time range to cut
time_range_to_cut = []
# loop through all the original word
for i, (range_, sub) in enumerate(subtitles_word.items()):
# get the correspond word of the new script
compared_value = edited_script_list_word[tracked_index]
print(f"Comparing '{compared_value}' of index {tracked_index} with '{sub}' of index {i}")
# if the index of old script is equal to new script then it hasnt cutted move to the next word.
if sub == compared_value:
tracked_index += 1
# otherwise add its range as it removed by th user and assign tracked_index same number as it is
# This is will not shift the index of the new script until we found its own range from the old one
else :
time_range_to_cut.append(range_)
tracked_index += 0
return time_range_to_cut
def process_video(video_file):
"""
Process video and return text to be edited
"""
print(video_file)
print("Transribe.....")
segments = transribe(video_file, model)
print('Mapping the segments....')
subtitles_word, list_words = mapping_segments(segments)
# Plain string to be edited as sheet
text_to_edited = ' '.join(list_words)
return text_to_edited
def cut_video(input_video, output_video, cut_ranges):
cut_ranges_cleaned = cut_ranges.copy()
# cut_ranges_cleaned = [(i.split('-')[0], i.split('-')[1]) for i in cut_ranges_cleaned]
print(cut_ranges_cleaned)
# Load the video clip
video_clip = VideoFileClip(input_video)
# Cut and concatenate the specified ranges
cut_clips = [video_clip.subclip(start, end) for start, end in cut_ranges_cleaned]
final_clip = concatenate_videoclips(cut_clips)
# Write the result to a new video file
final_clip.write_videofile(output_video, codec="libx264", audio_codec="aac")
def edit_video(script, video_file):
segments = transribe(video_file, model)
subtitles_word_text, list_words = mapping_segments(segments)
print("subtiles word mapped: ", subtitles_word_text)
# Plain string to be edited as sheet
file_content = re.sub(r'[^\w\s]', '', script)
# after text has been edited transform it to list of words
edited_script_list_word = [ i for i in file_content.split(' ') if i != '']
time_range_to_cut = find_time_range_cutted(subtitles_word_text, edited_script_list_word)
# sort and transform it to list and sorted range
sorted_range = []
time_range_to_cut_cleaned = [(i.split('-')[0], i.split('-')[1]) for i in time_range_to_cut]
print("Cleaned range ", time_range_to_cut_cleaned)
for range_time in time_range_to_cut_cleaned:
for r in range_time:
sorted_range.append(r)
if sorted_range!=[]:
started_range = (0, sorted_range[0])
video_clip = VideoFileClip(video_file)
video_duration = video_clip.duration
ended_range = (sorted_range[-1], video_duration)
complete_range = []
complete_range.append(started_range)
print('sorted range ', sorted_range)
if len(sorted_range) > 2:
new_X = sorted_range[1:-1]
print("new x ", new_X)
print('len ', len(new_X))
for i in range(0, len(sorted_range)-2, 2):
print("Before the error ", i)
print(new_X[i:i+2])
pair_of_items = new_X[i:i+2]
complete_range.append((pair_of_items[0], pair_of_items[1]))
complete_range.append(ended_range)
print("Time range : ", complete_range)
output_video_path = "output.mp4"
cut_video(video_file, output_video_path, complete_range)
return output_video_path
return video_file
model = load_model()