from faster_whisper import WhisperModel |
import moviepy.editor as mp |
import re |
from moviepy.video.io.VideoFileClip import VideoFileClip |
from moviepy.video.compositing.concatenate import concatenate_videoclips |
def load_model(model_size="medium"): |
""" |
Load the model |
""" |
model = WhisperModel(model_size) |
return model |
def transribe(video_path, model, audio_path = 'audio.wav'): |
""" |
Transcribe the video into mapped segments. |
Parametres: |
------ |
video_path : the path of the video to be transcribe |
model : the model we will use to extract the script from the video |
audio_path : path of the audio to be exported to |
""" |
video = mp.VideoFileClip(video_path) |
audio_file = video.audio |
audio_file.write_audiofile(audio_path) |
segments, info = model.transcribe(audio_path, word_timestamps=True) |
segments = list(segments) |
return segments |
def mapping_segments(segments): |
""" |
Mapped the subtitles, each word with it correspond start and end time |
Parametres: |
---- |
segments: the segements results from runing the model |
return dictionairy of each word with it's own start and end time as well as the entire script in single string. |
""" |
subtitles_word = {} |
transcript = [] |
for segment in segments: |
for word in segment.words: |
text_without_punctuation = re.sub(r'[^\w\s]', '', word.word.strip()) |
subtitles_word[f"{word.start}-{word.end}"] = text_without_punctuation |
transcript.append(text_without_punctuation) |
return subtitles_word, transcript |
def find_time_range_cutted(subtitles_word, edited_script_list_word): |
""" |
Return the time range that correspond to cutted word |
Parametres |
---- |
subtitles_word : mapped words with their own time(start and end) |
edited_script_list_word : list of words with no punctuation and space comming from user submition. |
""" |
tracked_index = 0 |
time_range_to_cut = [] |
for i, (range_, sub) in enumerate(subtitles_word.items()): |
compared_value = edited_script_list_word[tracked_index] |
print(f"Comparing '{compared_value}' of index {tracked_index} with '{sub}' of index {i}") |
if sub == compared_value: |
tracked_index += 1 |
else : |
time_range_to_cut.append(range_) |
tracked_index += 0 |
return time_range_to_cut |
def process_video(video_file): |
""" |
Process video and return text to be edited |
""" |
print(video_file) |
print("Transribe.....") |
segments = transribe(video_file, model) |
print('Mapping the segments....') |
subtitles_word, list_words = mapping_segments(segments) |
text_to_edited = ' '.join(list_words) |
return text_to_edited |
def cut_video(input_video, output_video, cut_ranges): |
cut_ranges_cleaned = cut_ranges.copy() |
print(cut_ranges_cleaned) |
video_clip = VideoFileClip(input_video) |
cut_clips = [video_clip.subclip(start, end) for start, end in cut_ranges_cleaned] |
final_clip = concatenate_videoclips(cut_clips) |
final_clip.write_videofile(output_video, codec="libx264", audio_codec="aac") |
def edit_video(script, video_file): |
segments = transribe(video_file, model) |
subtitles_word_text, list_words = mapping_segments(segments) |
print("subtiles word mapped: ", subtitles_word_text) |
file_content = re.sub(r'[^\w\s]', '', script) |
edited_script_list_word = [ i for i in file_content.split(' ') if i != ''] |
time_range_to_cut = find_time_range_cutted(subtitles_word_text, edited_script_list_word) |
sorted_range = [] |
time_range_to_cut_cleaned = [(i.split('-')[0], i.split('-')[1]) for i in time_range_to_cut] |
print("Cleaned range ", time_range_to_cut_cleaned) |
for range_time in time_range_to_cut_cleaned: |
for r in range_time: |
sorted_range.append(r) |
if sorted_range!=[]: |
started_range = (0, sorted_range[0]) |
video_clip = VideoFileClip(video_file) |
video_duration = video_clip.duration |
ended_range = (sorted_range[-1], video_duration) |
complete_range = [] |
complete_range.append(started_range) |
print('sorted range ', sorted_range) |
if len(sorted_range) > 2: |
new_X = sorted_range[1:-1] |
print("new x ", new_X) |
print('len ', len(new_X)) |
for i in range(0, len(sorted_range)-2, 2): |
print("Before the error ", i) |
print(new_X[i:i+2]) |
pair_of_items = new_X[i:i+2] |
complete_range.append((pair_of_items[0], pair_of_items[1])) |
complete_range.append(ended_range) |
print("Time range : ", complete_range) |
output_video_path = "output.mp4" |
cut_video(video_file, output_video_path, complete_range) |
return output_video_path |
return video_file |
model = load_model() |