|
from faster_whisper import WhisperModel |
|
import moviepy.editor as mp |
|
import re |
|
from moviepy.video.io.VideoFileClip import VideoFileClip |
|
from moviepy.video.compositing.concatenate import concatenate_videoclips |
|
|
|
|
|
|
|
|
|
|
|
def load_model(model_size="medium"): |
|
""" |
|
Load the model |
|
""" |
|
model = WhisperModel(model_size) |
|
return model |
|
|
|
def transribe(video_path, model, audio_path = 'audio.wav'): |
|
""" |
|
Transcribe the video into mapped segments. |
|
|
|
Parametres: |
|
------ |
|
video_path : the path of the video to be transcribe |
|
model : the model we will use to extract the script from the video |
|
audio_path : path of the audio to be exported to |
|
|
|
""" |
|
|
|
video = mp.VideoFileClip(video_path) |
|
|
|
|
|
audio_file = video.audio |
|
audio_file.write_audiofile(audio_path) |
|
|
|
|
|
segments, info = model.transcribe(audio_path, word_timestamps=True) |
|
segments = list(segments) |
|
return segments |
|
|
|
def mapping_segments(segments): |
|
""" |
|
Mapped the subtitles, each word with it correspond start and end time |
|
|
|
Parametres: |
|
---- |
|
segments: the segements results from runing the model |
|
|
|
return dictionairy of each word with it's own start and end time as well as the entire script in single string. |
|
""" |
|
|
|
subtitles_word = {} |
|
|
|
transcript = [] |
|
|
|
for segment in segments: |
|
for word in segment.words: |
|
|
|
text_without_punctuation = re.sub(r'[^\w\s]', '', word.word.strip()) |
|
|
|
subtitles_word[f"{word.start}-{word.end}"] = text_without_punctuation |
|
|
|
transcript.append(text_without_punctuation) |
|
|
|
return subtitles_word, transcript |
|
|
|
|
|
def find_time_range_cutted(subtitles_word, edited_script_list_word): |
|
|
|
""" |
|
Return the time range that correspond to cutted word |
|
|
|
Parametres |
|
---- |
|
subtitles_word : mapped words with their own time(start and end) |
|
edited_script_list_word : list of words with no punctuation and space comming from user submition. |
|
|
|
""" |
|
|
|
tracked_index = 0 |
|
|
|
time_range_to_cut = [] |
|
|
|
for i, (range_, sub) in enumerate(subtitles_word.items()): |
|
|
|
compared_value = edited_script_list_word[tracked_index] |
|
print(f"Comparing '{compared_value}' of index {tracked_index} with '{sub}' of index {i}") |
|
|
|
if sub == compared_value: |
|
tracked_index += 1 |
|
|
|
|
|
else : |
|
time_range_to_cut.append(range_) |
|
tracked_index += 0 |
|
|
|
return time_range_to_cut |
|
|
|
|
|
|
|
|
|
|
|
def process_video(video_file): |
|
""" |
|
Process video and return text to be edited |
|
""" |
|
print(video_file) |
|
print("Transribe.....") |
|
segments = transribe(video_file, model) |
|
print('Mapping the segments....') |
|
subtitles_word, list_words = mapping_segments(segments) |
|
|
|
text_to_edited = ' '.join(list_words) |
|
return text_to_edited |
|
|
|
|
|
def cut_video(input_video, output_video, cut_ranges): |
|
cut_ranges_cleaned = cut_ranges.copy() |
|
|
|
print(cut_ranges_cleaned) |
|
|
|
video_clip = VideoFileClip(input_video) |
|
|
|
cut_clips = [video_clip.subclip(start, end) for start, end in cut_ranges_cleaned] |
|
final_clip = concatenate_videoclips(cut_clips) |
|
|
|
final_clip.write_videofile(output_video, codec="libx264", audio_codec="aac") |
|
|
|
def edit_video(script, video_file): |
|
segments = transribe(video_file, model) |
|
subtitles_word_text, list_words = mapping_segments(segments) |
|
print("subtiles word mapped: ", subtitles_word_text) |
|
|
|
file_content = re.sub(r'[^\w\s]', '', script) |
|
|
|
edited_script_list_word = [ i for i in file_content.split(' ') if i != ''] |
|
time_range_to_cut = find_time_range_cutted(subtitles_word_text, edited_script_list_word) |
|
|
|
sorted_range = [] |
|
time_range_to_cut_cleaned = [(i.split('-')[0], i.split('-')[1]) for i in time_range_to_cut] |
|
print("Cleaned range ", time_range_to_cut_cleaned) |
|
for range_time in time_range_to_cut_cleaned: |
|
for r in range_time: |
|
sorted_range.append(r) |
|
if sorted_range!=[]: |
|
started_range = (0, sorted_range[0]) |
|
video_clip = VideoFileClip(video_file) |
|
video_duration = video_clip.duration |
|
ended_range = (sorted_range[-1], video_duration) |
|
|
|
complete_range = [] |
|
complete_range.append(started_range) |
|
print('sorted range ', sorted_range) |
|
if len(sorted_range) > 2: |
|
new_X = sorted_range[1:-1] |
|
print("new x ", new_X) |
|
print('len ', len(new_X)) |
|
for i in range(0, len(sorted_range)-2, 2): |
|
print("Before the error ", i) |
|
print(new_X[i:i+2]) |
|
pair_of_items = new_X[i:i+2] |
|
complete_range.append((pair_of_items[0], pair_of_items[1])) |
|
|
|
complete_range.append(ended_range) |
|
print("Time range : ", complete_range) |
|
output_video_path = "output.mp4" |
|
cut_video(video_file, output_video_path, complete_range) |
|
return output_video_path |
|
return video_file |
|
|
|
model = load_model() |
|
|