Spaces:
Running
on
A10G
Running
on
A10G
from moviepy.editor import * | |
from gtts import gTTS | |
from pydub import AudioSegment | |
import tempfile | |
import os | |
import pyttsx3 | |
from ms_tts import ms_tts_gen | |
os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/local/bin/ffmpeg" | |
# os.environ["IMAGEMAGICK_BINARY"] = "/usr/local/bin/convert" | |
def gen_audio(subtitles, tmpdir): | |
# subtitles = [ | |
# [[0, 3], "這裡請注意右臂"], | |
# [[4, 8], "這裡請注意左臂"], | |
# [[9, 12], "這裡請注意左手細節"] | |
# ]) | |
subtitles = merge_subtitles(subtitles) | |
print("===> Subtitles:") | |
for subtitle in subtitles: | |
print(subtitle) | |
combined_clip = VideoFileClip(tmpdir + "/output.mp4") | |
combined_clip = combined_clip.resize(width=800) | |
total_duration = combined_clip.duration | |
def add_subtitles(clip, subtitles): | |
txt_clips = [] | |
for start_end, text in subtitles: | |
start_time, end_time = start_end | |
duration = end_time - start_time | |
txt_clip = (TextClip(text, fontsize=clip.w//20, color='white', font='bold.ttf', method='caption', size=(clip.w * 0.9, None)) | |
.set_duration(duration) | |
.set_position(('center', 4 * clip.h // 5)) | |
.set_start(start_time)) | |
txt_clips.append(txt_clip) | |
return CompositeVideoClip([clip, *txt_clips]) | |
if len(subtitles) != 0: | |
video_with_subtitles = combined_clip #add_subtitles(combined_clip, subtitles) | |
else: | |
combined_clip.write_videofile(tmpdir + '/output_with_subtitles.mp4', fps=24) | |
print("Generated Video with Subtitles to:", tmpdir + '/output_with_subtitles.mp4') | |
return tmpdir + '/output_with_subtitles.mp4' | |
def generate_audio(subtitles, total_duration): | |
temp_audio_path = tempfile.mkdtemp() | |
clips = [] | |
# engine = pyttsx3.init() | |
# engine.setProperty('voice', 'yue') # yue:粤语 cnm:普通话 | |
# engine.setProperty('rate', 150) # 设置语速 | |
# engine.setProperty('volume', 1.0) # 设置音量 | |
for i, (start_end, text) in enumerate(subtitles): | |
start_time, end_time = start_end | |
duration = end_time - start_time | |
# tts = gTTS(text=text, lang='yue') | |
audio_path = os.path.join(temp_audio_path, f'subtitle_{i}.wav') | |
# tts.save(audio_path) | |
ms_tts_gen(text, audio_path) | |
audio_segment = AudioSegment.from_wav(audio_path) | |
new_audio_path = os.path.join(temp_audio_path, f'subtitle_{i}_adjusted.wav') | |
audio_segment.export(new_audio_path, format="wav") | |
audio_clip = AudioFileClip(new_audio_path).set_start(start_time).set_duration(min(duration, audio_segment.duration_seconds)) | |
clips.append(audio_clip) | |
final_audio = CompositeAudioClip(clips) | |
return final_audio | |
audio_clip = generate_audio(subtitles, total_duration) | |
video_with_audio = video_with_subtitles.set_audio(audio_clip) | |
video_with_audio.write_videofile(tmpdir + '/output_with_subtitles.mp4', fps=24) | |
print("Generated Video with Subtitles to:", tmpdir + '/output_with_subtitles.mp4') | |
return tmpdir + '/output_with_subtitles.mp4' | |
def merge_subtitles(subtitles): | |
merged_subtitles = [] | |
i = 0 | |
n = len(subtitles) | |
while i < n: | |
current_start, current_end = subtitles[i][0] | |
current_content = subtitles[i][1] | |
duration = current_end - current_start | |
i += 1 | |
while duration < 3: | |
if i >= n: | |
current_end = current_start + 3 | |
duration = current_end - current_start | |
break | |
next_start, next_end = subtitles[i][0] | |
next_content = subtitles[i][1] | |
if next_start == current_end: | |
current_end = next_end | |
current_content += "、" + next_content | |
duration = current_end - current_start | |
i += 1 | |
else: | |
if duration < 3: | |
current_end = current_start + 3 | |
duration = current_end - current_start | |
break | |
current_content = '這裡請注意' + '、'.join(list(set(current_content.replace("細節", "").split('、')))) | |
current_content = current_content.replace("左手、右手", "雙手").replace("右手、左手", "雙手") | |
current_content = current_content.replace("左臂、右臂", "雙臂").replace("右臂、左臂", "雙臂") | |
merged_subtitles.append([[current_start, current_end], current_content]) | |
return merged_subtitles | |