Spaces:
Running
on
A10G
Running
on
A10G
Update audio.py
Browse files
audio.py
CHANGED
@@ -5,11 +5,9 @@ import tempfile
|
|
5 |
import os
|
6 |
|
7 |
os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/local/bin/ffmpeg"
|
8 |
-
# 如果需要,设置 ImageMagick 的路径
|
9 |
# os.environ["IMAGEMAGICK_BINARY"] = "/usr/local/bin/convert"
|
10 |
|
11 |
def gen_audio(subtitles, tmpdir):
|
12 |
-
# 字幕列表,表示在指定时间段显示对应的字幕
|
13 |
# subtitles = [
|
14 |
# [[0, 3], "這裡請注意右臂"],
|
15 |
# [[4, 8], "這裡請注意左臂"],
|
@@ -20,45 +18,37 @@ def gen_audio(subtitles, tmpdir):
|
|
20 |
for subtitle in subtitles:
|
21 |
print(subtitle)
|
22 |
combined_clip = VideoFileClip(tmpdir + "/output.mp4")
|
23 |
-
# 获取视频总时长
|
24 |
total_duration = combined_clip.duration
|
25 |
|
26 |
-
# 添加字幕的函数
|
27 |
def add_subtitles(clip, subtitles):
|
28 |
txt_clips = []
|
29 |
for start_end, text in subtitles:
|
30 |
start_time, end_time = start_end
|
31 |
duration = end_time - start_time
|
32 |
-
txt_clip = (TextClip(text, fontsize=clip.w//20, color='white', font='bold.ttf', method='caption', size=(clip.w * 0.9, None))
|
33 |
-
.set_duration(duration)
|
34 |
.set_position(('center', 4 * clip.h // 5))
|
35 |
-
.set_start(start_time))
|
36 |
txt_clips.append(txt_clip)
|
37 |
|
38 |
-
# 将字幕叠加到视频上
|
39 |
return CompositeVideoClip([clip, *txt_clips])
|
40 |
|
41 |
-
# 将字幕添加到视频中
|
42 |
video_with_subtitles = add_subtitles(combined_clip, subtitles)
|
43 |
|
44 |
-
# 生成语音文件并返回音频剪辑的函数
|
45 |
def generate_audio(subtitles, total_duration):
|
46 |
-
temp_audio_path = tempfile.mkdtemp()
|
47 |
clips = []
|
48 |
|
49 |
for i, (start_end, text) in enumerate(subtitles):
|
50 |
start_time, end_time = start_end
|
51 |
duration = end_time - start_time
|
52 |
|
53 |
-
# 使用 gtts 生成语音
|
54 |
tts = gTTS(text=text, lang='zh')
|
55 |
audio_path = os.path.join(temp_audio_path, f'subtitle_{i}.mp3')
|
56 |
tts.save(audio_path)
|
57 |
|
58 |
-
# 使用 pydub 加载音频并将其转换为 wav 格式
|
59 |
audio_segment = AudioSegment.from_mp3(audio_path)
|
60 |
|
61 |
-
# 将 pydub 音频转换为 moviepy 可用的 AudioFileClip
|
62 |
new_audio_path = os.path.join(temp_audio_path, f'subtitle_{i}_adjusted.wav')
|
63 |
audio_segment.export(new_audio_path, format="wav")
|
64 |
|
@@ -66,17 +56,13 @@ def gen_audio(subtitles, tmpdir):
|
|
66 |
|
67 |
clips.append(audio_clip)
|
68 |
|
69 |
-
# 合并所有音频剪辑
|
70 |
final_audio = CompositeAudioClip(clips)
|
71 |
return final_audio
|
72 |
|
73 |
-
# 生成语音
|
74 |
audio_clip = generate_audio(subtitles, total_duration)
|
75 |
|
76 |
-
# 将语音添加到视频
|
77 |
video_with_audio = video_with_subtitles.set_audio(audio_clip)
|
78 |
|
79 |
-
# 导出带语音和字幕的视频
|
80 |
video_with_audio.write_videofile(tmpdir + '/output_with_subtitles.mp4', fps=24)
|
81 |
return tmpdir + '/output_with_subtitles.mp4'
|
82 |
|
@@ -87,35 +73,29 @@ def merge_subtitles(subtitles):
|
|
87 |
n = len(subtitles)
|
88 |
|
89 |
while i < n:
|
90 |
-
# 初始化当前字幕的起始时间、结束时间和内容
|
91 |
current_start, current_end = subtitles[i][0]
|
92 |
current_content = subtitles[i][1]
|
93 |
duration = current_end - current_start
|
94 |
-
i += 1
|
95 |
|
96 |
-
# 尝试合并后续字幕,直到持续时间至少为3秒
|
97 |
while duration < 3:
|
98 |
if i >= n:
|
99 |
-
# 已经处理完所有字幕,延长结束时间
|
100 |
current_end = current_start + 3
|
101 |
duration = current_end - current_start
|
102 |
break
|
103 |
next_start, next_end = subtitles[i][0]
|
104 |
next_content = subtitles[i][1]
|
105 |
if next_start == current_end:
|
106 |
-
# 与下一个字幕相邻,合并
|
107 |
current_end = next_end
|
108 |
current_content += "、" + next_content
|
109 |
duration = current_end - current_start
|
110 |
-
i += 1
|
111 |
else:
|
112 |
-
# 不相邻,无法合并,延长结束时间
|
113 |
if duration < 3:
|
114 |
current_end = current_start + 3
|
115 |
duration = current_end - current_start
|
116 |
-
break
|
117 |
|
118 |
-
# 添加合并后的字幕到新列表
|
119 |
current_content = '這裡請注意' + '、'.join(list(set(current_content.replace("細節", "").split('、'))))
|
120 |
merged_subtitles.append([[current_start, current_end], current_content])
|
121 |
|
|
|
5 |
import os
|
6 |
|
7 |
os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/local/bin/ffmpeg"
|
|
|
8 |
# os.environ["IMAGEMAGICK_BINARY"] = "/usr/local/bin/convert"
|
9 |
|
10 |
def gen_audio(subtitles, tmpdir):
|
|
|
11 |
# subtitles = [
|
12 |
# [[0, 3], "這裡請注意右臂"],
|
13 |
# [[4, 8], "這裡請注意左臂"],
|
|
|
18 |
for subtitle in subtitles:
|
19 |
print(subtitle)
|
20 |
combined_clip = VideoFileClip(tmpdir + "/output.mp4")
|
|
|
21 |
total_duration = combined_clip.duration
|
22 |
|
|
|
23 |
def add_subtitles(clip, subtitles):
|
24 |
txt_clips = []
|
25 |
for start_end, text in subtitles:
|
26 |
start_time, end_time = start_end
|
27 |
duration = end_time - start_time
|
28 |
+
txt_clip = (TextClip(text, fontsize=clip.w//20, color='white', font='bold.ttf', method='caption', size=(clip.w * 0.9, None))
|
29 |
+
.set_duration(duration)
|
30 |
.set_position(('center', 4 * clip.h // 5))
|
31 |
+
.set_start(start_time))
|
32 |
txt_clips.append(txt_clip)
|
33 |
|
|
|
34 |
return CompositeVideoClip([clip, *txt_clips])
|
35 |
|
|
|
36 |
video_with_subtitles = add_subtitles(combined_clip, subtitles)
|
37 |
|
|
|
38 |
def generate_audio(subtitles, total_duration):
|
39 |
+
temp_audio_path = tempfile.mkdtemp()
|
40 |
clips = []
|
41 |
|
42 |
for i, (start_end, text) in enumerate(subtitles):
|
43 |
start_time, end_time = start_end
|
44 |
duration = end_time - start_time
|
45 |
|
|
|
46 |
tts = gTTS(text=text, lang='zh')
|
47 |
audio_path = os.path.join(temp_audio_path, f'subtitle_{i}.mp3')
|
48 |
tts.save(audio_path)
|
49 |
|
|
|
50 |
audio_segment = AudioSegment.from_mp3(audio_path)
|
51 |
|
|
|
52 |
new_audio_path = os.path.join(temp_audio_path, f'subtitle_{i}_adjusted.wav')
|
53 |
audio_segment.export(new_audio_path, format="wav")
|
54 |
|
|
|
56 |
|
57 |
clips.append(audio_clip)
|
58 |
|
|
|
59 |
final_audio = CompositeAudioClip(clips)
|
60 |
return final_audio
|
61 |
|
|
|
62 |
audio_clip = generate_audio(subtitles, total_duration)
|
63 |
|
|
|
64 |
video_with_audio = video_with_subtitles.set_audio(audio_clip)
|
65 |
|
|
|
66 |
video_with_audio.write_videofile(tmpdir + '/output_with_subtitles.mp4', fps=24)
|
67 |
return tmpdir + '/output_with_subtitles.mp4'
|
68 |
|
|
|
73 |
n = len(subtitles)
|
74 |
|
75 |
while i < n:
|
|
|
76 |
current_start, current_end = subtitles[i][0]
|
77 |
current_content = subtitles[i][1]
|
78 |
duration = current_end - current_start
|
79 |
+
i += 1
|
80 |
|
|
|
81 |
while duration < 3:
|
82 |
if i >= n:
|
|
|
83 |
current_end = current_start + 3
|
84 |
duration = current_end - current_start
|
85 |
break
|
86 |
next_start, next_end = subtitles[i][0]
|
87 |
next_content = subtitles[i][1]
|
88 |
if next_start == current_end:
|
|
|
89 |
current_end = next_end
|
90 |
current_content += "、" + next_content
|
91 |
duration = current_end - current_start
|
92 |
+
i += 1
|
93 |
else:
|
|
|
94 |
if duration < 3:
|
95 |
current_end = current_start + 3
|
96 |
duration = current_end - current_start
|
97 |
+
break
|
98 |
|
|
|
99 |
current_content = '這裡請注意' + '、'.join(list(set(current_content.replace("細節", "").split('、'))))
|
100 |
merged_subtitles.append([[current_start, current_end], current_content])
|
101 |
|