File size: 5,257 Bytes
593b9ab
 
 
 
 
f08d9ef
 
593b9ab
 
 
 
79237fa
593b9ab
 
 
 
f08d9ef
593b9ab
 
 
 
79237fa
808c5d2
593b9ab
 
 
 
 
 
 
0b835dc
 
593b9ab
0b835dc
593b9ab
 
 
 
8d20b52
2c531e7
8d20b52
 
 
 
593b9ab
346f11e
 
 
09fcd9c
593b9ab
0b835dc
593b9ab
 
f08d9ef
 
 
 
 
593b9ab
 
 
 
f08d9ef
f470955
f08d9ef
 
593b9ab
f470955
593b9ab
f470955
 
593b9ab
 
 
 
 
 
 
 
 
 
 
 
79237fa
ebc8cf9
 
79237fa
593b9ab
 
 
 
 
 
 
 
 
 
 
0b835dc
593b9ab
 
 
 
 
 
 
 
 
 
 
 
0b835dc
593b9ab
 
 
 
0b835dc
593b9ab
798ca11
 
 
 
 
 
 
 
 
 
 
 
593b9ab
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from moviepy.editor import *
from gtts import gTTS
from pydub import AudioSegment
import tempfile
import os
import pyttsx3
from ms_tts import ms_tts_gen

os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/local/bin/ffmpeg"
# os.environ["IMAGEMAGICK_BINARY"] = "/usr/local/bin/convert"

def gen_audio(subtitles, tmpdir):
# subtitles = [
#    [[0, 3], "這裡請注意右臂"],
#     [[4, 8], "這裡請注意左臂"],
#     [[9, 12], "這裡請注意左手細節"]
# ])
    subtitles = merge_subtitles(subtitles)
    print("===> Subtitles:")
    for subtitle in subtitles:
        print(subtitle)
    combined_clip = VideoFileClip(tmpdir + "/output.mp4")
    combined_clip = combined_clip.resize(width=800)
    total_duration = combined_clip.duration

    def add_subtitles(clip, subtitles):
        txt_clips = []
        for start_end, text in subtitles:
            start_time, end_time = start_end
            duration = end_time - start_time
            txt_clip = (TextClip(text, fontsize=clip.w//20, color='white', font='bold.ttf', method='caption', size=(clip.w * 0.9, None))  
                        .set_duration(duration)  
                        .set_position(('center', 4 * clip.h // 5))
                        .set_start(start_time))
            txt_clips.append(txt_clip)
        
        return CompositeVideoClip([clip, *txt_clips])

    if len(subtitles) != 0:
        video_with_subtitles = combined_clip #add_subtitles(combined_clip, subtitles)
    else:
        combined_clip.write_videofile(tmpdir + '/output_with_subtitles.mp4', fps=24)
        print("Generated Video with Subtitles to:", tmpdir + '/output_with_subtitles.mp4')
        return tmpdir + '/output_with_subtitles.mp4'

    # combined_clip.write_videofile(tmpdir + '/output_with_subtitles.mp4', fps=24)
    # print("Generated Video with Subtitles to:", tmpdir + '/output_with_subtitles.mp4')
    # return tmpdir + '/output_with_subtitles.mp4'
    
    def generate_audio(subtitles, total_duration):
        temp_audio_path = tempfile.mkdtemp()  
        clips = []

        # engine = pyttsx3.init()
        # engine.setProperty('voice', 'yue') # yue:粤语 cnm:普通话
        # engine.setProperty('rate', 150)  # 设置语速
        # engine.setProperty('volume', 1.0)  # 设置音量

        for i, (start_end, text) in enumerate(subtitles):
            start_time, end_time = start_end
            duration = end_time - start_time

            # tts = gTTS(text=text, lang='yue')
            audio_path = os.path.join(temp_audio_path, f'subtitle_{i}.mp3')
            # tts.save(audio_path)
            ms_tts_gen(text, audio_path)
            
            audio_segment = AudioSegment.from_mp3(audio_path)

            new_audio_path = os.path.join(temp_audio_path, f'subtitle_{i}_adjusted.mp3')
            audio_segment.export(new_audio_path, format="mp3")

            audio_clip = AudioFileClip(new_audio_path).set_start(start_time).set_duration(min(duration, audio_segment.duration_seconds))
            
            clips.append(audio_clip)
        
        final_audio = CompositeAudioClip(clips)
        return final_audio

    audio_clip = generate_audio(subtitles, total_duration)

    video_with_audio = video_with_subtitles.set_audio(audio_clip)

    video_with_audio.write_videofile(tmpdir + '/output_with_subtitles.mp4', fps=24)

    print("Generated Video with Subtitles to:", tmpdir + '/output_with_subtitles.mp4')
    return tmpdir + '/output_with_subtitles.mp4'


def merge_subtitles(subtitles):
    merged_subtitles = []
    i = 0
    n = len(subtitles)
    
    while i < n:
        current_start, current_end = subtitles[i][0]
        current_content = subtitles[i][1]
        duration = current_end - current_start
        i += 1  

        while duration < 3:
            if i >= n:
                current_end = current_start + 3
                duration = current_end - current_start
                break
            next_start, next_end = subtitles[i][0]
            next_content = subtitles[i][1]
            if next_start == current_end:
                current_end = next_end
                current_content += "、" + next_content
                duration = current_end - current_start
                i += 1  
            else:
                if duration < 3:
                    current_end = current_start + 3
                    duration = current_end - current_start
                break  

        all_elem = list(set(current_content.replace("細節", "").split('、')))
        if "左手" in all_elem and "右手" in all_elem:
            all_elem.remove("左手")
            all_elem.remove("右手")
            all_elem.append("雙手")
        if "左臂" in all_elem and "右臂" in all_elem:
            all_elem.remove("左臂")
            all_elem.remove("右臂")
            all_elem.append("雙臂")
        current_content = '這裡請注意' + '、'.join(all_elem)
        # current_content = current_content.replace("左手、右手", "雙手").replace("右手、左手", "雙手")
        # current_content = current_content.replace("左臂、右臂", "雙臂").replace("右臂、左臂", "雙臂")
        merged_subtitles.append([[current_start, current_end], current_content])
    
    return merged_subtitles