bugbounted BatuhanYilmaz commited on
Commit
6c164b4
·
0 Parent(s):

Duplicate from BatuhanYilmaz/Whisper-Auto-Subtitled-Video-Generator

Browse files

Co-authored-by: Batuhan Yilmaz <[email protected]>

.streamlit/config.toml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor="#F63366"
3
+ backgroundColor="#FFFFFF"
4
+ secondaryBackgroundColor="#F0F2F6"
5
+ textColor="#262730"
6
+ font="sans serif"
7
+ [server]
8
+ maxUploadSize=1028
01_🎥_Input_YouTube_Link.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from pytube import YouTube
3
+ import requests
4
+ import time
5
+ import streamlit as st
6
+ from streamlit_lottie import st_lottie
7
+ import numpy as np
8
+ import os
9
+ from typing import Iterator
10
+ from io import StringIO
11
+ from utils import write_vtt, write_srt
12
+ import ffmpeg
13
+ from languages import LANGUAGES
14
+
15
+ st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
16
+
17
+ # Define a function that we can use to load lottie files from a link.
18
+ @st.cache()
19
+ def load_lottieurl(url: str):
20
+ r = requests.get(url)
21
+ if r.status_code != 200:
22
+ return None
23
+ return r.json()
24
+
25
+ col1, col2 = st.columns([1, 3])
26
+ with col1:
27
+ lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
28
+ st_lottie(lottie)
29
+
30
+ with col2:
31
+ st.write("""
32
+ ## Auto Subtitled Video Generator
33
+ ##### Input a YouTube video link and get a video with subtitles.
34
+ ###### ➠ If you want to transcribe the video in its original language, select the task as "Transcribe"
35
+ ###### ➠ If you want to translate the subtitles to English, select the task as "Translate"
36
+ ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
37
+
38
+
39
+ @st.cache(allow_output_mutation=True)
40
+ def populate_metadata(link):
41
+ yt = YouTube(link)
42
+ author = yt.author
43
+ title = yt.title
44
+ description = yt.description
45
+ thumbnail = yt.thumbnail_url
46
+ length = yt.length
47
+ views = yt.views
48
+ return author, title, description, thumbnail, length, views
49
+
50
+
51
+ @st.cache(allow_output_mutation=True)
52
+ def download_video(link):
53
+ yt = YouTube(link)
54
+ video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
55
+ return video
56
+
57
+
58
+ def convert(seconds):
59
+ return time.strftime("%H:%M:%S", time.gmtime(seconds))
60
+
61
+
62
+ loaded_model = whisper.load_model("base")
63
+ current_size = "None"
64
+
65
+
66
+ @st.cache(allow_output_mutation=True)
67
+ def change_model(current_size, size):
68
+ if current_size != size:
69
+ loaded_model = whisper.load_model(size)
70
+ return loaded_model
71
+ else:
72
+ raise Exception("Model size is the same as the current size.")
73
+
74
+
75
+ @st.cache(allow_output_mutation=True)
76
+ def inference(link, loaded_model, task):
77
+ yt = YouTube(link)
78
+ path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
79
+ if task == "Transcribe":
80
+ options = dict(task="transcribe", best_of=5)
81
+ results = loaded_model.transcribe(path, **options)
82
+ vtt = getSubs(results["segments"], "vtt", 80)
83
+ srt = getSubs(results["segments"], "srt", 80)
84
+ lang = results["language"]
85
+ return results["text"], vtt, srt, lang
86
+ elif task == "Translate":
87
+ options = dict(task="translate", best_of=5)
88
+ results = loaded_model.transcribe(path, **options)
89
+ vtt = getSubs(results["segments"], "vtt", 80)
90
+ srt = getSubs(results["segments"], "srt", 80)
91
+ lang = results["language"]
92
+ return results["text"], vtt, srt, lang
93
+ else:
94
+ raise ValueError("Task not supported")
95
+
96
+
97
+ @st.cache(allow_output_mutation=True)
98
+ def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
99
+ segmentStream = StringIO()
100
+
101
+ if format == 'vtt':
102
+ write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
103
+ elif format == 'srt':
104
+ write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
105
+ else:
106
+ raise Exception("Unknown format " + format)
107
+
108
+ segmentStream.seek(0)
109
+ return segmentStream.read()
110
+
111
+
112
+ def get_language_code(language):
113
+ if language in LANGUAGES.keys():
114
+ detected_language = LANGUAGES[language]
115
+ return detected_language
116
+ else:
117
+ raise ValueError("Language not supported")
118
+
119
+
120
+ def generate_subtitled_video(video, audio, transcript):
121
+ video_file = ffmpeg.input(video)
122
+ audio_file = ffmpeg.input(audio)
123
+ ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
124
+ video_with_subs = open("final.mp4", "rb")
125
+ return video_with_subs
126
+
127
+
128
+ def main():
129
+ size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
130
+ loaded_model = change_model(current_size, size)
131
+ st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
132
+ f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
133
+ link = st.text_input("YouTube Link (The longer the video, the longer the processing time)")
134
+ task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
135
+ if task == "Transcribe":
136
+ if st.button("Transcribe"):
137
+ author, title, description, thumbnail, length, views = populate_metadata(link)
138
+ results = inference(link, loaded_model, task)
139
+ video = download_video(link)
140
+ lang = results[3]
141
+ detected_language = get_language_code(lang)
142
+
143
+ col3, col4 = st.columns(2)
144
+ col5, col6, col7, col8 = st.columns(4)
145
+ col9, col10 = st.columns(2)
146
+ with col3:
147
+ st.video(video)
148
+
149
+ # Write the results to a .txt file and download it.
150
+ with open("transcript.txt", "w+", encoding='utf8') as f:
151
+ f.writelines(results[0])
152
+ f.close()
153
+ with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
154
+ datatxt = f.read()
155
+
156
+ with open("transcript.vtt", "w+",encoding='utf8') as f:
157
+ f.writelines(results[1])
158
+ f.close()
159
+ with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
160
+ datavtt = f.read()
161
+
162
+ with open("transcript.srt", "w+",encoding='utf8') as f:
163
+ f.writelines(results[2])
164
+ f.close()
165
+ with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
166
+ datasrt = f.read()
167
+
168
+ with col5:
169
+ st.download_button(label="Download Transcript (.txt)",
170
+ data=datatxt,
171
+ file_name="transcript.txt")
172
+ with col6:
173
+ st.download_button(label="Download Transcript (.vtt)",
174
+ data=datavtt,
175
+ file_name="transcript.vtt")
176
+ with col7:
177
+ st.download_button(label="Download Transcript (.srt)",
178
+ data=datasrt,
179
+ file_name="transcript.srt")
180
+ with col9:
181
+ st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
182
+ with col10:
183
+ st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
184
+
185
+ with col4:
186
+ with st.spinner("Generating Subtitled Video"):
187
+ video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
188
+ st.video(video_with_subs)
189
+ st.balloons()
190
+ with col8:
191
+ st.download_button(label="Download Subtitled Video",
192
+ data=video_with_subs,
193
+ file_name=f"{title} with subtitles.mp4")
194
+ elif task == "Translate":
195
+ if st.button("Translate to English"):
196
+ author, title, description, thumbnail, length, views = populate_metadata(link)
197
+ results = inference(link, loaded_model, task)
198
+ video = download_video(link)
199
+ lang = results[3]
200
+ detected_language = get_language_code(lang)
201
+
202
+ col3, col4 = st.columns(2)
203
+ col5, col6, col7, col8 = st.columns(4)
204
+ col9, col10 = st.columns(2)
205
+ with col3:
206
+ st.video(video)
207
+
208
+ # Write the results to a .txt file and download it.
209
+ with open("transcript.txt", "w+", encoding='utf8') as f:
210
+ f.writelines(results[0])
211
+ f.close()
212
+ with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
213
+ datatxt = f.read()
214
+
215
+ with open("transcript.vtt", "w+",encoding='utf8') as f:
216
+ f.writelines(results[1])
217
+ f.close()
218
+ with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
219
+ datavtt = f.read()
220
+
221
+ with open("transcript.srt", "w+",encoding='utf8') as f:
222
+ f.writelines(results[2])
223
+ f.close()
224
+ with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
225
+ datasrt = f.read()
226
+ with col5:
227
+ st.download_button(label="Download Transcript (.txt)",
228
+ data=datatxt,
229
+ file_name="transcript.txt")
230
+ with col6:
231
+ st.download_button(label="Download Transcript (.vtt)",
232
+ data=datavtt,
233
+ file_name="transcript.vtt")
234
+ with col7:
235
+ st.download_button(label="Download Transcript (.srt)",
236
+ data=datasrt,
237
+ file_name="transcript.srt")
238
+ with col9:
239
+ st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
240
+ with col10:
241
+ st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
242
+
243
+ with col4:
244
+ with st.spinner("Generating Subtitled Video"):
245
+ video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
246
+ st.video(video_with_subs)
247
+ st.balloons()
248
+ with col8:
249
+ st.download_button(label="Download Subtitled Video",
250
+ data=video_with_subs,
251
+ file_name=f"{title} with subtitles.mp4")
252
+ else:
253
+ st.error("Please select a task.")
254
+
255
+
256
+ if __name__ == "__main__":
257
+ main()
258
+ st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Batuhan Yılmaz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Whisper-Auto-Subtitled-Video-Generator
3
+ emoji: 🎥
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: streamlit
7
+ sdk_version: 1.10.0
8
+ app_file: 01_🎥_Input_YouTube_Link.py
9
+ pinned: false
10
+ duplicated_from: BatuhanYilmaz/Whisper-Auto-Subtitled-Video-Generator
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
languages.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LANGUAGES = {
2
+ "en": "eng",
3
+ "zh": "zho",
4
+ "de": "deu",
5
+ "es": "spa",
6
+ "ru": "rus",
7
+ "ko": "kor",
8
+ "fr": "fra",
9
+ "ja": "jpn",
10
+ "pt": "por",
11
+ "tr": "tur",
12
+ "pl": "pol",
13
+ "ca": "cat",
14
+ "nl": "nld",
15
+ "ar": "ara",
16
+ "sv": "swe",
17
+ "it": "ita",
18
+ "id": "ind",
19
+ "hi": "hin",
20
+ "fi": "fin",
21
+ "vi": "vie",
22
+ "iw": "heb",
23
+ "uk": "ukr",
24
+ "el": "ell",
25
+ "ms": "msa",
26
+ "cs": "ces",
27
+ "ro": "ron",
28
+ "da": "dan",
29
+ "hu": "hun",
30
+ "ta": "tam",
31
+ "no": "nor",
32
+ "th": "tha",
33
+ "ur": "urd",
34
+ "hr": "hrv",
35
+ "bg": "bul",
36
+ "lt": "lit",
37
+ "la": "lat",
38
+ "mi": "mri",
39
+ "ml": "mal",
40
+ "cy": "cym",
41
+ "sk": "slk",
42
+ "te": "tel",
43
+ "fa": "fas",
44
+ "lv": "lav",
45
+ "bn": "ben",
46
+ "sr": "srp",
47
+ "az": "aze",
48
+ "sl": "slv",
49
+ "kn": "kan",
50
+ "et": "est",
51
+ "mk": "mkd",
52
+ "br": "bre",
53
+ "eu": "eus",
54
+ "is": "isl",
55
+ "hy": "hye",
56
+ "ne": "nep",
57
+ "mn": "mon",
58
+ "bs": "bos",
59
+ "kk": "kaz",
60
+ "sq": "sqi",
61
+ "sw": "swa",
62
+ "gl": "glg",
63
+ "mr": "mar",
64
+ "pa": "pan",
65
+ "si": "sin",
66
+ "km": "khm",
67
+ "sn": "sna",
68
+ "yo": "yor",
69
+ "so": "som",
70
+ "af": "afr",
71
+ "oc": "oci",
72
+ "ka": "kat",
73
+ "be": "bel",
74
+ "tg": "tgk",
75
+ "sd": "snd",
76
+ "gu": "guj",
77
+ "am": "amh",
78
+ "yi": "yid",
79
+ "lo": "lao",
80
+ "uz": "uzb",
81
+ "fo": "fao",
82
+ "ht": "hat",
83
+ "ps": "pus",
84
+ "tk": "tuk",
85
+ "nn": "nno",
86
+ "mt": "mlt",
87
+ "sa": "san",
88
+ "lb": "ltz",
89
+ "my": "mya",
90
+ "bo": "bod",
91
+ "tl": "tgl",
92
+ "mg": "mlg",
93
+ "as": "asm",
94
+ "tt": "tat",
95
+ "haw": "haw",
96
+ "ln": "lin",
97
+ "ha": "hau",
98
+ "ba": "bak",
99
+ "jw": "jav",
100
+ "su": "sun",
101
+ }
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
pages/02_📼_Upload_Video_File.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import streamlit as st
3
+ from streamlit_lottie import st_lottie
4
+ from utils import write_vtt, write_srt
5
+ import ffmpeg
6
+ import requests
7
+ from typing import Iterator
8
+ from io import StringIO
9
+ import numpy as np
10
+ import pathlib
11
+ import os
12
+
13
+ st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
14
+
15
+ # Define a function that we can use to load lottie files from a link.
16
+ @st.cache(allow_output_mutation=True)
17
+ def load_lottieurl(url: str):
18
+ r = requests.get(url)
19
+ if r.status_code != 200:
20
+ return None
21
+ return r.json()
22
+
23
+
24
+ APP_DIR = pathlib.Path(__file__).parent.absolute()
25
+
26
+ LOCAL_DIR = APP_DIR / "local"
27
+ LOCAL_DIR.mkdir(exist_ok=True)
28
+ save_dir = LOCAL_DIR / "output"
29
+ save_dir.mkdir(exist_ok=True)
30
+
31
+
32
+ loaded_model = whisper.load_model("base")
33
+ current_size = "None"
34
+
35
+
36
+ col1, col2 = st.columns([1, 3])
37
+ with col1:
38
+ lottie = load_lottieurl("https://assets1.lottiefiles.com/packages/lf20_HjK9Ol.json")
39
+ st_lottie(lottie)
40
+
41
+ with col2:
42
+ st.write("""
43
+ ## Auto Subtitled Video Generator
44
+ ##### Upload a video file and get a video with subtitles.
45
+ ###### ➠ If you want to transcribe the video in its original language, select the task as "Transcribe"
46
+ ###### ➠ If you want to translate the subtitles to English, select the task as "Translate"
47
+ ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
48
+
49
+
50
+ @st.cache(allow_output_mutation=True)
51
+ def change_model(current_size, size):
52
+ if current_size != size:
53
+ loaded_model = whisper.load_model(size)
54
+ return loaded_model
55
+ else:
56
+ raise Exception("Model size is the same as the current size.")
57
+
58
+
59
+ @st.cache(allow_output_mutation=True)
60
+ def inferecence(loaded_model, uploaded_file, task):
61
+ with open(f"{save_dir}/input.mp4", "wb") as f:
62
+ f.write(uploaded_file.read())
63
+ audio = ffmpeg.input(f"{save_dir}/input.mp4")
64
+ audio = ffmpeg.output(audio, f"{save_dir}/output.wav", acodec="pcm_s16le", ac=1, ar="16k")
65
+ ffmpeg.run(audio, overwrite_output=True)
66
+ if task == "Transcribe":
67
+ options = dict(task="transcribe", best_of=5)
68
+ results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
69
+ vtt = getSubs(results["segments"], "vtt", 80)
70
+ srt = getSubs(results["segments"], "srt", 80)
71
+ lang = results["language"]
72
+ return results["text"], vtt, srt, lang
73
+ elif task == "Translate":
74
+ options = dict(task="translate", best_of=5)
75
+ results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
76
+ vtt = getSubs(results["segments"], "vtt", 80)
77
+ srt = getSubs(results["segments"], "srt", 80)
78
+ lang = results["language"]
79
+ return results["text"], vtt, srt, lang
80
+ else:
81
+ raise ValueError("Task not supported")
82
+
83
+
84
+ def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
85
+ segmentStream = StringIO()
86
+
87
+ if format == 'vtt':
88
+ write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
89
+ elif format == 'srt':
90
+ write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
91
+ else:
92
+ raise Exception("Unknown format " + format)
93
+
94
+ segmentStream.seek(0)
95
+ return segmentStream.read()
96
+
97
+
98
+ def generate_subtitled_video(video, audio, transcript):
99
+ video_file = ffmpeg.input(video)
100
+ audio_file = ffmpeg.input(audio)
101
+ ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
102
+ video_with_subs = open("final.mp4", "rb")
103
+ return video_with_subs
104
+
105
+
106
+ def main():
107
+ size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
108
+ loaded_model = change_model(current_size, size)
109
+ st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
110
+ f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
111
+ input_file = st.file_uploader("File", type=["mp4", "avi", "mov", "mkv"])
112
+ # get the name of the input_file
113
+ if input_file is not None:
114
+ filename = input_file.name[:-4]
115
+ else:
116
+ filename = None
117
+ task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
118
+ if task == "Transcribe":
119
+ if st.button("Transcribe"):
120
+ results = inferecence(loaded_model, input_file, task)
121
+ col3, col4 = st.columns(2)
122
+ col5, col6, col7, col8 = st.columns(4)
123
+ col9, col10 = st.columns(2)
124
+ with col3:
125
+ st.video(input_file)
126
+
127
+ with open("transcript.txt", "w+", encoding='utf8') as f:
128
+ f.writelines(results[0])
129
+ f.close()
130
+ with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
131
+ datatxt = f.read()
132
+
133
+ with open("transcript.vtt", "w+",encoding='utf8') as f:
134
+ f.writelines(results[1])
135
+ f.close()
136
+ with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
137
+ datavtt = f.read()
138
+
139
+ with open("transcript.srt", "w+",encoding='utf8') as f:
140
+ f.writelines(results[2])
141
+ f.close()
142
+ with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
143
+ datasrt = f.read()
144
+
145
+ with col5:
146
+ st.download_button(label="Download Transcript (.txt)",
147
+ data=datatxt,
148
+ file_name="transcript.txt")
149
+ with col6:
150
+ st.download_button(label="Download Transcript (.vtt)",
151
+ data=datavtt,
152
+ file_name="transcript.vtt")
153
+ with col7:
154
+ st.download_button(label="Download Transcript (.srt)",
155
+ data=datasrt,
156
+ file_name="transcript.srt")
157
+ with col9:
158
+ st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
159
+ with col10:
160
+ st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
161
+
162
+ with col4:
163
+ with st.spinner("Generating Subtitled Video"):
164
+ video_with_subs = generate_subtitled_video(f"{save_dir}/input.mp4", f"{save_dir}/output.wav", "transcript.srt")
165
+ st.video(video_with_subs)
166
+ st.snow()
167
+ with col8:
168
+ st.download_button(label="Download Video with Subtitles",
169
+ data=video_with_subs,
170
+ file_name=f"{filename}_with_subs.mp4")
171
+ elif task == "Translate":
172
+ if st.button("Translate to English"):
173
+ results = inferecence(loaded_model, input_file, task)
174
+ col3, col4 = st.columns(2)
175
+ col5, col6, col7, col8 = st.columns(4)
176
+ col9, col10 = st.columns(2)
177
+ with col3:
178
+ st.video(input_file)
179
+
180
+ with open("transcript.txt", "w+", encoding='utf8') as f:
181
+ f.writelines(results[0])
182
+ f.close()
183
+ with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
184
+ datatxt = f.read()
185
+
186
+ with open("transcript.vtt", "w+",encoding='utf8') as f:
187
+ f.writelines(results[1])
188
+ f.close()
189
+ with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
190
+ datavtt = f.read()
191
+
192
+ with open("transcript.srt", "w+",encoding='utf8') as f:
193
+ f.writelines(results[2])
194
+ f.close()
195
+ with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
196
+ datasrt = f.read()
197
+
198
+ with col5:
199
+ st.download_button(label="Download Transcript (.txt)",
200
+ data=datatxt,
201
+ file_name="transcript.txt")
202
+ with col6:
203
+ st.download_button(label="Download Transcript (.vtt)",
204
+ data=datavtt,
205
+ file_name="transcript.vtt")
206
+ with col7:
207
+ st.download_button(label="Download Transcript (.srt)",
208
+ data=datasrt,
209
+ file_name="transcript.srt")
210
+ with col9:
211
+ st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
212
+ with col10:
213
+ st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
214
+
215
+ with col4:
216
+ with st.spinner("Generating Subtitled Video"):
217
+ video_with_subs = generate_subtitled_video(f"{save_dir}/input.mp4", f"{save_dir}/output.wav", "transcript.srt")
218
+ st.video(video_with_subs)
219
+ st.snow()
220
+ with col8:
221
+ st.download_button(label="Download Video with Subtitles",
222
+ data=video_with_subs,
223
+ file_name=f"{filename}_with_subs.mp4")
224
+ else:
225
+ st.error("Please select a task.")
226
+
227
+
228
+ if __name__ == "__main__":
229
+ main()
230
+ st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")
pages/03_📝_Upload_Video_File_and_Transcript.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_lottie import st_lottie
3
+ from utils import write_vtt, write_srt
4
+ import ffmpeg
5
+ import requests
6
+ from typing import Iterator
7
+ from io import StringIO
8
+ import numpy as np
9
+ import pathlib
10
+ import os
11
+
12
+
13
+ st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
14
+
15
+ # Define a function that we can use to load lottie files from a link.
16
+ @st.cache(allow_output_mutation=True)
17
+ def load_lottieurl(url: str):
18
+ r = requests.get(url)
19
+ if r.status_code != 200:
20
+ return None
21
+ return r.json()
22
+
23
+
24
+ APP_DIR = pathlib.Path(__file__).parent.absolute()
25
+
26
+ LOCAL_DIR = APP_DIR / "local_transcript"
27
+ LOCAL_DIR.mkdir(exist_ok=True)
28
+ save_dir = LOCAL_DIR / "output"
29
+ save_dir.mkdir(exist_ok=True)
30
+
31
+
32
+ col1, col2 = st.columns([1, 3])
33
+ with col1:
34
+ lottie = load_lottieurl("https://assets6.lottiefiles.com/packages/lf20_cjnxwrkt.json")
35
+ st_lottie(lottie)
36
+
37
+ with col2:
38
+ st.write("""
39
+ ## Auto Subtitled Video Generator
40
+ ##### ➠ Upload a video file and a transcript as .srt or .vtt file and get a video with subtitles.
41
+ ##### ➠ Processing time will increase as the video length increases. """)
42
+
43
+
44
+ def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
45
+ segmentStream = StringIO()
46
+
47
+ if format == 'vtt':
48
+ write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
49
+ elif format == 'srt':
50
+ write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
51
+ else:
52
+ raise Exception("Unknown format " + format)
53
+
54
+ segmentStream.seek(0)
55
+ return segmentStream.read()
56
+
57
+
58
+ def split_video_audio(uploaded_file):
59
+ with open(f"{save_dir}/input.mp4", "wb") as f:
60
+ f.write(uploaded_file.read())
61
+ audio = ffmpeg.input(f"{save_dir}/input.mp4")
62
+ audio = ffmpeg.output(audio, f"{save_dir}/output.wav", acodec="pcm_s16le", ac=1, ar="16k")
63
+ ffmpeg.run(audio, overwrite_output=True)
64
+
65
+
66
+ def main():
67
+ uploaded_video = st.file_uploader("Upload Video File", type=["mp4", "avi", "mov", "mkv"])
68
+ # get the name of the input_file
69
+ if uploaded_video is not None:
70
+ filename = uploaded_video.name[:-4]
71
+ else:
72
+ filename = None
73
+ transcript_file = st.file_uploader("Upload Transcript File", type=["srt", "vtt"])
74
+ if transcript_file is not None:
75
+ transcript_name = transcript_file.name
76
+ else:
77
+ transcript_name = None
78
+ if uploaded_video is not None and transcript_file is not None:
79
+ if transcript_name[-3:] == "vtt":
80
+ with open("uploaded_transcript.vtt", "wb") as f:
81
+ f.writelines(transcript_file)
82
+ f.close()
83
+ with open(os.path.join(os.getcwd(), "uploaded_transcript.vtt"), "rb") as f:
84
+ vtt_file = f.read()
85
+ if st.button("Generate Video with Subtitles"):
86
+ with st.spinner("Generating Subtitled Video"):
87
+ split_video_audio(uploaded_video)
88
+ video_file = ffmpeg.input(f"{save_dir}/input.mp4")
89
+ audio_file = ffmpeg.input(f"{save_dir}/output.wav")
90
+ ffmpeg.concat(video_file.filter("subtitles", "uploaded_transcript.vtt"), audio_file, v=1, a=1).output("final.mp4").global_args('-report').run(quiet=True, overwrite_output=True)
91
+ video_with_subs = open("final.mp4", "rb")
92
+ col3, col4 = st.columns(2)
93
+ with col3:
94
+ st.video(uploaded_video)
95
+ with col4:
96
+ st.video(video_with_subs)
97
+ st.download_button(label="Download Video with Subtitles",
98
+ data=video_with_subs,
99
+ file_name=f"{filename}_with_subs.mp4")
100
+
101
+ elif transcript_name[-3:] == "srt":
102
+ with open("uploaded_transcript.srt", "wb") as f:
103
+ f.writelines(transcript_file)
104
+ f.close()
105
+ with open(os.path.join(os.getcwd(), "uploaded_transcript.srt"), "rb") as f:
106
+ srt_file = f.read()
107
+ if st.button("Generate Video with Subtitles"):
108
+ with st.spinner("Generating Subtitled Video"):
109
+ split_video_audio(uploaded_video)
110
+ video_file = ffmpeg.input(f"{save_dir}/input.mp4")
111
+ audio_file = ffmpeg.input(f"{save_dir}/output.wav")
112
+ ffmpeg.concat(video_file.filter("subtitles", "uploaded_transcript.srt"), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
113
+ video_with_subs = open("final.mp4", "rb")
114
+ col3, col4 = st.columns(2)
115
+ with col3:
116
+ st.video(uploaded_video)
117
+ with col4:
118
+ st.video(video_with_subs)
119
+ st.download_button(label="Download Video with Subtitles",
120
+ data=video_with_subs,
121
+ file_name=f"{filename}_with_subs.mp4")
122
+ else:
123
+ st.error("Please upload a .srt or .vtt file")
124
+ else:
125
+ st.info("Please upload a video file and a transcript file")
126
+
127
+
128
+ if __name__ == "__main__":
129
+ main()
130
+
pages/04_🔊_Upload_Audio_File.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import streamlit as st
3
+ from streamlit_lottie import st_lottie
4
+ from utils import write_vtt, write_srt
5
+ import ffmpeg
6
+ import requests
7
+ from typing import Iterator
8
+ from io import StringIO
9
+ import numpy as np
10
+ import pathlib
11
+ import os
12
+
13
+ st.set_page_config(page_title="Auto Transcriber", page_icon="🔊", layout="wide")
14
+
15
+ # Define a function that we can use to load lottie files from a link.
16
+ @st.cache(allow_output_mutation=True)
17
+ def load_lottieurl(url: str):
18
+ r = requests.get(url)
19
+ if r.status_code != 200:
20
+ return None
21
+ return r.json()
22
+
23
+
24
+ APP_DIR = pathlib.Path(__file__).parent.absolute()
25
+
26
+ LOCAL_DIR = APP_DIR / "local_audio"
27
+ LOCAL_DIR.mkdir(exist_ok=True)
28
+ save_dir = LOCAL_DIR / "output"
29
+ save_dir.mkdir(exist_ok=True)
30
+
31
+
32
+ col1, col2 = st.columns([1, 3])
33
+ with col1:
34
+ lottie = load_lottieurl("https://assets1.lottiefiles.com/packages/lf20_1xbk4d2v.json")
35
+ st_lottie(lottie)
36
+
37
+ with col2:
38
+ st.write("""
39
+ ## Auto Transcriber
40
+ ##### Input an audio file and get a transcript.
41
+ ###### ➠ If you want to transcribe the audio in its original language, select the task as "Transcribe"
42
+ ###### ➠ If you want to translate the transcription to English, select the task as "Translate"
43
+ ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
44
+
45
+ loaded_model = whisper.load_model("base")
46
+ current_size = "None"
47
+
48
+
49
+ @st.cache(allow_output_mutation=True)
50
+ def change_model(current_size, size):
51
+ if current_size != size:
52
+ loaded_model = whisper.load_model(size)
53
+ return loaded_model
54
+ else:
55
+ raise Exception("Model size is the same as the current size.")
56
+
57
+ @st.cache(allow_output_mutation=True)
58
+ def inferecence(loaded_model, uploaded_file, task):
59
+ with open(f"{save_dir}/input.mp3", "wb") as f:
60
+ f.write(uploaded_file.read())
61
+ audio = ffmpeg.input(f"{save_dir}/input.mp3")
62
+ audio = ffmpeg.output(audio, f"{save_dir}/output.wav", acodec="pcm_s16le", ac=1, ar="16k")
63
+ ffmpeg.run(audio, overwrite_output=True)
64
+ if task == "Transcribe":
65
+ options = dict(task="transcribe", best_of=5)
66
+ results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
67
+ vtt = getSubs(results["segments"], "vtt", 80)
68
+ srt = getSubs(results["segments"], "srt", 80)
69
+ lang = results["language"]
70
+ return results["text"], vtt, srt, lang
71
+ elif task == "Translate":
72
+ options = dict(task="translate", best_of=5)
73
+ results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
74
+ vtt = getSubs(results["segments"], "vtt", 80)
75
+ srt = getSubs(results["segments"], "srt", 80)
76
+ lang = results["language"]
77
+ return results["text"], vtt, srt, lang
78
+ else:
79
+ raise ValueError("Task not supported")
80
+
81
+
82
+ def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
83
+ segmentStream = StringIO()
84
+
85
+ if format == 'vtt':
86
+ write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
87
+ elif format == 'srt':
88
+ write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
89
+ else:
90
+ raise Exception("Unknown format " + format)
91
+
92
+ segmentStream.seek(0)
93
+ return segmentStream.read()
94
+
95
+
96
+ def main():
97
+ size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
98
+ loaded_model = change_model(current_size, size)
99
+ st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
100
+ f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
101
+ input_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"])
102
+ if input_file is not None:
103
+ filename = input_file.name[:-4]
104
+ else:
105
+ filename = None
106
+ task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
107
+ if task == "Transcribe":
108
+ if st.button("Transcribe"):
109
+ results = inferecence(loaded_model, input_file, task)
110
+ col3, col4 = st.columns(2)
111
+ col5, col6, col7 = st.columns(3)
112
+ col9, col10 = st.columns(2)
113
+
114
+ with col3:
115
+ st.audio(input_file)
116
+
117
+ with open("transcript.txt", "w+", encoding='utf8') as f:
118
+ f.writelines(results[0])
119
+ f.close()
120
+ with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
121
+ datatxt = f.read()
122
+
123
+
124
+ with open("transcript.vtt", "w+",encoding='utf8') as f:
125
+ f.writelines(results[1])
126
+ f.close()
127
+ with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
128
+ datavtt = f.read()
129
+
130
+ with open("transcript.srt", "w+",encoding='utf8') as f:
131
+ f.writelines(results[2])
132
+ f.close()
133
+ with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
134
+ datasrt = f.read()
135
+
136
+ with col5:
137
+ st.download_button(label="Download Transcript (.txt)",
138
+ data=datatxt,
139
+ file_name="transcript.txt")
140
+ with col6:
141
+ st.download_button(label="Download Transcript (.vtt)",
142
+ data=datavtt,
143
+ file_name="transcript.vtt")
144
+ with col7:
145
+ st.download_button(label="Download Transcript (.srt)",
146
+ data=datasrt,
147
+ file_name="transcript.srt")
148
+ with col9:
149
+ st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
150
+ with col10:
151
+ st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
152
+
153
+ elif task == "Translate":
154
+ if st.button("Translate to English"):
155
+ results = inferecence(loaded_model, input_file, task)
156
+ col3, col4 = st.columns(2)
157
+ col5, col6, col7 = st.columns(3)
158
+ col9, col10 = st.columns(2)
159
+
160
+ with col3:
161
+ st.audio(input_file)
162
+
163
+ with open("transcript.txt", "w+", encoding='utf8') as f:
164
+ f.writelines(results[0])
165
+ f.close()
166
+ with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
167
+ datatxt = f.read()
168
+
169
+
170
+ with open("transcript.vtt", "w+",encoding='utf8') as f:
171
+ f.writelines(results[1])
172
+ f.close()
173
+ with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
174
+ datavtt = f.read()
175
+
176
+ with open("transcript.srt", "w+",encoding='utf8') as f:
177
+ f.writelines(results[2])
178
+ f.close()
179
+ with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
180
+ datasrt = f.read()
181
+
182
+ with col5:
183
+ st.download_button(label="Download Transcript (.txt)",
184
+ data=datatxt,
185
+ file_name="transcript.txt")
186
+ with col6:
187
+ st.download_button(label="Download Transcript (.vtt)",
188
+ data=datavtt,
189
+ file_name="transcript.vtt")
190
+ with col7:
191
+ st.download_button(label="Download Transcript (.srt)",
192
+ data=datasrt,
193
+ file_name="transcript.srt")
194
+ with col9:
195
+ st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
196
+ with col10:
197
+ st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
198
+
199
+ else:
200
+ st.error("Please select a task.")
201
+
202
+
203
+ if __name__ == "__main__":
204
+ main()
205
+ st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/openai/whisper.git
2
+ ffmpeg==1.4
3
+ ffmpeg_python==0.2.0
4
+ numpy==1.23.3
5
+ pytube==12.1.0
6
+ requests==2.28.1
7
+ streamlit==1.13.0
8
+ streamlit_lottie==0.0.3
9
+ whisper
utils.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import textwrap
2
+ import zlib
3
+ from typing import Iterator, TextIO
4
+
5
+
6
+ def exact_div(x, y):
7
+ assert x % y == 0
8
+ return x // y
9
+
10
+
11
+ def str2bool(string):
12
+ str2val = {"True": True, "False": False}
13
+ if string in str2val:
14
+ return str2val[string]
15
+ else:
16
+ raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
17
+
18
+
19
+ def optional_int(string):
20
+ return None if string == "None" else int(string)
21
+
22
+
23
+ def optional_float(string):
24
+ return None if string == "None" else float(string)
25
+
26
+
27
+ def compression_ratio(text) -> float:
28
+ return len(text) / len(zlib.compress(text.encode("utf-8")))
29
+
30
+
31
+ def format_timestamp(seconds: float, always_include_hours: bool = False, fractionalSeperator: str = '.'):
32
+ assert seconds >= 0, "non-negative timestamp expected"
33
+ milliseconds = round(seconds * 1000.0)
34
+
35
+ hours = milliseconds // 3_600_000
36
+ milliseconds -= hours * 3_600_000
37
+
38
+ minutes = milliseconds // 60_000
39
+ milliseconds -= minutes * 60_000
40
+
41
+ seconds = milliseconds // 1_000
42
+ milliseconds -= seconds * 1_000
43
+
44
+ hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
45
+ return f"{hours_marker}{minutes:02d}:{seconds:02d}{fractionalSeperator}{milliseconds:03d}"
46
+
47
+
48
+ def write_txt(transcript: Iterator[dict], file: TextIO):
49
+ for segment in transcript:
50
+ print(segment['text'].strip(), file=file, flush=True)
51
+
52
+
53
+ def write_vtt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
54
+ print("WEBVTT\n", file=file)
55
+ for segment in transcript:
56
+ text = processText(segment['text'], maxLineWidth).replace('-->', '->')
57
+
58
+ print(
59
+ f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
60
+ f"{text}\n",
61
+ file=file,
62
+ flush=True,
63
+ )
64
+
65
+
66
+ def write_srt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
67
+ """
68
+ Write a transcript to a file in SRT format.
69
+ Example usage:
70
+ from pathlib import Path
71
+ from whisper.utils import write_srt
72
+ result = transcribe(model, audio_path, temperature=temperature, **args)
73
+ # save SRT
74
+ audio_basename = Path(audio_path).stem
75
+ with open(Path(output_dir) / (audio_basename + ".srt"), "w", encoding="utf-8") as srt:
76
+ write_srt(result["segments"], file=srt)
77
+ """
78
+ for i, segment in enumerate(transcript, start=1):
79
+ text = processText(segment['text'].strip(), maxLineWidth).replace('-->', '->')
80
+
81
+ # write srt lines
82
+ print(
83
+ f"{i}\n"
84
+ f"{format_timestamp(segment['start'], always_include_hours=True, fractionalSeperator=',')} --> "
85
+ f"{format_timestamp(segment['end'], always_include_hours=True, fractionalSeperator=',')}\n"
86
+ f"{text}\n",
87
+ file=file,
88
+ flush=True,
89
+ )
90
+
91
+ def processText(text: str, maxLineWidth=None):
92
+ if (maxLineWidth is None or maxLineWidth < 0):
93
+ return text
94
+
95
+ lines = textwrap.wrap(text, width=maxLineWidth, tabsize=4)
96
+ return '\n'.join(lines)