Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -172,11 +172,11 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
|
|
172 |
# Read and convert youtube video
|
173 |
_,file_ending = os.path.splitext(f'{video_file_path}')
|
174 |
print(f'file enging is {file_ending}')
|
|
|
175 |
print("starting conversion to wav")
|
176 |
-
os.system(f'ffmpeg -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{
|
177 |
|
178 |
# Get duration
|
179 |
-
audio_file = video_file_path.replace(file_ending, ".wav")
|
180 |
with contextlib.closing(wave.open(audio_file,'r')) as f:
|
181 |
frames = f.getnframes()
|
182 |
rate = f.getframerate()
|
@@ -184,10 +184,9 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
|
|
184 |
print(f"conversion to wav ready, duration of audio file: {duration}")
|
185 |
|
186 |
# Transcribe audio
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
result = model.transcribe(audio_file, task="transcribe", language=selected_source_lang)
|
191 |
segments = result["segments"]
|
192 |
print("starting whisper done with whisper")
|
193 |
except Exception as e:
|
@@ -243,6 +242,7 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
|
|
243 |
|
244 |
|
245 |
# ---- Gradio Layout -----
|
|
|
246 |
video_in = gr.Video(label="Video file", mirror_webcam=False)
|
247 |
youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
|
248 |
video_out = gr.Video(label="Video Out", mirror_webcam=False)
|
@@ -305,8 +305,8 @@ with demo:
|
|
305 |
with gr.Column():
|
306 |
gr.Markdown('''
|
307 |
##### Here you can start the transcription process.
|
308 |
-
##### Please select source language for transcription.
|
309 |
-
#####
|
310 |
''')
|
311 |
selected_source_lang.render()
|
312 |
selected_whisper_model.render()
|
|
|
172 |
# Read and convert youtube video
|
173 |
_,file_ending = os.path.splitext(f'{video_file_path}')
|
174 |
print(f'file enging is {file_ending}')
|
175 |
+
audio_file = video_file_path.replace(file_ending, ".wav")
|
176 |
print("starting conversion to wav")
|
177 |
+
os.system(f'ffmpeg -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
|
178 |
|
179 |
# Get duration
|
|
|
180 |
with contextlib.closing(wave.open(audio_file,'r')) as f:
|
181 |
frames = f.getnframes()
|
182 |
rate = f.getframerate()
|
|
|
184 |
print(f"conversion to wav ready, duration of audio file: {duration}")
|
185 |
|
186 |
# Transcribe audio
|
187 |
+
options = dict(language=selected_source_lang, beam_size=5, best_of=5)
|
188 |
+
transcribe_options = dict(task="transcribe", **options)
|
189 |
+
result = model.transcribe(audio_file, **transcribe_options)
|
|
|
190 |
segments = result["segments"]
|
191 |
print("starting whisper done with whisper")
|
192 |
except Exception as e:
|
|
|
242 |
|
243 |
|
244 |
# ---- Gradio Layout -----
|
245 |
+
# Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
|
246 |
video_in = gr.Video(label="Video file", mirror_webcam=False)
|
247 |
youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
|
248 |
video_out = gr.Video(label="Video Out", mirror_webcam=False)
|
|
|
305 |
with gr.Column():
|
306 |
gr.Markdown('''
|
307 |
##### Here you can start the transcription process.
|
308 |
+
##### Please select the source language for transcription.
|
309 |
+
##### You should select a number of speakers for getting better results.
|
310 |
''')
|
311 |
selected_source_lang.render()
|
312 |
selected_whisper_model.render()
|