Spaces:
Build error
Build error
Alex Volkov
commited on
Commit
·
a72265c
1
Parent(s):
2e0131e
This seems to be good? hmmm
Browse files- download.py +25 -17
- utils/apis.py +2 -2
download.py
CHANGED
@@ -173,31 +173,39 @@ def caption_generator(social_media_url,uid, language="Autodetect", model_size=mo
|
|
173 |
print(f"Starting whisper transcribe with {uid}.mp3")
|
174 |
transcribe_whisper_result = transcribe(audio, translate_action=False, language=language, override_model_size=model_size)
|
175 |
detected_language = LANGUAGES[transcribe_whisper_result["language"]]
|
176 |
-
translate_whisper_result = transcribe(audio, translate_action=True, language=detected_language, override_model_size=model_size)
|
177 |
print(f"Transcribe successful!, writing files")
|
|
|
178 |
vtt_path = tempdir / f"{transcribe_whisper_result['language']}.vtt"
|
179 |
-
en_vtt_path = tempdir / f"en.vtt"
|
180 |
|
181 |
with open(vtt_path.resolve(), "w", encoding="utf-8") as vtt:
|
182 |
write_vtt(transcribe_whisper_result["segments"], file=vtt)
|
183 |
|
184 |
-
|
185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
except Exception as e:
|
188 |
print(f"Could not transcribe file: {e}")
|
189 |
-
|
190 |
-
|
191 |
-
whisper_result_captions = [
|
192 |
-
{
|
193 |
-
"language_tag": transcribe_whisper_result["language"],
|
194 |
-
"vtt_file": anvil.BlobMedia(content_type="text/plain", content=vtt_path.read_bytes(), name=f"{uid}.{transcribe_whisper_result['language']}.vtt")
|
195 |
-
},
|
196 |
-
{
|
197 |
-
"language_tag": "en",
|
198 |
-
"vtt_file": anvil.BlobMedia(content_type="text/plain", content=vtt_path.read_bytes(), name=f"{uid}.en.vtt")
|
199 |
-
}
|
200 |
-
]
|
201 |
|
202 |
return 'success', whisper_result_captions
|
203 |
|
@@ -315,7 +323,7 @@ def transcribe(audio, translate_action=True, language='Autodetect', override_mod
|
|
315 |
}
|
316 |
|
317 |
if language != 'Autodetect':
|
318 |
-
props["language"] = TO_LANGUAGE_CODE[language.lower()]
|
319 |
|
320 |
output = model.transcribe(audio, verbose=True, **props)
|
321 |
|
|
|
173 |
print(f"Starting whisper transcribe with {uid}.mp3")
|
174 |
transcribe_whisper_result = transcribe(audio, translate_action=False, language=language, override_model_size=model_size)
|
175 |
detected_language = LANGUAGES[transcribe_whisper_result["language"]]
|
|
|
176 |
print(f"Transcribe successful!, writing files")
|
177 |
+
|
178 |
vtt_path = tempdir / f"{transcribe_whisper_result['language']}.vtt"
|
|
|
179 |
|
180 |
with open(vtt_path.resolve(), "w", encoding="utf-8") as vtt:
|
181 |
write_vtt(transcribe_whisper_result["segments"], file=vtt)
|
182 |
|
183 |
+
whisper_result_captions = [
|
184 |
+
{
|
185 |
+
"language_tag": transcribe_whisper_result["language"],
|
186 |
+
"vtt_file": anvil.BlobMedia(content_type="text/plain", content=vtt_path.read_bytes(),
|
187 |
+
name=f"{uid}.{transcribe_whisper_result['language']}.vtt")
|
188 |
+
},
|
189 |
+
]
|
190 |
+
|
191 |
+
if detected_language != "en":
|
192 |
+
print(f"Transcribe successful! Starting translation to English")
|
193 |
+
translate_whisper_result = transcribe(audio, translate_action=True, language=detected_language, override_model_size=model_size)
|
194 |
+
print(f"Trfan!, writing files")
|
195 |
+
en_vtt_path = tempdir / f"en.vtt"
|
196 |
+
with open(en_vtt_path.resolve(), "w", encoding="utf-8") as en_vtt:
|
197 |
+
write_vtt(transcribe_whisper_result["segments"], file=en_vtt)
|
198 |
+
print(f"Finished translation to English, preparing subtitle files")
|
199 |
+
whisper_result_captions.append(
|
200 |
+
{
|
201 |
+
"language_tag": "en",
|
202 |
+
"vtt_file": anvil.BlobMedia(content_type="text/plain", content=vtt_path.read_bytes(), name=f"{uid}.en.vtt")
|
203 |
+
}
|
204 |
+
)
|
205 |
|
206 |
except Exception as e:
|
207 |
print(f"Could not transcribe file: {e}")
|
208 |
+
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
return 'success', whisper_result_captions
|
211 |
|
|
|
323 |
}
|
324 |
|
325 |
if language != 'Autodetect':
|
326 |
+
props["language"] = TO_LANGUAGE_CODE[language.lower()] if len(language) > 2 else language
|
327 |
|
328 |
output = model.transcribe(audio, verbose=True, **props)
|
329 |
|
utils/apis.py
CHANGED
@@ -63,8 +63,8 @@ def test_api(url=''):
|
|
63 |
# TODO: add an anvil server pingback to show we completed the queue operation
|
64 |
return f"I've slept for 15 seconds and now I'm done. "
|
65 |
|
66 |
-
|
67 |
-
def caption(downloadable_url="",uid="", language="Autodetect", override_model_size=""):
|
68 |
"""
|
69 |
:param media_id: The twitter media ID object
|
70 |
:param user_id_str: The twitter user ID string
|
|
|
63 |
# TODO: add an anvil server pingback to show we completed the queue operation
|
64 |
return f"I've slept for 15 seconds and now I'm done. "
|
65 |
|
66 |
+
|
67 |
+
def caption(downloadable_url="", uid="", language="Autodetect", override_model_size=""):
|
68 |
"""
|
69 |
:param media_id: The twitter media ID object
|
70 |
:param user_id_str: The twitter user ID string
|