vitaliy-sharandin
commited on
Commit
•
3b6f758
1
Parent(s):
d9b982f
Update app.py
Browse files
app.py
CHANGED
@@ -16,13 +16,8 @@ os.environ["COQUI_TOS_AGREED"] = "1"
|
|
16 |
HF_TOKEN = os.environ["HF_TOKEN"]
|
17 |
DEEPL_TOKEN = os.environ["DEEPL_TOKEN"]
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
yt = YouTube(url)
|
22 |
-
stream = yt.streams.filter(file_extension='mp4').first()
|
23 |
-
output_path = stream.download()
|
24 |
-
return output_path
|
25 |
-
|
26 |
|
27 |
# Extract audio from video
|
28 |
def extract_audio(video_path):
|
@@ -181,7 +176,7 @@ def voice_cloning_translation(translated_transcription, speakers_voice_clips, ta
|
|
181 |
|
182 |
# Generate speech
|
183 |
print(f"[{speech_item['speaker']}]")
|
184 |
-
|
185 |
sample_rate = None
|
186 |
audio = None
|
187 |
if 'vits' in selected_model:
|
@@ -262,12 +257,21 @@ def video_translation(video_path, target_language, speaker_model, hf_token, deep
|
|
262 |
|
263 |
|
264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
def translation_limit():
|
266 |
translator = deepl.Translator(DEEPL_TOKEN)
|
267 |
usage = translator.get_usage()
|
268 |
if usage.character.valid:
|
269 |
characters_used = usage.character.count
|
270 |
-
minutes_used = characters_used / 750
|
271 |
max_minutes = usage.character.limit / 750
|
272 |
percent_used = (minutes_used / max_minutes) * 100
|
273 |
|
@@ -285,7 +289,7 @@ def translation_limit():
|
|
285 |
|
286 |
progress_bar_html = (
|
287 |
"<div style='width: 100%; background-color: #adb5bd; position: relative; text-align: center; "
|
288 |
-
"line-height: 2em; color: white; font-weight: bold;'>"
|
289 |
"<div style='position: absolute; width: 100%; left: 0; top: 0; z-index: 1;'>"
|
290 |
f"{used_time_str} / {max_time_str}"
|
291 |
"</div>"
|
@@ -304,12 +308,15 @@ def translate_video(video_path, youtube_link, target_language, speaker_model):
|
|
304 |
try:
|
305 |
if not video_path and not youtube_link:
|
306 |
gr.Warning("You should either upload video or input a YouTube link")
|
307 |
-
return None
|
308 |
if youtube_link:
|
309 |
video_path = download_youtube_video(youtube_link)
|
|
|
|
|
|
|
310 |
dubbed_video = video_translation(video_path, target_language, speaker_model, HF_TOKEN, DEEPL_TOKEN)
|
311 |
limit_info = translation_limit()
|
312 |
-
return limit_info, dubbed_video
|
313 |
except Exception as e:
|
314 |
print(f"An error occurred: {e}")
|
315 |
raise e
|
@@ -323,24 +330,27 @@ css = """
|
|
323 |
}
|
324 |
"""
|
325 |
|
|
|
|
|
326 |
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
327 |
|
328 |
gr.Markdown("<h1 style='text-align: center;'>🌐AI Video Translation</h2>")
|
329 |
gr.Markdown("<h3 style='text-align: center;'>Currently supported languages are: English, Polish, Ukrainian, and Russian</h3>")
|
330 |
-
|
331 |
with gr.Row():
|
332 |
with gr.Column(elem_classes=["column-frame"]):
|
333 |
gr.Markdown("<h2 style='text-align: center;'>Inputs</h3>")
|
334 |
-
translation_limit_info = gr.
|
335 |
video = gr.Video(label="Upload a video file")
|
336 |
gr.Markdown("<h3 style='text-align: center;'>OR</h3>")
|
337 |
youtube_link = gr.Textbox(label="Paste YouTube link")
|
|
|
338 |
gr.Markdown("---")
|
339 |
target_language = gr.Dropdown(["en", "pl", "uk", "ru"], value="pl", label="Select translation target language")
|
340 |
speaker_model = gr.Dropdown(["(Recommended) XTTS_V2", "VITs (will be default for Ukrainian)"], value="(Recommended) XTTS_V2", label="Select text-to-speech generation model")
|
341 |
with gr.Row():
|
342 |
clear_btn = gr.Button("Clear inputs")
|
343 |
-
translate_btn = gr.Button("Translate")
|
344 |
|
345 |
with gr.Column():
|
346 |
with gr.Row(elem_classes=["column-frame"]):
|
|
|
16 |
HF_TOKEN = os.environ["HF_TOKEN"]
|
17 |
DEEPL_TOKEN = os.environ["DEEPL_TOKEN"]
|
18 |
|
19 |
+
# Agreeing to terms of coqui-tts model
|
20 |
+
os.environ["COQUI_TOS_AGREED"] = "1"
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
# Extract audio from video
|
23 |
def extract_audio(video_path):
|
|
|
176 |
|
177 |
# Generate speech
|
178 |
print(f"[{speech_item['speaker']}]")
|
179 |
+
|
180 |
sample_rate = None
|
181 |
audio = None
|
182 |
if 'vits' in selected_model:
|
|
|
257 |
|
258 |
|
259 |
|
260 |
+
def download_youtube_video(url):
|
261 |
+
yt = YouTube(url)
|
262 |
+
if yt.age_restricted:
|
263 |
+
gr.Warning("The Youtube video you are trying to translate is age restricted. Manually download it using the following link(https://en.savefrom.net/) and use file upload, as pytube library doesn't support restricted videos download.")
|
264 |
+
return None
|
265 |
+
stream = yt.streams.filter(file_extension='mp4').first()
|
266 |
+
output_path = stream.download()
|
267 |
+
return output_path
|
268 |
+
|
269 |
def translation_limit():
|
270 |
translator = deepl.Translator(DEEPL_TOKEN)
|
271 |
usage = translator.get_usage()
|
272 |
if usage.character.valid:
|
273 |
characters_used = usage.character.count
|
274 |
+
minutes_used = characters_used / 750
|
275 |
max_minutes = usage.character.limit / 750
|
276 |
percent_used = (minutes_used / max_minutes) * 100
|
277 |
|
|
|
289 |
|
290 |
progress_bar_html = (
|
291 |
"<div style='width: 100%; background-color: #adb5bd; position: relative; text-align: center; "
|
292 |
+
"line-height: 2em; color: white; font-weight: bold;'>"
|
293 |
"<div style='position: absolute; width: 100%; left: 0; top: 0; z-index: 1;'>"
|
294 |
f"{used_time_str} / {max_time_str}"
|
295 |
"</div>"
|
|
|
308 |
try:
|
309 |
if not video_path and not youtube_link:
|
310 |
gr.Warning("You should either upload video or input a YouTube link")
|
311 |
+
return None, None
|
312 |
if youtube_link:
|
313 |
video_path = download_youtube_video(youtube_link)
|
314 |
+
if video_path is None:
|
315 |
+
gr.Warning("Video input did not process well, try again")
|
316 |
+
return None, None
|
317 |
dubbed_video = video_translation(video_path, target_language, speaker_model, HF_TOKEN, DEEPL_TOKEN)
|
318 |
limit_info = translation_limit()
|
319 |
+
return gr.Markdown(limit_info), gr.components.Video(dubbed_video)
|
320 |
except Exception as e:
|
321 |
print(f"An error occurred: {e}")
|
322 |
raise e
|
|
|
330 |
}
|
331 |
"""
|
332 |
|
333 |
+
initial_usage_info = translation_limit()
|
334 |
+
|
335 |
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
336 |
|
337 |
gr.Markdown("<h1 style='text-align: center;'>🌐AI Video Translation</h2>")
|
338 |
gr.Markdown("<h3 style='text-align: center;'>Currently supported languages are: English, Polish, Ukrainian, and Russian</h3>")
|
339 |
+
|
340 |
with gr.Row():
|
341 |
with gr.Column(elem_classes=["column-frame"]):
|
342 |
gr.Markdown("<h2 style='text-align: center;'>Inputs</h3>")
|
343 |
+
translation_limit_info = gr.Markdown(initial_usage_info)
|
344 |
video = gr.Video(label="Upload a video file")
|
345 |
gr.Markdown("<h3 style='text-align: center;'>OR</h3>")
|
346 |
youtube_link = gr.Textbox(label="Paste YouTube link")
|
347 |
+
gr.Markdown("⚠️If you get a warning that the video is age restricted, manually download it using the following [link](https://en.savefrom.net/) and use file upload, as pytube library doesn't support restricted videos download.")
|
348 |
gr.Markdown("---")
|
349 |
target_language = gr.Dropdown(["en", "pl", "uk", "ru"], value="pl", label="Select translation target language")
|
350 |
speaker_model = gr.Dropdown(["(Recommended) XTTS_V2", "VITs (will be default for Ukrainian)"], value="(Recommended) XTTS_V2", label="Select text-to-speech generation model")
|
351 |
with gr.Row():
|
352 |
clear_btn = gr.Button("Clear inputs")
|
353 |
+
translate_btn = gr.Button("Translate")
|
354 |
|
355 |
with gr.Column():
|
356 |
with gr.Row(elem_classes=["column-frame"]):
|