Video_translation_with_speaker_diarization_and_voice_cloning_private

Build error

App Files Files Community

vitaliy-sharandin commited on Nov 20, 2023

Commit

3b6f758

•

1 Parent(s): d9b982f

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -15

app.py CHANGED Viewed

@@ -16,13 +16,8 @@ os.environ["COQUI_TOS_AGREED"] = "1"
 HF_TOKEN = os.environ["HF_TOKEN"]
 DEEPL_TOKEN = os.environ["DEEPL_TOKEN"]
-# Download video from Youtube
-def download_youtube_video(url):
- yt = YouTube(url)
- stream = yt.streams.filter(file_extension='mp4').first()
- output_path = stream.download()
- return output_path
 # Extract audio from video
 def extract_audio(video_path):
@@ -181,7 +176,7 @@ def voice_cloning_translation(translated_transcription, speakers_voice_clips, ta
  # Generate speech
  print(f"[{speech_item['speaker']}]")
  sample_rate = None
  audio = None
  if 'vits' in selected_model:
@@ -262,12 +257,21 @@ def video_translation(video_path, target_language, speaker_model, hf_token, deep
 def translation_limit():
  translator = deepl.Translator(DEEPL_TOKEN)
  usage = translator.get_usage()
  if usage.character.valid:
  characters_used = usage.character.count
- minutes_used = characters_used / 750
  max_minutes = usage.character.limit / 750
  percent_used = (minutes_used / max_minutes) * 100
@@ -285,7 +289,7 @@ def translation_limit():
  progress_bar_html = (
  "<div style='width: 100%; background-color: #adb5bd; position: relative; text-align: center; "
- "line-height: 2em; color: white; font-weight: bold;'>"
  "<div style='position: absolute; width: 100%; left: 0; top: 0; z-index: 1;'>"
  f"{used_time_str} / {max_time_str}"
  "</div>"
@@ -304,12 +308,15 @@ def translate_video(video_path, youtube_link, target_language, speaker_model):
  try:
  if not video_path and not youtube_link:
  gr.Warning("You should either upload video or input a YouTube link")
- return None
  if youtube_link:
  video_path = download_youtube_video(youtube_link)
  dubbed_video = video_translation(video_path, target_language, speaker_model, HF_TOKEN, DEEPL_TOKEN)
  limit_info = translation_limit()
- return limit_info, dubbed_video
  except Exception as e:
  print(f"An error occurred: {e}")
  raise e
@@ -323,24 +330,27 @@ css = """
 }
 """
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
  gr.Markdown("<h1 style='text-align: center;'>🌐AI Video Translation</h2>")
  gr.Markdown("<h3 style='text-align: center;'>Currently supported languages are: English, Polish, Ukrainian, and Russian</h3>")
  with gr.Row():
  with gr.Column(elem_classes=["column-frame"]):
  gr.Markdown("<h2 style='text-align: center;'>Inputs</h3>")
- translation_limit_info = gr.HTML(value=translation_limit(), label="Video translation limit", show_label=True)
  video = gr.Video(label="Upload a video file")
  gr.Markdown("<h3 style='text-align: center;'>OR</h3>")
  youtube_link = gr.Textbox(label="Paste YouTube link")
  gr.Markdown("---")
  target_language = gr.Dropdown(["en", "pl", "uk", "ru"], value="pl", label="Select translation target language")
  speaker_model = gr.Dropdown(["(Recommended) XTTS_V2", "VITs (will be default for Ukrainian)"], value="(Recommended) XTTS_V2", label="Select text-to-speech generation model")
  with gr.Row():
  clear_btn = gr.Button("Clear inputs")
- translate_btn = gr.Button("Translate")
  with gr.Column():
  with gr.Row(elem_classes=["column-frame"]):

 HF_TOKEN = os.environ["HF_TOKEN"]
 DEEPL_TOKEN = os.environ["DEEPL_TOKEN"]
+# Agreeing to terms of coqui-tts model
+os.environ["COQUI_TOS_AGREED"] = "1"
 # Extract audio from video
 def extract_audio(video_path):
  # Generate speech
  print(f"[{speech_item['speaker']}]")
  sample_rate = None
  audio = None
  if 'vits' in selected_model:
+def download_youtube_video(url):
+ yt = YouTube(url)
+ if yt.age_restricted:
+ gr.Warning("The Youtube video you are trying to translate is age restricted. Manually download it using the following link(https://en.savefrom.net/) and use file upload, as pytube library doesn't support restricted videos download.")
+ return None
+ stream = yt.streams.filter(file_extension='mp4').first()
+ output_path = stream.download()
+ return output_path
 def translation_limit():
  translator = deepl.Translator(DEEPL_TOKEN)
  usage = translator.get_usage()
  if usage.character.valid:
  characters_used = usage.character.count
+ minutes_used = characters_used / 750
  max_minutes = usage.character.limit / 750
  percent_used = (minutes_used / max_minutes) * 100
  progress_bar_html = (
  "<div style='width: 100%; background-color: #adb5bd; position: relative; text-align: center; "
+ "line-height: 2em; color: white; font-weight: bold;'>"
  "<div style='position: absolute; width: 100%; left: 0; top: 0; z-index: 1;'>"
  f"{used_time_str} / {max_time_str}"
  "</div>"
  try:
  if not video_path and not youtube_link:
  gr.Warning("You should either upload video or input a YouTube link")
+ return None, None
  if youtube_link:
  video_path = download_youtube_video(youtube_link)
+ if video_path is None:
+ gr.Warning("Video input did not process well, try again")
+ return None, None
  dubbed_video = video_translation(video_path, target_language, speaker_model, HF_TOKEN, DEEPL_TOKEN)
  limit_info = translation_limit()
+ return gr.Markdown(limit_info), gr.components.Video(dubbed_video)
  except Exception as e:
  print(f"An error occurred: {e}")
  raise e
 }
 """
+initial_usage_info = translation_limit()
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
  gr.Markdown("<h1 style='text-align: center;'>🌐AI Video Translation</h2>")
  gr.Markdown("<h3 style='text-align: center;'>Currently supported languages are: English, Polish, Ukrainian, and Russian</h3>")
  with gr.Row():
  with gr.Column(elem_classes=["column-frame"]):
  gr.Markdown("<h2 style='text-align: center;'>Inputs</h3>")
+ translation_limit_info = gr.Markdown(initial_usage_info)
  video = gr.Video(label="Upload a video file")
  gr.Markdown("<h3 style='text-align: center;'>OR</h3>")
  youtube_link = gr.Textbox(label="Paste YouTube link")
+ gr.Markdown("⚠️If you get a warning that the video is age restricted, manually download it using the following [link](https://en.savefrom.net/) and use file upload, as pytube library doesn't support restricted videos download.")
  gr.Markdown("---")
  target_language = gr.Dropdown(["en", "pl", "uk", "ru"], value="pl", label="Select translation target language")
  speaker_model = gr.Dropdown(["(Recommended) XTTS_V2", "VITs (will be default for Ukrainian)"], value="(Recommended) XTTS_V2", label="Select text-to-speech generation model")
  with gr.Row():
  clear_btn = gr.Button("Clear inputs")
+ translate_btn = gr.Button("Translate")
  with gr.Column():
  with gr.Row(elem_classes=["column-frame"]):