Ubuntu commited on
Commit
da70d80
1 Parent(s): 893eb12

add speaker selection

Browse files
Files changed (1) hide show
  1. app.py +24 -6
app.py CHANGED
@@ -24,6 +24,14 @@ DEVELOPER_PASSWORD = os.getenv("DEV_PWD")
24
  # Add this constant for the RapidAPI key
25
  RAPID_API_KEY = os.getenv("RAPID_API_KEY")
26
 
 
 
 
 
 
 
 
 
27
  def fetch_youtube_id(youtube_url: str) -> str:
28
  if 'v=' in youtube_url:
29
  return youtube_url.split("v=")[1].split("&")[0]
@@ -108,7 +116,7 @@ def inference_via_llm_api(input_text, min_new_tokens=2, max_new_tokens=64):
108
  else:
109
  return "The system got some error during vLLM generation. Please try it again."
110
 
111
- def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None):
112
  if youtube_url:
113
  audio = download_youtube_audio(youtube_url)
114
  if not audio:
@@ -141,7 +149,7 @@ def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None):
141
  tts_params = {
142
  'language': target_lang,
143
  'speed': 1.1,
144
- 'speaker': 'MS' if target_lang == 'en' else 'msFemale' if target_lang == 'ma' else 'ta_female1' if target_lang == 'ta' else 'childChinese2',
145
  'text': translated_text
146
  }
147
 
@@ -156,8 +164,8 @@ def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None):
156
  def check_password(password):
157
  return password == DEVELOPER_PASSWORD
158
 
159
- def run_speech_translation(audio, source_lang, target_lang, youtube_url):
160
- transcription, translated_text, audio_url = transcribe_and_speak(audio, source_lang, target_lang, youtube_url)
161
 
162
  return transcription, translated_text, audio_url
163
 
@@ -172,7 +180,8 @@ with gr.Blocks() as demo:
172
  user_youtube_url = gr.Textbox(label="YouTube URL (optional)")
173
  user_source_lang = gr.Dropdown(choices=["en", "ma", "ta", "zh"], label="Source Language", value="en")
174
  user_target_lang = gr.Dropdown(choices=["en", "ma", "ta", "zh"], label="Target Language", value="zh")
175
-
 
176
  with gr.Row():
177
  user_button = gr.Button("Translate and Speak", interactive=False)
178
 
@@ -200,7 +209,7 @@ with gr.Blocks() as demo:
200
 
201
  user_button.click(
202
  fn=run_speech_translation,
203
- inputs=[user_audio_input, user_source_lang, user_target_lang, user_youtube_url],
204
  outputs=[user_transcription_output, user_translation_output, user_audio_output]
205
  )
206
 
@@ -219,4 +228,13 @@ with gr.Blocks() as demo:
219
  outputs=[user_video_output]
220
  )
221
 
 
 
 
 
 
 
 
 
 
222
  demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD")))
 
24
  # Add this constant for the RapidAPI key
25
  RAPID_API_KEY = os.getenv("RAPID_API_KEY")
26
 
27
+ # Add this constant for available speakers
28
+ AVAILABLE_SPEAKERS = {
29
+ "en": ["MS"],
30
+ "ma": ["msFemale"],
31
+ "ta": ["ta_female1"],
32
+ "zh": ["childChinese2"]
33
+ }
34
+
35
  def fetch_youtube_id(youtube_url: str) -> str:
36
  if 'v=' in youtube_url:
37
  return youtube_url.split("v=")[1].split("&")[0]
 
116
  else:
117
  return "The system got some error during vLLM generation. Please try it again."
118
 
119
+ def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None, target_speaker=None):
120
  if youtube_url:
121
  audio = download_youtube_audio(youtube_url)
122
  if not audio:
 
149
  tts_params = {
150
  'language': target_lang,
151
  'speed': 1.1,
152
+ 'speaker': target_speaker or AVAILABLE_SPEAKERS[target_lang][0], # Use the first speaker as default
153
  'text': translated_text
154
  }
155
 
 
164
  def check_password(password):
165
  return password == DEVELOPER_PASSWORD
166
 
167
+ def run_speech_translation(audio, source_lang, target_lang, youtube_url, target_speaker):
168
+ transcription, translated_text, audio_url = transcribe_and_speak(audio, source_lang, target_lang, youtube_url, target_speaker)
169
 
170
  return transcription, translated_text, audio_url
171
 
 
180
  user_youtube_url = gr.Textbox(label="YouTube URL (optional)")
181
  user_source_lang = gr.Dropdown(choices=["en", "ma", "ta", "zh"], label="Source Language", value="en")
182
  user_target_lang = gr.Dropdown(choices=["en", "ma", "ta", "zh"], label="Target Language", value="zh")
183
+ user_target_speaker = gr.Dropdown(choices=[], label="Target Speaker")
184
+
185
  with gr.Row():
186
  user_button = gr.Button("Translate and Speak", interactive=False)
187
 
 
209
 
210
  user_button.click(
211
  fn=run_speech_translation,
212
+ inputs=[user_audio_input, user_source_lang, user_target_lang, user_youtube_url, user_target_speaker],
213
  outputs=[user_transcription_output, user_translation_output, user_audio_output]
214
  )
215
 
 
228
  outputs=[user_video_output]
229
  )
230
 
231
+ def update_target_speakers(target_lang):
232
+ return gr.Dropdown(choices=AVAILABLE_SPEAKERS[target_lang], value=AVAILABLE_SPEAKERS[target_lang][0])
233
+
234
+ user_target_lang.change(
235
+ fn=update_target_speakers,
236
+ inputs=[user_target_lang],
237
+ outputs=[user_target_speaker]
238
+ )
239
+
240
  demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD")))