poemsforaphrodite commited on
Commit
8c070ea
1 Parent(s): 61f84ab

Upload openvoice_app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. openvoice_app.py +10 -26
openvoice_app.py CHANGED
@@ -34,21 +34,15 @@ zh_source_se = torch.load(f'{zh_ckpt_base}/zh_default_se.pth').to(device)
34
  # This online demo mainly supports English and Chinese
35
  supported_languages = ['zh', 'en']
36
 
37
- def predict(prompt, style, audio_file_pth, agree):
38
  # initialize a empty info
39
  text_hint = ''
40
- # agree with the terms
41
- if agree == False:
42
- text_hint += '[ERROR] Please accept the Terms & Condition!\n'
43
- gr.Warning("Please accept the Terms & Condition!")
44
- return (
45
- text_hint,
46
- None,
47
- None,
48
- )
49
 
50
  # first detect the input language
51
- language_predicted = langid.classify(prompt)[0].strip()
52
  print(f"Detected language:{language_predicted}")
53
 
54
  if language_predicted not in supported_languages:
@@ -116,6 +110,8 @@ def predict(prompt, style, audio_file_pth, agree):
116
  # note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
117
  try:
118
  target_se, audio_name = se_extractor.get_se(speaker_wav, tone_color_converter, target_dir='processed', vad=True)
 
 
119
  except Exception as e:
120
  text_hint += f"[ERROR] Get target tone color error {str(e)} \n"
121
  gr.Warning(
@@ -159,18 +155,15 @@ examples = [
159
  "今天天气真好,我们一起出去吃饭吧。",
160
  'default',
161
  "resources/demo_speaker1.mp3",
162
- True,
163
  ],[
164
  "This audio is generated by open voice with a half-performance model.",
165
  'whispering',
166
  "resources/demo_speaker2.mp3",
167
- True,
168
  ],
169
  [
170
  "He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
171
  'sad',
172
  "resources/demo_speaker0.mp3",
173
- True,
174
  ],
175
  ]
176
 
@@ -185,19 +178,16 @@ with gr.Blocks(analytics_enabled=False) as demo:
185
  style_gr = gr.Dropdown(
186
  label="Style",
187
  choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
 
188
  max_choices=1,
189
  value="default",
190
  )
191
  ref_gr = gr.Audio(
192
  label="Reference Audio",
 
193
  type="filepath",
194
  value="resources/demo_speaker2.mp3",
195
  )
196
- tos_gr = gr.Checkbox(
197
- label="Agree",
198
- value=False,
199
- info="I agree to the terms of the cc-by-nc-4.0 license-: https://github.com/myshell-ai/OpenVoice/blob/main/LICENSE",
200
- )
201
 
202
  tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
203
 
@@ -207,13 +197,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
207
  audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
208
  ref_audio_gr = gr.Audio(label="Reference Audio Used")
209
 
210
- gr.Examples(examples,
211
- label="Examples",
212
- inputs=[input_text_gr, style_gr, ref_gr, tos_gr],
213
- outputs=[out_text_gr, audio_gr, ref_audio_gr],
214
- fn=predict,
215
- cache_examples=False,)
216
- tts_button.click(predict, [input_text_gr, style_gr, ref_gr, tos_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
217
 
218
  demo.queue()
219
  demo.launch(debug=True, show_api=True, share=True)
 
34
  # This online demo mainly supports English and Chinese
35
  supported_languages = ['zh', 'en']
36
 
37
+ def predict(prompt, style, audio_file_pth):
38
  # initialize a empty info
39
  text_hint = ''
40
+
41
+ # set agree to True by default
42
+ agree = True
 
 
 
 
 
 
43
 
44
  # first detect the input language
45
+ language_predicted = langid.classify(prompt)[0].strip()
46
  print(f"Detected language:{language_predicted}")
47
 
48
  if language_predicted not in supported_languages:
 
110
  # note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
111
  try:
112
  target_se, audio_name = se_extractor.get_se(speaker_wav, tone_color_converter, target_dir='processed', vad=True)
113
+ # base_speaker = f"{output_dir}/openai_source_output.mp3"
114
+ # source_se, audio_name = se_extractor.get_se(base_speaker, tone_color_converter, vad=True)
115
  except Exception as e:
116
  text_hint += f"[ERROR] Get target tone color error {str(e)} \n"
117
  gr.Warning(
 
155
  "今天天气真好,我们一起出去吃饭吧。",
156
  'default',
157
  "resources/demo_speaker1.mp3",
 
158
  ],[
159
  "This audio is generated by open voice with a half-performance model.",
160
  'whispering',
161
  "resources/demo_speaker2.mp3",
 
162
  ],
163
  [
164
  "He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
165
  'sad',
166
  "resources/demo_speaker0.mp3",
 
167
  ],
168
  ]
169
 
 
178
  style_gr = gr.Dropdown(
179
  label="Style",
180
  choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
181
+ info="Please upload a reference audio file, it should be 1 minute long and clear.",
182
  max_choices=1,
183
  value="default",
184
  )
185
  ref_gr = gr.Audio(
186
  label="Reference Audio",
187
+
188
  type="filepath",
189
  value="resources/demo_speaker2.mp3",
190
  )
 
 
 
 
 
191
 
192
  tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
193
 
 
197
  audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
198
  ref_audio_gr = gr.Audio(label="Reference Audio Used")
199
 
200
+ tts_button.click(predict, [input_text_gr, style_gr, ref_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
 
 
 
 
 
 
201
 
202
  demo.queue()
203
  demo.launch(debug=True, show_api=True, share=True)