Pendrokar commited on
Commit
76e9cbc
ยท
1 Parent(s): 015adff

gradio: Interface => Blocks

Browse files
Files changed (2) hide show
  1. app.py +236 -71
  2. gr_client.py +0 -1
app.py CHANGED
@@ -35,34 +35,34 @@ base_speaker_emb = ''
35
 
36
  # order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
37
  languages = [
38
- ("๐Ÿ‡ฌ๐Ÿ‡ง EN", "en"),
39
- ("๐Ÿ‡ฉ๐Ÿ‡ช DE", "de"),
40
- ("๐Ÿ‡ช๐Ÿ‡ธ ES", "es"),
41
- ("๐Ÿ‡ฎ๐Ÿ‡น IT", "it"),
42
- ("๐Ÿ‡ณ๐Ÿ‡ฑ NL", "nl"),
43
- ("๐Ÿ‡ต๐Ÿ‡น PT", "pt"),
44
- ("๐Ÿ‡ต๐Ÿ‡ฑ PL", "pl"),
45
- ("๐Ÿ‡ท๐Ÿ‡ด RO", "ro"),
46
- ("๐Ÿ‡ธ๐Ÿ‡ช SV", "sv"),
47
- ("๐Ÿ‡ฉ๐Ÿ‡ฐ DA", "da"),
48
- ("๐Ÿ‡ซ๐Ÿ‡ฎ FI", "fi"),
49
- ("๐Ÿ‡ญ๐Ÿ‡บ HU", "hu"),
50
- ("๐Ÿ‡ฌ๐Ÿ‡ท EL", "el"),
51
- ("๐Ÿ‡ซ๐Ÿ‡ท FR", "fr"),
52
- ("๐Ÿ‡ท๐Ÿ‡บ RU", "ru"),
53
- ("๐Ÿ‡บ๐Ÿ‡ฆ UK", "uk"),
54
- ("๐Ÿ‡น๐Ÿ‡ท TR", "tr"),
55
- ("๐Ÿ‡ธ๐Ÿ‡ฆ AR", "ar"),
56
- ("๐Ÿ‡ฎ๐Ÿ‡ณ HI", "hi"),
57
- ("๐Ÿ‡ฏ๐Ÿ‡ต JP", "jp"),
58
- ("๐Ÿ‡ฐ๐Ÿ‡ท KO", "ko"),
59
- ("๐Ÿ‡จ๐Ÿ‡ณ ZH", "zh"),
60
- ("๐Ÿ‡ป๐Ÿ‡ณ VI", "vi"),
61
- ("๐Ÿ‡ป๐Ÿ‡ฆ LA", "la"),
62
- ("HA", "ha"),
63
- ("SW", "sw"),
64
- ("๐Ÿ‡ณ๐Ÿ‡ฌ YO", "yo"),
65
- ("WO", "wo"),
66
  ]
67
 
68
  # Translated from English by DeepMind's Gemini Pro
@@ -218,11 +218,33 @@ def predict(
218
  save_path = ''
219
  response = {text: 'Failed'}
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  print('server.log contents:')
222
  with open('resources/app/server.log', 'r') as f:
223
  print(f.read())
224
 
225
- return [save_path, response.text]
 
 
 
 
 
 
 
226
 
227
  input_textbox = gr.Textbox(
228
  label="Input Text",
@@ -246,14 +268,62 @@ voice_radio = gr.Radio(
246
  info="NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
247
  )
248
 
249
- def set_default_text(lang):
250
- input_textbox = gr.Textbox(
251
- label="Input Text",
252
- value=default_text[lang],
253
- lines=1,
254
- max_lines=5,
255
- autofocus=True
256
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
  language_radio = gr.Radio(
259
  languages,
@@ -261,40 +331,135 @@ language_radio = gr.Radio(
261
  label="Language",
262
  info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
263
  )
264
- # language_radio.change(set_default_text)
265
- deepmoji_checkbox = gr.Checkbox(label="Use DeepMoji", info="Auto adjust emotional values")
266
-
267
- gradio_app = gr.Interface(
268
- predict,
269
- [
270
- input_textbox,
271
- voice_radio,
272
- language_radio,
273
- pacing_slider,
274
- pitch_slider,
275
- energy_slider,
276
- anger_slider,
277
- happy_slider,
278
- sad_slider,
279
- surprise_slider,
280
- deepmoji_checkbox
281
- ],
282
- outputs=[
283
- gr.Audio(label="22kHz audio output", type="filepath"),
284
- gr.Textbox(label="xVASynth Server Response")
285
- ],
286
- title="xVASynth (WIP)",
287
- clear_btn=gr.Button(visible=False)
288
- # examples=[
289
- # ["Once, I headed in much deeper. But I doubt I'll ever do that again.", 1],
290
- # ["You love hurting me, huh?", 1.5],
291
- # ["Ah, I see. Well, I'm afraid I can't help with that.", 1],
292
- # ["Embrace your demise!", 1],
293
- # ["Never come back!", 1]
294
- # ],
295
- # cache_examples=None
296
- )
297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
  if __name__ == "__main__":
300
  # Run the web server in a separate thread
@@ -303,7 +468,7 @@ if __name__ == "__main__":
303
  web_server_thread.start()
304
 
305
  print('running Gradio interface')
306
- gradio_app.launch()
307
 
308
  # Wait for the web server thread to finish (shouldn't be reached in normal execution)
309
  web_server_thread.join()
 
35
 
36
  # order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
37
  languages = [
38
+ ("๐Ÿ‡ฌ๐Ÿ‡ง EN", "en"),
39
+ ("๐Ÿ‡ฉ๐Ÿ‡ช DE", "de"),
40
+ ("๐Ÿ‡ช๐Ÿ‡ธ ES", "es"),
41
+ ("๐Ÿ‡ฎ๐Ÿ‡น IT", "it"),
42
+ ("๐Ÿ‡ณ๐Ÿ‡ฑ NL", "nl"),
43
+ ("๐Ÿ‡ต๐Ÿ‡น PT", "pt"),
44
+ ("๐Ÿ‡ต๐Ÿ‡ฑ PL", "pl"),
45
+ ("๐Ÿ‡ท๐Ÿ‡ด RO", "ro"),
46
+ ("๐Ÿ‡ธ๐Ÿ‡ช SV", "sv"),
47
+ ("๐Ÿ‡ฉ๐Ÿ‡ฐ DA", "da"),
48
+ ("๐Ÿ‡ซ๐Ÿ‡ฎ FI", "fi"),
49
+ ("๐Ÿ‡ญ๐Ÿ‡บ HU", "hu"),
50
+ ("๐Ÿ‡ฌ๐Ÿ‡ท EL", "el"),
51
+ ("๐Ÿ‡ซ๐Ÿ‡ท FR", "fr"),
52
+ ("๐Ÿ‡ท๐Ÿ‡บ RU", "ru"),
53
+ ("๐Ÿ‡บ๐Ÿ‡ฆ UK", "uk"),
54
+ ("๐Ÿ‡น๐Ÿ‡ท TR", "tr"),
55
+ ("๐Ÿ‡ธ๐Ÿ‡ฆ AR", "ar"),
56
+ ("๐Ÿ‡ฎ๐Ÿ‡ณ HI", "hi"),
57
+ ("๐Ÿ‡ฏ๐Ÿ‡ต JP", "jp"),
58
+ ("๐Ÿ‡ฐ๐Ÿ‡ท KO", "ko"),
59
+ ("๐Ÿ‡จ๐Ÿ‡ณ ZH", "zh"),
60
+ ("๐Ÿ‡ป๐Ÿ‡ณ VI", "vi"),
61
+ ("๐Ÿ‡ป๐Ÿ‡ฆ LA", "la"),
62
+ ("HA", "ha"),
63
+ ("SW", "sw"),
64
+ ("๐Ÿ‡ณ๐Ÿ‡ฌ YO", "yo"),
65
+ ("WO", "wo"),
66
  ]
67
 
68
  # Translated from English by DeepMind's Gemini Pro
 
218
  save_path = ''
219
  response = {text: 'Failed'}
220
 
221
+
222
+ json_data = json.loads(response)
223
+
224
+ arpabet_html = '<h6>ARPAbet & Durations</h6>'
225
+ arpabet_symbols = json_data['arpabet'].split('|')
226
+ for symb_i in range(len(json_data['durations'])):
227
+ if (arpabet_symbols[symb_i] == '<PAD>'):
228
+ continue
229
+
230
+ arpabet_html += '<strong class="arpabet" style="padding: 0 '\
231
+ + str(round(float(json_data['durations'][symb_i]/2), 1))\
232
+ +'em">'\
233
+ + arpabet_symbols[symb_i]\
234
+ + '</strong> '
235
+
236
  print('server.log contents:')
237
  with open('resources/app/server.log', 'r') as f:
238
  print(f.read())
239
 
240
+ return [
241
+ wav_path,
242
+ arpabet_html,
243
+ round(json_data['em_angry'][0], 2),
244
+ round(json_data['em_happy'][0], 2),
245
+ round(json_data['em_sad'][0], 2),
246
+ round(json_data['em_surprise'][0], 2)
247
+ ]
248
 
249
  input_textbox = gr.Textbox(
250
  label="Input Text",
 
268
  info="NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
269
  )
270
 
271
+ def set_default_text(lang, deepmoji_checked):
272
+ # DeepMoji only works on English Text
273
+ # checkbox_enabled = True
274
+ # if lang != 'en':
275
+ # checkbox_enabled = False
276
+
277
+ if lang == 'en':
278
+ checkbox_enabled = gr.Checkbox(
279
+ label="Use DeepMoji",
280
+ info="Auto adjust emotional values",
281
+ value=deepmoji_checked,
282
+ interactive=True
283
+ )
284
+ else:
285
+ checkbox_enabled = gr.Checkbox(
286
+ label="Use DeepMoji",
287
+ info="Works only with English!",
288
+ value=False,
289
+ interactive=False
290
+ )
291
+
292
+ return default_text[lang], checkbox_enabled # Return the modified textbox (important for Blocks)
293
+
294
+ def reset_em_sliders(
295
+ deepmoji_enabled,
296
+ anger,
297
+ happy,
298
+ sad,
299
+ surprise
300
+ ):
301
+ if (deepmoji_enabled):
302
+ return (0, 0, 0, 0)
303
+ else:
304
+ return (
305
+ anger,
306
+ happy,
307
+ sad,
308
+ surprise
309
+ )
310
+
311
+ def toggle_deepmoji(
312
+ checked,
313
+ anger,
314
+ happy,
315
+ sad,
316
+ surprise
317
+ ):
318
+ if checked:
319
+ return (0, 0, 0, 0)
320
+ else:
321
+ return (
322
+ anger,
323
+ happy,
324
+ sad,
325
+ surprise
326
+ )
327
 
328
  language_radio = gr.Radio(
329
  languages,
 
331
  label="Language",
332
  info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
333
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
+ with gr.Blocks(css=".arpabet {display: inline-block; background-color: gray; border-radius: 5px; font-size: 120%; margin: 0.1em 0}") as demo:
336
+ gr.Markdown("# xVASynth TTS")
337
+
338
+ with gr.Row(): # Main row for inputs and language selection
339
+ with gr.Column(): # Input column
340
+ input_textbox = gr.Textbox(
341
+ label="Input Text",
342
+ value="This is what my voice sounds like.",
343
+ info="Also accepts ARPAbet symbols placed within {} brackets.",
344
+ lines=1,
345
+ max_lines=5,
346
+ autofocus=True
347
+ )
348
+ language_radio = gr.Radio(
349
+ languages,
350
+ value="en",
351
+ label="Language",
352
+ info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
353
+ )
354
+ pacing_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Duration")
355
+ with gr.Column(): # Control column
356
+ voice_radio = gr.Radio(
357
+ voice_models,
358
+ value="ccby_nvidia_hifi_6671_M",
359
+ label="Voice",
360
+ info="NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
361
+ )
362
+ pitch_slider = gr.Slider(0, 1.0, value=0.5, step=0.05, label="Pitch", visible=False)
363
+ energy_slider = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Energy", visible=False)
364
+ with gr.Row(): # Main row for inputs and language selection
365
+ with gr.Column(): # Input column
366
+ anger_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜  Anger", info="Tread lightly beyond 0.9")
367
+ sad_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜ญ Sadness", info="Duration increased when beyond 0.2")
368
+ with gr.Column(): # Input column
369
+ happy_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜ƒ Happiness", info="Tread lightly beyond 0.7")
370
+ surprise_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="๐Ÿ˜ฎ Surprise", info="Can oversaturate Happiness")
371
+ deepmoji_checkbox = gr.Checkbox(label="Use DeepMoji", info="Auto adjust emotional values", value=True)
372
+
373
+ # Event handling using click
374
+ btn = gr.Button("Generate")
375
+
376
+ with gr.Row(): # Main row for inputs and language selection
377
+ with gr.Column(): # Input column
378
+ output_wav = gr.Audio(label="22kHz audio output", type="filepath", editable=False)
379
+ with gr.Column(): # Input column
380
+ output_arpabet = gr.HTML(label="ARPAbet")
381
+
382
+ btn.click(
383
+ fn=predict,
384
+ inputs=[
385
+ input_textbox,
386
+ voice_radio,
387
+ language_radio,
388
+ pacing_slider,
389
+ pitch_slider,
390
+ energy_slider,
391
+ anger_slider,
392
+ happy_slider,
393
+ sad_slider,
394
+ surprise_slider,
395
+ deepmoji_checkbox
396
+ ],
397
+ outputs=[
398
+ output_wav,
399
+ output_arpabet,
400
+ anger_slider,
401
+ happy_slider,
402
+ sad_slider,
403
+ surprise_slider
404
+ ]
405
+ )
406
+
407
+ language_radio.change(
408
+ set_default_text,
409
+ inputs=[language_radio, deepmoji_checkbox],
410
+ outputs=[input_textbox, deepmoji_checkbox]
411
+ )
412
+
413
+ deepmoji_checkbox.change(
414
+ toggle_deepmoji,
415
+ inputs=[
416
+ deepmoji_checkbox,
417
+ anger_slider,
418
+ happy_slider,
419
+ sad_slider,
420
+ surprise_slider
421
+ ],
422
+ outputs=[
423
+ anger_slider,
424
+ happy_slider,
425
+ sad_slider,
426
+ surprise_slider
427
+ ]
428
+ )
429
+
430
+ input_textbox.change(
431
+ reset_em_sliders,
432
+ inputs=[
433
+ deepmoji_checkbox,
434
+ anger_slider,
435
+ happy_slider,
436
+ sad_slider,
437
+ surprise_slider
438
+ ],
439
+ outputs=[
440
+ anger_slider,
441
+ happy_slider,
442
+ sad_slider,
443
+ surprise_slider
444
+ ]
445
+ )
446
+
447
+ voice_radio.change(
448
+ reset_em_sliders,
449
+ inputs=[
450
+ deepmoji_checkbox,
451
+ anger_slider,
452
+ happy_slider,
453
+ sad_slider,
454
+ surprise_slider
455
+ ],
456
+ outputs=[
457
+ anger_slider,
458
+ happy_slider,
459
+ sad_slider,
460
+ surprise_slider
461
+ ]
462
+ )
463
 
464
  if __name__ == "__main__":
465
  # Run the web server in a separate thread
 
468
  web_server_thread.start()
469
 
470
  print('running Gradio interface')
471
+ demo.launch()
472
 
473
  # Wait for the web server thread to finish (shouldn't be reached in normal execution)
474
  web_server_thread.join()
gr_client.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
  import sys
3
- import copy
4
  import time
5
  import requests
6
  import json
 
1
  import os
2
  import sys
 
3
  import time
4
  import requests
5
  import json