Pendrokar commited on
Commit
6109b39
Β·
1 Parent(s): 116549d

Space description below title

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +10 -0
  3. gr_client.py +52 -27
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: xVASynth (WIP)
3
  emoji: πŸ—£
4
  colorFrom: gray
5
  colorTo: gray
 
1
  ---
2
+ title: xVASynth TTS
3
  emoji: πŸ—£
4
  colorFrom: gray
5
  colorTo: gray
app.py CHANGED
@@ -356,9 +356,19 @@ language_radio = gr.Radio(
356
  info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
357
  )
358
 
 
 
 
 
 
 
 
 
359
  with gr.Blocks(css=".arpabet {display: inline-block; background-color: gray; border-radius: 5px; font-size: 120%; margin: 0.1em 0}") as demo:
360
  gr.Markdown("# xVASynth TTS")
361
 
 
 
362
  with gr.Row(): # Main row for inputs and language selection
363
  with gr.Column(): # Input column
364
  input_textbox = gr.Textbox(
 
356
  info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
357
  )
358
 
359
+ _DESCRIPTION = '''
360
+ <div>
361
+ <a style="display:inline-block;" href="https://github.com/DanRuta/xVA-Synth"><img src='https://img.shields.io/github/stars/DanRuta/xVA-Synth?style=social'/></a>
362
+ <a style="display:inline-block; margin-left: .5em" href="https://discord.gg/nv7c6E2TzV"><img src='https://img.shields.io/discord/794590496202293278.svg?label=&logo=discord&logoColor=ffffff&color=7389D8&labelColor=6A7EC2'/></a>
363
+ <span style="display: inline-block;margin-left: .5em;vertical-align: top;"><a href="https://huggingface.co/spaces/Pendrokar/xVASynth?duplicate=true" style="" target="_blank"><img style="margin-bottom: 0em;display: inline;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for a personal CPU-run</span>
364
+ </div>
365
+ '''
366
+
367
  with gr.Blocks(css=".arpabet {display: inline-block; background-color: gray; border-radius: 5px; font-size: 120%; margin: 0.1em 0}") as demo:
368
  gr.Markdown("# xVASynth TTS")
369
 
370
+ gr.HTML(label="description", value=_DESCRIPTION)
371
+
372
  with gr.Row(): # Main row for inputs and language selection
373
  with gr.Column(): # Input column
374
  input_textbox = gr.Textbox(
gr_client.py CHANGED
@@ -99,7 +99,7 @@ def predict(
99
  surprise,
100
  deepmoji_checked
101
  ):
102
- wav_path, arpabet_html, angry, happy, sad, surprise = client.predict(
103
  input_text, # str in 'Input Text' Textbox component
104
  voice, # Literal['ccby_nvidia_hifi_6670_M', 'ccby_nv_hifi_11614_F', 'ccby_nvidia_hifi_11697_F', 'ccby_nvidia_hifi_12787_F', 'ccby_nvidia_hifi_6097_M', 'ccby_nvidia_hifi_6671_M', 'ccby_nvidia_hifi_8051_F', 'ccby_nvidia_hifi_9017_M', 'ccby_nvidia_hifi_9136_F', 'ccby_nvidia_hifi_92_F'] in 'Voice' Radio component
105
  lang, # Literal['en', 'de', 'es', 'it', 'fr', 'ru', 'tr', 'la', 'ro', 'da', 'vi', 'ha', 'nl', 'zh', 'ar', 'uk', 'hi', 'ko', 'pl', 'sw', 'fi', 'hu', 'pt', 'yo', 'sv', 'el', 'wo', 'jp'] in 'Language' Radio component
@@ -114,31 +114,41 @@ def predict(
114
  api_name="/predict"
115
  )
116
 
117
- # json_data = json.loads(response)
118
 
119
- # arpabet_html = '<h6>ARPAbet & Durations</h6>'
120
- # arpabet_symbols = json_data['arpabet'].split('|')
121
- # for symb_i in range(len(json_data['durations'])):
122
- # if (arpabet_symbols[symb_i] == '<PAD>'):
123
- # continue
 
 
 
 
 
 
124
 
125
- # arpabet_html += '<strong class="arpabet" style="padding: 0 '\
126
- # + str(round(float(json_data['durations'][symb_i]/2), 1))\
127
- # +'em">'\
128
- # + arpabet_symbols[symb_i]\
129
- # + '</strong> '
 
 
 
 
 
 
 
 
130
 
131
  return [
132
  wav_path,
133
  arpabet_html,
134
- angry,
135
- happy,
136
- sad,
137
- surprise,
138
- # round(json_data['em_angry'][0], 2),
139
- # round(json_data['em_happy'][0], 2),
140
- # round(json_data['em_sad'][0], 2),
141
- # round(json_data['em_surprise'][0], 2)
142
  ]
143
 
144
  input_textbox = gr.Textbox(
@@ -227,9 +237,19 @@ language_radio = gr.Radio(
227
  info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
228
  )
229
 
230
- with gr.Blocks(css=".arpabet {display: inline-block; background-color: gray; border-radius: 5px; font-size: 120%; margin: 0.1em 0}") as demo:
 
 
 
 
 
 
 
 
231
  gr.Markdown("# xVASynth TTS")
232
 
 
 
233
  with gr.Row(): # Main row for inputs and language selection
234
  with gr.Column(): # Input column
235
  input_textbox = gr.Textbox(
@@ -266,13 +286,18 @@ with gr.Blocks(css=".arpabet {display: inline-block; background-color: gray; bor
266
  deepmoji_checkbox = gr.Checkbox(label="Use DeepMoji", info="Auto adjust emotional values", value=True)
267
 
268
  # Event handling using click
269
- btn = gr.Button("Generate")
270
 
271
- with gr.Row(): # Main row for inputs and language selection
272
- with gr.Column(): # Input column
273
- output_wav = gr.Audio(label="22kHz audio output", type="filepath", editable=False)
274
- with gr.Column(): # Input column
275
- output_arpabet = gr.HTML(label="ARPAbet")
 
 
 
 
 
276
 
277
  btn.click(
278
  fn=predict,
 
99
  surprise,
100
  deepmoji_checked
101
  ):
102
+ wav_path, arpabet_html, angry, happy, sad, surprise, response = client.predict(
103
  input_text, # str in 'Input Text' Textbox component
104
  voice, # Literal['ccby_nvidia_hifi_6670_M', 'ccby_nv_hifi_11614_F', 'ccby_nvidia_hifi_11697_F', 'ccby_nvidia_hifi_12787_F', 'ccby_nvidia_hifi_6097_M', 'ccby_nvidia_hifi_6671_M', 'ccby_nvidia_hifi_8051_F', 'ccby_nvidia_hifi_9017_M', 'ccby_nvidia_hifi_9136_F', 'ccby_nvidia_hifi_92_F'] in 'Voice' Radio component
105
  lang, # Literal['en', 'de', 'es', 'it', 'fr', 'ru', 'tr', 'la', 'ro', 'da', 'vi', 'ha', 'nl', 'zh', 'ar', 'uk', 'hi', 'ko', 'pl', 'sw', 'fi', 'hu', 'pt', 'yo', 'sv', 'el', 'wo', 'jp'] in 'Language' Radio component
 
114
  api_name="/predict"
115
  )
116
 
117
+ json_data = json.loads(response)
118
 
119
+ arpabet_html = '<h6>ARPAbet & Durations</h6>'
120
+ arpabet_html += '<table style="margin: 0 var(--size-2)"><tbody><tr>'
121
+ arpabet_nopad = json_data['arpabet'].split('|PAD|')
122
+ arpabet_symbols = json_data['arpabet'].split('|')
123
+ wpad_len = len(arpabet_symbols)
124
+ nopad_len = len(arpabet_nopad)
125
+ total_dur_length = 0
126
+ for symb_i in range(wpad_len):
127
+ if (arpabet_symbols[symb_i] == '<PAD>'):
128
+ continue
129
+ total_dur_length += float(json_data['durations'][symb_i])
130
 
131
+ print(total_dur_length)
132
+ for symb_i in range(wpad_len):
133
+ if (arpabet_symbols[symb_i] == '<PAD>'):
134
+ continue
135
+
136
+ arpabet_length = float(json_data['durations'][symb_i])
137
+ cell_width = round(arpabet_length / total_dur_length * 100, 2)
138
+ arpabet_html += '<td class="arpabet" style="width: '\
139
+ + str(cell_width)\
140
+ +'%">'\
141
+ + arpabet_symbols[symb_i]\
142
+ + '</td> '
143
+ arpabet_html += '<tr></tbody></table>'
144
 
145
  return [
146
  wav_path,
147
  arpabet_html,
148
+ round(json_data['em_angry'][0], 2),
149
+ round(json_data['em_happy'][0], 2),
150
+ round(json_data['em_sad'][0], 2),
151
+ round(json_data['em_surprise'][0], 2)
 
 
 
 
152
  ]
153
 
154
  input_textbox = gr.Textbox(
 
237
  info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
238
  )
239
 
240
+ _DESCRIPTION = '''
241
+ <div>
242
+ <a style="display:inline-block;" href="https://github.com/DanRuta/xVA-Synth"><img src='https://img.shields.io/github/stars/DanRuta/xVA-Synth?style=social'/></a>
243
+ <a style="display:inline-block; margin-left: .5em" href="https://discord.gg/nv7c6E2TzV"><img src='https://img.shields.io/discord/794590496202293278.svg?label=&logo=discord&logoColor=ffffff&color=7389D8&labelColor=6A7EC2'/></a>
244
+ <span style="display: inline-block;margin-left: .5em;vertical-align: top;"><a href="https://huggingface.co/spaces/Pendrokar/xVASynth?duplicate=true" style="" target="_blank"><img style="margin-bottom: 0em;display: inline;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for a personal CPU-run</span>
245
+ </div>
246
+ '''
247
+
248
+ with gr.Blocks(css=".arpabet {background-color: gray; border-radius: 5px; font-size: 120%; padding: 0 0.1em; margin: 0 0.1em; text-align: center}") as demo:
249
  gr.Markdown("# xVASynth TTS")
250
 
251
+ gr.HTML(label="description", value=_DESCRIPTION)
252
+
253
  with gr.Row(): # Main row for inputs and language selection
254
  with gr.Column(): # Input column
255
  input_textbox = gr.Textbox(
 
286
  deepmoji_checkbox = gr.Checkbox(label="Use DeepMoji", info="Auto adjust emotional values", value=True)
287
 
288
  # Event handling using click
289
+ btn = gr.Button("Generate", variant="primary")
290
 
291
+ # with gr.Row(): # Main row for inputs and language selection
292
+ # with gr.Column(): # Input column
293
+ output_wav = gr.Audio(
294
+ label="22kHz audio output",
295
+ type="filepath",
296
+ editable=False,
297
+ autoplay=True
298
+ )
299
+ # with gr.Column(): # Input column
300
+ output_arpabet = gr.HTML(label="ARPAbet")
301
 
302
  btn.click(
303
  fn=predict,