lojban commited on
Commit
be81ba7
·
1 Parent(s): 11efcdb

add wav/ogg option

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -152,7 +152,7 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice, f
152
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
153
  audio = model.infer(x_tst, x_tst_lengths, noise_scale=noise_scale,
154
  noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.float().numpy()
155
- result = [ipa_text, (hps_vctk.data.sampling_rate, float2pcm(audio))]
156
  else:
157
  ipa_text, stn_tst = get_text(text, language, hps_vctk, mode="VITS")
158
  with torch.no_grad():
@@ -161,9 +161,11 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice, f
161
  sid = torch.LongTensor([voice])
162
  audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
163
  noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
164
- result = [ipa_text, (hps_vctk.data.sampling_rate, float2pcm(audio))]
165
  if file_format == 'ogg':
166
  result = [result[0], wav2ogg(result[1][1], result[1][0], text, language)]
 
 
167
 
168
  return result
169
 
 
152
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
153
  audio = model.infer(x_tst, x_tst_lengths, noise_scale=noise_scale,
154
  noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.float().numpy()
155
+ result = [ipa_text, (hps_vctk.data.sampling_rate, audio)]
156
  else:
157
  ipa_text, stn_tst = get_text(text, language, hps_vctk, mode="VITS")
158
  with torch.no_grad():
 
161
  sid = torch.LongTensor([voice])
162
  audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
163
  noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
164
+ result = [ipa_text, (hps_vctk.data.sampling_rate, audio)]
165
  if file_format == 'ogg':
166
  result = [result[0], wav2ogg(result[1][1], result[1][0], text, language)]
167
+ else:
168
+ result = [result[0], (result[1][0], float2pcm(result[1][1]))]
169
 
170
  return result
171