Serhiy Stetskovych commited on
Commit
9cb2738
1 Parent(s): 5db46b8

Add list of prompts

Browse files
Files changed (2) hide show
  1. app.py +11 -28
  2. prompt22050.wav +0 -0
app.py CHANGED
@@ -1,4 +1,4 @@
1
-
2
  from pathlib import Path
3
  import torchaudio
4
  import gradio as gr
@@ -34,23 +34,12 @@ VOCODER44_MODEL_PATH = 'patriotyk/vocos-mel-hifigan-compat-44100khz'
34
  HIFIGAN_MODEL_PATH = 'checkpoints/g_00120000'
35
 
36
 
37
- transform = torchaudio.transforms.Vol(gain=-32, gain_type="db")
38
- wav, sr = torchaudio.load('prompt22050.wav')
39
-
40
- prompt = mel_spectrogram(
41
- transform(wav),
42
- 1024,
43
- 80,
44
- 22050,
45
- 256,
46
- 1024,
47
- 0,
48
- 8000,
49
- center=False,
50
- )[:,:,:264]
51
-
52
 
53
 
 
 
 
54
 
55
  def process_text(text: str, device: torch.device):
56
  x = torch.tensor(
@@ -89,16 +78,6 @@ def load_vocos(checkpoint_path, config_path, device):
89
  def to_waveform(mel, vocoder, denoiser=None):
90
  return vocoder.decode(mel).clamp(-1, 1).cpu().squeeze()
91
 
92
- # audio = vocoder(mel).clamp(-1, 1)
93
- # if denoiser is not None:
94
- # audio = denoiser(audio.squeeze(), strength=0.00025).cpu().squeeze()
95
-
96
- # return audio.cpu().squeeze()
97
-
98
-
99
-
100
-
101
-
102
 
103
  def get_device():
104
  if torch.cuda.is_available():
@@ -123,11 +102,14 @@ denoiser = None#Denoiser(vocoder, mode="zeros")
123
 
124
 
125
  @torch.inference_mode()
126
- def synthesise(text, speed):
127
  if len(text) > 1000:
128
  raise gr.Error("Текст повинен бути коротшим за 1000 символів.")
129
 
130
  text_processed = process_text(text.strip(), device)
 
 
 
131
 
132
  output = model.synthesise(
133
  text_processed["x"].to(device),
@@ -165,7 +147,8 @@ if __name__ == "__main__":
165
  description=description,
166
  inputs=[
167
  gr.Text(label='Текст для синтезу:', lines=5, max_lines=10),
168
- gr.Slider(minimum=0.6, maximum=2.0, label="Швидкість", value=1.0)
 
169
  ],
170
  outputs=[
171
  gr.Text(label='Фонемізований текст:', lines=5),
 
1
+ import os
2
  from pathlib import Path
3
  import torchaudio
4
  import gradio as gr
 
34
  HIFIGAN_MODEL_PATH = 'checkpoints/g_00120000'
35
 
36
 
37
+ volnorm = torchaudio.transforms.Vol(gain=-32, gain_type="db")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
+ prompts_dir = 'prompts'
41
+ print(os.listdir(prompts_dir))
42
+ prompts_list = sorted(os.listdir(prompts_dir), key=lambda x: x.split('.')[0])
43
 
44
  def process_text(text: str, device: torch.device):
45
  x = torch.tensor(
 
78
  def to_waveform(mel, vocoder, denoiser=None):
79
  return vocoder.decode(mel).clamp(-1, 1).cpu().squeeze()
80
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  def get_device():
83
  if torch.cuda.is_available():
 
102
 
103
 
104
  @torch.inference_mode()
105
+ def synthesise(text, prompt_selection, speed):
106
  if len(text) > 1000:
107
  raise gr.Error("Текст повинен бути коротшим за 1000 символів.")
108
 
109
  text_processed = process_text(text.strip(), device)
110
+ prompt_audio_path = os.path.join(prompts_dir, prompt_selection)
111
+ wav, sr = torchaudio.load(prompt_audio_path)
112
+ prompt = mel_spectrogram(volnorm(wav), 1024, 80, 22050, 256, 1024, 0, 8000, center=False)[:,:,:264]
113
 
114
  output = model.synthesise(
115
  text_processed["x"].to(device),
 
147
  description=description,
148
  inputs=[
149
  gr.Text(label='Текст для синтезу:', lines=5, max_lines=10),
150
+ gr.Dropdown(label="Prompt audio", choices=prompts_list, value=prompts_list[0]),
151
+ gr.Slider(minimum=0.6, maximum=2.0, label="Швидкість", value=1.1)
152
  ],
153
  outputs=[
154
  gr.Text(label='Фонемізований текст:', lines=5),
prompt22050.wav DELETED
Binary file (655 kB)