Spaces:
Running
on
Zero
Running
on
Zero
txya900619
commited on
Commit
•
a523a5e
1
Parent(s):
576392b
feat: update model and use zero
Browse files- app.py +48 -18
- configs/models.yaml +2 -20
app.py
CHANGED
@@ -6,6 +6,8 @@ from TTS.utils.synthesizer import Synthesizer
|
|
6 |
import numpy as np
|
7 |
from huggingface_hub import snapshot_download
|
8 |
from omegaconf import OmegaConf
|
|
|
|
|
9 |
|
10 |
from ipa.ipa import get_ipa, parse_ipa
|
11 |
from replace.tts import ChangedVitsConfig
|
@@ -31,13 +33,34 @@ def load_model(model_id):
|
|
31 |
with open(temp_config_path, "w") as f:
|
32 |
f.write(content)
|
33 |
f.close()
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
|
37 |
OmegaConf.register_new_resolver("load_model", load_model)
|
38 |
|
39 |
models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
def text_to_speech(
|
43 |
model_id: str,
|
@@ -55,19 +78,23 @@ def text_to_speech(
|
|
55 |
raise gr.Error(
|
56 |
f"句子中的[{','.join(missing_words)}]目前無法轉成 ipa。請嘗試其他句子。"
|
57 |
)
|
|
|
58 |
if use_default_emb_or_custom == "default":
|
59 |
-
wav =
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
62 |
language_name=dialect,
|
63 |
-
|
64 |
)
|
65 |
else:
|
66 |
-
wav =
|
67 |
-
|
|
|
68 |
speaker_wav=speaker_wav,
|
69 |
language_name=dialect,
|
70 |
-
split_sentences=False,
|
71 |
)
|
72 |
|
73 |
return (
|
@@ -83,7 +110,7 @@ def when_model_selected(model_id):
|
|
83 |
speaker_drop_down_choices = [
|
84 |
(k, v) for k, v in model_config["speaker_mapping"].items()
|
85 |
]
|
86 |
-
|
87 |
dialect_drop_down_choices = [
|
88 |
(k, v) for k, v in model_config["dialect_mapping"].items()
|
89 |
]
|
@@ -91,11 +118,13 @@ def when_model_selected(model_id):
|
|
91 |
use_default_emb_or_ref_radio_visible = False
|
92 |
if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path:
|
93 |
use_default_emb_or_ref_radio_visible = True
|
94 |
-
|
95 |
return (
|
96 |
gr.update(
|
97 |
choices=speaker_drop_down_choices,
|
98 |
-
value=speaker_drop_down_choices[0][1]
|
|
|
|
|
99 |
interactive=len(speaker_drop_down_choices) > 1,
|
100 |
),
|
101 |
gr.update(
|
@@ -138,9 +167,9 @@ with demo:
|
|
138 |
label="use default speaker embedding or custom speaker embedding",
|
139 |
choices=["default", "custom"],
|
140 |
value="default",
|
141 |
-
visible=
|
142 |
)
|
143 |
-
speaker_wav = gr.
|
144 |
label="speaker wav",
|
145 |
visible=False,
|
146 |
editable=False,
|
@@ -185,11 +214,9 @@ with demo:
|
|
185 |
"""
|
186 |
# 臺灣客語語音合成系統
|
187 |
### Taiwanese Hakka Text-to-Speech System
|
188 |
-
### 模型
|
189 |
-
- **sixian-1f-240417**(四縣腔,單一語者)
|
190 |
### 研發
|
191 |
-
- **[李鴻欣 Hung-Shin Lee](mailto:[email protected])
|
192 |
-
- **[陳力瑋 Li-Wei Chen](mailto:[email protected])
|
193 |
"""
|
194 |
)
|
195 |
gr.Interface(
|
@@ -200,7 +227,10 @@ with demo:
|
|
200 |
speaker_wav,
|
201 |
speaker_drop_down,
|
202 |
dialect_drop_down,
|
203 |
-
gr.Textbox(
|
|
|
|
|
|
|
204 |
],
|
205 |
outputs=[
|
206 |
gr.Textbox(interactive=False, label="斷詞"),
|
|
|
6 |
import numpy as np
|
7 |
from huggingface_hub import snapshot_download
|
8 |
from omegaconf import OmegaConf
|
9 |
+
import spaces
|
10 |
+
import torch
|
11 |
|
12 |
from ipa.ipa import get_ipa, parse_ipa
|
13 |
from replace.tts import ChangedVitsConfig
|
|
|
33 |
with open(temp_config_path, "w") as f:
|
34 |
f.write(content)
|
35 |
f.close()
|
36 |
+
|
37 |
+
return Synthesizer(
|
38 |
+
tts_checkpoint=model_ckpt_path,
|
39 |
+
tts_config_path=temp_config_path,
|
40 |
+
use_cuda=torch.cuda.is_available(),
|
41 |
+
)
|
42 |
|
43 |
|
44 |
OmegaConf.register_new_resolver("load_model", load_model)
|
45 |
|
46 |
models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
|
47 |
|
48 |
+
@spaces.GPU
|
49 |
+
def _do_tts(model, ipa, language_name,speaker_name=None, speaker_wav=None):
|
50 |
+
if speaker_wav is not None:
|
51 |
+
return model.tts(
|
52 |
+
ipa,
|
53 |
+
speaker_wav=speaker_wav,
|
54 |
+
language_name=language_name,
|
55 |
+
split_sentences=False,
|
56 |
+
)
|
57 |
+
return model.tts(
|
58 |
+
ipa,
|
59 |
+
speaker_name=speaker_name,
|
60 |
+
language_name=language_name,
|
61 |
+
split_sentences=False,
|
62 |
+
)
|
63 |
+
|
64 |
|
65 |
def text_to_speech(
|
66 |
model_id: str,
|
|
|
78 |
raise gr.Error(
|
79 |
f"句子中的[{','.join(missing_words)}]目前無法轉成 ipa。請嘗試其他句子。"
|
80 |
)
|
81 |
+
parsed_ipa = parse_ipa(ipa)
|
82 |
if use_default_emb_or_custom == "default":
|
83 |
+
wav = _do_tts(
|
84 |
+
model,
|
85 |
+
parsed_ipa,
|
86 |
+
speaker_name=speaker
|
87 |
+
if len(models_config[model_id]["speaker_mapping"]) > 1
|
88 |
+
else None,
|
89 |
language_name=dialect,
|
90 |
+
speaker_wav=speaker_wav,
|
91 |
)
|
92 |
else:
|
93 |
+
wav = _do_tts(
|
94 |
+
model,
|
95 |
+
parsed_ipa,
|
96 |
speaker_wav=speaker_wav,
|
97 |
language_name=dialect,
|
|
|
98 |
)
|
99 |
|
100 |
return (
|
|
|
110 |
speaker_drop_down_choices = [
|
111 |
(k, v) for k, v in model_config["speaker_mapping"].items()
|
112 |
]
|
113 |
+
|
114 |
dialect_drop_down_choices = [
|
115 |
(k, v) for k, v in model_config["dialect_mapping"].items()
|
116 |
]
|
|
|
118 |
use_default_emb_or_ref_radio_visible = False
|
119 |
if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path:
|
120 |
use_default_emb_or_ref_radio_visible = True
|
121 |
+
|
122 |
return (
|
123 |
gr.update(
|
124 |
choices=speaker_drop_down_choices,
|
125 |
+
value=speaker_drop_down_choices[0][1]
|
126 |
+
if len(speaker_drop_down_choices) > 0
|
127 |
+
else None,
|
128 |
interactive=len(speaker_drop_down_choices) > 1,
|
129 |
),
|
130 |
gr.update(
|
|
|
167 |
label="use default speaker embedding or custom speaker embedding",
|
168 |
choices=["default", "custom"],
|
169 |
value="default",
|
170 |
+
visible=True,
|
171 |
)
|
172 |
+
speaker_wav = gr.Audio(
|
173 |
label="speaker wav",
|
174 |
visible=False,
|
175 |
editable=False,
|
|
|
214 |
"""
|
215 |
# 臺灣客語語音合成系統
|
216 |
### Taiwanese Hakka Text-to-Speech System
|
|
|
|
|
217 |
### 研發
|
218 |
+
- **[李鴻欣 Hung-Shin Lee](mailto:[email protected])([聯和科創](https://www.104.com.tw/company/1a2x6bmu75))**
|
219 |
+
- **[陳力瑋 Li-Wei Chen](mailto:[email protected])([聯和科創](https://www.104.com.tw/company/1a2x6bmu75))**
|
220 |
"""
|
221 |
)
|
222 |
gr.Interface(
|
|
|
227 |
speaker_wav,
|
228 |
speaker_drop_down,
|
229 |
dialect_drop_down,
|
230 |
+
gr.Textbox(
|
231 |
+
label="輸入文字",
|
232 |
+
value="客家族群个六堆運動會會一直延續下去,為臺灣个體育史寫下特別个一頁。",
|
233 |
+
),
|
234 |
],
|
235 |
outputs=[
|
236 |
gr.Textbox(interactive=False, label="斷詞"),
|
configs/models.yaml
CHANGED
@@ -1,23 +1,5 @@
|
|
1 |
-
|
2 |
-
model: ${load_model:formospeech/
|
3 |
-
dialect_mapping:
|
4 |
-
四縣: sixian
|
5 |
-
speaker_mapping: # display_name: id
|
6 |
-
女(64)/苗栗: XF
|
7 |
-
|
8 |
-
sixian-hailu-mix:
|
9 |
-
model: ${load_model:formospeech/taiwanese-hakka-tts-sixian-hailu-mix}
|
10 |
-
dialect_mapping:
|
11 |
-
四縣: sixian
|
12 |
-
海陸: hailu
|
13 |
-
speaker_mapping: # display_name: id
|
14 |
-
女(64)/苗栗: XF
|
15 |
-
男(53)/苗栗: XM
|
16 |
-
女(54)/新竹: HF
|
17 |
-
男(56)/新竹: HM
|
18 |
-
|
19 |
-
sixian-hailu-mix-se:
|
20 |
-
model: ${load_model:formospeech/taiwanese-hakka-tts-sixian-hailu-mix-se}
|
21 |
dialect_mapping:
|
22 |
四縣: sixian
|
23 |
海陸: hailu
|
|
|
1 |
+
yourtts-htia-240515:
|
2 |
+
model: ${load_model:formospeech/yourtts-htia-240515}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
dialect_mapping:
|
4 |
四縣: sixian
|
5 |
海陸: hailu
|