Spaces:
Running
on
Zero
Running
on
Zero
txya900619
commited on
Commit
•
b128fb7
1
Parent(s):
bde200c
feat: change markdown and lable, and prepare to handle diff sample rate model and one speaker one language model
Browse files
app.py
CHANGED
@@ -72,22 +72,42 @@ def text_to_speech(
|
|
72 |
split_sentences=False,
|
73 |
)
|
74 |
|
75 |
-
return
|
|
|
|
|
|
|
|
|
76 |
|
77 |
|
78 |
def when_model_selected(model_id):
|
79 |
model_config = models_config[model_id]
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
]
|
83 |
-
|
84 |
use_default_emb_or_ref_radio_visible = False
|
85 |
if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path:
|
86 |
use_default_emb_or_ref_radio_visible = True
|
|
|
87 |
return (
|
88 |
-
gr.update(
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
)
|
92 |
|
93 |
|
@@ -116,6 +136,7 @@ with demo:
|
|
116 |
model_drop_down = gr.Dropdown(
|
117 |
models_config.keys(),
|
118 |
value=default_model_id,
|
|
|
119 |
)
|
120 |
use_default_emb_or_custom_radio = gr.Radio(
|
121 |
label="use default speaker embedding or custom speaker embedding",
|
@@ -139,6 +160,7 @@ with demo:
|
|
139 |
for k, v in models_config[default_model_id]["speaker_mapping"].items()
|
140 |
],
|
141 |
value=list(models_config[default_model_id]["speaker_mapping"].values())[0],
|
|
|
142 |
)
|
143 |
use_default_emb_or_custom_radio.input(
|
144 |
use_default_emb_or_custom_radio_input,
|
@@ -147,8 +169,12 @@ with demo:
|
|
147 |
)
|
148 |
|
149 |
dialect_drop_down = gr.Dropdown(
|
150 |
-
choices=
|
151 |
-
|
|
|
|
|
|
|
|
|
152 |
)
|
153 |
|
154 |
model_drop_down.input(
|
@@ -159,7 +185,13 @@ with demo:
|
|
159 |
|
160 |
gr.Markdown(
|
161 |
"""
|
162 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
"""
|
164 |
)
|
165 |
gr.Interface(
|
@@ -170,14 +202,12 @@ with demo:
|
|
170 |
speaker_wav,
|
171 |
speaker_drop_down,
|
172 |
dialect_drop_down,
|
173 |
-
gr.Textbox(),
|
174 |
],
|
175 |
outputs=[
|
176 |
-
gr.Textbox(interactive=False, label="
|
177 |
-
gr.Textbox(interactive=False, label="
|
178 |
-
gr.Audio(
|
179 |
-
interactive=False, label="generated speech", show_download_button=True
|
180 |
-
),
|
181 |
],
|
182 |
allow_flagging="auto",
|
183 |
)
|
|
|
72 |
split_sentences=False,
|
73 |
)
|
74 |
|
75 |
+
return (
|
76 |
+
words,
|
77 |
+
pinyin,
|
78 |
+
(model.tts_model.config.audio.sample_rate, np.array(wav)),
|
79 |
+
)
|
80 |
|
81 |
|
82 |
def when_model_selected(model_id):
|
83 |
model_config = models_config[model_id]
|
84 |
+
|
85 |
+
speaker_drop_down_choices = []
|
86 |
+
if "speaker_mapping" in model_config:
|
87 |
+
speaker_drop_down_choices = [
|
88 |
+
(k, v) for k, v in model_config["speaker_mapping"].items()
|
89 |
+
]
|
90 |
+
|
91 |
+
dialect_drop_down_choices = [
|
92 |
+
(k, v) for k, v in model_config["dialect_mapping"].items()
|
93 |
]
|
94 |
+
|
95 |
use_default_emb_or_ref_radio_visible = False
|
96 |
if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path:
|
97 |
use_default_emb_or_ref_radio_visible = True
|
98 |
+
|
99 |
return (
|
100 |
+
gr.update(
|
101 |
+
choices=speaker_drop_down_choices,
|
102 |
+
value=speaker_drop_down_choices[0][1] if len(speaker_drop_down_choices) > 0 else None,
|
103 |
+
visible=len(speaker_drop_down_choices) > 1,
|
104 |
+
),
|
105 |
+
gr.update(
|
106 |
+
choices=dialect_drop_down_choices,
|
107 |
+
value=dialect_drop_down_choices[0][1],
|
108 |
+
visible=len(dialect_drop_down_choices) > 1,
|
109 |
+
),
|
110 |
+
gr.update(visible=use_default_emb_or_ref_radio_visible, value="default"),
|
111 |
)
|
112 |
|
113 |
|
|
|
136 |
model_drop_down = gr.Dropdown(
|
137 |
models_config.keys(),
|
138 |
value=default_model_id,
|
139 |
+
label="模型",
|
140 |
)
|
141 |
use_default_emb_or_custom_radio = gr.Radio(
|
142 |
label="use default speaker embedding or custom speaker embedding",
|
|
|
160 |
for k, v in models_config[default_model_id]["speaker_mapping"].items()
|
161 |
],
|
162 |
value=list(models_config[default_model_id]["speaker_mapping"].values())[0],
|
163 |
+
label="語者",
|
164 |
)
|
165 |
use_default_emb_or_custom_radio.input(
|
166 |
use_default_emb_or_custom_radio_input,
|
|
|
169 |
)
|
170 |
|
171 |
dialect_drop_down = gr.Dropdown(
|
172 |
+
choices=[
|
173 |
+
(k, v)
|
174 |
+
for k, v in models_config[default_model_id]["dialect_mapping"].items()
|
175 |
+
],
|
176 |
+
value=list(models_config[default_model_id]["dialect_mapping"].values())[0],
|
177 |
+
label="腔調",
|
178 |
)
|
179 |
|
180 |
model_drop_down.input(
|
|
|
185 |
|
186 |
gr.Markdown(
|
187 |
"""
|
188 |
+
# 臺灣客語語音合成系統
|
189 |
+
### Taiwanese Hakka Text-to-Speech System
|
190 |
+
### 模型
|
191 |
+
- **sixian-1p-240417**(四縣腔,單一語者)
|
192 |
+
### 研發
|
193 |
+
- **[李鴻欣 Hung-Shin Lee](mailto:[email protected])(諾思資訊 North Co., Ltd.)**
|
194 |
+
- **[陳力瑋 Li-Wei Chen](mailto:[email protected])(諾思資訊 North Co., Ltd.)**
|
195 |
"""
|
196 |
)
|
197 |
gr.Interface(
|
|
|
202 |
speaker_wav,
|
203 |
speaker_drop_down,
|
204 |
dialect_drop_down,
|
205 |
+
gr.Textbox(label="輸入文字"),
|
206 |
],
|
207 |
outputs=[
|
208 |
+
gr.Textbox(interactive=False, label="斷詞"),
|
209 |
+
gr.Textbox(interactive=False, label="客語拼音"),
|
210 |
+
gr.Audio(interactive=False, label="合成語音", show_download_button=True),
|
|
|
|
|
211 |
],
|
212 |
allow_flagging="auto",
|
213 |
)
|