txya900619 commited on
Commit
b128fb7
1 Parent(s): bde200c

feat: change markdown and lable, and prepare to handle diff sample rate model and one speaker one language model

Browse files
Files changed (1) hide show
  1. app.py +46 -16
app.py CHANGED
@@ -72,22 +72,42 @@ def text_to_speech(
72
  split_sentences=False,
73
  )
74
 
75
- return words, pinyin, (16000, np.array(wav))
 
 
 
 
76
 
77
 
78
  def when_model_selected(model_id):
79
  model_config = models_config[model_id]
80
- speaker_drop_down_choices = [
81
- (k, v) for k, v in model_config["speaker_mapping"].items()
 
 
 
 
 
 
 
82
  ]
83
- dialect_drop_down_choices = model_config["avalible_dialect"]
84
  use_default_emb_or_ref_radio_visible = False
85
  if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path:
86
  use_default_emb_or_ref_radio_visible = True
 
87
  return (
88
- gr.update(choices=speaker_drop_down_choices),
89
- gr.update(choices=dialect_drop_down_choices),
90
- gr.update(visible=use_default_emb_or_ref_radio_visible),
 
 
 
 
 
 
 
 
91
  )
92
 
93
 
@@ -116,6 +136,7 @@ with demo:
116
  model_drop_down = gr.Dropdown(
117
  models_config.keys(),
118
  value=default_model_id,
 
119
  )
120
  use_default_emb_or_custom_radio = gr.Radio(
121
  label="use default speaker embedding or custom speaker embedding",
@@ -139,6 +160,7 @@ with demo:
139
  for k, v in models_config[default_model_id]["speaker_mapping"].items()
140
  ],
141
  value=list(models_config[default_model_id]["speaker_mapping"].values())[0],
 
142
  )
143
  use_default_emb_or_custom_radio.input(
144
  use_default_emb_or_custom_radio_input,
@@ -147,8 +169,12 @@ with demo:
147
  )
148
 
149
  dialect_drop_down = gr.Dropdown(
150
- choices=models_config[default_model_id]["avalible_dialect"],
151
- value=models_config[default_model_id]["avalible_dialect"][0],
 
 
 
 
152
  )
153
 
154
  model_drop_down.input(
@@ -159,7 +185,13 @@ with demo:
159
 
160
  gr.Markdown(
161
  """
162
- # 臺灣客語語音生成系統
 
 
 
 
 
 
163
  """
164
  )
165
  gr.Interface(
@@ -170,14 +202,12 @@ with demo:
170
  speaker_wav,
171
  speaker_drop_down,
172
  dialect_drop_down,
173
- gr.Textbox(),
174
  ],
175
  outputs=[
176
- gr.Textbox(interactive=False, label="word segment"),
177
- gr.Textbox(interactive=False, label="pinyin"),
178
- gr.Audio(
179
- interactive=False, label="generated speech", show_download_button=True
180
- ),
181
  ],
182
  allow_flagging="auto",
183
  )
 
72
  split_sentences=False,
73
  )
74
 
75
+ return (
76
+ words,
77
+ pinyin,
78
+ (model.tts_model.config.audio.sample_rate, np.array(wav)),
79
+ )
80
 
81
 
82
  def when_model_selected(model_id):
83
  model_config = models_config[model_id]
84
+
85
+ speaker_drop_down_choices = []
86
+ if "speaker_mapping" in model_config:
87
+ speaker_drop_down_choices = [
88
+ (k, v) for k, v in model_config["speaker_mapping"].items()
89
+ ]
90
+
91
+ dialect_drop_down_choices = [
92
+ (k, v) for k, v in model_config["dialect_mapping"].items()
93
  ]
94
+
95
  use_default_emb_or_ref_radio_visible = False
96
  if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path:
97
  use_default_emb_or_ref_radio_visible = True
98
+
99
  return (
100
+ gr.update(
101
+ choices=speaker_drop_down_choices,
102
+ value=speaker_drop_down_choices[0][1] if len(speaker_drop_down_choices) > 0 else None,
103
+ visible=len(speaker_drop_down_choices) > 1,
104
+ ),
105
+ gr.update(
106
+ choices=dialect_drop_down_choices,
107
+ value=dialect_drop_down_choices[0][1],
108
+ visible=len(dialect_drop_down_choices) > 1,
109
+ ),
110
+ gr.update(visible=use_default_emb_or_ref_radio_visible, value="default"),
111
  )
112
 
113
 
 
136
  model_drop_down = gr.Dropdown(
137
  models_config.keys(),
138
  value=default_model_id,
139
+ label="模型",
140
  )
141
  use_default_emb_or_custom_radio = gr.Radio(
142
  label="use default speaker embedding or custom speaker embedding",
 
160
  for k, v in models_config[default_model_id]["speaker_mapping"].items()
161
  ],
162
  value=list(models_config[default_model_id]["speaker_mapping"].values())[0],
163
+ label="語者",
164
  )
165
  use_default_emb_or_custom_radio.input(
166
  use_default_emb_or_custom_radio_input,
 
169
  )
170
 
171
  dialect_drop_down = gr.Dropdown(
172
+ choices=[
173
+ (k, v)
174
+ for k, v in models_config[default_model_id]["dialect_mapping"].items()
175
+ ],
176
+ value=list(models_config[default_model_id]["dialect_mapping"].values())[0],
177
+ label="腔調",
178
  )
179
 
180
  model_drop_down.input(
 
185
 
186
  gr.Markdown(
187
  """
188
+ # 臺灣客語語音合成系統
189
+ ### Taiwanese Hakka Text-to-Speech System
190
+ ### 模型
191
+ - **sixian-1p-240417**(四縣腔,單一語者)
192
+ ### 研發
193
+ - **[李鴻欣 Hung-Shin Lee](mailto:[email protected])(諾思資訊 North Co., Ltd.)**
194
+ - **[陳力瑋 Li-Wei Chen](mailto:[email protected])(諾思資訊 North Co., Ltd.)**
195
  """
196
  )
197
  gr.Interface(
 
202
  speaker_wav,
203
  speaker_drop_down,
204
  dialect_drop_down,
205
+ gr.Textbox(label="輸入文字"),
206
  ],
207
  outputs=[
208
+ gr.Textbox(interactive=False, label="斷詞"),
209
+ gr.Textbox(interactive=False, label="客語拼音"),
210
+ gr.Audio(interactive=False, label="合成語音", show_download_button=True),
 
 
211
  ],
212
  allow_flagging="auto",
213
  )