pikto commited on
Commit
0cb208e
·
1 Parent(s): b1372f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -35
app.py CHANGED
@@ -1,17 +1,29 @@
1
- """
2
-
3
- TTS interactive demo
4
- """
5
-
6
  import logging
7
  from typing import cast
 
8
 
9
  import gradio as gr
10
  from balacoon_tts import TTS
11
  from huggingface_hub import hf_hub_download, list_repo_files
12
 
 
 
13
  # global tts module, initialized from a model selected
14
  tts = None
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  def main():
@@ -20,23 +32,22 @@ def main():
20
  with gr.Blocks() as demo:
21
  gr.Markdown(
22
  """
23
- <h1 align="center">Text-to-Speech</h1>
24
-
25
  1. Write an utterance to generate,
26
  2. Select the model to synthesize with
27
- 3. Select the speaker
28
  4. Hit "Generate" and listen to the result!
29
-
30
- When you select a Model for the first time,
31
- it will take a little time to download it.
32
  """
33
  )
34
  with gr.Row(variant="panel"):
35
- text = gr.Textbox(label="Text", placeholder="Insert your article here...")
36
 
37
  with gr.Row():
38
  with gr.Column(variant="panel"):
39
- repo_files = list_repo_files(repo_id="balacoon/tts")
40
  model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
41
  model_name = gr.Dropdown(
42
  label="Model",
@@ -47,16 +58,25 @@ def main():
47
 
48
  def set_model(model_name_str: str):
49
  """
50
- gets value from `model_name`, loads model,
51
- re-initializes tts object, gets list of
52
- speakers that model supports and set them to `speaker`
53
  """
54
- model_path = hf_hub_download(
55
- repo_id="balacoon/tts", filename=model_name_str
56
- )
57
- global tts
58
- tts = TTS(model_path)
59
- speakers = tts.get_speakers()
 
 
 
 
 
 
 
 
 
60
  value = speakers[-1]
61
  return gr.Dropdown.update(
62
  choices=speakers, value=value, visible=True
@@ -69,26 +89,35 @@ def main():
69
  with gr.Row(variant="panel"):
70
  audio = gr.Audio()
71
 
72
- def synthesize_audio(text_str: str, speaker_str: str = ""):
73
  """
74
  gets utterance to synthesize from `text` Textbox
75
  and speaker name from `speaker` dropdown list.
76
  speaker name might be empty for single-speaker models.
77
  Synthesizes the waveform and updates `audio` with it.
78
  """
79
- if not text_str:
80
- logging.info("text or speaker are not provided")
81
  return None
82
- global tts
83
- if len(text_str) > 1024:
84
- text_str = text_str[:1024]
85
- samples = cast(TTS, tts).synthesize(text_str, speaker_str)
86
- return gr.Audio.update(value=(cast(TTS, tts).get_sampling_rate(), samples))
87
-
88
- generate.click(synthesize_audio, inputs=[text, speaker], outputs=audio)
89
-
90
- demo.launch()
 
 
 
 
 
 
 
 
 
91
 
92
 
93
  if __name__ == "__main__":
94
- main()
 
1
+ import os
2
+ import glob
 
 
 
3
  import logging
4
  from typing import cast
5
+ from threading import Lock
6
 
7
  import gradio as gr
8
  from balacoon_tts import TTS
9
  from huggingface_hub import hf_hub_download, list_repo_files
10
 
11
+ # locker that disallow access to the tts object from more then one thread
12
+ locker = Lock()
13
  # global tts module, initialized from a model selected
14
  tts = None
15
+ # path to the model that is currently used in tts
16
+ cur_model_path = None
17
+ # cache of speakers, maps model name to speaker list
18
+ model_to_speakers = dict()
19
+ model_repo_dir = "/data"
20
+ for name in list_repo_files(repo_id="balacoon/tts"):
21
+ if not os.path.isfile(os.path.join(model_repo_dir, name)):
22
+ hf_hub_download(
23
+ repo_id="balacoon/tts",
24
+ filename=name,
25
+ local_dir=model_repo_dir,
26
+ )
27
 
28
 
29
  def main():
 
32
  with gr.Blocks() as demo:
33
  gr.Markdown(
34
  """
35
+ <h1 align="center">Balacoon🦝 Text-to-Speech</h1>
 
36
  1. Write an utterance to generate,
37
  2. Select the model to synthesize with
38
+ 3. Select speaker
39
  4. Hit "Generate" and listen to the result!
40
+ You can learn more about models available
41
+ [here](https://huggingface.co/balacoon/tts).
42
+ Visit [Balacoon website](https://balacoon.com/) for more info.
43
  """
44
  )
45
  with gr.Row(variant="panel"):
46
+ text = gr.Textbox(label="Text", placeholder="Type something here...")
47
 
48
  with gr.Row():
49
  with gr.Column(variant="panel"):
50
+ repo_files = os.listdir(model_repo_dir)
51
  model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
52
  model_name = gr.Dropdown(
53
  label="Model",
 
58
 
59
  def set_model(model_name_str: str):
60
  """
61
+ gets value from `model_name`. either
62
+ uses cached list of speakers for the given model name
63
+ or loads the addon and checks what are the speakers.
64
  """
65
+ global model_to_speakers
66
+ if model_name_str in model_to_speakers:
67
+ speakers = model_to_speakers[model_name_str]
68
+ else:
69
+ global tts, cur_model_path, locker
70
+ with locker:
71
+ # need to load this model to learn the list of speakers
72
+ model_path = os.path.join(model_repo_dir, model_name_str)
73
+ if tts is not None:
74
+ del tts
75
+ tts = TTS(model_path)
76
+ cur_model_path = model_path
77
+ speakers = tts.get_speakers()
78
+ model_to_speakers[model_name_str] = speakers
79
+
80
  value = speakers[-1]
81
  return gr.Dropdown.update(
82
  choices=speakers, value=value, visible=True
 
89
  with gr.Row(variant="panel"):
90
  audio = gr.Audio()
91
 
92
+ def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
93
  """
94
  gets utterance to synthesize from `text` Textbox
95
  and speaker name from `speaker` dropdown list.
96
  speaker name might be empty for single-speaker models.
97
  Synthesizes the waveform and updates `audio` with it.
98
  """
99
+ if not text_str or not model_name_str or not speaker_str:
100
+ logging.info("text, model name or speaker are not provided")
101
  return None
102
+ expected_model_path = os.path.join(model_repo_dir, model_name_str)
103
+ global tts, cur_model_path, locker
104
+ with locker:
105
+ if expected_model_path != cur_model_path:
106
+ # reload model
107
+ if tts is not None:
108
+ del tts
109
+ tts = TTS(expected_model_path)
110
+ cur_model_path = expected_model_path
111
+ if len(text_str) > 1024:
112
+ # truncate the text
113
+ text_str = text_str[:1024]
114
+ samples = tts.synthesize(text_str, speaker_str)
115
+ return gr.Audio.update(value=(tts.get_sampling_rate(), samples))
116
+
117
+ generate.click(synthesize_audio, inputs=[text, model_name, speaker], outputs=audio)
118
+
119
+ demo.queue(concurrency_count=1).launch()
120
 
121
 
122
  if __name__ == "__main__":
123
+ main()