Abdullah-Habib commited on
Commit
14e7fb7
·
1 Parent(s): 5fdc9d8

fisrt commit

Browse files
Files changed (4) hide show
  1. .gitignore +4 -0
  2. app.py +98 -0
  3. model.py +61 -0
  4. requirements.txt +3 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ venv/
2
+ __pycache__/
3
+ audios/
4
+ Dockerfile
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ doc string
3
+ """
4
+ import logging
5
+ import os
6
+ import time
7
+ import uuid
8
+ import gradio as gr
9
+ import soundfile as sf
10
+ from model import get_pretrained_model
11
+
12
+ title = "# Danish Text To Speech"
13
+ css = """
14
+ .result {display:flex;flex-direction:column}
15
+ .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
16
+ .result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
17
+ .result_item_error {background-color:#ff7070;color:white;align-self:start}
18
+ """
19
+
20
+ def process(text: str, sid: str):
21
+ """
22
+ doc string
23
+ """
24
+ repo_id = "csukuangfj/vits-piper-da_DK-talesyntese-medium"
25
+ speed = 1
26
+ sid = int(sid)
27
+ tts = get_pretrained_model(repo_id, speed)
28
+ start = time.time()
29
+ audio = tts.generate(text, sid = sid)
30
+ if len(audio.samples) == 0:
31
+ raise ValueError(
32
+ "Error in generating audios. Please read previous error messages."
33
+ )
34
+ filename = str(uuid.uuid4())
35
+ filename = f"{filename}.wav"
36
+ sf.write(
37
+ filename,
38
+ audio.samples,
39
+ samplerate = audio.sample_rate,
40
+ subtype = "PCM_16",
41
+ )
42
+ return filename
43
+
44
+ demo = gr.Blocks(css=css)
45
+ with demo:
46
+ gr.Markdown(title)
47
+ with gr.Tabs():
48
+ with gr.TabItem("Please input your text"):
49
+ input_text = gr.Textbox(
50
+ label="Input text",
51
+ info="Your text",
52
+ lines=3,
53
+ placeholder="Please input your text here",
54
+ )
55
+ input_sid = gr.Textbox(
56
+ label="Speaker ID",
57
+ info="Speaker ID",
58
+ lines=1,
59
+ max_lines=1,
60
+ value="0",
61
+ placeholder="Speaker ID. Valid only for mult-speaker model",
62
+ visible = False
63
+ )
64
+ input_button = gr.Button("Submit")
65
+
66
+ output_audio = gr.Audio(label="Output")
67
+
68
+ output_info = gr.HTML(label="Info")
69
+ input_button.click(
70
+ process,
71
+ inputs=[
72
+ input_text,
73
+ input_sid
74
+ ],
75
+ outputs=[
76
+ output_audio
77
+ ],
78
+ )
79
+
80
+ def download_espeak_ng_data():
81
+ """
82
+ doc string
83
+ """
84
+ os.system(
85
+ """
86
+ cd /tmp
87
+ wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
88
+ tar xf espeak-ng-data.tar.bz2
89
+ """
90
+ )
91
+
92
+ if __name__ == "__main__":
93
+ download_espeak_ng_data()
94
+ demo.launch(share = True)
95
+
96
+
97
+
98
+
model.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ doc string
3
+ """
4
+ import os
5
+ from functools import lru_cache
6
+ from pathlib import Path
7
+ import sherpa_onnx
8
+ from huggingface_hub import hf_hub_download
9
+
10
+ def get_file( repo_id: str, filename: str, subfolder: str = ".", ) -> str:
11
+ """
12
+ doc string
13
+ """
14
+ model_filename = hf_hub_download(
15
+ repo_id = repo_id,
16
+ filename = filename,
17
+ subfolder = subfolder,
18
+ )
19
+ return model_filename
20
+
21
+ @lru_cache(maxsize = 10)
22
+ def get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
23
+ """
24
+ doc string
25
+ """
26
+ data_dir = "/tmp/espeak-ng-data"
27
+ name = "da_DK-talesyntese-medium"
28
+ model = get_file(
29
+ repo_id = repo_id,
30
+ filename = f"{name}.onnx",
31
+ subfolder = ".",
32
+ )
33
+ tokens = get_file(
34
+ repo_id = repo_id,
35
+ filename = "tokens.txt",
36
+ subfolder = ".")
37
+ print(model)
38
+ tts_config = sherpa_onnx.OfflineTtsConfig(
39
+ model = sherpa_onnx.OfflineTtsModelConfig(
40
+ vits = sherpa_onnx.OfflineTtsVitsModelConfig(
41
+ model = model,
42
+ lexicon = "",
43
+ data_dir = data_dir,
44
+ tokens = tokens,
45
+ length_scale = 1.0 / speed,
46
+ ),
47
+ provider = "cpu",
48
+ debug = True,
49
+ num_threads = 2,
50
+ )
51
+ )
52
+ tts = sherpa_onnx.OfflineTts(tts_config)
53
+ return tts
54
+
55
+ @lru_cache(maxsize = 10)
56
+ def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
57
+ """
58
+ doc string
59
+ """
60
+ tts = get_vits_piper(repo_id, speed)
61
+ return tts
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ sherpa-onnx
2
+ soundfile
3
+ gradio