Spaces:
Sleeping
Sleeping
Commit
·
14e7fb7
1
Parent(s):
5fdc9d8
fisrt commit
Browse files- .gitignore +4 -0
- app.py +98 -0
- model.py +61 -0
- requirements.txt +3 -0
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
venv/
|
2 |
+
__pycache__/
|
3 |
+
audios/
|
4 |
+
Dockerfile
|
app.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
doc string
|
3 |
+
"""
|
4 |
+
import logging
|
5 |
+
import os
|
6 |
+
import time
|
7 |
+
import uuid
|
8 |
+
import gradio as gr
|
9 |
+
import soundfile as sf
|
10 |
+
from model import get_pretrained_model
|
11 |
+
|
12 |
+
title = "# Danish Text To Speech"
|
13 |
+
css = """
|
14 |
+
.result {display:flex;flex-direction:column}
|
15 |
+
.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
|
16 |
+
.result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
|
17 |
+
.result_item_error {background-color:#ff7070;color:white;align-self:start}
|
18 |
+
"""
|
19 |
+
|
20 |
+
def process(text: str, sid: str):
|
21 |
+
"""
|
22 |
+
doc string
|
23 |
+
"""
|
24 |
+
repo_id = "csukuangfj/vits-piper-da_DK-talesyntese-medium"
|
25 |
+
speed = 1
|
26 |
+
sid = int(sid)
|
27 |
+
tts = get_pretrained_model(repo_id, speed)
|
28 |
+
start = time.time()
|
29 |
+
audio = tts.generate(text, sid = sid)
|
30 |
+
if len(audio.samples) == 0:
|
31 |
+
raise ValueError(
|
32 |
+
"Error in generating audios. Please read previous error messages."
|
33 |
+
)
|
34 |
+
filename = str(uuid.uuid4())
|
35 |
+
filename = f"{filename}.wav"
|
36 |
+
sf.write(
|
37 |
+
filename,
|
38 |
+
audio.samples,
|
39 |
+
samplerate = audio.sample_rate,
|
40 |
+
subtype = "PCM_16",
|
41 |
+
)
|
42 |
+
return filename
|
43 |
+
|
44 |
+
demo = gr.Blocks(css=css)
|
45 |
+
with demo:
|
46 |
+
gr.Markdown(title)
|
47 |
+
with gr.Tabs():
|
48 |
+
with gr.TabItem("Please input your text"):
|
49 |
+
input_text = gr.Textbox(
|
50 |
+
label="Input text",
|
51 |
+
info="Your text",
|
52 |
+
lines=3,
|
53 |
+
placeholder="Please input your text here",
|
54 |
+
)
|
55 |
+
input_sid = gr.Textbox(
|
56 |
+
label="Speaker ID",
|
57 |
+
info="Speaker ID",
|
58 |
+
lines=1,
|
59 |
+
max_lines=1,
|
60 |
+
value="0",
|
61 |
+
placeholder="Speaker ID. Valid only for mult-speaker model",
|
62 |
+
visible = False
|
63 |
+
)
|
64 |
+
input_button = gr.Button("Submit")
|
65 |
+
|
66 |
+
output_audio = gr.Audio(label="Output")
|
67 |
+
|
68 |
+
output_info = gr.HTML(label="Info")
|
69 |
+
input_button.click(
|
70 |
+
process,
|
71 |
+
inputs=[
|
72 |
+
input_text,
|
73 |
+
input_sid
|
74 |
+
],
|
75 |
+
outputs=[
|
76 |
+
output_audio
|
77 |
+
],
|
78 |
+
)
|
79 |
+
|
80 |
+
def download_espeak_ng_data():
|
81 |
+
"""
|
82 |
+
doc string
|
83 |
+
"""
|
84 |
+
os.system(
|
85 |
+
"""
|
86 |
+
cd /tmp
|
87 |
+
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
|
88 |
+
tar xf espeak-ng-data.tar.bz2
|
89 |
+
"""
|
90 |
+
)
|
91 |
+
|
92 |
+
if __name__ == "__main__":
|
93 |
+
download_espeak_ng_data()
|
94 |
+
demo.launch(share = True)
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
model.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
doc string
|
3 |
+
"""
|
4 |
+
import os
|
5 |
+
from functools import lru_cache
|
6 |
+
from pathlib import Path
|
7 |
+
import sherpa_onnx
|
8 |
+
from huggingface_hub import hf_hub_download
|
9 |
+
|
10 |
+
def get_file( repo_id: str, filename: str, subfolder: str = ".", ) -> str:
|
11 |
+
"""
|
12 |
+
doc string
|
13 |
+
"""
|
14 |
+
model_filename = hf_hub_download(
|
15 |
+
repo_id = repo_id,
|
16 |
+
filename = filename,
|
17 |
+
subfolder = subfolder,
|
18 |
+
)
|
19 |
+
return model_filename
|
20 |
+
|
21 |
+
@lru_cache(maxsize = 10)
|
22 |
+
def get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
|
23 |
+
"""
|
24 |
+
doc string
|
25 |
+
"""
|
26 |
+
data_dir = "/tmp/espeak-ng-data"
|
27 |
+
name = "da_DK-talesyntese-medium"
|
28 |
+
model = get_file(
|
29 |
+
repo_id = repo_id,
|
30 |
+
filename = f"{name}.onnx",
|
31 |
+
subfolder = ".",
|
32 |
+
)
|
33 |
+
tokens = get_file(
|
34 |
+
repo_id = repo_id,
|
35 |
+
filename = "tokens.txt",
|
36 |
+
subfolder = ".")
|
37 |
+
print(model)
|
38 |
+
tts_config = sherpa_onnx.OfflineTtsConfig(
|
39 |
+
model = sherpa_onnx.OfflineTtsModelConfig(
|
40 |
+
vits = sherpa_onnx.OfflineTtsVitsModelConfig(
|
41 |
+
model = model,
|
42 |
+
lexicon = "",
|
43 |
+
data_dir = data_dir,
|
44 |
+
tokens = tokens,
|
45 |
+
length_scale = 1.0 / speed,
|
46 |
+
),
|
47 |
+
provider = "cpu",
|
48 |
+
debug = True,
|
49 |
+
num_threads = 2,
|
50 |
+
)
|
51 |
+
)
|
52 |
+
tts = sherpa_onnx.OfflineTts(tts_config)
|
53 |
+
return tts
|
54 |
+
|
55 |
+
@lru_cache(maxsize = 10)
|
56 |
+
def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
|
57 |
+
"""
|
58 |
+
doc string
|
59 |
+
"""
|
60 |
+
tts = get_vits_piper(repo_id, speed)
|
61 |
+
return tts
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
sherpa-onnx
|
2 |
+
soundfile
|
3 |
+
gradio
|