Spaces:

Abdullah-Habib
/

Danish_TTS

Sleeping

App Files Files Community

Abdullah-Habib commited on May 5, 2024

Commit

14e7fb7

1 Parent(s): 5fdc9d8

fisrt commit

Browse files

Files changed (4) hide show

.gitignore +4 -0
app.py +98 -0
model.py +61 -0
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+venv/
+__pycache__/
+audios/
+Dockerfile

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""
+doc string
+"""
+import logging
+import os
+import time
+import uuid
+import gradio as gr
+import soundfile as sf
+from model import get_pretrained_model
+title = "# Danish Text To Speech"
+css = """
+.result {display:flex;flex-direction:column}
+.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
+.result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
+.result_item_error {background-color:#ff7070;color:white;align-self:start}
+"""
+def process(text: str, sid: str):
+    """
+    doc string
+    """
+    repo_id = "csukuangfj/vits-piper-da_DK-talesyntese-medium"
+    speed = 1
+    sid = int(sid)
+    tts = get_pretrained_model(repo_id, speed)
+    start = time.time()
+    audio = tts.generate(text, sid = sid)
+    if len(audio.samples) == 0:
+        raise ValueError(
+            "Error in generating audios. Please read previous error messages."
+        )
+    filename = str(uuid.uuid4())
+    filename = f"{filename}.wav"
+    sf.write(
+        filename,
+        audio.samples,
+        samplerate = audio.sample_rate,
+        subtype = "PCM_16",
+    )
+    return filename
+demo = gr.Blocks(css=css)
+with demo:
+    gr.Markdown(title)
+    with gr.Tabs():
+        with gr.TabItem("Please input your text"):
+            input_text = gr.Textbox(
+                label="Input text",
+                info="Your text",
+                lines=3,
+                placeholder="Please input your text here",
+            )
+            input_sid = gr.Textbox(
+                label="Speaker ID",
+                info="Speaker ID",
+                lines=1,
+                max_lines=1,
+                value="0",
+                placeholder="Speaker ID. Valid only for mult-speaker model",
+                visible = False
+            )
+            input_button = gr.Button("Submit")
+            output_audio = gr.Audio(label="Output")
+            output_info = gr.HTML(label="Info")
+        input_button.click(
+            process,
+            inputs=[
+                input_text,
+                input_sid
+            ],
+            outputs=[
+                output_audio
+            ],
+        )
+def download_espeak_ng_data():
+    """
+    doc string
+    """
+    os.system(
+    """
+    cd /tmp
+    wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
+    tar xf espeak-ng-data.tar.bz2
+    """
+    )
+if __name__ == "__main__":
+    download_espeak_ng_data()
+    demo.launch(share = True)

model.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""
+doc string
+"""
+import os
+from functools import lru_cache
+from pathlib import Path
+import sherpa_onnx
+from huggingface_hub import hf_hub_download
+def get_file( repo_id: str, filename: str, subfolder: str = ".", ) -> str:
+    """
+    doc string
+    """
+    model_filename = hf_hub_download(
+        repo_id = repo_id,
+        filename = filename,
+        subfolder = subfolder,
+)
+    return model_filename
+@lru_cache(maxsize = 10)
+def get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
+    """
+    doc string
+    """
+    data_dir = "/tmp/espeak-ng-data"
+    name = "da_DK-talesyntese-medium"
+    model = get_file(
+        repo_id = repo_id,
+        filename = f"{name}.onnx",
+        subfolder = ".",
+    )
+    tokens = get_file(
+        repo_id = repo_id,
+        filename = "tokens.txt",
+        subfolder = ".")
+    print(model)
+    tts_config = sherpa_onnx.OfflineTtsConfig(
+        model = sherpa_onnx.OfflineTtsModelConfig(
+            vits = sherpa_onnx.OfflineTtsVitsModelConfig(
+                model = model,
+                lexicon = "",
+                data_dir = data_dir,
+                tokens = tokens,
+                length_scale = 1.0 / speed,
+            ),
+            provider = "cpu",
+            debug = True,
+            num_threads = 2,
+        )
+    )
+    tts = sherpa_onnx.OfflineTts(tts_config)
+    return tts
+@lru_cache(maxsize = 10)
+def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
+     """
+     doc string
+     """
+     tts = get_vits_piper(repo_id, speed)
+     return tts

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+sherpa-onnx
+soundfile
+gradio