File size: 3,623 Bytes
8d910b7
28a1377
8d910b7
 
e7dae75
8d910b7
 
 
34fce4e
 
 
 
 
 
8d910b7
6cd3aee
 
 
8d910b7
 
34fce4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d910b7
 
 
 
 
6cd3aee
8d910b7
34fce4e
 
 
8d910b7
 
e7dae75
8d910b7
 
 
 
 
 
 
e7dae75
137f174
34fce4e
 
 
 
 
137f174
34fce4e
 
 
e7dae75
137f174
 
8d910b7
 
137f174
34fce4e
 
 
 
 
 
 
137f174
8d910b7
 
 
28a1377
8d910b7
137f174
34fce4e
 
 
 
 
 
8d910b7
28a1377
a5bab0f
34fce4e
8d910b7
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

import tempfile ,os
from TTS.config import load_config
import gradio as gr

from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer

MODEL_NAMES=[
    "vits-male",
    "vits-female",
    "glowtts-male",
    "glowtts-female"
]
MAX_TXT_LEN = 800
model_path = os.getcwd() + "/best_model.pth"
config_path = os.getcwd() + "/config.json"
   


from TTS.utils.download import download_url
modelInfo=[
    ["vits-male","best_model_65633.pth","config-0.json","https://huggingface.co/Kamtera/persian-tts-male-vits/resolve/main/"],
    ["vits-female","checkpoint_48000.pth","config-2.json","https://huggingface.co/Kamtera/persian-tts-female-vits/resolve/main/"],
    ["glowtts-male","best_model_77797.pth","config-1.json","https://huggingface.co/Kamtera/persian-tts-male-glow_tts/resolve/main/"],
    ["glowtts-female","best_model.pth","config.json","https://huggingface.co/Kamtera/persian-tts-female-glow_tts/resolve/main/"]
]

for d in modelInfo:
    directory=d[0]
    if not os.path.exists(directory):
        os.makedirs(directory)
    download_url(
        d[3]+d[1],directory,"best_model.pth"
    )
    download_url(
        d[3]+d[2],directory,"config.json"
    )
def tts(text: str,model_name: str):
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
    print(text)

    
    # synthesize
    synthesizer = Synthesizer(
        model_name+"/best_model.pth", model_name+"/config.json"
    )
    if synthesizer is None:
        raise NameError("model not found")
    wavs = synthesizer.tts(text)
    # return output
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name


description="""
This is a demo of persian text to speech model.

Models can be found here: 
https://huggingface.co/Kamtera/persian-tts-female-vits
https://huggingface.co/Kamtera/persian-tts-male-vits
https://huggingface.co/Kamtera/persian-tts-male-glow_tts
https://huggingface.co/Kamtera/persian-tts-female-glow_tts

Models trained on these datasets : 
https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset
https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale



"""
article= ""
examples=[
    ["و خداوند شما را با ارسال روح در جسم زندگانی و حیات بخشید","vits-male"],
    ["تاجر تو چه تجارت می کنی ، تو را چه که چه تجارت می کنم؟","vits-female"],
    ["شیش سیخ جیگر سیخی شیش هزار","vits-female"],
    ["سه شیشه شیر ، سه سیر سرشیر","vits-female"],
    ["دزدی دزدید ز بز دزدی بزی ، عجب دزدی که دزدید ز بز دزدی بزی","vits-female"],
    ["مثنوی یکی از قالب های شعری است ک هر بیت قافیه ی جداگانه دارد","vits-female"],
    ["در گلو ماند خس او سالها، چیست آن خس مهر جاه و مالها","vits-female"],
]
iface = gr.Interface(
    fn=tts,
    inputs=[
        gr.Textbox(
            label="Text",
            value="زندگی فقط یک بار است؛ از آن به خوبی استفاده کن",
        ),
        gr.Radio(
            label="Pick a TTS Model ",
            choices=MODEL_NAMES,
            value="vits-female",
        ),
    ],
    outputs=gr.Audio(label="Output",type='filepath'),
    examples=examples,
    title="🗣️ Persian tts 🗣️",
    description=description,
    article=article,
    live=False
)
iface.launch(share=False)