|
import os |
|
import json |
|
import gradio as gr |
|
|
|
models = [ |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/hi.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/bn.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/as.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/brx.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/gu.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/kn.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/ml.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/mni.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/mr.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/or.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/pa.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/raj.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/ta.zip", |
|
"https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/te.zip" |
|
] |
|
|
|
languages = { |
|
"Assamese": "as", |
|
"Bengali": "bn", |
|
"Bodo": "brx", |
|
"Gujarati": "gu", |
|
"Hindi" : "hi", |
|
"Kannada": "kn", |
|
"Manipuri": "mni", |
|
"Malayalam": "ml", |
|
"Marathi": "mr", |
|
"Odia": "or", |
|
"Punjabi": "pa", |
|
"Rajasthani": "raj", |
|
"Tamil": "ta", |
|
"Telugu": "te" |
|
} |
|
|
|
for model in models: |
|
os.system(f"wget {model}") |
|
os.system(f"unzip {model.split('/')[-1]}") |
|
os.system(f"rm -fr {model.split('/')[-1]}") |
|
lang = model.split('/')[-1].split('.')[0] |
|
|
|
with open(f"{lang}/fastpitch/config.json", "r") as f: |
|
config = json.load(f) |
|
config["speakers_file"] = '/'.join(config["speakers_file"].split("/")[-3:]) |
|
config["model_args"]["speakers_file"] = config["speakers_file"] |
|
|
|
with open(f"{lang}/fastpitch/config.json", "w") as f: |
|
json.dump(config, f) |
|
|
|
|
|
def convert(text, language, out = "out.wav"): |
|
m = languages[language] |
|
|
|
os.system(f'python3 -m TTS.bin.synthesize --text "{text}" --model_path {m}/fastpitch/best_model.pth --config_path {m}/fastpitch/config.json --vocoder_path {m}/hifigan/best_model.pth --vocoder_config_path {m}/hifigan/config.json --speaker_idx "male" --out_path male_{out}') |
|
os.system(f'python3 -m TTS.bin.synthesize --text "{text}" --model_path {m}/fastpitch/best_model.pth --config_path {m}/fastpitch/config.json --vocoder_path {m}/hifigan/best_model.pth --vocoder_config_path {m}/hifigan/config.json --speaker_idx "female" --out_path female_{out}') |
|
|
|
return f"male_{out}", f"female_{out}" |
|
|
|
text = gr.Textbox(value = "यह कल का दिन अद्भुत था क्योंकि हम संगीत कार्यक्रम से वापस आ गए हैं।", |
|
placeholder = "Enter a text to synthesize", |
|
label = "Text") |
|
|
|
language = gr.Dropdown(choices = sorted(languages.keys()), |
|
value = "Hindi", |
|
type = "value", |
|
label = "Language") |
|
|
|
inputs = [text, language] |
|
outputs = [ |
|
gr.outputs.Audio(label = "Male Speaker", type = 'filepath'), |
|
gr.outputs.Audio(label = "Female Speaker", type = 'filepath'), |
|
] |
|
|
|
title = "Indic Languages Speech Synthesis" |
|
|
|
gr.Interface(convert, inputs, outputs, title=title, enable_queue=True).launch() |