import os import json import gradio as gr models = [ "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/hi.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/bn.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/as.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/brx.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/gu.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/kn.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/ml.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/mni.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/mr.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/or.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/pa.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/raj.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/ta.zip", "https://github.com/AI4Bharat/Indic-TTS/releases/download/v1-checkpoints-release/te.zip" ] languages = { "Assamese": "as", "Bengali": "bn", "Bodo": "brx", "Gujarati": "gu", "Hindi" : "hi", "Kannada": "kn", "Manipuri": "mni", "Malayalam": "ml", "Marathi": "mr", "Odia": "or", "Punjabi": "pa", "Rajasthani": "raj", "Tamil": "ta", "Telugu": "te" } for model in models: os.system(f"wget {model}") os.system(f"unzip {model.split('/')[-1]}") os.system(f"rm -fr {model.split('/')[-1]}") lang = model.split('/')[-1].split('.')[0] # read congig and fix speakers path with open(f"{lang}/fastpitch/config.json", "r") as f: config = json.load(f) config["speakers_file"] = '/'.join(config["speakers_file"].split("/")[-3:]) config["model_args"]["speakers_file"] = config["speakers_file"] # save updated config with open(f"{lang}/fastpitch/config.json", "w") as f: json.dump(config, f) def convert(text, language, out = "out.wav"): m = languages[language] os.system(f'python3 -m TTS.bin.synthesize --text "{text}" --model_path {m}/fastpitch/best_model.pth --config_path {m}/fastpitch/config.json --vocoder_path {m}/hifigan/best_model.pth --vocoder_config_path {m}/hifigan/config.json --speaker_idx "male" --out_path male_{out}') os.system(f'python3 -m TTS.bin.synthesize --text "{text}" --model_path {m}/fastpitch/best_model.pth --config_path {m}/fastpitch/config.json --vocoder_path {m}/hifigan/best_model.pth --vocoder_config_path {m}/hifigan/config.json --speaker_idx "female" --out_path female_{out}') return f"male_{out}", f"female_{out}" text = gr.Textbox(value = "यह कल का दिन अद्भुत था क्योंकि हम संगीत कार्यक्रम से वापस आ गए हैं।", placeholder = "Enter a text to synthesize", label = "Text") language = gr.Dropdown(choices = sorted(languages.keys()), value = "Hindi", type = "value", label = "Language") inputs = [text, language] outputs = [ gr.outputs.Audio(label = "Male Speaker", type = 'filepath'), gr.outputs.Audio(label = "Female Speaker", type = 'filepath'), ] title = "Indic Languages Speech Synthesis" gr.Interface(convert, inputs, outputs, title=title, enable_queue=True).launch()