Spaces:

shethjenil
/

Indic-Text2Speech

Paused

App Files Files Community

Indic-Text2Speech / app.py

shethjenil

Upload 2 files

3fb5a1e verified about 2 months ago

raw

history blame

2.46 kB

	from os import getenv
	from huggingface_hub import hf_hub_download
	from json import load as json_load , dump as json_dump
	from torch import device as Device
	from torch.cuda import is_available as cuda_is_available
	from TTS.utils.synthesizer import Synthesizer

	lang_conf = {
	"as": "Assamese - অসমীয়া",
	"bn": "Bangla - বাংলা",
	"brx": "Boro - बड़ो",
	"en": "English (Indian accent)",
	"en+hi": "English+Hindi (Hinglish code-mixed)",
	"gu": "Gujarati - ગુજરાતી",
	"hi": "Hindi - हिंदी",
	"kn": "Kannada - ಕನ್ನಡ",
	"ml": "Malayalam - മലയാളം",
	"mni": "Manipuri - মিতৈলোন",
	"mr": "Marathi - मराठी",
	"or": "Oriya - ଓଡ଼ିଆ",
	"pa": "Panjabi - ਪੰਜਾਬੀ",
	"raj": "Rajasthani - राजस्थानी",
	"ta": "Tamil - தமிழ்",
	"te": "Telugu - తెలుగు"
	}


	class Indic_TTS:
	def __init__(self,lang,device):
	model_id = "shethjenil/INDIC_TTS"
	model_path = hf_hub_download(model_id, lang+"_fastpitch_best_model.pth")
	vocoder_path = hf_hub_download(model_id, lang+"_hifigan_best_model.pth")
	vocoder_config_path = hf_hub_download(model_id, lang+"_hifigan_config.json")
	config_path = hf_hub_download(model_id, lang+"_fastpitch_config.json")
	speaker_path = hf_hub_download(model_id, lang+"_fastpitch_speakers.pth")
	conf = json_load(open(config_path))
	conf['speakers_file'] = conf['model_args']['speakers_file'] = speaker_path
	json_dump(conf, open(config_path, 'w'))
	self.synthesizer = Synthesizer(model_path,config_path,vocoder_checkpoint=vocoder_path,vocoder_config=vocoder_config_path,use_cuda=device.type == "cuda")
	self.speakers = self.synthesizer.tts_model.speaker_manager.speaker_names
	def text2speech(self,text:str,speaker:str):
	self.synthesizer.save_wav(self.synthesizer.tts(text,speaker),"output.wav")
	"output.wav"

	indic_tts_lang = getenv("indic_tts_lang")
	tts_lang_name = lang_conf[indic_tts_lang]
	tts_model = Indic_TTS(indic_tts_lang,Device("cuda" if cuda_is_available() else "cpu"))

	import gradio as gr
	gr.Interface(tts_model.text2speech,[gr.Textbox(label="Enter Text"),gr.Dropdown(tts_model.speakers, label="speaker"),],gr.Audio(type="filepath", label="Speech"),title=f"{tts_lang_name} TTS").launch()