tts / app.py
MohamedRashad's picture
Update app.py
bd630a2 verified
import tempfile
import edge_tts
import gradio as gr
from gradio_client import Client
import pyarabic.araby as araby
language_dict = {
"English": {
"Jenny": "en-US-JennyNeural",
"Guy": "en-US-GuyNeural",
"Ana": "en-US-AnaNeural",
"Aria": "en-US-AriaNeural",
"Christopher": "en-US-ChristopherNeural",
"Eric": "en-US-EricNeural",
"Michelle": "en-US-MichelleNeural",
"Roger": "en-US-RogerNeural",
"Natasha": "en-AU-NatashaNeural",
"William": "en-AU-WilliamNeural",
"Clara": "en-CA-ClaraNeural",
"Liam": "en-CA-LiamNeural",
"Libby": "en-GB-LibbyNeural",
"Maisie": "en-GB-MaisieNeural",
"Ryan": "en-GB-RyanNeural",
"Sonia": "en-GB-SoniaNeural",
"Thomas": "en-GB-ThomasNeural",
"Sam": "en-HK-SamNeural",
"Yan": "en-HK-YanNeural",
"Connor": "en-IE-ConnorNeural",
"Emily": "en-IE-EmilyNeural",
"Neerja": "en-IN-NeerjaNeural",
"Prabhat": "en-IN-PrabhatNeural",
"Asilia": "en-KE-AsiliaNeural",
"Chilemba": "en-KE-ChilembaNeural",
"Abeo": "en-NG-AbeoNeural",
"Ezinne": "en-NG-EzinneNeural",
"Mitchell": "en-NZ-MitchellNeural",
"James": "en-PH-JamesNeural",
"Rosa": "en-PH-RosaNeural",
"Luna": "en-SG-LunaNeural",
"Wayne": "en-SG-WayneNeural",
"Elimu": "en-TZ-ElimuNeural",
"Imani": "en-TZ-ImaniNeural",
"Leah": "en-ZA-LeahNeural",
"Luke": "en-ZA-LukeNeural"
},
"Spanish": {
"Elena": "es-AR-ElenaNeural",
"Tomas": "es-AR-TomasNeural",
"Marcelo": "es-BO-MarceloNeural",
"Sofia": "es-BO-SofiaNeural",
"Gonzalo": "es-CO-GonzaloNeural",
"Salome": "es-CO-SalomeNeural",
"Juan": "es-CR-JuanNeural",
"Maria": "es-CR-MariaNeural",
"Belkys": "es-CU-BelkysNeural",
"Emilio": "es-DO-EmilioNeural",
"Ramona": "es-DO-RamonaNeural",
"Andrea": "es-EC-AndreaNeural",
"Luis": "es-EC-LuisNeural",
"Alvaro": "es-ES-AlvaroNeural",
"Elvira": "es-ES-ElviraNeural",
"Teresa": "es-GQ-TeresaNeural",
"Andres": "es-GT-AndresNeural",
"Marta": "es-GT-MartaNeural",
"Carlos": "es-HN-CarlosNeural",
"Karla": "es-HN-KarlaNeural",
"Federico": "es-NI-FedericoNeural",
"Yolanda": "es-NI-YolandaNeural",
"Margarita": "es-PA-MargaritaNeural",
"Roberto": "es-PA-RobertoNeural",
"Alex": "es-PE-AlexNeural",
"Camila": "es-PE-CamilaNeural",
"Karina": "es-PR-KarinaNeural",
"Victor": "es-PR-VictorNeural",
"Mario": "es-PY-MarioNeural",
"Tania": "es-PY-TaniaNeural",
"Lorena": "es-SV-LorenaNeural",
"Rodrigo": "es-SV-RodrigoNeural",
"Alonso": "es-US-AlonsoNeural",
"Paloma": "es-US-PalomaNeural",
"Mateo": "es-UY-MateoNeural",
"Valentina": "es-UY-ValentinaNeural",
"Paola": "es-VE-PaolaNeural",
"Sebastian": "es-VE-SebastianNeural"
},
"Arabic": {
"Hamed": "ar-SA-HamedNeural",
"Zariyah": "ar-SA-ZariyahNeural",
"Fatima": "ar-AE-FatimaNeural",
"Hamdan": "ar-AE-HamdanNeural",
"Ali": "ar-BH-AliNeural",
"Laila": "ar-BH-LailaNeural",
"Ismael": "ar-DZ-IsmaelNeural",
"Salma": "ar-EG-SalmaNeural",
"Shakir": "ar-EG-ShakirNeural",
"Bassel": "ar-IQ-BasselNeural",
"Rana": "ar-IQ-RanaNeural",
"Sana": "ar-JO-SanaNeural",
"Taim": "ar-JO-TaimNeural",
"Fahed": "ar-KW-FahedNeural",
"Noura": "ar-KW-NouraNeural",
"Layla": "ar-LB-LaylaNeural",
"Rami": "ar-LB-RamiNeural",
"Iman": "ar-LY-ImanNeural",
"Omar": "ar-LY-OmarNeural",
"Jamal": "ar-MA-JamalNeural",
"Mouna": "ar-MA-MounaNeural",
"Abdullah": "ar-OM-AbdullahNeural",
"Aysha": "ar-OM-AyshaNeural",
"Amal": "ar-QA-AmalNeural",
"Moaz": "ar-QA-MoazNeural",
"Amany": "ar-SY-AmanyNeural",
"Laith": "ar-SY-LaithNeural",
"Hedi": "ar-TN-HediNeural",
"Reem": "ar-TN-ReemNeural",
"Maryam": "ar-YE-MaryamNeural",
"Saleh": "ar-YE-SalehNeural"
},
"Korean": {
"Sun-Hi": "ko-KR-SunHiNeural",
"InJoon": "ko-KR-InJoonNeural"
},
"Thai": {
"Premwadee": "th-TH-PremwadeeNeural",
"Niwat": "th-TH-NiwatNeural"
},
"Vietnamese": {
"HoaiMy": "vi-VN-HoaiMyNeural",
"NamMinh": "vi-VN-NamMinhNeural"
},
"Japanese": {
"Nanami": "ja-JP-NanamiNeural",
"Keita": "ja-JP-KeitaNeural"
},
"French": {
"Denise": "fr-FR-DeniseNeural",
"Eloise": "fr-FR-EloiseNeural",
"Henri": "fr-FR-HenriNeural",
"Sylvie": "fr-CA-SylvieNeural",
"Antoine": "fr-CA-AntoineNeural",
"Jean": "fr-CA-JeanNeural",
"Ariane": "fr-CH-ArianeNeural",
"Fabrice": "fr-CH-FabriceNeural",
"Charline": "fr-BE-CharlineNeural",
"Gerard": "fr-BE-GerardNeural"
},
"Portuguese": {
"Francisca": "pt-BR-FranciscaNeural",
"Antonio": "pt-BR-AntonioNeural",
"Duarte": "pt-PT-DuarteNeural",
"Raquel": "pt-PT-RaquelNeural"
},
"Indonesian": {
"Ardi": "id-ID-ArdiNeural",
"Gadis": "id-ID-GadisNeural"
},
"Hebrew": {
"Avri": "he-IL-AvriNeural",
"Hila": "he-IL-HilaNeural"
},
"Italian": {
"Isabella": "it-IT-IsabellaNeural",
"Diego": "it-IT-DiegoNeural",
"Elsa": "it-IT-ElsaNeural"
},
"Dutch": {
"Colette": "nl-NL-ColetteNeural",
"Fenna": "nl-NL-FennaNeural",
"Maarten": "nl-NL-MaartenNeural",
"Arnaud": "nl-BE-ArnaudNeural",
"Dena": "nl-BE-DenaNeural"
},
"Malay": {
"Osman": "ms-MY-OsmanNeural",
"Yasmin": "ms-MY-YasminNeural"
},
"Norwegian": {
"Pernille": "nb-NO-PernilleNeural",
"Finn": "nb-NO-FinnNeural"
},
"Swedish": {
"Sofie": "sv-SE-SofieNeural",
"Mattias": "sv-SE-MattiasNeural"
},
"Greek": {
"Athina": "el-GR-AthinaNeural",
"Nestoras": "el-GR-NestorasNeural"
},
"German": {
"Katja": "de-DE-KatjaNeural",
"Amala": "de-DE-AmalaNeural",
"Conrad": "de-DE-ConradNeural",
"Killian": "de-DE-KillianNeural",
"Ingrid": "de-AT-IngridNeural",
"Jonas": "de-AT-JonasNeural",
"Jan": "de-CH-JanNeural",
"Leni": "de-CH-LeniNeural"
},
"Afrikaans": {
"Adri": "af-ZA-AdriNeural",
"Willem": "af-ZA-WillemNeural"
},
"Amharic": {
"Ameha": "am-ET-AmehaNeural",
"Mekdes": "am-ET-MekdesNeural"
},
"Azerbaijani": {
"Babek": "az-AZ-BabekNeural",
"Banu": "az-AZ-BanuNeural"
},
"Bulgarian": {
"Borislav": "bg-BG-BorislavNeural",
"Kalina": "bg-BG-KalinaNeural"
},
"Bengali": {
"Nabanita": "bn-BD-NabanitaNeural",
"Pradeep": "bn-BD-PradeepNeural",
"Bashkar": "bn-IN-BashkarNeural",
"Tanishaa": "bn-IN-TanishaaNeural"
},
"Bosnian": {
"Goran": "bs-BA-GoranNeural",
"Vesna": "bs-BA-VesnaNeural"
},
"Catalan": {
"Joana": "ca-ES-JoanaNeural",
"Enric": "ca-ES-EnricNeural"
},
"Czech": {
"Antonin": "cs-CZ-AntoninNeural",
"Vlasta": "cs-CZ-VlastaNeural"
},
"Welsh": {
"Aled": "cy-GB-AledNeural",
"Nia": "cy-GB-NiaNeural"
},
"Danish": {
"Christel": "da-DK-ChristelNeural",
"Jeppe": "da-DK-JeppeNeural"
},
"Estonian": {
"Anu": "et-EE-AnuNeural",
"Kert": "et-EE-KertNeural"
},
"Persian": {
"Dilara": "fa-IR-DilaraNeural",
"Farid": "fa-IR-FaridNeural"
},
"Finnish": {
"Harri": "fi-FI-HarriNeural",
"Noora": "fi-FI-NooraNeural"
},
"Irish": {
"Colm": "ga-IE-ColmNeural",
"Orla": "ga-IE-OrlaNeural"
},
"Galician": {
"Roi": "gl-ES-RoiNeural",
"Sabela": "gl-ES-SabelaNeural"
},
"Gujarati": {
"Dhwani": "gu-IN-DhwaniNeural",
"Niranjan": "gu-IN-NiranjanNeural"
},
"Hindi": {
"Madhur": "hi-IN-MadhurNeural",
"Swara": "hi-IN-SwaraNeural"
},
"Croatian": {
"Gabrijela": "hr-HR-GabrijelaNeural",
"Srecko": "hr-HR-SreckoNeural"
},
"Hungarian": {
"Noemi": "hu-HU-NoemiNeural",
"Tamas": "hu-HU-TamasNeural"
},
"Icelandic": {
"Gudrun": "is-IS-GudrunNeural",
"Gunnar": "is-IS-GunnarNeural"
},
"Javanese": {
"Dimas": "jv-ID-DimasNeural",
"Siti": "jv-ID-SitiNeural"
},
"Georgian": {
"Eka": "ka-GE-EkaNeural",
"Giorgi": "ka-GE-GiorgiNeural"
},
"Kazakh": {
"Aigul": "kk-KZ-AigulNeural",
"Daulet": "kk-KZ-DauletNeural"
},
"Khmer": {
"Piseth": "km-KH-PisethNeural",
"Sreymom": "km-KH-SreymomNeural"
},
"Kannada": {
"Gagan": "kn-IN-GaganNeural",
"Sapna": "kn-IN-SapnaNeural"
},
"Lao": {
"Chanthavong": "lo-LA-ChanthavongNeural",
"Keomany": "lo-LA-KeomanyNeural"
},
"Lithuanian": {
"Leonas": "lt-LT-LeonasNeural",
"Ona": "lt-LT-OnaNeural"
},
"Latvian": {
"Everita": "lv-LV-EveritaNeural",
"Nils": "lv-LV-NilsNeural"
},
"Macedonian": {
"Aleksandar": "mk-MK-AleksandarNeural",
"Marija": "mk-MK-MarijaNeural"
},
"Malayalam": {
"Midhun": "ml-IN-MidhunNeural",
"Sobhana": "ml-IN-SobhanaNeural"
},
"Mongolian": {
"Bataa": "mn-MN-BataaNeural",
"Yesui": "mn-MN-YesuiNeural"
},
"Marathi": {
"Aarohi": "mr-IN-AarohiNeural",
"Manohar": "mr-IN-ManoharNeural"
},
"Maltese": {
"Grace": "mt-MT-GraceNeural",
"Joseph": "mt-MT-JosephNeural"
},
"Burmese": {
"Nilar": "my-MM-NilarNeural",
"Thiha": "my-MM-ThihaNeural"
},
"Nepali": {
"Hemkala": "ne-NP-HemkalaNeural",
"Sagar": "ne-NP-SagarNeural"
},
"Polish": {
"Marek": "pl-PL-MarekNeural",
"Zofia": "pl-PL-ZofiaNeural"
},
"Pashto": {
"Gul Nawaz": "ps-AF-GulNawazNeural",
"Latifa": "ps-AF-LatifaNeural"
},
"Romanian": {
"Alina": "ro-RO-AlinaNeural",
"Emil": "ro-RO-EmilNeural"
},
"Russian": {
"Svetlana": "ru-RU-SvetlanaNeural",
"Dmitry": "ru-RU-DmitryNeural"
},
"Sinhala": {
"Sameera": "si-LK-SameeraNeural",
"Thilini": "si-LK-ThiliniNeural"
},
"Slovak": {
"Lukas": "sk-SK-LukasNeural",
"Viktoria": "sk-SK-ViktoriaNeural"
},
"Slovenian": {
"Petra": "sl-SI-PetraNeural",
"Rok": "sl-SI-RokNeural"
},
"Somali": {
"Muuse": "so-SO-MuuseNeural",
"Ubax": "so-SO-UbaxNeural"
},
"Albanian": {
"Anila": "sq-AL-AnilaNeural",
"Ilir": "sq-AL-IlirNeural"
},
"Serbian": {
"Nicholas": "sr-RS-NicholasNeural",
"Sophie": "sr-RS-SophieNeural"
},
"Sundanese": {
"Jajang": "su-ID-JajangNeural",
"Tuti": "su-ID-TutiNeural"
},
"Swahili": {
"Rafiki": "sw-KE-RafikiNeural",
"Zuri": "sw-KE-ZuriNeural",
"Daudi": "sw-TZ-DaudiNeural",
"Rehema": "sw-TZ-RehemaNeural"
},
"Tamil": {
"Pallavi": "ta-IN-PallaviNeural",
"Valluvar": "ta-IN-ValluvarNeural",
"Kumar": "ta-LK-KumarNeural",
"Saranya": "ta-LK-SaranyaNeural",
"Kani": "ta-MY-KaniNeural",
"Surya": "ta-MY-SuryaNeural",
"Anbu": "ta-SG-AnbuNeural"
},
"Telugu": {
"Mohan": "te-IN-MohanNeural",
"Shruti": "te-IN-ShrutiNeural"
},
"Turkish": {
"Ahmet": "tr-TR-AhmetNeural",
"Emel": "tr-TR-EmelNeural"
},
"Ukrainian": {
"Ostap": "uk-UA-OstapNeural",
"Polina": "uk-UA-PolinaNeural"
},
"Urdu": {
"Gul": "ur-IN-GulNeural",
"Salman": "ur-IN-SalmanNeural",
"Asad": "ur-PK-AsadNeural",
"Uzma": "ur-PK-UzmaNeural"
},
"Uzbek": {
"Madina": "uz-UZ-MadinaNeural",
"Sardor": "uz-UZ-SardorNeural"
},
"Mandarin": {
"Xiaoxiao": "zh-CN-XiaoxiaoNeural",
"Yunyang": "zh-CN-YunyangNeural",
"Yunxi": "zh-CN-YunxiNeural",
"Xiaoyi": "zh-CN-XiaoyiNeural",
"Yunjian": "zh-CN-YunjianNeural",
"Yunxia": "zh-CN-YunxiaNeural",
"Xiaobei": "zh-CN-liaoning-XiaobeiNeural",
"Xiaoni": "zh-CN-shaanxi-XiaoniNeural",
"HiuMaan": "zh-HK-HiuMaanNeural",
"HiuGaai": "zh-HK-HiuGaaiNeural",
"WanLung": "zh-HK-WanLungNeural",
"HsiaoChen": "zh-TW-HsiaoChenNeural",
"HsiaoYu": "zh-TW-HsiaoYuNeural",
"YunJhe": "zh-TW-YunJheNeural"
},
"Zulu": {
"Thando": "zu-ZA-ThandoNeural",
"Themba": "zu-ZA-ThembaNeural"
}
}
client = Client("MohamedRashad/arabic-auto-tashkeel")
async def text_to_speech_edge(text, language_code, speaker, tashkeel_checkbox=False):
# Remove diacritics from Arabic text then add tashkeel
if language_code == "Arabic" and tashkeel_checkbox:
text = client.predict(
input_text=araby.strip_diacritics(text),
api_name="/infer_shakkala"
)
# Get the voice for the selected language and speaker
voice = language_dict[language_code][speaker]
communicate = edge_tts.Communicate(text, voice)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return text, tmp_path
def get_speakers(language):
print(language)
speakers = list(language_dict[language].keys())
return gr.Dropdown(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True)
default_language = None
default_speaker = None
with gr.Blocks(title="Multilingual TTS") as demo:
gr.HTML("<center><h1>Multilingual TTS (Edge TTS)</h1></center>")
gr.HTML(f"<h2 style='color:Tomato;'> {len(language_dict)} languages supported</h3>")
gr.HTML(f"<p> {', '.join(language_dict.keys())} </h3>")
gr.Markdown("**Note:** A special feature is added for Arabic language only.")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(lines=5, label="Input Text", placeholder="Enter text to convert to speech")
language = gr.Dropdown(
choices=list(language_dict.keys()), value=default_language, label="Languages", interactive=True
)
speaker = gr.Dropdown(choices=[], value=default_speaker, label="Speakers", interactive=False)
tashkeel_checkbox = gr.Checkbox(label="Tashkeel", value=False, visible=False, interactive=False)
run_btn = gr.Button(value="Generate Audio", variant="primary")
with gr.Column():
output_text = gr.Textbox(label="Output Text")
output_audio = gr.Audio(type="filepath", label="Audio Output")
language.change(get_speakers, inputs=[language], outputs=[speaker, tashkeel_checkbox])
run_btn.click(text_to_speech_edge, inputs=[input_text, language, speaker, tashkeel_checkbox], outputs=[output_text, output_audio])
if __name__ == "__main__":
demo.queue().launch(share=False)