import streamlit as st import edge_tts import asyncio import tempfile import os from typing import Dict from collections import defaultdict async def text_to_speech(text: str, voice: str) -> str: output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") communicate = edge_tts.Communicate(text, voice) await communicate.save(output_file.name) return output_file.name async def list_voices() -> Dict[str, Dict]: voices = await edge_tts.list_voices() return {v['ShortName']: {'name': v['ShortName'], 'language': v['Locale']} for v in voices} def process_voices(voices: Dict[str, Dict]) -> Dict[str, Dict[str, str]]: processed_voices = defaultdict(dict) for full_name, details in voices.items(): language = details['language'] speaker_name = full_name.split('-')[2].replace('Neural', '') processed_voices[language][speaker_name] = full_name return dict(processed_voices) async def main(): st.title("OpenSource Text-to-Speech App") st.write("Convert text to speech using various voices") # Get voices and process them voices = await list_voices() processed_voices = process_voices(voices) # Text-to-Speech st.header("Text-to-Speech") text_input = st.text_area("Enter text to convert to speech:") # Two-step voice selection col1, col2 = st.columns(2) with col1: selected_language = st.selectbox("Select language:", list(processed_voices.keys())) with col2: selected_speaker = st.selectbox("Select speaker:", list(processed_voices[selected_language].keys())) selected_voice = processed_voices[selected_language][selected_speaker] if st.button("Generate Speech"): if not text_input: st.error("Please enter some text.") else: with st.spinner("Generating speech..."): output_file = await text_to_speech(text_input, selected_voice) st.audio(output_file, format='audio/mp3') os.unlink(output_file) # Delete the temporary file # List Available Voices st.header("Available Voices") for language, speakers in processed_voices.items(): st.subheader(language) st.write(", ".join(speakers.keys())) if __name__ == '__main__': asyncio.run(main())