Spaces:
Running
Running
import streamlit as st | |
import edge_tts | |
import asyncio | |
import tempfile | |
import os | |
from typing import Dict | |
from collections import defaultdict | |
async def text_to_speech(text: str, voice: str) -> str: | |
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
communicate = edge_tts.Communicate(text, voice) | |
await communicate.save(output_file.name) | |
return output_file.name | |
async def list_voices() -> Dict[str, Dict]: | |
voices = await edge_tts.list_voices() | |
return {v['ShortName']: {'name': v['ShortName'], 'language': v['Locale']} for v in voices} | |
def process_voices(voices: Dict[str, Dict]) -> Dict[str, Dict[str, str]]: | |
processed_voices = defaultdict(dict) | |
for full_name, details in voices.items(): | |
language = details['language'] | |
speaker_name = full_name.split('-')[2].replace('Neural', '') | |
processed_voices[language][speaker_name] = full_name | |
return dict(processed_voices) | |
async def main(): | |
st.title("OpenSpeech TTS") | |
st.write("An OpenAI compatible API to reproduce high fidelity speech fast, in minimal hardware") | |
st.write("Official Repo: https://github.com/PantelisDeveloping/openspeech-tts/tree/main") | |
# Get voices and process them | |
voices = await list_voices() | |
processed_voices = process_voices(voices) | |
# Text-to-Speech | |
st.header("Text-to-Speech") | |
text_input = st.text_area("Enter text to convert to speech:") | |
# Two-step voice selection | |
col1, col2 = st.columns(2) | |
with col1: | |
selected_language = st.selectbox("Select language:", list(processed_voices.keys())) | |
with col2: | |
selected_speaker = st.selectbox("Select speaker:", list(processed_voices[selected_language].keys())) | |
selected_voice = processed_voices[selected_language][selected_speaker] | |
if st.button("Generate Speech"): | |
if not text_input: | |
st.error("Please enter some text.") | |
else: | |
with st.spinner("Generating speech..."): | |
output_file = await text_to_speech(text_input, selected_voice) | |
st.audio(output_file, format='audio/mp3') | |
os.unlink(output_file) # Delete the temporary file | |
# List Available Voices | |
# st.header("Available Voices") | |
# for language, speakers in processed_voices.items(): | |
# st.subheader(language) | |
# st.write(", ".join(speakers.keys())) | |
if __name__ == '__main__': | |
asyncio.run(main()) |