edwko's picture
Create app.py
0b3a1f3 verified
import gradio as gr
from outetts.v0_1.interface import InterfaceHF
interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")
def generate_tts(text, temperature, repetition_penalty, reference_audio, reference_text):
if reference_audio and reference_text:
speaker = interface.create_speaker(reference_audio, reference_text)
else:
speaker = None
output = interface.generate(
text=text,
speaker=speaker,
temperature=temperature,
repetition_penalty=repetition_penalty
)
output.save("output.wav")
return "output.wav"
with gr.Blocks() as demo:
gr.Markdown("# OuteTTS-0.1-350M Text-to-Speech Demo")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter text here...")
temperature = gr.Slider(0.1, 1.0, value=0.1, label="Temperature")
repetition_penalty = gr.Slider(0.5, 2.0, value=1.1, label="Repetition Penalty")
gr.Markdown("""
**Note**: For voice cloning, both a reference audio file and its corresponding transcription must be provided.
If either the audio file or transcription is missing, the model will generate audio with random characteristics.""")
reference_audio = gr.Audio(label="Reference Audio (for voice cloning)", type="filepath")
reference_text = gr.Textbox(label="Reference Transcription Text (matching the audio)", placeholder="Enter reference text here if using voice cloning")
submit_button = gr.Button("Generate Speech")
with gr.Column():
audio_output = gr.Audio(label="Generated Audio", type="filepath")
submit_button.click(
fn=generate_tts,
inputs=[text_input, temperature, repetition_penalty, reference_audio, reference_text],
outputs=audio_output
)
demo.launch()