Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModel | |
import numpy as np | |
import soundfile as sf | |
import tempfile | |
import whisper | |
# Load TTS model (IndicF5) | |
tts_model = AutoModel.from_pretrained("ai4bharat/IndicF5", trust_remote_code=True) | |
# Load ASR model (Whisper) | |
asr_model = whisper.load_model("medium") | |
def generate_tts_and_transcribe(text, ref_audio, ref_text): | |
# Save uploaded ref_audio to a path | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: | |
tmp.write(ref_audio.read()) | |
ref_audio_path = tmp.name | |
# Generate speech using IndicF5 | |
audio = tts_model(text, ref_audio_path=ref_audio_path, ref_text=ref_text) | |
# Normalize | |
if audio.dtype == np.int16: | |
audio = audio.astype(np.float32) / 32768.0 | |
# Save TTS output | |
tts_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name | |
sf.write(tts_path, np.array(audio, dtype=np.float32), samplerate=24000) | |
# Transcribe using Whisper | |
asr_result = asr_model.transcribe(tts_path, language="ta") | |
transcript = asr_result["text"] | |
return tts_path, transcript | |
# Gradio Interface | |
demo = gr.Interface( | |
fn=generate_tts_and_transcribe, | |
inputs=[ | |
gr.Textbox(label="Text to Synthesize (Tamil)"), | |
gr.Audio(label="Reference Audio (.wav)", type="file"), | |
gr.Textbox(label="Reference Text (Tamil)") | |
], | |
outputs=[ | |
gr.Audio(label="Generated Audio", type="filepath"), | |
gr.Textbox(label="ASR Transcription (Whisper)") | |
], | |
title="IndicF5 Tamil TTS + Whisper ASR", | |
description="Give a reference audio and text, synthesize Tamil speech using IndicF5, and transcribe it with Whisper." | |
) | |
if __name__ == "__main__": | |
demo.launch() | |