Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import pipeline | |
import os | |
from pydub import AudioSegment | |
import tempfile | |
# Model ID from Hugging Face | |
model_id = "Quantamhash/Quantum_STT" | |
# Load the speech recognition pipeline | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model_id, | |
generate_kwargs={"language": "en", "task": "transcribe"}, | |
tokenizer=model_id | |
) | |
def convert_to_wav(input_path): | |
audio = AudioSegment.from_file(input_path) | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav: | |
audio.export(temp_wav.name, format="wav") | |
return temp_wav.name | |
# Transcription function with format check | |
def transcribe(audio): | |
if audio is None: | |
return "Please upload an audio file." | |
# Optional: validate file extension | |
ext = os.path.splitext(audio)[1].lower() | |
if ext not in [".caf", ".au", ".opus", ".amr", ".alac", ".aiff", ".wma", ".m4a", ".ogg", ".aac", ".flac", ".wav", ".mp3"]: | |
return f"β Unsupported file format: {ext}. Please upload .caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav or .mp3 files." | |
# Convert to .wav if necessary | |
if ext != ".wav": | |
audio = convert_to_wav(audio) | |
try: | |
result = pipe(audio) | |
return result["text"] | |
except ValueError as e: | |
return f"Error processing audio file: {str(e)}" | |
except Exception as e: | |
return f"An unexpected error occurred: {str(e)}" | |
# Gradio interface | |
interface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio( | |
type="filepath", # return audio file path | |
sources=["upload"], # restrict to file upload (not mic) | |
label="π΅ Upload Audio File" | |
), | |
outputs=gr.Textbox(label="π Transcription"), | |
title="ποΈ Quantum Speech Recognizer", | |
description="Upload an audio file (.caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav, .mp3)<br>***to transcribe it using the Quantum_STT model***." | |
) | |
# Launch the interface | |
interface.launch() |