Spaces:

Quantamhash
/

Quantum_STT-V1

Running on Zero

App Files Files Community

Quantum_STT-V1 / app.py

sbapan41

Update app.py

ee04627 verified about 18 hours ago

raw

history blame contribute delete

2.04 kB

	import gradio as gr
	from transformers import pipeline
	import os
	from pydub import AudioSegment
	import tempfile


	# Model ID from Hugging Face
	model_id = "Quantamhash/Quantum_STT"

	# Load the speech recognition pipeline
	pipe = pipeline(
	"automatic-speech-recognition",
	model=model_id,
	generate_kwargs={"language": "en", "task": "transcribe"},
	tokenizer=model_id
	)

	def convert_to_wav(input_path):
	audio = AudioSegment.from_file(input_path)
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
	audio.export(temp_wav.name, format="wav")
	return temp_wav.name

	# Transcription function with format check
	def transcribe(audio):
	if audio is None:
	return "Please upload an audio file."

	# Optional: validate file extension
	ext = os.path.splitext(audio)[1].lower()
	if ext not in [".caf", ".au", ".opus", ".amr", ".alac", ".aiff", ".wma", ".m4a", ".ogg", ".aac", ".flac", ".wav", ".mp3"]:
	return f"❌ Unsupported file format: {ext}. Please upload .caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav or .mp3 files."

	# Convert to .wav if necessary
	if ext != ".wav":
	audio = convert_to_wav(audio)

	try:
	result = pipe(audio)
	return result["text"]
	except ValueError as e:
	return f"Error processing audio file: {str(e)}"
	except Exception as e:
	return f"An unexpected error occurred: {str(e)}"


	# Gradio interface
	interface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(
	type="filepath", # return audio file path
	sources=["upload"], # restrict to file upload (not mic)
	label="🎵 Upload Audio File"
	),
	outputs=gr.Textbox(label="📝 Transcription"),
	title="🎙️ Quantum Speech Recognizer",
	description="Upload an audio file (.caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav, .mp3)<br>*to transcribe it using the Quantum_STT model*."
	)

	# Launch the interface
	interface.launch()