import gradio as gr import requests import os import time ASR_API_URL = os.getenv('ASR_API_URL') AUTH_TOKEN = os.getenv('AUTH_TOKEN') def transcribe_audio(file_path): if not ASR_API_URL or not AUTH_TOKEN: return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", "" headers = { 'accept': 'application/json', 'Authorization': f'Bearer {AUTH_TOKEN}', } files = { 'file': (file_path, open(file_path, 'rb'), 'audio/mpeg'), } start_time = time.time() try: response = requests.post(ASR_API_URL, headers=headers, files=files) except Exception as e: return f"❌ Error: {str(e)}", "" inference_time = time.time() - start_time if response.status_code == 200: res = response.json() transcription = res.get("transcription", "No transcription returned.") inference_time_str = f"{res.get('time', inference_time):.2f} seconds" return transcription, inference_time_str else: return f"❌ Error: {response.status_code}, {response.text}", "" with gr.Blocks(css=""" #gooya-title {color:white; background: linear-gradient(90deg, #224CA5 0%, #2CD8D5 100%); border-radius: 12px; padding:20px 10px;margin-bottom:12px;} .gooya-badge {display:inline-block; background:#224CA5; color:#fff; border-radius:16px; padding:6px 16px; font-size:0.97rem; margin-top:4px;} #gooya-box {background:#F7FAFF; border:1px solid #e7e9ef; border-radius:14px; padding:22px 18px; margin-top:12px;} """) as demo: gr.HTML("""

Gooya ASR v1.4

High-performance Persian Speech-to-Text

Upload or record a Persian audio file (max 30s) and instantly receive the transcription.

""") with gr.Row(): with gr.Column(): audio = gr.Audio( label="Audio Input (Upload or record, up to 30s)", type="filepath", show_label=True, sources=["upload", "microphone"] ) with gr.Column(): inference_time = gr.Label(label="⏱️ Processing Time", elem_classes="gooya-badge") transcription = gr.Textbox( label="📝 Transcription", lines=5, show_copy_button=True, placeholder="The transcription will appear here...", elem_id="gooya-textbox" ) with gr.Row(): submit_btn = gr.Button("Transcribe", variant="primary") clear_btn = gr.Button("Clear", variant="secondary") gr.Markdown(""" **Instructions:** - Maximum audio length: **30 seconds** - Input audio should be in Persian. - The transcription and processing time will be displayed instantly. For performance benchmarks, visit: [Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard) """) submit_btn.click( transcribe_audio, inputs=audio, outputs=[transcription, inference_time] ) clear_btn.click( lambda: ("", ""), None, [transcription, inference_time, audio] ) demo.launch(share=True)