Spaces:

bektim
/

kzs2t

Runtime error

App Files Files Community

bektim commited on Jan 16

Commit

a585ac9

verified ·

1 Parent(s): a921ceb

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -29

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import gradio as gr
 import requests
 import os
-import time
 from tempfile import NamedTemporaryFile
 # Get API token from environment variable
@@ -9,20 +10,25 @@ API_TOKEN = os.environ.get("HF_API_TOKEN")  # Use your token here
 API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large"
 headers = {"Authorization": f"Bearer {API_TOKEN}"}
 def query(audio_input):
     try:
-        # Debug: Print the type and content of audio_input
-        print(f"Audio input type: {type(audio_input)}")
-        print(f"Audio input content: {audio_input}")
         # Check if input is None (no audio provided)
         if audio_input is None:
             return "Please provide an audio file or record from the microphone."
-        # Handle file upload (returns a file path)
-        if isinstance(audio_input, str):
-            audio_path = audio_input
-            print(f"Uploaded file path: {audio_path}")
         else:
             return "Invalid input. Please provide an audio file or record from the microphone."
@@ -31,36 +37,33 @@ def query(audio_input):
             data = f.read()
         # Send the request to the Inference API
-        max_retries = 5
-        retry_delay = 30  # Wait 30 seconds between retries
-        for attempt in range(max_retries):
-            response = requests.post(API_URL, headers=headers, data=data)
-            # Check for errors
-            if response.status_code == 200:
-                # Return the transcription
-                return response.json().get("text", "No transcription found in response.")
-            elif response.status_code == 503:  # Model is loading
-                print(f"Model is loading. Attempt {attempt + 1}/{max_retries}. Retrying in {retry_delay} seconds...")
-                time.sleep(retry_delay)
-            else:
-                return f"Error: {response.status_code}, {response.text}"
-        return "Model is still loading. Please try again later."
     except Exception as e:
         return f"Error during API request: {str(e)}"
 # Gradio interface
 interface = gr.Interface(
     fn=query,
     inputs=gr.Audio(
-        label="Upload Audio or Record from Microphone",
-        sources=["microphone", "upload"],
-        type="filepath"  # Use "filepath" to ensure compatibility
     ),
     outputs=gr.Textbox(label="Transcription"),
-    title="Whisper Speech-to-Text (Inference API)",
-    description="Upload audio or use microphone to transcribe speech using Hugging Face's Inference API.",
     examples=None,
     cache_examples=False
 )

 import gradio as gr
 import requests
 import os
+import numpy as np
+import soundfile as sf
 from tempfile import NamedTemporaryFile
 # Get API token from environment variable
 API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large"
 headers = {"Authorization": f"Bearer {API_TOKEN}"}
+def save_audio_to_tempfile(audio_data, sample_rate):
+    """Save raw audio data to a temporary WAV file."""
+    with NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+        sf.write(temp_file.name, audio_data, sample_rate)
+        return temp_file.name
 def query(audio_input):
     try:
         # Check if input is None (no audio provided)
         if audio_input is None:
             return "Please provide an audio file or record from the microphone."
+        # Handle microphone input (returns a tuple: (sample_rate, audio_data))
+        if isinstance(audio_input, tuple):
+            sample_rate, audio_data = audio_input
+            print(f"Sample rate: {sample_rate}")
+            print(f"Audio data shape: {audio_data.shape}")
+            audio_path = save_audio_to_tempfile(audio_data, sample_rate)
+            print(f"Temporary file saved at: {audio_path}")
         else:
             return "Invalid input. Please provide an audio file or record from the microphone."
             data = f.read()
         # Send the request to the Inference API
+        response = requests.post(API_URL, headers=headers, data=data)
+        # Check for errors
+        if response.status_code != 200:
+            return f"Error: {response.status_code}, {response.text}"
+        # Return the transcription
+        return response.json().get("text", "No transcription found in response.")
     except Exception as e:
         return f"Error during API request: {str(e)}"
+    finally:
+        # Clean up the temporary file
+        if "audio_path" in locals() and os.path.exists(audio_path):
+            os.remove(audio_path)
+            print(f"Temporary file deleted: {audio_path}")
 # Gradio interface
 interface = gr.Interface(
     fn=query,
     inputs=gr.Audio(
+        label="Record from Microphone",
+        sources=["microphone"],  # Only microphone input
+        type="numpy"  # Get audio as a NumPy array
     ),
     outputs=gr.Textbox(label="Transcription"),
+    title="Whisper Speech-to-Text (Microphone Only)",
+    description="Record audio from your microphone to transcribe speech using Hugging Face's Inference API.",
     examples=None,
     cache_examples=False
 )