bektim commited on
Commit
a585ac9
·
verified ·
1 Parent(s): a921ceb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -29
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import gradio as gr
2
  import requests
3
  import os
4
- import time
 
5
  from tempfile import NamedTemporaryFile
6
 
7
  # Get API token from environment variable
@@ -9,20 +10,25 @@ API_TOKEN = os.environ.get("HF_API_TOKEN") # Use your token here
9
  API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large"
10
  headers = {"Authorization": f"Bearer {API_TOKEN}"}
11
 
 
 
 
 
 
 
12
  def query(audio_input):
13
  try:
14
- # Debug: Print the type and content of audio_input
15
- print(f"Audio input type: {type(audio_input)}")
16
- print(f"Audio input content: {audio_input}")
17
-
18
  # Check if input is None (no audio provided)
19
  if audio_input is None:
20
  return "Please provide an audio file or record from the microphone."
21
 
22
- # Handle file upload (returns a file path)
23
- if isinstance(audio_input, str):
24
- audio_path = audio_input
25
- print(f"Uploaded file path: {audio_path}")
 
 
 
26
  else:
27
  return "Invalid input. Please provide an audio file or record from the microphone."
28
 
@@ -31,36 +37,33 @@ def query(audio_input):
31
  data = f.read()
32
 
33
  # Send the request to the Inference API
34
- max_retries = 5
35
- retry_delay = 30 # Wait 30 seconds between retries
36
- for attempt in range(max_retries):
37
- response = requests.post(API_URL, headers=headers, data=data)
38
-
39
- # Check for errors
40
- if response.status_code == 200:
41
- # Return the transcription
42
- return response.json().get("text", "No transcription found in response.")
43
- elif response.status_code == 503: # Model is loading
44
- print(f"Model is loading. Attempt {attempt + 1}/{max_retries}. Retrying in {retry_delay} seconds...")
45
- time.sleep(retry_delay)
46
- else:
47
- return f"Error: {response.status_code}, {response.text}"
48
 
49
- return "Model is still loading. Please try again later."
 
50
  except Exception as e:
51
  return f"Error during API request: {str(e)}"
 
 
 
 
 
52
 
53
  # Gradio interface
54
  interface = gr.Interface(
55
  fn=query,
56
  inputs=gr.Audio(
57
- label="Upload Audio or Record from Microphone",
58
- sources=["microphone", "upload"],
59
- type="filepath" # Use "filepath" to ensure compatibility
60
  ),
61
  outputs=gr.Textbox(label="Transcription"),
62
- title="Whisper Speech-to-Text (Inference API)",
63
- description="Upload audio or use microphone to transcribe speech using Hugging Face's Inference API.",
64
  examples=None,
65
  cache_examples=False
66
  )
 
1
  import gradio as gr
2
  import requests
3
  import os
4
+ import numpy as np
5
+ import soundfile as sf
6
  from tempfile import NamedTemporaryFile
7
 
8
  # Get API token from environment variable
 
10
  API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large"
11
  headers = {"Authorization": f"Bearer {API_TOKEN}"}
12
 
13
+ def save_audio_to_tempfile(audio_data, sample_rate):
14
+ """Save raw audio data to a temporary WAV file."""
15
+ with NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
16
+ sf.write(temp_file.name, audio_data, sample_rate)
17
+ return temp_file.name
18
+
19
  def query(audio_input):
20
  try:
 
 
 
 
21
  # Check if input is None (no audio provided)
22
  if audio_input is None:
23
  return "Please provide an audio file or record from the microphone."
24
 
25
+ # Handle microphone input (returns a tuple: (sample_rate, audio_data))
26
+ if isinstance(audio_input, tuple):
27
+ sample_rate, audio_data = audio_input
28
+ print(f"Sample rate: {sample_rate}")
29
+ print(f"Audio data shape: {audio_data.shape}")
30
+ audio_path = save_audio_to_tempfile(audio_data, sample_rate)
31
+ print(f"Temporary file saved at: {audio_path}")
32
  else:
33
  return "Invalid input. Please provide an audio file or record from the microphone."
34
 
 
37
  data = f.read()
38
 
39
  # Send the request to the Inference API
40
+ response = requests.post(API_URL, headers=headers, data=data)
41
+
42
+ # Check for errors
43
+ if response.status_code != 200:
44
+ return f"Error: {response.status_code}, {response.text}"
 
 
 
 
 
 
 
 
 
45
 
46
+ # Return the transcription
47
+ return response.json().get("text", "No transcription found in response.")
48
  except Exception as e:
49
  return f"Error during API request: {str(e)}"
50
+ finally:
51
+ # Clean up the temporary file
52
+ if "audio_path" in locals() and os.path.exists(audio_path):
53
+ os.remove(audio_path)
54
+ print(f"Temporary file deleted: {audio_path}")
55
 
56
  # Gradio interface
57
  interface = gr.Interface(
58
  fn=query,
59
  inputs=gr.Audio(
60
+ label="Record from Microphone",
61
+ sources=["microphone"], # Only microphone input
62
+ type="numpy" # Get audio as a NumPy array
63
  ),
64
  outputs=gr.Textbox(label="Transcription"),
65
+ title="Whisper Speech-to-Text (Microphone Only)",
66
+ description="Record audio from your microphone to transcribe speech using Hugging Face's Inference API.",
67
  examples=None,
68
  cache_examples=False
69
  )