Spaces:

oyemade
/

speech-recognition-yoruba

Sleeping

App Files Files Community

oyemade commited on Jul 21, 2024

Commit

344e698

verified ·

1 Parent(s): cbbfba9

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -7

app.py CHANGED Viewed

@@ -1,18 +1,53 @@
 from transformers import pipeline
 import gradio as gr
-pipe = pipeline("automatic-speech-recognition", model="oyemade/w2v-bert-2.0-yoruba-CV17.0")
 def transcribe(audio):
-    text = pipe(audio)["text"]
-    return text
 iface = gr.Interface(
-    transcribe,
-    gr.Audio(sources="microphone", type="filepath"),
-    "text",
     title="Neoform AI: Yoruba Speech Recognition",
-    description="Realtime demo for Yoruba speech recognition using a fine-tuned Wav2Vec-Bert model. https://neoformai.com. NOTE: If you get an error after pressing submit, give the audio some secs to load then try again.",
 )
 iface.launch()

 from transformers import pipeline
 import gradio as gr
+import numpy as np
+import librosa
+# Initialize the speech recognition pipeline
+pipe = pipeline("automatic-speech-recognition", model="oyemade/w2v-bert-2.0-yoruba-CV17.0")
 def transcribe(audio):
+    if audio is None:
+        return "No audio detected. Please try again."
+    try:
+        # Check if the input is a file path (for uploaded files) or numpy array (for microphone input)
+        if isinstance(audio, str):
+            # Load the audio file using librosa
+            audio, sr = librosa.load(audio, sr=16000)  # Resample to 16kHz
+        elif isinstance(audio, tuple):  # Gradio audio components return a tuple (sr, audio)
+            sr, audio = audio
+            if sr != 16000:
+                audio = librosa.resample(audio, sr, 16000)
+        else:
+            return "Invalid audio format. Please try again."
+        # Check if the audio is valid (not silent)
+        if np.max(np.abs(audio)) < 0.01:
+            return "Audio is too quiet. Please speak louder or choose a different file and try again."
+        text = pipe(audio)["text"]
+        return text
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+# Create the Gradio interface
 iface = gr.Interface(
+    fn=transcribe,
+    inputs=[
+        gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio Input"),
+    ],
+    outputs="text",
     title="Neoform AI: Yoruba Speech Recognition",
+    description="Realtime demo for Yoruba speech recognition using a fine-tuned Wav2Vec-Bert model. "
+                "You can either use your microphone or upload an MP3 file. "
+                "https://neoformai.com",
+    examples=[
+        ["path/to/example1.mp3"],
+        ["path/to/example2.mp3"]
+    ],  # Add example inputs if available
+    cache_examples=True,  # Cache example outputs
 )
+# Launch the interface
 iface.launch()