Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,53 @@
|
|
1 |
from transformers import pipeline
|
2 |
import gradio as gr
|
|
|
|
|
3 |
|
4 |
-
|
|
|
5 |
|
6 |
def transcribe(audio):
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
iface = gr.Interface(
|
11 |
-
transcribe,
|
12 |
-
|
13 |
-
|
|
|
|
|
14 |
title="Neoform AI: Yoruba Speech Recognition",
|
15 |
-
description="Realtime demo for Yoruba speech recognition using a fine-tuned Wav2Vec-Bert model.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
)
|
17 |
|
|
|
18 |
iface.launch()
|
|
|
1 |
from transformers import pipeline
|
2 |
import gradio as gr
|
3 |
+
import numpy as np
|
4 |
+
import librosa
|
5 |
|
6 |
+
# Initialize the speech recognition pipeline
|
7 |
+
pipe = pipeline("automatic-speech-recognition", model="oyemade/w2v-bert-2.0-yoruba-CV17.0")
|
8 |
|
9 |
def transcribe(audio):
|
10 |
+
if audio is None:
|
11 |
+
return "No audio detected. Please try again."
|
12 |
+
|
13 |
+
try:
|
14 |
+
# Check if the input is a file path (for uploaded files) or numpy array (for microphone input)
|
15 |
+
if isinstance(audio, str):
|
16 |
+
# Load the audio file using librosa
|
17 |
+
audio, sr = librosa.load(audio, sr=16000) # Resample to 16kHz
|
18 |
+
elif isinstance(audio, tuple): # Gradio audio components return a tuple (sr, audio)
|
19 |
+
sr, audio = audio
|
20 |
+
if sr != 16000:
|
21 |
+
audio = librosa.resample(audio, sr, 16000)
|
22 |
+
else:
|
23 |
+
return "Invalid audio format. Please try again."
|
24 |
|
25 |
+
# Check if the audio is valid (not silent)
|
26 |
+
if np.max(np.abs(audio)) < 0.01:
|
27 |
+
return "Audio is too quiet. Please speak louder or choose a different file and try again."
|
28 |
+
|
29 |
+
text = pipe(audio)["text"]
|
30 |
+
return text
|
31 |
+
except Exception as e:
|
32 |
+
return f"An error occurred: {str(e)}"
|
33 |
+
|
34 |
+
# Create the Gradio interface
|
35 |
iface = gr.Interface(
|
36 |
+
fn=transcribe,
|
37 |
+
inputs=[
|
38 |
+
gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio Input"),
|
39 |
+
],
|
40 |
+
outputs="text",
|
41 |
title="Neoform AI: Yoruba Speech Recognition",
|
42 |
+
description="Realtime demo for Yoruba speech recognition using a fine-tuned Wav2Vec-Bert model. "
|
43 |
+
"You can either use your microphone or upload an MP3 file. "
|
44 |
+
"https://neoformai.com",
|
45 |
+
examples=[
|
46 |
+
["path/to/example1.mp3"],
|
47 |
+
["path/to/example2.mp3"]
|
48 |
+
], # Add example inputs if available
|
49 |
+
cache_examples=True, # Cache example outputs
|
50 |
)
|
51 |
|
52 |
+
# Launch the interface
|
53 |
iface.launch()
|