oyemade commited on
Commit
344e698
·
verified ·
1 Parent(s): cbbfba9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -7
app.py CHANGED
@@ -1,18 +1,53 @@
1
  from transformers import pipeline
2
  import gradio as gr
 
 
3
 
4
- pipe = pipeline("automatic-speech-recognition", model="oyemade/w2v-bert-2.0-yoruba-CV17.0")
 
5
 
6
  def transcribe(audio):
7
- text = pipe(audio)["text"]
8
- return text
 
 
 
 
 
 
 
 
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
10
  iface = gr.Interface(
11
- transcribe,
12
- gr.Audio(sources="microphone", type="filepath"),
13
- "text",
 
 
14
  title="Neoform AI: Yoruba Speech Recognition",
15
- description="Realtime demo for Yoruba speech recognition using a fine-tuned Wav2Vec-Bert model. https://neoformai.com. NOTE: If you get an error after pressing submit, give the audio some secs to load then try again.",
 
 
 
 
 
 
 
16
  )
17
 
 
18
  iface.launch()
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ import numpy as np
4
+ import librosa
5
 
6
+ # Initialize the speech recognition pipeline
7
+ pipe = pipeline("automatic-speech-recognition", model="oyemade/w2v-bert-2.0-yoruba-CV17.0")
8
 
9
  def transcribe(audio):
10
+ if audio is None:
11
+ return "No audio detected. Please try again."
12
+
13
+ try:
14
+ # Check if the input is a file path (for uploaded files) or numpy array (for microphone input)
15
+ if isinstance(audio, str):
16
+ # Load the audio file using librosa
17
+ audio, sr = librosa.load(audio, sr=16000) # Resample to 16kHz
18
+ elif isinstance(audio, tuple): # Gradio audio components return a tuple (sr, audio)
19
+ sr, audio = audio
20
+ if sr != 16000:
21
+ audio = librosa.resample(audio, sr, 16000)
22
+ else:
23
+ return "Invalid audio format. Please try again."
24
 
25
+ # Check if the audio is valid (not silent)
26
+ if np.max(np.abs(audio)) < 0.01:
27
+ return "Audio is too quiet. Please speak louder or choose a different file and try again."
28
+
29
+ text = pipe(audio)["text"]
30
+ return text
31
+ except Exception as e:
32
+ return f"An error occurred: {str(e)}"
33
+
34
+ # Create the Gradio interface
35
  iface = gr.Interface(
36
+ fn=transcribe,
37
+ inputs=[
38
+ gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio Input"),
39
+ ],
40
+ outputs="text",
41
  title="Neoform AI: Yoruba Speech Recognition",
42
+ description="Realtime demo for Yoruba speech recognition using a fine-tuned Wav2Vec-Bert model. "
43
+ "You can either use your microphone or upload an MP3 file. "
44
+ "https://neoformai.com",
45
+ examples=[
46
+ ["path/to/example1.mp3"],
47
+ ["path/to/example2.mp3"]
48
+ ], # Add example inputs if available
49
+ cache_examples=True, # Cache example outputs
50
  )
51
 
52
+ # Launch the interface
53
  iface.launch()