Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, AutoTokenizer
|
3 |
import numpy as np
|
4 |
-
from pydub import AudioSegment
|
5 |
|
6 |
# Load the pipeline for speech recognition and translation
|
7 |
pipe = pipeline(
|
@@ -13,15 +12,8 @@ translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-t
|
|
13 |
tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
|
14 |
|
15 |
# Define the function to translate speech
|
16 |
-
def translate_speech(
|
17 |
-
print(f"Type of audio: {type(
|
18 |
-
|
19 |
-
# Load the audio file with pydub
|
20 |
-
audio = AudioSegment.from_mp3(audio_file) # Change this line
|
21 |
-
|
22 |
-
# Convert the audio to mono and get the raw data
|
23 |
-
audio = audio.set_channels(1)
|
24 |
-
audio_data = np.array(audio.get_array_of_samples())
|
25 |
|
26 |
# Use the speech recognition pipeline to transcribe the audio
|
27 |
output = pipe(audio_data)
|
@@ -65,15 +57,13 @@ def translate_speech(audio_file):
|
|
65 |
|
66 |
return 16000, synthesised_speech
|
67 |
|
68 |
-
|
69 |
-
|
70 |
# Define the Gradio interface
|
71 |
iface = gr.Interface(
|
72 |
fn=translate_speech,
|
73 |
-
inputs=gr.inputs.Audio(
|
74 |
outputs=gr.outputs.Audio(type="numpy"),
|
75 |
title="Hausa to English Translation",
|
76 |
description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
|
77 |
)
|
78 |
|
79 |
-
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, AutoTokenizer
|
3 |
import numpy as np
|
|
|
4 |
|
5 |
# Load the pipeline for speech recognition and translation
|
6 |
pipe = pipeline(
|
|
|
12 |
tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
|
13 |
|
14 |
# Define the function to translate speech
|
15 |
+
def translate_speech(audio_data):
|
16 |
+
print(f"Type of audio: {type(audio_data)}, Value of audio: {audio_data}") # Debug line
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
# Use the speech recognition pipeline to transcribe the audio
|
19 |
output = pipe(audio_data)
|
|
|
57 |
|
58 |
return 16000, synthesised_speech
|
59 |
|
|
|
|
|
60 |
# Define the Gradio interface
|
61 |
iface = gr.Interface(
|
62 |
fn=translate_speech,
|
63 |
+
inputs=gr.inputs.Audio(source="microphone"), # Change this line
|
64 |
outputs=gr.outputs.Audio(type="numpy"),
|
65 |
title="Hausa to English Translation",
|
66 |
description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
|
67 |
)
|
68 |
|
69 |
+
iface.launch()
|