eng-to-hau

Sleeping

Baghdad99 commited on Dec 9, 2023

Commit

ee37b95

•

1 Parent(s): 8a6097b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import gradio as gr
 from transformers import pipeline, AutoTokenizer
 import numpy as np
-from pydub import AudioSegment
 # Load the pipeline for speech recognition and translation
 pipe = pipeline(
@@ -13,15 +12,8 @@ translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-t
 tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
 # Define the function to translate speech
-def translate_speech(audio_file):
-    print(f"Type of audio: {type(audio_file)}, Value of audio: {audio_file}")  # Debug line
-    # Load the audio file with pydub
-    audio = AudioSegment.from_mp3(audio_file)  # Change this line
-    # Convert the audio to mono and get the raw data
-    audio = audio.set_channels(1)
-    audio_data = np.array(audio.get_array_of_samples())
     # Use the speech recognition pipeline to transcribe the audio
     output = pipe(audio_data)
@@ -65,15 +57,13 @@ def translate_speech(audio_file):
     return 16000, synthesised_speech
 # Define the Gradio interface
 iface = gr.Interface(
     fn=translate_speech,
-    inputs=gr.inputs.Audio(type="filepath"),  # Change this line
     outputs=gr.outputs.Audio(type="numpy"),
     title="Hausa to English Translation",
     description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
 )
-iface.launch()

 import gradio as gr
 from transformers import pipeline, AutoTokenizer
 import numpy as np
 # Load the pipeline for speech recognition and translation
 pipe = pipeline(
 tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
 # Define the function to translate speech
+def translate_speech(audio_data):
+    print(f"Type of audio: {type(audio_data)}, Value of audio: {audio_data}")  # Debug line
     # Use the speech recognition pipeline to transcribe the audio
     output = pipe(audio_data)
     return 16000, synthesised_speech
 # Define the Gradio interface
 iface = gr.Interface(
     fn=translate_speech,
+    inputs=gr.inputs.Audio(source="microphone"),  # Change this line
     outputs=gr.outputs.Audio(type="numpy"),
     title="Hausa to English Translation",
     description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
 )
+iface.launch()