eng-to-hau

Sleeping

Baghdad99 commited on Dec 9, 2023

Commit

ee193bb

•

1 Parent(s): def416c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,14 +1,11 @@
 import gradio as gr
 from transformers import pipeline, AutoTokenizer
 import numpy as np
-# Load the pipeline for speech recognition and translation
-pipe = pipeline(
-    "automatic-speech-recognition",
-    model="BlueRaccoon/whisper-small-en",
-    tokenizer="BlueRaccoon/whisper-small-en"
-)
 translator = pipeline("text2text-generation", model="Baghdad99/saad-english-text-to-hausa-text")
 tts = pipeline("text-to-speech", model="Baghdad99/hausa_voice_tts")
@@ -19,19 +16,12 @@ def translate_speech(audio_data_tuple):
     # Extract the audio data from the tuple
     sample_rate, audio_data = audio_data_tuple
-    # Use the speech recognition pipeline to transcribe the audio
-    output = pipe(audio_data)
     print(f"Output: {output}")  # Print the output to see what it contains
-    # Check if the output contains 'text'
-    if 'text' in output:
-        transcription = output["text"]
-    else:
-        print("The output does not contain 'text'")
-        return
     # Use the translation pipeline to translate the transcription
-    translated_text = translator(transcription, return_tensors="pt")
     print(f"Translated text: {translated_text}")  # Print the translated text to see what it contains
     # Check if the translated text contains 'generated_token_ids'

 import gradio as gr
 from transformers import pipeline, AutoTokenizer
+from huggingsound import SpeechRecognitionModel
 import numpy as np
+# Load the model for speech recognition
+model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
 translator = pipeline("text2text-generation", model="Baghdad99/saad-english-text-to-hausa-text")
 tts = pipeline("text-to-speech", model="Baghdad99/hausa_voice_tts")
     # Extract the audio data from the tuple
     sample_rate, audio_data = audio_data_tuple
+    # Use the speech recognition model to transcribe the audio
+    output = model.transcribe(audio_data)
     print(f"Output: {output}")  # Print the output to see what it contains
     # Use the translation pipeline to translate the transcription
+    translated_text = translator(output, return_tensors="pt")
     print(f"Translated text: {translated_text}")  # Print the translated text to see what it contains
     # Check if the translated text contains 'generated_token_ids'