Baghdad99 commited on
Commit
ee193bb
1 Parent(s): def416c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -16
app.py CHANGED
@@ -1,14 +1,11 @@
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
 
3
  import numpy as np
4
 
 
 
5
 
6
- # Load the pipeline for speech recognition and translation
7
- pipe = pipeline(
8
- "automatic-speech-recognition",
9
- model="BlueRaccoon/whisper-small-en",
10
- tokenizer="BlueRaccoon/whisper-small-en"
11
- )
12
  translator = pipeline("text2text-generation", model="Baghdad99/saad-english-text-to-hausa-text")
13
  tts = pipeline("text-to-speech", model="Baghdad99/hausa_voice_tts")
14
 
@@ -19,19 +16,12 @@ def translate_speech(audio_data_tuple):
19
  # Extract the audio data from the tuple
20
  sample_rate, audio_data = audio_data_tuple
21
 
22
- # Use the speech recognition pipeline to transcribe the audio
23
- output = pipe(audio_data)
24
  print(f"Output: {output}") # Print the output to see what it contains
25
 
26
- # Check if the output contains 'text'
27
- if 'text' in output:
28
- transcription = output["text"]
29
- else:
30
- print("The output does not contain 'text'")
31
- return
32
-
33
  # Use the translation pipeline to translate the transcription
34
- translated_text = translator(transcription, return_tensors="pt")
35
  print(f"Translated text: {translated_text}") # Print the translated text to see what it contains
36
 
37
  # Check if the translated text contains 'generated_token_ids'
 
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
+ from huggingsound import SpeechRecognitionModel
4
  import numpy as np
5
 
6
+ # Load the model for speech recognition
7
+ model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
8
 
 
 
 
 
 
 
9
  translator = pipeline("text2text-generation", model="Baghdad99/saad-english-text-to-hausa-text")
10
  tts = pipeline("text-to-speech", model="Baghdad99/hausa_voice_tts")
11
 
 
16
  # Extract the audio data from the tuple
17
  sample_rate, audio_data = audio_data_tuple
18
 
19
+ # Use the speech recognition model to transcribe the audio
20
+ output = model.transcribe(audio_data)
21
  print(f"Output: {output}") # Print the output to see what it contains
22
 
 
 
 
 
 
 
 
23
  # Use the translation pipeline to translate the transcription
24
+ translated_text = translator(output, return_tensors="pt")
25
  print(f"Translated text: {translated_text}") # Print the translated text to see what it contains
26
 
27
  # Check if the translated text contains 'generated_token_ids'