eng-to-hau

Sleeping

App Files Files Community

Baghdad99 commited on Dec 9, 2023

Commit

2ad4835

•

1 Parent(s): 868f598

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -32

app.py CHANGED Viewed

@@ -26,51 +26,47 @@ def translate_speech(audio_data_tuple):
         output = model.transcribe([temp_audio_file.name])
         print(f"Output: {output}")  # Print the output to see what it contains
-        # Check if the output contains 'transcription'
-        if 'transcription' in output:
-            transcription = output["transcription"]
-        else:
-            print("The output does not contain 'transcription'")
-            return
-    # Use the translation pipeline to translate the transcription
-    translated_text = translator(transciption, return_tensors="pt")
-    print(f"Translated text: {translated_text}")  # Print the translated text to see what it contains
-    # Check if the translated text contains 'generated_token_ids'
-    if 'generated_token_ids' in translated_text[0]:
-        # Decode the tokens into text
-        translated_text_str = translator.tokenizer.decode(translated_text[0]['generated_token_ids'])
-    else:
-        print("The translated text does not contain 'generated_token_ids'")
-        return
-    # Use the text-to-speech pipeline to synthesize the translated text
-    synthesised_speech = tts(translated_text_str)
-    print(f"Synthesised speech: {synthesised_speech}")  # Print the synthesised speech to see what it contains
-    # Check if the synthesised speech contains 'audio'
-    if 'audio' in synthesised_speech:
-        synthesised_speech_data = synthesised_speech['audio']
-    else:
-        print("The synthesised speech does not contain 'audio'")
-        return
-    # Flatten the audio data
-    synthesised_speech_data = synthesised_speech_data.flatten()
-    # Scale the audio data to the range of int16 format
-    synthesised_speech = (synthesised_speech_data * 32767).astype(np.int16)
-    return 16000, synthesised_speech
 # Define the Gradio interface
 iface = gr.Interface(
     fn=translate_speech,
     inputs=gr.inputs.Audio(source="microphone"),  # Change this line
     outputs=gr.outputs.Audio(type="numpy"),
-    title="Hausa to English Translation",
     description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
 )

         output = model.transcribe([temp_audio_file.name])
         print(f"Output: {output}")  # Print the output to see what it contains
+      # Extract the transcriptions from the outputs
+    transcriptions = [output['transcription'] for output in outputs]
+    for transcription in transcriptions:
+        # Use the translation pipeline to translate the transcription
+        translated_text = translator(transcription, return_tensors="pt")
+        print(f"Translated text: {translated_text}")  # Print the translated text to see what it contains
+        # Check if the translated text contains 'generated_token_ids'
+        if 'generated_token_ids' in translated_text[0]:
+            # Decode the tokens into text
+            translated_text_str = translator.tokenizer.decode(translated_text[0]['generated_token_ids'])
+        else:
+            print("The translated text does not contain 'generated_token_ids'")
+            return
+        # Use the text-to-speech pipeline to synthesize the translated text
+        synthesised_speech = tts(translated_text_str)
+        print(f"Synthesised speech: {synthesised_speech}")  # Print the synthesised speech to see what it contains
+        # Check if the synthesised speech contains 'audio'
+        if 'audio' in synthesised_speech:
+            synthesised_speech_data = synthesised_speech['audio']
+        else:
+            print("The synthesised speech does not contain 'audio'")
+            return
+        # Flatten the audio data
+        synthesised_speech_data = synthesised_speech_data.flatten()
+        # Scale the audio data to the range of int16 format
+        synthesised_speech = (synthesised_speech_data * 32767).astype(np.int16)
+        return 16000, synthesised_speech
 # Define the Gradio interface
 iface = gr.Interface(
     fn=translate_speech,
     inputs=gr.inputs.Audio(source="microphone"),  # Change this line
     outputs=gr.outputs.Audio(type="numpy"),
+    title="English to Hausa Translation",
     description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
 )