import gradio as gr import whisper from transformers import MarianMTModel, MarianTokenizer from gtts import gTTS from io import BytesIO # Load Whisper ASR model whisper_model = whisper.load_model("small") # Use a smaller model # Load translation models for Hausa-English and English-Hausa model_name_he = 'Helsinki-NLP/opus-mt-ha-en' # Hausa to English model_name_eh = 'Helsinki-NLP/opus-mt-en-ha' # English to Hausa tokenizer_he = MarianTokenizer.from_pretrained(model_name_he) model_he = MarianMTModel.from_pretrained(model_name_he) tokenizer_eh = MarianTokenizer.from_pretrained(model_name_eh) model_eh = MarianMTModel.from_pretrained(model_name_eh) # Function to punctuate (simple punctuation for demo) def punctuate(text): if text[-1] not in '.!?': text += '.' return text.capitalize() # Function to translate and punctuate def translate_and_punctuate(text, direction): if direction == "Hausa to English": translated = model_he.generate(**tokenizer_he(text, return_tensors="pt", padding=True)) result = tokenizer_he.decode(translated[0], skip_special_tokens=True) else: translated = model_eh.generate(**tokenizer_eh(text, return_tensors="pt", padding=True)) result = tokenizer_eh.decode(translated[0], skip_special_tokens=True) return punctuate(result) # Text-to-speech function def text_to_speech(text, language): tts = gTTS(text=text, lang=language) audio_fp = BytesIO() tts.save(audio_fp) audio_fp.seek(0) return audio_fp # Real-time translation function def real_time_translation(audio, direction): # Use Whisper model to transcribe the audio (speech to text) result = whisper_model.transcribe(audio) spoken_text = result['text'] # Translate and punctuate the transcribed text translated_text = translate_and_punctuate(spoken_text, direction) # Generate speech output from the translated text if direction == "Hausa to English": speech_output = text_to_speech(translated_text, "en") else: speech_output = text_to_speech(translated_text, "ha") return translated_text, speech_output # Gradio interface def translation_app(audio, direction): # Handle real-time translation from audio input translated_text, speech_output = real_time_translation(audio, direction) return translated_text, speech_output # Define Gradio inputs and outputs inputs = [ gr.Audio(type="filepath", label="Speak Now"), gr.Radio(choices=["Hausa to English", "English to Hausa"], label="Translation Direction") ] outputs = [ gr.Textbox(label="Translated and Punctuated Text"), gr.Audio(label="Translated Speech") ] # Launch Gradio app gr.Interface(fn=translation_app, inputs=inputs, outputs=outputs, title="Real-Time Hausa-English Speech Translator with Whisper").launch()