import gradio as gr
import whisper
from transformers import MarianMTModel, MarianTokenizer
from gtts import gTTS
from io import BytesIO

# Load Whisper ASR model
whisper_model = whisper.load_model("small")  # Use a smaller model

# Load translation models for Hausa-English and English-Hausa
model_name_he = 'Helsinki-NLP/opus-mt-ha-en'  # Hausa to English
model_name_eh = 'Helsinki-NLP/opus-mt-en-ha'  # English to Hausa

tokenizer_he = MarianTokenizer.from_pretrained(model_name_he)
model_he = MarianMTModel.from_pretrained(model_name_he)

tokenizer_eh = MarianTokenizer.from_pretrained(model_name_eh)
model_eh = MarianMTModel.from_pretrained(model_name_eh)

# Function to punctuate (simple punctuation for demo)
def punctuate(text):
    if text[-1] not in '.!?':
        text += '.'
    return text.capitalize()

# Function to translate and punctuate
def translate_and_punctuate(text, direction):
    if direction == "Hausa to English":
        translated = model_he.generate(**tokenizer_he(text, return_tensors="pt", padding=True))
        result = tokenizer_he.decode(translated[0], skip_special_tokens=True)
    else:
        translated = model_eh.generate(**tokenizer_eh(text, return_tensors="pt", padding=True))
        result = tokenizer_eh.decode(translated[0], skip_special_tokens=True)
    
    return punctuate(result)

# Text-to-speech function
def text_to_speech(text, language):
    tts = gTTS(text=text, lang=language)
    audio_fp = BytesIO()
    tts.save(audio_fp)
    audio_fp.seek(0)
    return audio_fp

# Real-time translation function
def real_time_translation(audio, direction):
    # Use Whisper model to transcribe the audio (speech to text)
    result = whisper_model.transcribe(audio)
    spoken_text = result['text']
    
    # Translate and punctuate the transcribed text
    translated_text = translate_and_punctuate(spoken_text, direction)
    
    # Generate speech output from the translated text
    if direction == "Hausa to English":
        speech_output = text_to_speech(translated_text, "en")
    else:
        speech_output = text_to_speech(translated_text, "ha")
    
    return translated_text, speech_output

# Gradio interface
def translation_app(audio, direction):
    # Handle real-time translation from audio input
    translated_text, speech_output = real_time_translation(audio, direction)
    
    return translated_text, speech_output

# Define Gradio inputs and outputs
inputs = [
    gr.Audio(type="filepath", label="Speak Now"),
    gr.Radio(choices=["Hausa to English", "English to Hausa"], label="Translation Direction")
]

outputs = [
    gr.Textbox(label="Translated and Punctuated Text"),
    gr.Audio(label="Translated Speech")
]

# Launch Gradio app
gr.Interface(fn=translation_app, inputs=inputs, outputs=outputs, title="Real-Time Hausa-English Speech Translator with Whisper").launch()