import gradio as gr
import torch
from transformers import AutoProcessor, SeamlessM4TModel

class SeamlessM4TApp:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")
        
        # Load model and processor
        self.processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
        self.model = SeamlessM4TModel.from_pretrained("facebook/seamless-m4t-v2-large")
        self.model.to(self.device)

    def transcribe_audio(self, audio_path):
        try:
            # Load and process the audio
            audio_inputs = self.processor(
                audios=audio_path,
                return_tensors="pt",
                sampling_rate=16000
            ).to(self.device)
            
            # Generate transcription
            with torch.no_grad():
                generated_tokens = self.model.generate(
                    **audio_inputs,
                    tgt_lang="eng",
                    task="transcribe"
                )
            
            # Decode the generated tokens
            transcription = self.processor.decode(
                generated_tokens[0].tolist(),
                skip_special_tokens=True
            )
            
            return transcription
            
        except Exception as e:
            return f"Error during transcription: {str(e)}"

# Initialize the Gradio interface
def create_interface():
    app = SeamlessM4TApp()
    
    interface = gr.Interface(
        fn=app.transcribe_audio,
        inputs=gr.Audio(
            type="filepath",
            label="Upload Audio",
            source="microphone"
        ),
        outputs=gr.Textbox(label="Transcription"),
        title="SeamlessM4T Speech-to-Text",
        description="Upload audio or use microphone to transcribe speech to text using SeamlessM4T model.",
        examples=[],
        cache_examples=False
    )
    
    return interface

if __name__ == "__main__":
    interface = create_interface()
    interface.launch()