import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, AutoModel
from huggingface_hub import hf_hub_download
import os
import torch.nn as nn

# ----- Model Definition -----
class CustomDialoGPT(nn.Module):
    def __init__(self, vocab_size, n_embd=768, n_head=8, n_layer=8): # <---- FORCE n_embd, n_head, n_layer to match your model
        super().__init__()

        config = AutoConfig.from_pretrained("microsoft/DialoGPT-medium",
            vocab_size=vocab_size,
            n_embd=n_embd,
            n_head=n_head,
            n_layer=n_layer,
            bos_token_id=50256,
            eos_token_id=50256,
            pad_token_id = 50256
        )
        self.transformer = AutoModelForCausalLM.from_config(config) # Use AutoModelForCausalLM here
        self.lm_head = nn.Linear(n_embd, vocab_size, bias=False) # Keep lm_head

    def forward(self, input_ids):
        transformer_outputs = self.transformer(input_ids=input_ids, output_hidden_states=True)
        hidden_states = transformer_outputs.hidden_states[-1] #get last hidden state
        logits = self.lm_head(hidden_states)
        return logits


# Model and tokenizer details
model_repo = "elapt1c/ElapticAI-1a"
model_filename = "model.pth" # <--- CHECK FILENAME ON HF HUB, UPDATE IF NEEDED!
tokenizer_name = "microsoft/DialoGPT-medium"

# Device configuration
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
vocab_size = len(tokenizer) # <---- Define vocab_size AFTER loading tokenizer

# Initialize model with fixed parameters to match checkpoint
n_embd=768
n_head=8
n_layer=8
model = CustomDialoGPT(vocab_size, n_embd, n_head, n_layer).to(device).eval()

# Download and load model weights
try:
    pth_filepath = hf_hub_download(repo_id=model_repo, filename=model_filename)
    checkpoint = torch.load(pth_filepath, map_location=device)

    # Handle different checkpoint saving formats if needed.
    if 'model_state_dict' in checkpoint:
        model.load_state_dict(checkpoint['model_state_dict'])
    elif 'state_dict' in checkpoint:
        model.load_state_dict(checkpoint['state_dict'])
    else:
        model.load_state_dict(checkpoint)

    print(f"Successfully loaded model weights from {model_repo}/{model_filename}")
except Exception as e:
    print(f"Error loading model: {e}")
    print("Please ensure the model repository and filename are correct and that the model architecture in app.py matches the checkpoint.")
    raise e # It's better to raise the error in a Space, so it's visible.

model.to(device)
model.eval() # Set model to evaluation mode

def chat_with_model(user_input): # Removed history parameter for gr.Text() output
    """Chatbot function to interact with the loaded model - DYNAMIC RESPONSE."""
    input_ids = tokenizer.encode(user_input, return_tensors='pt').to(device)

    with torch.no_grad():
        output = model.transformer.generate(
            inputs=input_ids,
            max_length=100,
            pad_token_id=tokenizer.eos_token_id,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )

    response = tokenizer.decode(output[0], skip_special_tokens=True)
    bot_response = response # No need to split for gr.Text()

    print("--- chat_with_model Output ---") # Debugging print
    print("user_input:", user_input)        # Debugging print
    print("bot_response:", bot_response)      # Debugging print
    print("--- End of chat_with_model Output ---") # Debugging print

    return bot_response # Just return bot_response for gr.Text()


iface = gr.Interface( # Changed from gr.ChatInterface to gr.Interface
    fn=chat_with_model,
    inputs=gr.Textbox(placeholder="Type your message here..."), # Explicitly define inputs as gr.Textbox
    outputs=gr.Text(), # Changed outputs to gr.Text()
    title="ElapticAI-1a Chatbot - TESTING MODEL RESPONSE", # Updated title
    description="Simple chatbot interface for ElapticAI-1a model - TESTING MODEL RESPONSE" # Updated description
)

if __name__ == "__main__":
    iface.launch()