import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load Mistral-7B Model
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, 
    torch_dtype=torch.float16,  
    device_map="auto"
)

# Chatbot function
def chat_with_mistral(prompt, history=[]):
    history.append(f"User: {prompt}")  
    input_text = "\n".join(history) + "\nAssistant:"

    inputs = tokenizer(input_text, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
    output = model.generate(**inputs, max_new_tokens=200)
    response = tokenizer.decode(output[0], skip_special_tokens=True).split("Assistant:")[-1].strip()

    history.append(f"Assistant: {response}")
    return response, history

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("### 🤖 Mistral-7B Chatbot on Hugging Face Spaces")
    
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Type your message here...")
    clear = gr.Button("Clear Chat")

    history = gr.State([])

    msg.submit(chat_with_mistral, inputs=[msg, history], outputs=[chatbot, history])
    clear.click(lambda: ([], []), inputs=[], outputs=[chatbot, history])

# Launch Gradio app
demo.launch()