import os
import torch
from transformers import AutoTokenizer
from petals import AutoDistributedModelForCausalLM
import chainlit as cl
from huggingface_hub import login
from dotenv import load_dotenv
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain import PromptTemplate

# Load environment variables from .env file
load_dotenv()

# Retrieve Hugging Face token from environment variables
hugging_face_token = os.getenv("HUGGINGFACE_TOKEN")

DB_FAISS_PATH = 'vectorstore/db_faiss'

# Login with Hugging Face token
login(token=hugging_face_token)

# Load SentenceEncoder model
def load_vector_store():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cpu'})
    db = FAISS.load_local(DB_FAISS_PATH, embeddings)
    return db

# Loading the model
def load_llm():
    model_name = "meta-llama/Llama-2-70b-chat-hf"
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, add_bos_token=False)
    model = AutoDistributedModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.float32)
    model.to('cpu')
    return model, tokenizer

# QA Model Function
def qa_bot():
    model, tokenizer = load_llm()
    return model, tokenizer

# Initialize conversational history
conversational_history = []

# chainlit code
@cl.on_chat_start
async def start():
    model, tokenizer = qa_bot()
    msg = cl.Message(content="Starting the bot...")
    await msg.send()
    msg.content = "Hi, Welcome to HealsMindAI. What is your query?"
    await msg.update()

    cl.user_session.set("model", model)
    cl.user_session.set("tokenizer", tokenizer)
    cl.user_session.set("history", conversational_history)

@cl.on_message
async def main(message):
    model = cl.user_session.get("model") 
    tokenizer = cl.user_session.get("tokenizer")
    history = cl.user_session.get("history")
    msg = cl.Message(content="")
    print("The msg obj:")
    print(msg)
    print("The message obj:")
    print(message)
    print("The msg content:")
    print(msg.content)
    print("the end")
    # Use the history to provide context for the query
    query_with_history = " ".join(history + [message])
    
    custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {}
Question: {}

Only return the helpful answer below and nothing else.
Helpful answer:
""".format(query_with_history, message)
    # Generate text using the LLM model and the custom prompt
    max_generated_length = 7000  # Desired length of the generated text
    total_prefix_length = len(custom_prompt_template.split())
    max_length = total_prefix_length + max_generated_length

    generated_output = model.generate(tokenizer.encode(custom_prompt_template, return_tensors="pt"),max_length=max_length,num_return_sequences=1)
    
    # Convert generated output to text using the tokenizer
    decoded_output = tokenizer.decode(generated_output[0], skip_special_tokens=True)
    
    # Update conversational history
    # history.append(msg.content)
    history.append(decoded_output)
    cl.user_session.set("history", history)
    
    await cl.Message(content=decoded_output).send()