import os import torch from transformers import AutoTokenizer from petals import AutoDistributedModelForCausalLM import chainlit as cl from huggingface_hub import login from dotenv import load_dotenv from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain import PromptTemplate # Load environment variables from .env file load_dotenv() # Retrieve Hugging Face token from environment variables hugging_face_token = os.getenv("HUGGINGFACE_TOKEN") DB_FAISS_PATH = 'vectorstore/db_faiss' # Login with Hugging Face token login(token=hugging_face_token) # Load SentenceEncoder model def load_vector_store(): embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}) db = FAISS.load_local(DB_FAISS_PATH, embeddings) return db # Loading the model def load_llm(): model_name = "meta-llama/Llama-2-70b-chat-hf" tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, add_bos_token=False) model = AutoDistributedModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.float32) model.to('cpu') return model, tokenizer # QA Model Function def qa_bot(): model, tokenizer = load_llm() return model, tokenizer # Initialize conversational history conversational_history = [] # chainlit code @cl.on_chat_start async def start(): model, tokenizer = qa_bot() msg = cl.Message(content="Starting the bot...") await msg.send() msg.content = "Hi, Welcome to HealsMindAI. What is your query?" await msg.update() cl.user_session.set("model", model) cl.user_session.set("tokenizer", tokenizer) cl.user_session.set("history", conversational_history) @cl.on_message async def main(message): model = cl.user_session.get("model") tokenizer = cl.user_session.get("tokenizer") history = cl.user_session.get("history") msg = cl.Message(content="") print("The msg obj:") print(msg) print("The message obj:") print(message) print("The msg content:") print(msg.content) print("the end") # Use the history to provide context for the query query_with_history = " ".join(history + [message]) custom_prompt_template = """Use the following pieces of information to answer the user's question. If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: {} Question: {} Only return the helpful answer below and nothing else. Helpful answer: """.format(query_with_history, message) # Generate text using the LLM model and the custom prompt max_generated_length = 7000 # Desired length of the generated text total_prefix_length = len(custom_prompt_template.split()) max_length = total_prefix_length + max_generated_length generated_output = model.generate(tokenizer.encode(custom_prompt_template, return_tensors="pt"),max_length=max_length,num_return_sequences=1) # Convert generated output to text using the tokenizer decoded_output = tokenizer.decode(generated_output[0], skip_special_tokens=True) # Update conversational history # history.append(msg.content) history.append(decoded_output) cl.user_session.set("history", history) await cl.Message(content=decoded_output).send()