from dotenv import load_dotenv import gradio as gr import os from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI from llama_index.embeddings.huggingface import HuggingFaceEmbedding from sentence_transformers import SentenceTransformer import datetime import uuid from fastapi import FastAPI from fastapi.responses import HTMLResponse from fastapi.staticfiles import StaticFiles import random # Load environment variables load_dotenv() app = FastAPI() # Serve static files (HTML, CSS, JS) app.mount("/static", StaticFiles(directory="static"), name="static") # Configure Llama index settings Settings.llm = HuggingFaceInferenceAPI( model_name="meta-llama/Meta-Llama-3-8B-Instruct", tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct", context_window=3000, token=os.getenv("HF_TOKEN"), max_new_tokens=512, generate_kwargs={"temperature": 0.1}, ) Settings.embed_model = HuggingFaceEmbedding( model_name="BAAI/bge-small-en-v1.5" ) PERSIST_DIR = "db" PDF_DIRECTORY = 'data' # Ensure directories exist os.makedirs(PDF_DIRECTORY, exist_ok=True) os.makedirs(PERSIST_DIR, exist_ok=True) def data_ingestion_from_directory(): documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data() storage_context = StorageContext.from_defaults() index = VectorStoreIndex.from_documents(documents) index.storage_context.persist(persist_dir=PERSIST_DIR) data_ingestion_from_directory() # Process PDF ingestion at startup # Store chat history in-memory (you could also use a database) chat_history = [] def handle_query(query): chat_text_qa_msgs = [ ( "user", f""" You are the Clara Redfernstech chatbot. Your goal is to provide accurate, professional, and helpful answers to user queries based on the company's data. Always ensure your responses are clear and concise. Give response within 10-15 words only {context_str} Question: {query} """ ) ] text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs) # Load index from storage storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) index = load_index_from_storage(storage_context) # Use chat history to enhance response context_str = "\n".join([f"User asked: '{msg['message']}'\nBot answered: '{msg['response']}'" for msg in chat_history]) query_engine = index.as_query_engine(text_qa_template=text_qa_template, context_str=context_str) answer = query_engine.query(query) if hasattr(answer, 'response'): return answer.response elif isinstance(answer, dict) and 'response' in answer: return answer['response'] else: return "Sorry, I couldn't find an answer." @app.get("/", response_class=HTMLResponse) async def read_root(): with open("static/index.html") as f: return f.read() @app.post("/chat/") async def chat(message: str): response = handle_query(message) message_data = { "sender": "User", "message": message, "response": response, "timestamp": datetime.datetime.now().isoformat() } # Store the interaction in chat history chat_history.append(message_data) return {"response": response}