Spaces:

tasmimulhuda
/

fastapi-rag-qa

Runtime error

App Files Files Community

tasmimulhuda commited on Nov 2, 2024

Commit

abb6f94

1 Parent(s): 66bdce2

application addd

Browse files

Files changed (35) hide show

Dockerfile +10 -0
app/__init__.py +5 -0
app/__pycache__/__init__.cpython-310.pyc +0 -0
app/__pycache__/logging_config.cpython-310.pyc +0 -0
app/__pycache__/main.cpython-310.pyc +0 -0
app/__pycache__/settings.cpython-310.pyc +0 -0
app/api/__init__.py +2 -0
app/api/__pycache__/__init__.cpython-310.pyc +0 -0
app/api/__pycache__/answer.cpython-310.pyc +0 -0
app/api/__pycache__/upload.cpython-310.pyc +0 -0
app/api/answer.py +63 -0
app/api/upload.py +173 -0
app/data_pipeline/__init__.py +2 -0
app/data_pipeline/__pycache__/__init__.cpython-310.pyc +0 -0
app/data_pipeline/__pycache__/data_loader.cpython-310.pyc +0 -0
app/data_pipeline/__pycache__/embedding_manager.cpython-310.pyc +0 -0
app/data_pipeline/data_loader.py +153 -0
app/data_pipeline/embedding_manager.py +109 -0
app/logging_config.py +45 -0
app/main.py +32 -0
app/rag_pipeline/__init__.py +3 -0
app/rag_pipeline/__pycache__/__init__.cpython-310.pyc +0 -0
app/rag_pipeline/__pycache__/chroma_client.cpython-310.pyc +0 -0
app/rag_pipeline/__pycache__/model_initializer.cpython-310.pyc +0 -0
app/rag_pipeline/__pycache__/model_loader.cpython-310.pyc +0 -0
app/rag_pipeline/__pycache__/prompt_utils.cpython-310.pyc +0 -0
app/rag_pipeline/__pycache__/retriever_chain.cpython-310.pyc +0 -0
app/rag_pipeline/__pycache__/retriver_chain.cpython-310.pyc +0 -0
app/rag_pipeline/chroma_client.py +15 -0
app/rag_pipeline/model_initializer.py +50 -0
app/rag_pipeline/model_loader.py +148 -0
app/rag_pipeline/prompt_utils.py +31 -0
app/rag_pipeline/retriever_chain.py +136 -0
app/settings.py +33 -0
requirements.txt +0 -0

Dockerfile CHANGED Viewed

	@@ -0,0 +1,10 @@

+FROM python:3.9
+WORKDIR /app
+COPY ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY app/ app/
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# from main import create_app
+# from api.answer import answer_router
+# app = create_app()

app/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (125 Bytes). View file

app/__pycache__/logging_config.cpython-310.pyc ADDED Viewed

Binary file (997 Bytes). View file

app/__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (852 Bytes). View file

app/__pycache__/settings.cpython-310.pyc ADDED Viewed

Binary file (1.16 kB). View file

app/api/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from app.api.answer import answer_router
2	+ from app.api.upload import upload_router

app/api/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (233 Bytes). View file

app/api/__pycache__/answer.cpython-310.pyc ADDED Viewed

Binary file (1.97 kB). View file

app/api/__pycache__/upload.cpython-310.pyc ADDED Viewed

Binary file (3.57 kB). View file

app/api/answer.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from fastapi import APIRouter, Request,Body, HTTPException
+import logging
+from typing import Dict
+from app.rag_pipeline.model_initializer import initialize_models
+from app.rag_pipeline.retriever_chain import RetrieverChain
+from app.settings import Config
+answer_router = APIRouter()
+logger = logging.getLogger(__name__)
+import warnings
+warnings.filterwarnings("ignore")
+conf = Config()
+OPENAI_API_KEY = conf.API_KEY
+MODEL_ID = conf.MODEL_ID
+MODEL_BASENAME = conf.MODEL_BASENAME
+COLLECTION_NAME = conf.COLLECTION_NAME
+PERSIST_DIRECTORY = conf.PERSIST_DIRECTORY
+# print(OPENAI_API_KEY, MODEL_ID, MODEL_BASENAME, PERSIST_DIRECTORY, COLLECTION_NAME)
+embedding_model, llm_model = initialize_models(OPENAI_API_KEY,model_id=MODEL_ID, model_basename=MODEL_BASENAME)
+def validate_question(data: Dict) -> str:
+    """Extract and validate the 'question' field from the incoming data."""
+    question = data.get("question")
+    if not question or not isinstance(question, str) or not question.strip():
+        logger.warning("Received invalid question input.")
+        raise HTTPException(status_code=400, detail="Question must be a non-empty string.")
+    return question
+@answer_router.post('/answer', response_model=dict)
+async def generate_answer(data: Dict = Body(...)) -> Dict:
+    try:
+        # Validate and extract the question
+        question = validate_question(data)
+        # Log incoming question
+        logger.info(f"Received question: {question}")
+        # Generate the answer
+        retriever_qa = RetrieverChain(
+            collection_name=COLLECTION_NAME, embedding_function=embedding_model, persist_directory=PERSIST_DIRECTORY)
+        answer = retriever_qa.get_response(user_input = question, llm= llm_model)
+        # answer = f"Generated answer for: {question}"
+        # Log generated answer
+        logger.info(f"Generated answer: {answer}")
+        return {"answer": answer}
+    except HTTPException as http_exc:
+        logger.error(f"HTTP error: {http_exc.detail}")
+        raise http_exc  # Re-raise the HTTPException to return the error response
+    except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        raise HTTPException(status_code=500, detail="An internal server error occurred.")

app/api/upload.py ADDED Viewed

	@@ -0,0 +1,173 @@

+from fastapi import FastAPI, APIRouter, UploadFile, HTTPException
+from fastapi import File, UploadFile
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import JSONResponse
+from pathlib import Path
+from typing import List
+import os
+import shutil
+import logging
+from app.data_pipeline.data_loader import DocumentLoader
+from app.data_pipeline.embedding_manager import split_text,initialize_embedding_model,create_and_store_embeddings
+import warnings
+warnings.filterwarnings("ignore")
+logger = logging.getLogger(__name__)
+from app.settings import Config
+conf = Config()
+upload_router = APIRouter()
+UPLOAD_DIR = conf.UPLOAD_DIR
+COLLECTION_NAME = conf.COLLECTION_NAME
+PERSIST_DIRECTORY = conf.PERSIST_DIRECTORY
+# Type of files allowed to be uploaded
+def is_allowed_file(filename):
+    allowed_extensions = {"pdf", "csv", "doc", "docx", "txt", "xlsx", "xls"}
+    return "." in filename and filename.rsplit(".", 1)[1].lower() in allowed_extensions
+def empty_folder(folder_path):
+    # Check if the folder exists
+    if os.path.exists(folder_path):
+        # Iterate through all items in the folder
+        for item in os.listdir(folder_path):
+            item_path = os.path.join(folder_path, item)
+            # Remove files and folders
+            if os.path.isfile(item_path) or os.path.islink(item_path):
+                os.remove(item_path)
+            elif os.path.isdir(item_path):
+                shutil.rmtree(item_path)
+        print(f"The folder '{folder_path}' has been emptied.")
+    else:
+        print(f"The folder '{folder_path}' does not exist.")
+@upload_router.post("/upload")
+async def upload_files(files: List[UploadFile] = File(...)):
+    try:
+        # Empty the upload directory
+        empty_folder(UPLOAD_DIR)
+        logger.info(f"{UPLOAD_DIR} is now empty.")
+        # Check if UPLOAD_DIR exists
+        if not os.path.exists(UPLOAD_DIR):
+            logger.error(f"Upload directory '{UPLOAD_DIR}' does not exist.")
+            return JSONResponse(content={"error": f"Folder '{UPLOAD_DIR}' does not exist"}, status_code=404)
+        # Save uploaded files
+        for uploaded_file in files:
+            if not is_allowed_file(uploaded_file.filename):
+                logger.error(f"File type of '{uploaded_file.filename}' not allowed.")
+                return JSONResponse(content={"error": "File type not allowed"}, status_code=400)
+            file_path = os.path.join(UPLOAD_DIR, uploaded_file.filename)
+            with open(file_path, "wb") as buffer:
+                buffer.write(uploaded_file.file.read())
+            logger.info(f"File '{uploaded_file.filename}' uploaded successfully.")
+        # Load documents from the upload directory
+        try:
+            document_loader = DocumentLoader(UPLOAD_DIR)
+            documents = document_loader.load_all_documents()
+            logger.info(f"Loaded {len(documents)} documents.")
+        except Exception as e:
+            logger.error(f"Error loading documents: {e}")
+            return JSONResponse(content={"error": "Failed to load documents"}, status_code=500)
+        # Process documents into chunks for embedding
+        try:
+            chunks = split_text(documents)
+            logger.info(f"Processed {len(chunks)} chunks for embedding.")
+        except Exception as e:
+            logger.error(f"Error processing documents: {e}")
+            return JSONResponse(content={"error": "Failed to process documents"}, status_code=500)
+        # Initialize the embedding model
+        try:
+            embedding_function = initialize_embedding_model()
+        except Exception as e:
+            logger.error(f"Error initializing embedding model: {e}")
+            return JSONResponse(content={"error": "Failed to initialize embedding model"}, status_code=500)
+        # Create and store embeddings
+        try:
+            create_and_store_embeddings(chunks, COLLECTION_NAME, embedding_function, PERSIST_DIRECTORY)
+            logger.info("Embeddings created and stored successfully.")
+        except Exception as e:
+            logger.error(f"Error creating or storing embeddings: {e}")
+            return JSONResponse(content={"error": "Failed to create and store embeddings"}, status_code=500)
+        # Return success message if everything is successful
+        return JSONResponse(content={"message": "Documents successfully loaded and processed."})
+    except Exception as e:
+        logger.error(f"Unexpected error in upload_files endpoint: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error.")
+# @upload_router.post("/upload")
+# async def upload_files(files: List[UploadFile] = File(...)):
+#     empty_folder(UPLOAD_DIR)
+#     logger.info(f" {UPLOAD_DIR} is empty Now")
+#     if not os.path.exists(UPLOAD_DIR):
+#         logger.error(f"{UPLOAD_DIR}' does not exist")
+#         return JSONResponse(content={"error": f"Folder '{UPLOAD_DIR}' does not exist"}, status_code=404)
+#     for uploaded_file in files:
+#         if not is_allowed_file(uploaded_file.filename):
+#             logger.error(f"File type not allowed")
+#             return JSONResponse(content={"error": "File type not allowed"}, status_code=400)
+#         file_path = os.path.join(UPLOAD_DIR, uploaded_file.filename)
+#         with open(file_path, "wb") as buffer:
+#             buffer.write(uploaded_file.file.read())
+#         logger.info(f"Files uploaded successfully")
+#     try:
+#         document_loader = DocumentLoader(UPLOAD_DIR)
+#         documents = document_loader.load_all_documents()
+#         logger.info(f"Loaded {len(documents)} documents.")
+#     except Exception as e:
+#         logger.error(f"Error loading documents: {e}")
+#         return
+#     try:
+#         chunks = split_text(documents)
+#         logger.info(f"Processed {len(chunks)} chunks for embedding.", )
+#     except Exception as e:
+#         logger.error(f"Error processing documents: {e}")
+#         return
+#     try:
+#         embedding_function = initialize_embedding_model()
+#     except Exception:
+#         return  # Stop execution if embedding model fails
+#     create_and_store_embeddings(chunks, COLLECTION_NAME, embedding_function, PERSIST_DIRECTORY)
+#     logger.info(f'Documents Successfully loades')
+#     return JSONResponse(content={"message": "Documents Successfully loades"})

app/data_pipeline/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from app.data_pipeline.data_loader import DocumentLoader
2	+ from app.data_pipeline.embedding_manager import *

app/data_pipeline/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (259 Bytes). View file

app/data_pipeline/__pycache__/data_loader.cpython-310.pyc ADDED Viewed

Binary file (3.84 kB). View file

app/data_pipeline/__pycache__/embedding_manager.cpython-310.pyc ADDED Viewed

Binary file (2.71 kB). View file

app/data_pipeline/data_loader.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import os
+from langchain_community.document_loaders.csv_loader import CSVLoader
+from langchain_community.document_loaders.csv_loader import UnstructuredCSVLoader
+from langchain_community.document_loaders.excel import UnstructuredExcelLoader
+from langchain_community.document_loaders import PDFMinerLoader
+from langchain_community.document_loaders import TextLoader
+from langchain_community.document_loaders import Docx2txtLoader
+from langchain.docstore.document import Document
+import logging
+INGEST_THREADS = os.cpu_count() or 8
+DOCUMENT_MAP = {
+    ".txt": TextLoader,
+    ".md": TextLoader,
+    ".pdf": PDFMinerLoader,
+    ".csv": CSVLoader,
+    ".csv": UnstructuredCSVLoader,
+    ".xls": UnstructuredExcelLoader,
+    ".xlsx": UnstructuredExcelLoader,
+    ".docx": Docx2txtLoader
+    # Add additional file types here if necessary
+}
+logger = logging.getLogger(__name__)
+class DocumentLoader():
+    def __init__(self, source_dir: str):
+        """
+        Initializes the loader with the directory path from which to load documents.
+        """
+        self.source_dir = source_dir
+        logger.info(f"DocumentLoader initialized with source directory: {self.source_dir}")
+    def load_single_document(self, file_path: str):
+        """
+        Loads a single document based on its file extension using the appropriate loader.
+        Args:
+            file_path (str): Path to the document file.
+        Returns:
+            List[Document]: Loaded document(s) as LangChain Document instances.
+        """
+        file_extension = os.path.splitext(file_path)[1]
+        loader_class = DOCUMENT_MAP.get(file_extension)
+        if loader_class:
+            loader = loader_class(file_path)
+            logger.info(f"Loading document: {file_path}")
+            try:
+                documents = loader.load()
+                logger.info(f"Successfully loaded document: {file_path}")
+                return documents
+            except Exception as e:
+                logger.error(f"Error loading document {file_path}: {e}", exc_info=True)
+                raise
+        else:
+            logger.warning(f"Unsupported document type for file: {file_path}")
+            raise ValueError(f"Unsupported document type: {file_extension}")
+    def load_all_documents(self) -> list[Document]:
+        """
+        Loads all documents from the source directory, including documents in subdirectories.
+        Returns:
+            List[Document]: List of all loaded documents from the source directory.
+        """
+        paths = self._gather_file_paths()  # Gather file paths of documents to load
+        all_docs = []
+        logger.info(f"Loading all documents from directory: {self.source_dir}")
+        # # Load each document sequentially
+        # for file_path in paths:
+        #     documents = self.load_single_document(file_path)
+        #     all_docs.extend(documents)  # Append loaded documents to the result list
+        # # return all_docs
+        for file_path in paths:
+            try:
+                documents = self.load_single_document(file_path)
+                all_docs.extend(documents)  # Append loaded documents to the result list
+            except ValueError as e:
+                logger.error(f"Skipping file {file_path}: {e}")
+            except Exception as e:
+                logger.error(f"An unexpected error occurred while loading {file_path}: {e}", exc_info=True)
+        logger.info(f"Finished loading documents. Total documents loaded: {len(all_docs)}")
+        return all_docs
+    def _gather_file_paths(self):
+        """
+        Walks through the source directory and gathers file paths of documents
+        that match the supported file types in DOCUMENT_MAP.
+        Returns:
+            List[str]: List of file paths for documents to load.
+        """
+        file_paths = []
+        logger.debug(f"Scanning for files in directory: {self.source_dir}")
+        for root, _, files in os.walk(self.source_dir):
+            for file_name in files:
+                file_extension = os.path.splitext(file_name)[1]
+                if file_extension in DOCUMENT_MAP:
+                    full_path = os.path.join(root, file_name)
+                    file_paths.append(full_path)
+                    logger.debug(f"Found document: {full_path}")
+        logger.info(f"Total files found for loading: {len(file_paths)}")
+        return file_paths
+# if __name__ == "__main__":
+#     source_directory = os.path.join(os.path.dirname(__file__),'..','Data')
+#     document_loader = DocumentLoader(source_directory)
+#     documents = document_loader.load_all_documents()
+# from langchain_community.embeddings import OpenAIEmbeddings
+# from langchain_community.vectorstores import FAISS
+# directory_path = os.path.join(os.path.dirname(__file__),'..','Data')
+# documents = load_documents(directory_path)
+# print(documents)
+# print(os.path.join(os.path.dirname(__file__),'..','Data'))

app/data_pipeline/embedding_manager.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import os
+import logging
+from typing import List
+from langchain_chroma import Chroma
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_openai import OpenAIEmbeddings
+from app.settings import Config
+conf = Config()
+OPENAI_API_KEY = conf.API_KEY
+PERSIST_DIRECTORY = conf.PERSIST_DIRECTORY
+COLLECTION_NAME = conf.COLLECTION_NAME
+# Set up logging
+import logging
+logger = logging.getLogger(__name__)
+def initialize_embedding_model():
+    """Initialize the embedding model based on the availability of the OpenAI API key."""
+    try:
+        if OPENAI_API_KEY:
+            logger.info("Using OpenAI embedding model.")
+            embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
+        else:
+            logger.info(f"Using Hugging Face embedding model.")
+            embedding_model = HuggingFaceEmbeddings(
+                model_name=conf.MODEL_NAME,
+                model_kwargs=conf.MODEL_KWARGS,
+                encode_kwargs=conf.ENCODE_KWARGS
+            )
+        return embedding_model
+    except Exception as e:
+        logger.error(f"Error initializing embedding model: {e}")
+        raise
+def split_text(documents: List[str]) -> List[str]:
+    """Split documents into smaller chunks."""
+    try:
+        logger.info(f"Splitting documents into chunks...")
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=conf.CHUNK_SIZE, chunk_overlap=conf.CHUNK_OVERLAP)
+        chunks = text_splitter.split_documents(documents)
+        logger.info(f"Document splitting completed.")
+        return chunks
+    except Exception as e:
+        logger.error(f"Error splitting text: {e}")
+        raise
+def get_chroma_client(collection_name: str, embedding_function, persist_directory: str):
+    """Initialize and return a Chroma client for a specific collection."""
+    try:
+        logger.info(f"Creating Chroma client for collection: {collection_name}")
+        return Chroma(
+            collection_name=collection_name,
+            embedding_function=embedding_function,
+            persist_directory=persist_directory
+        )
+    except Exception as e:
+        logger.error(f"Error creating Chroma client:  {e}")
+        raise
+def create_and_store_embeddings(chunks: List[str], collection_name: str, embedding_function, persist_directory: str):
+    """Create and store embeddings for document chunks."""
+    try:
+        vector_db = get_chroma_client(collection_name, embedding_function, persist_directory)
+        vector_db.add_documents(chunks)
+        logger.info(f"Embeddings created for collection {collection_name} and saved to {persist_directory}.")
+    except Exception as e:
+        logger.error("Error creating and storing embeddings: {e}")
+        raise
+# def main():
+#     source_directory = conf.DATA_DIRECTORY
+#     document_loader = DocumentLoader(source_directory)
+#     try:
+#         documents = document_loader.load_all_documents()
+#         logger.info(f"Loaded {len(documents)} documents.")
+#     except Exception as e:
+#         logger.error(f"Error loading documents: {e}")
+#         return
+#     # Split documents into chunks
+#     try:
+#         chunks = split_text(documents)
+#         logger.info(f"Processed {len(chunks)} chunks for embedding.", )
+#     except Exception as e:
+#         logger.error(f"Error processing documents: {e}")
+#         return
+#     # Initialize embedding model
+#     try:
+#         embedding_function = initialize_embedding_model()
+#     except Exception:
+#         return  # Stop execution if embedding model fails
+#     # Create and store embeddings
+#     create_and_store_embeddings(chunks, COLLECTION_NAME, embedding_function, PERSIST_DIRECTORY)
+# if __name__ == "__main__":
+#     main()

app/logging_config.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import logging
+from logging.handlers import RotatingFileHandler
+import os
+from app.settings import Config
+# Define whether logging should be enabled
+is_logging = True  # Set to False to disable logging
+conf = Config()
+# Log file settings
+LOG_FILE = f"{conf.LOG_DIR}/app.log"
+LOG_LEVEL = logging.INFO
+def setup_logging():
+    """Configure logging for the entire application."""
+    if not is_logging:
+        # Disable all logging if is_logging is False
+        logging.disable(logging.CRITICAL)
+        return
+    # Create a logger
+    logger = logging.getLogger()
+    logger.setLevel(LOG_LEVEL)
+    # Create a rotating file handler to store logs in a file
+    file_handler = RotatingFileHandler(LOG_FILE, maxBytes=5 * 1024 * 1024, backupCount=2, encoding='utf-8')
+    file_handler.setLevel(LOG_LEVEL)
+    # Create a stream handler to log to console
+    stream_handler = logging.StreamHandler()
+    stream_handler.setLevel(LOG_LEVEL)
+    # Define the format for log messages
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    file_handler.setFormatter(formatter)
+    stream_handler.setFormatter(formatter)
+    # Add the handlers to the logger
+    logger.addHandler(file_handler)
+    logger.addHandler(stream_handler)
+    logging.info("Logging setup complete")
+# Call the function to set up logging when the module is imported
+setup_logging()

app/main.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from fastapi import FastAPI, Body, Request, Form
+import uvicorn
+from fastapi.middleware.cors import CORSMiddleware
+from app.api.answer import answer_router
+from app.api.upload import upload_router
+from app import logging_config
+from app.settings import Config
+import warnings
+warnings.filterwarnings("ignore")
+conf = Config
+def create_app():
+    app = FastAPI()
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    app.include_router(answer_router) #, prefix="/api/v1"
+    app.include_router(upload_router) #, prefix="/api/v1"
+    return app
+app = create_app()

app/rag_pipeline/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from app.rag_pipeline.prompt_utils import contex_retriever_prompt, conversion_retriever_prompt
+from app.rag_pipeline.retriever_chain import RetrieverChain
+from app.rag_pipeline.chroma_client import get_chroma_client

app/rag_pipeline/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (396 Bytes). View file

app/rag_pipeline/__pycache__/chroma_client.cpython-310.pyc ADDED Viewed

Binary file (617 Bytes). View file

app/rag_pipeline/__pycache__/model_initializer.cpython-310.pyc ADDED Viewed

Binary file (1.5 kB). View file

app/rag_pipeline/__pycache__/model_loader.cpython-310.pyc ADDED Viewed

Binary file (3.37 kB). View file

app/rag_pipeline/__pycache__/prompt_utils.cpython-310.pyc ADDED Viewed

Binary file (941 Bytes). View file

app/rag_pipeline/__pycache__/retriever_chain.cpython-310.pyc ADDED Viewed

Binary file (2.75 kB). View file

app/rag_pipeline/__pycache__/retriver_chain.cpython-310.pyc ADDED Viewed

Binary file (2.87 kB). View file

app/rag_pipeline/chroma_client.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from langchain_chroma import Chroma
+import logging
+logger = logging.getLogger(__name__)
+def get_chroma_client(collection_name, embedding_function, persist_directory):
+    try:
+        logging.info(f"Setting  up chroma client")
+        return Chroma(collection_name=collection_name,
+                      embedding_function=embedding_function,
+                      persist_directory=persist_directory)
+    except Exception as e:
+        logging.error(f"Failed to initialize Chroma client: {e}")
+        raise

app/rag_pipeline/model_initializer.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+import logging
+from langchain_openai import OpenAIEmbeddings, ChatOpenAI
+from langchain_huggingface import HuggingFaceEmbeddings
+from app.rag_pipeline.model_loader import load_model
+from langchain_huggingface import HuggingFacePipeline
+from app.settings import Config
+conf = Config()
+CACHE_DIR = conf.CACHE_DIR
+logger = logging.getLogger(__name__)
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
+def initialize_models(openai_api_key=None,model_id=None, model_basename=None):
+    """
+    Initializes embedding and chat model based on the OpenAI API key availability.
+    Returns:
+        tuple: (embedding_model, llm_model)
+    """
+    try:
+        if openai_api_key:
+            embedding_model = OpenAIEmbeddings(api_key=openai_api_key)
+            llm_model = ChatOpenAI(api_key=openai_api_key)
+            logger.info("Using OpenAI models.")
+        else:
+            embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", # all-mpnet-base-v2
+                                                    model_kwargs={'device': 'cpu'},
+                                                    encode_kwargs={'normalize_embeddings': False},
+                                                    cache_folder = CACHE_DIR
+                                                    )
+            # llm_model = load_model(device_type="cpu", model_id=model_id, model_basename=model_basename, LOGGING=logger)
+            llm_model = HuggingFacePipeline.from_model_id(
+                model_id= "gpt2", #"google/flan-t5-small",
+                task="text-generation",
+            )
+            #TheBloke/Mistral-7B-v0.1-GGUF
+            #HuggingFaceH4/zephyr-7b-beta
+            logger.info("Using Hugging Face embeddings and local LLM model.")
+        return embedding_model, llm_model
+    except Exception as e:
+        logger.error(f"Error initializing models: {e}")
+        raise

app/rag_pipeline/model_loader.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import logging
+from auto_gptq import AutoGPTQForCausalLM
+from huggingface_hub import hf_hub_download
+# from langchain.llms import LlamaCpp
+from langchain_community.llms import LlamaCpp
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    LlamaForCausalLM,
+    LlamaTokenizer,
+)
+from langchain_community.llms import HuggingFacePipeline
+from langchain.callbacks.manager import CallbackManager
+from transformers import GenerationConfig, pipeline
+import torch
+import os
+from app.settings import Config
+conf = Config()
+logger = logging.getLogger(__name__)
+MODELS_PATH = conf.MODELS_PATH
+CONTEXT_WINDOW_SIZE = 2048
+MAX_NEW_TOKENS = 2048
+N_BATCH= 512
+N_GPU_LAYERS = 1
+CACHE_DIR = conf.CACHE_DIR #"./models/"
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
+def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, logging):
+    try:
+        logging.info("Using Llamacpp for GGUF/GGML quantized models")
+        model_path = hf_hub_download(
+            repo_id=model_id,
+            filename=model_basename,
+            resume_download=True,
+            # force_download=True,
+            cache_dir=MODELS_PATH,
+        )
+        kwargs = {
+            "model_path": model_path,
+            "n_ctx": CONTEXT_WINDOW_SIZE,
+            "max_tokens": MAX_NEW_TOKENS,
+            "n_batch": N_BATCH,  # set this based on your GPU & CPU RAM
+        }
+        if device_type.lower() == "mps":
+            kwargs["n_gpu_layers"] = 1
+        if device_type.lower() == "cuda":
+            kwargs["n_gpu_layers"] = N_GPU_LAYERS  # set this based on your GPU
+        return LlamaCpp(**kwargs)
+    except:
+        if "ggml" in model_basename:
+            logging.INFO("If you were using GGML model, LLAMA-CPP Dropped Support, Use GGUF Instead")
+        return None
+def load_quantized_model_qptq(model_id, model_basename, device_type, logging):
+    logging.info("Using AutoGPTQForCausalLM for quantized models")
+    if ".safetensors" in model_basename:
+        # Remove the ".safetensors" ending if present
+        model_basename = model_basename.replace(".safetensors", "")
+    tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
+    logging.info("Tokenizer loaded")
+    model = AutoGPTQForCausalLM.from_quantized(
+        model_id,
+        model_basename=model_basename,
+        use_safetensors=True,
+        trust_remote_code=True,
+        device_map="auto",
+        use_triton=False,
+        quantize_config=None,
+    )
+    return model, tokenizer
+def load_full_model(model_id, model_basename, device_type, logging):
+    if device_type.lower() in ["mps", "cpu"]:
+        logging.info("Using LlamaTokenizer")
+        tokenizer = LlamaTokenizer.from_pretrained(model_id, cache_dir=CACHE_DIR, use_auth_token=os.environ["HUGGINGFACEHUB_API_TOKEN"]) #
+        model = LlamaForCausalLM.from_pretrained(model_id, cache_dir=CACHE_DIR, use_auth_token=os.environ["HUGGINGFACEHUB_API_TOKEN"]) #, cache_dir=CACHE_DIR
+    else:
+        logging.info("Using AutoModelForCausalLM for full models")
+        tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=CACHE_DIR, use_auth_token=os.environ["HUGGINGFACEHUB_API_TOKEN"]) #, cache_dir=CACHE_DIR
+        logging.info("Tokenizer loaded")
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            device_map="auto",
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True,
+            cache_dir=MODELS_PATH,
+            use_auth_token=os.environ["HUGGINGFACEHUB_API_TOKEN"]
+            # trust_remote_code=True, # set these if you are using NVIDIA GPU
+            # load_in_4bit=True,
+            # bnb_4bit_quant_type="nf4",
+            # bnb_4bit_compute_dtype=torch.float16,
+            # max_memory={0: "15GB"} # Uncomment this line with you encounter CUDA out of memory errors
+        )
+        model.tie_weights()
+    return model, tokenizer
+def load_model(device_type, model_id, model_basename=None, LOGGING=logger):
+    logger.info(f"Loading Model: {model_id}, on: {device_type}")
+    logger.info("This action can take a few minutes!")
+    if model_basename is not None:
+        if ".gguf" in model_basename.lower():
+            llm = load_quantized_model_gguf_ggml(
+                model_id, model_basename, device_type, LOGGING)
+            return llm
+        elif ".ggml" in model_basename.lower():
+            model, tokenizer = load_quantized_model_gguf_ggml(
+                model_id, model_basename, device_type, LOGGING)
+        else:
+            model, tokenizer = load_quantized_model_qptq(
+                model_id, model_basename, device_type, LOGGING)
+    else:
+        model, tokenizer = load_full_model(
+            model_id, model_basename, device_type, LOGGING)
+    # Load configuration from the model to avoid warnings
+    generation_config = GenerationConfig.from_pretrained(model_id)
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_length=MAX_NEW_TOKENS,
+        temperature=0.1,
+        # top_p=0.95,
+        repetition_penalty=1.15,
+        generation_config=generation_config,
+    )
+    local_llm = HuggingFacePipeline(pipeline=pipe)
+    logger.info("Local LLM Loaded")
+    return local_llm

app/rag_pipeline/prompt_utils.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+contex_retriever_prompt = ChatPromptTemplate.from_messages([
+    MessagesPlaceholder(variable_name="chat_history"),
+    ("user", "{input}"),
+    ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
+])
+conversion_retriever_prompt = ChatPromptTemplate.from_messages([
+    ("system",
+     "Answer the user's questions based on the below context:\n\n{context}"),
+    MessagesPlaceholder(variable_name="chat_history"),
+    ("user", "{input}"),
+])
+system_prompt = (
+    "You are an assistant specializing in answering questions accurately based on provided context. "
+    "Use the context to answer the question concisely. If the answer is not found in the context, respond with 'I'm not sure'."
+    "\n\n"
+    "Context:\n{context}\n\n"
+)
+qa_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", system_prompt),
+        ("user", "{input}"),
+    ]
+)

app/rag_pipeline/retriever_chain.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import logging
+#import create_history_aware_retriever,
+from langchain.chains import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from app.rag_pipeline.prompt_utils import qa_prompt
+from app.rag_pipeline.chroma_client import get_chroma_client
+from app.settings import Config
+# from prompt_utils import qa_prompt
+# from chroma_client import get_chroma_client
+# import sys
+# import os
+# parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+# sys.path.insert(0, parent_dir)
+# from settings import Config
+conf = Config()
+MODELS_PATH =  conf.MODELS_PATH #'/models'
+CONTEXT_WINDOW_SIZE = 2048
+MAX_NEW_TOKENS = 2048
+N_BATCH= 512
+N_GPU_LAYERS = 1
+MODEL_ID = conf.MODEL_ID  #"TheBloke/Mistral-7B-v0.1-GGUF"
+MODEL_BASENAME = conf.MODEL_BASENAME # "mistral-7b-v0.1.Q4_0.gguf"
+device_type = 'cpu'
+logger = logging.getLogger(__name__)
+class RetrieverChain:
+    def __init__(self, collection_name, embedding_function, persist_directory):
+        try:
+            self.vector_db = get_chroma_client(collection_name, embedding_function, persist_directory)
+        except Exception as e:
+            logger.error(f"Error creating RetrieverChain: {e}")
+            raise
+    def get_retriever(self):
+        try:
+            retriever = self.vector_db.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 2})
+            return retriever
+        except Exception as e:
+            logger.error(f"Failed to get retriever: {e}")
+            raise
+    def get_conversational_rag_chain(self, llm):
+        try:
+            if self.get_retriever is None:
+                logger.error(f"Retriever must not be None")
+                raise ValueError("Retriever must not be None")
+            if llm is None:
+                logger.error(f"Model must not be None")
+                raise ValueError("Model must not be None")
+            question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
+            return create_retrieval_chain(self.get_retriever(), question_answer_chain)
+        except Exception as e:
+            logger.error(f"Error creating RAG chain: {e}")
+            raise
+    def get_relevent_docs(self, user_input):
+        try:
+            docs = self.vector_db.as_retriever(search_type="mmr", search_kwargs={"k": 6, "fetch_k": 3}).get_relevant_documents(user_input)
+            logger.info(f"Relevent documents for {user_input}: {docs}")
+            # Access the retrieved documents
+            # print("Relevent Docs")
+            # for doc in docs:
+            #     print(doc.page_content)  # Access the original text
+            #     print(doc.metadata)  # Access any metadata associated with the document
+            # print("Relevent Docs end")
+            return docs
+        except Exception as e:
+            logger.error(f"Error getting response: {e}")
+            raise
+    def get_response(self, user_input, llm):
+        try:
+            qa_rag_chain = self.get_conversational_rag_chain(llm)
+            response = qa_rag_chain.invoke({"input": user_input})
+            return response['answer']
+        except Exception as e:
+            logger.error(f"Error getting response: {e}")
+            raise
+# if __name__ == "__main__":
+#     import os
+#     from model_initializer import initialize_models
+#     parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+#     openai_api_key = conf.API_KEY
+#     embedding_model, llm_model = initialize_models(openai_api_key,model_id=MODEL_ID, model_basename=MODEL_BASENAME)
+#     print(f"embeddi_modelng: {embedding_model}")
+#     print(f"llm_model: {llm_model}")
+#     collection_name = 'AI_assignment'
+#     persist_directory = f'D:/AI Assignment/vector_store'
+#     print(f"persist_directory: {persist_directory}")
+#     while True:
+#         print("Enter query: ")
+#         user_query = input()
+#         if user_query.lower() == 'exit':
+#             break
+#         retriever_qa = RetrieverChain(
+#             collection_name=collection_name, embedding_function=embedding_model, persist_directory=persist_directory)
+#         response = retriever_qa.get_response(user_input = user_query, llm= llm_model)
+#         print(f"Response: {response}")

app/settings.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os
+from dotenv import load_dotenv
+from pathlib import Path
+env_path = Path(__file__).resolve().parent.parent / '.env'
+load_dotenv(dotenv_path=env_path, override=True)
+class Config:
+    API_KEY = os.getenv('OPENAI_API_KEY')
+    MODEL_ID = os.getenv('MODEL_ID')
+    MODEL_BASENAME = os.getenv('MODEL_BASENAME')
+    COLLECTION_NAME = os.getenv('COLLECTION_NAME')
+    PERSIST_DIRECTORY = os.path.join(os.path.dirname(__file__),'..','vector_store')
+    os.makedirs(PERSIST_DIRECTORY, exist_ok=True)
+    UPLOAD_DIR = os.path.join(os.path.dirname(__file__),'..','uploads')
+    os.makedirs(UPLOAD_DIR, exist_ok=True)
+    LOG_DIR = os.path.join(os.path.dirname(__file__),'..','log_dir')
+    os.makedirs(LOG_DIR, exist_ok=True)
+    MODELS_PATH = os.path.join(os.path.dirname(__file__),'..','models')
+    CACHE_DIR = os.path.join(os.path.dirname(__file__),'..','models')
+    os.makedirs(CACHE_DIR, exist_ok=True)
+    # MODELS_PATH = '/models'
+    MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
+    MODEL_KWARGS = {'device': 'cpu'}
+    ENCODE_KWARGS = {'normalize_embeddings': False}
+    CHUNK_SIZE = 1024
+    CHUNK_OVERLAP = 200

requirements.txt ADDED Viewed

Binary file (918 Bytes). View file