Spaces:

AI-trainer1
/

PDF_QA_Assistant

Running

App Files Files Community

AI-trainer1 commited on 11 days ago

Commit

9a05fa5

verified ·

1 Parent(s): de303ae

Create app.py

Browse files

Files changed (1) hide show

app.py +129 -0

app.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import gradio as gr
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_chroma import Chroma
+from langchain_groq import ChatGroq
+from langchain.chains import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate
+import os
+from dotenv import load_dotenv
+from helper import SYSTEM_PROMPT
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+# from langchain.embeddings import HuggingFaceEmbeddings # open source free embedding
+load_dotenv()
+class PDFQAProcessor:
+    SYSTEM_PROMPT = SYSTEM_PROMPT
+    llm = ChatGroq(
+        # model_name="deepseek-r1-distill-llama-70b",
+        model_name="llama3-70b-8192",
+        temperature=0.1,
+        max_tokens=3000,
+        api_key = os.getenv('GROQ_API_KEY')
+    )
+    # Setup RAG chain
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", SYSTEM_PROMPT),
+        ("human", "{input}"),
+    ])
+    question_answer_chain = create_stuff_documents_chain(llm, prompt)
+    # EMBEDDING_MODEL = "intfloat/e5-large-v2"
+    # embeddings = HuggingFaceEmbeddings(
+    #     model_name=EMBEDDING_MODEL,
+    #     model_kwargs={'device': 'cpu'},
+    #     encode_kwargs={'normalize_embeddings': True}
+    # )
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+    CHUNK_SIZE = 700
+    CHUNK_OVERLAP = 150
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE,chunk_overlap = CHUNK_OVERLAP)
+    # persist_directory="./chroma_db"
+    def __init__(self):
+        self.vectorstore = None
+        self.retriever = None
+    def process_pdfs(self, pdf_files):
+        """Processing PDF files and creating vector store"""
+        if not pdf_files:
+            return "Please upload PDF files first!"
+        try:
+            # Load and split documents
+            docs = []
+            for pdf_file in pdf_files:
+                loader = PyPDFLoader(pdf_file.name)
+                docs.extend(loader.load())
+            splits = self.text_splitter.split_documents(docs)
+            # Create vector store
+            self.vectorstore = Chroma.from_documents(
+                documents=splits,
+                embedding=self.embeddings,
+                # persist_directory = self.persist_directory
+            )
+            self.retriever = self.vectorstore.as_retriever(search_kwargs={"k": 10})
+            return "PDFs processed successfully! Ask your questions now."
+        except Exception as e:
+            return f"Error processing PDFs: {str(e)}"
+    def answer_question(self, question):
+        """Handling question answering"""
+        if not self.retriever:
+            return "Please process PDFs first!", None
+        try:
+            # Initialize LLM
+            rag_chain = create_retrieval_chain(self.retriever, self.question_answer_chain)
+            response = rag_chain.invoke({"input": question})
+            final_response = response["answer"] + "\n\nSources\n\n"
+            for info in response["context"]:
+                final_response += info.page_content + "\nSource of Info: " + info.metadata['source'] + "\nAt Page No: " + info.metadata['page_label']+"\n\n"
+            return final_response
+        except Exception as e:
+            return f"Error answering question: {str(e)}", None
+processor = PDFQAProcessor()
+with gr.Blocks(title="PDF QA Assistant") as demo:
+    with gr.Tab("Upload PDFs"):
+        file_input = gr.Files(label="Upload PDFs", file_types=[".pdf"])
+        process_btn = gr.Button("Process PDFs")
+        status_output = gr.Textbox(label="Processing Status")
+    with gr.Tab("Ask Questions"):
+        question_input = gr.Textbox(label="Your Question")
+        answer_output = gr.Textbox(label="Answer", interactive=False)
+        ask_btn = gr.Button("Ask Question")
+    process_btn.click(
+        processor.process_pdfs,
+        inputs=file_input,
+        outputs=status_output
+    )
+    # QA workflow
+    ask_btn.click(
+        processor.answer_question,
+        inputs=question_input,
+        outputs=[answer_output]
+    )
+if __name__ == "__main__":
+    demo.launch()