AI-trainer1 commited on
Commit
9a05fa5
·
verified ·
1 Parent(s): de303ae

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_chroma import Chroma
5
+ from langchain_groq import ChatGroq
6
+ from langchain.chains import create_retrieval_chain
7
+ from langchain.chains.combine_documents import create_stuff_documents_chain
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ import os
10
+ from dotenv import load_dotenv
11
+ from helper import SYSTEM_PROMPT
12
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
13
+ # from langchain.embeddings import HuggingFaceEmbeddings # open source free embedding
14
+ load_dotenv()
15
+
16
+
17
+ class PDFQAProcessor:
18
+
19
+ SYSTEM_PROMPT = SYSTEM_PROMPT
20
+
21
+ llm = ChatGroq(
22
+ # model_name="deepseek-r1-distill-llama-70b",
23
+ model_name="llama3-70b-8192",
24
+ temperature=0.1,
25
+ max_tokens=3000,
26
+ api_key = os.getenv('GROQ_API_KEY')
27
+ )
28
+
29
+ # Setup RAG chain
30
+ prompt = ChatPromptTemplate.from_messages([
31
+ ("system", SYSTEM_PROMPT),
32
+ ("human", "{input}"),
33
+ ])
34
+
35
+ question_answer_chain = create_stuff_documents_chain(llm, prompt)
36
+
37
+ # EMBEDDING_MODEL = "intfloat/e5-large-v2"
38
+
39
+ # embeddings = HuggingFaceEmbeddings(
40
+ # model_name=EMBEDDING_MODEL,
41
+ # model_kwargs={'device': 'cpu'},
42
+ # encode_kwargs={'normalize_embeddings': True}
43
+ # )
44
+
45
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
46
+ CHUNK_SIZE = 700
47
+ CHUNK_OVERLAP = 150
48
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE,chunk_overlap = CHUNK_OVERLAP)
49
+ # persist_directory="./chroma_db"
50
+
51
+
52
+ def __init__(self):
53
+ self.vectorstore = None
54
+ self.retriever = None
55
+
56
+ def process_pdfs(self, pdf_files):
57
+ """Processing PDF files and creating vector store"""
58
+ if not pdf_files:
59
+ return "Please upload PDF files first!"
60
+
61
+ try:
62
+ # Load and split documents
63
+ docs = []
64
+ for pdf_file in pdf_files:
65
+ loader = PyPDFLoader(pdf_file.name)
66
+ docs.extend(loader.load())
67
+
68
+ splits = self.text_splitter.split_documents(docs)
69
+
70
+ # Create vector store
71
+ self.vectorstore = Chroma.from_documents(
72
+ documents=splits,
73
+ embedding=self.embeddings,
74
+ # persist_directory = self.persist_directory
75
+ )
76
+ self.retriever = self.vectorstore.as_retriever(search_kwargs={"k": 10})
77
+ return "PDFs processed successfully! Ask your questions now."
78
+
79
+ except Exception as e:
80
+ return f"Error processing PDFs: {str(e)}"
81
+
82
+ def answer_question(self, question):
83
+ """Handling question answering"""
84
+ if not self.retriever:
85
+ return "Please process PDFs first!", None
86
+
87
+ try:
88
+ # Initialize LLM
89
+ rag_chain = create_retrieval_chain(self.retriever, self.question_answer_chain)
90
+
91
+ response = rag_chain.invoke({"input": question})
92
+
93
+ final_response = response["answer"] + "\n\nSources\n\n"
94
+
95
+ for info in response["context"]:
96
+ final_response += info.page_content + "\nSource of Info: " + info.metadata['source'] + "\nAt Page No: " + info.metadata['page_label']+"\n\n"
97
+
98
+ return final_response
99
+ except Exception as e:
100
+ return f"Error answering question: {str(e)}", None
101
+
102
+ processor = PDFQAProcessor()
103
+
104
+ with gr.Blocks(title="PDF QA Assistant") as demo:
105
+ with gr.Tab("Upload PDFs"):
106
+ file_input = gr.Files(label="Upload PDFs", file_types=[".pdf"])
107
+ process_btn = gr.Button("Process PDFs")
108
+ status_output = gr.Textbox(label="Processing Status")
109
+
110
+ with gr.Tab("Ask Questions"):
111
+ question_input = gr.Textbox(label="Your Question")
112
+ answer_output = gr.Textbox(label="Answer", interactive=False)
113
+ ask_btn = gr.Button("Ask Question")
114
+
115
+ process_btn.click(
116
+ processor.process_pdfs,
117
+ inputs=file_input,
118
+ outputs=status_output
119
+ )
120
+
121
+ # QA workflow
122
+ ask_btn.click(
123
+ processor.answer_question,
124
+ inputs=question_input,
125
+ outputs=[answer_output]
126
+ )
127
+
128
+ if __name__ == "__main__":
129
+ demo.launch()