ayush7 commited on
Commit
0b22a5d
·
verified ·
1 Parent(s): 35aa019

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +17 -0
  2. gradio_app.py +34 -0
  3. rag.py +53 -0
  4. retrive.py +18 -0
app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio_app
2
+ from gradio_app import *
3
+
4
+ # Create Gradio interface
5
+ iface = gr.Interface(
6
+ fn=process_question,
7
+ inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
8
+ outputs=[
9
+ gr.Textbox(label="Answer"),
10
+ gr.Textbox(label="Sources")
11
+ ],
12
+ title="NASA Report Q&A System",
13
+ description="Ask questions about the NASA reports and get answers with sources."
14
+ )
15
+
16
+ if __name__ == "__main__":
17
+ iface.launch(share=True)
gradio_app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from retrive import create_qa_chain_openai
4
+ from rag import process_pdfs
5
+ from langchain_community.vectorstores import Chroma
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ import os
8
+ # import key
9
+ # OPENAI_API_KEY = key.api_key
10
+ # from dotenv import load_dotenv
11
+ api_key=os.environ.get("HUGGINGFACE_API_KEY")
12
+
13
+ # Initialize embeddings and load the existing vectorstore
14
+ embeddings = HuggingFaceEmbeddings(
15
+ model_name="sentence-transformers/all-mpnet-base-v2"
16
+ )
17
+ vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
18
+
19
+ # Initialize the QA chain
20
+ qa_chain = create_qa_chain_openai(vectorstore, OPENAI_API_KEY)
21
+
22
+ def process_question(question):
23
+ """Process the user's question and return the answer"""
24
+ result = qa_chain({"query": question})
25
+
26
+ # Extract answer and sources
27
+ answer = result['result']
28
+ sources = [ f"- {doc.metadata['source']}, Page {doc.metadata['page']}"+ "..." for doc in result['source_documents']]
29
+
30
+ return answer, "\n\nSources:\n" + "\n\n".join(sources)
31
+
32
+
33
+
34
+ # f"- {doc.metadata['source']}, Page {doc.metadata['page']}"
rag.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from langchain_community.vectorstores import Chroma
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain.chains import RetrievalQA
8
+ from langchain_community.llms import Ollama
9
+ from langchain.prompts import PromptTemplate # Added this import
10
+
11
+
12
+ from dotenv import load_dotenv
13
+
14
+
15
+
16
+ ## Load environment variables (for OpenAI API key)
17
+ load_dotenv()
18
+
19
+ def process_pdfs(pdf_directory):
20
+ print("Processing PDFs...")
21
+ """Process all PDFs in the specified directory and create a vector store."""
22
+ documents = []
23
+
24
+ # Load all PDFs from the directory
25
+ for file in os.listdir(pdf_directory):
26
+ if file.endswith('.pdf'):
27
+ print(f"Processing {file}...")
28
+ pdf_path = os.path.join(pdf_directory, file)
29
+ loader = PyPDFLoader(pdf_path)
30
+ documents.extend(loader.load())
31
+
32
+ # Split documents into chunks
33
+ text_splitter = RecursiveCharacterTextSplitter(
34
+ chunk_size=2000,
35
+ chunk_overlap=200,
36
+ length_function=len
37
+ )
38
+ splits = text_splitter.split_documents(documents)
39
+
40
+ # Create embeddings
41
+ embeddings = HuggingFaceEmbeddings(
42
+ model_name="sentence-transformers/all-mpnet-base-v2"
43
+ )
44
+
45
+ # Create and persist vector store
46
+ vectorstore = Chroma.from_documents(
47
+ documents=splits,
48
+ embedding=embeddings,
49
+ persist_directory="./chroma_db"
50
+ )
51
+
52
+ return vectorstore
53
+
retrive.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rag import *
2
+
3
+
4
+ def create_qa_chain_openai(vectorstore, key):
5
+ """Create a question-answering chain using the vector store."""
6
+ # Initialize language model
7
+ llm = ChatOpenAI(api_key=key,temperature=0)
8
+ print("Querying the vector store...")
9
+
10
+ # Create retrieval chain
11
+ qa_chain = RetrievalQA.from_chain_type(
12
+ llm=llm,
13
+ chain_type="stuff",
14
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 10}),
15
+ return_source_documents=True
16
+ )
17
+
18
+ return qa_chain