Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- app.py +17 -0
- gradio_app.py +34 -0
- rag.py +53 -0
- retrive.py +18 -0
app.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio_app
|
2 |
+
from gradio_app import *
|
3 |
+
|
4 |
+
# Create Gradio interface
|
5 |
+
iface = gr.Interface(
|
6 |
+
fn=process_question,
|
7 |
+
inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
|
8 |
+
outputs=[
|
9 |
+
gr.Textbox(label="Answer"),
|
10 |
+
gr.Textbox(label="Sources")
|
11 |
+
],
|
12 |
+
title="NASA Report Q&A System",
|
13 |
+
description="Ask questions about the NASA reports and get answers with sources."
|
14 |
+
)
|
15 |
+
|
16 |
+
if __name__ == "__main__":
|
17 |
+
iface.launch(share=True)
|
gradio_app.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from retrive import create_qa_chain_openai
|
4 |
+
from rag import process_pdfs
|
5 |
+
from langchain_community.vectorstores import Chroma
|
6 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
+
import os
|
8 |
+
# import key
|
9 |
+
# OPENAI_API_KEY = key.api_key
|
10 |
+
# from dotenv import load_dotenv
|
11 |
+
api_key=os.environ.get("HUGGINGFACE_API_KEY")
|
12 |
+
|
13 |
+
# Initialize embeddings and load the existing vectorstore
|
14 |
+
embeddings = HuggingFaceEmbeddings(
|
15 |
+
model_name="sentence-transformers/all-mpnet-base-v2"
|
16 |
+
)
|
17 |
+
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
|
18 |
+
|
19 |
+
# Initialize the QA chain
|
20 |
+
qa_chain = create_qa_chain_openai(vectorstore, OPENAI_API_KEY)
|
21 |
+
|
22 |
+
def process_question(question):
|
23 |
+
"""Process the user's question and return the answer"""
|
24 |
+
result = qa_chain({"query": question})
|
25 |
+
|
26 |
+
# Extract answer and sources
|
27 |
+
answer = result['result']
|
28 |
+
sources = [ f"- {doc.metadata['source']}, Page {doc.metadata['page']}"+ "..." for doc in result['source_documents']]
|
29 |
+
|
30 |
+
return answer, "\n\nSources:\n" + "\n\n".join(sources)
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
# f"- {doc.metadata['source']}, Page {doc.metadata['page']}"
|
rag.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from langchain_community.document_loaders import PyPDFLoader
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
5 |
+
from langchain_community.vectorstores import Chroma
|
6 |
+
from langchain_openai import ChatOpenAI
|
7 |
+
from langchain.chains import RetrievalQA
|
8 |
+
from langchain_community.llms import Ollama
|
9 |
+
from langchain.prompts import PromptTemplate # Added this import
|
10 |
+
|
11 |
+
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
## Load environment variables (for OpenAI API key)
|
17 |
+
load_dotenv()
|
18 |
+
|
19 |
+
def process_pdfs(pdf_directory):
|
20 |
+
print("Processing PDFs...")
|
21 |
+
"""Process all PDFs in the specified directory and create a vector store."""
|
22 |
+
documents = []
|
23 |
+
|
24 |
+
# Load all PDFs from the directory
|
25 |
+
for file in os.listdir(pdf_directory):
|
26 |
+
if file.endswith('.pdf'):
|
27 |
+
print(f"Processing {file}...")
|
28 |
+
pdf_path = os.path.join(pdf_directory, file)
|
29 |
+
loader = PyPDFLoader(pdf_path)
|
30 |
+
documents.extend(loader.load())
|
31 |
+
|
32 |
+
# Split documents into chunks
|
33 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
34 |
+
chunk_size=2000,
|
35 |
+
chunk_overlap=200,
|
36 |
+
length_function=len
|
37 |
+
)
|
38 |
+
splits = text_splitter.split_documents(documents)
|
39 |
+
|
40 |
+
# Create embeddings
|
41 |
+
embeddings = HuggingFaceEmbeddings(
|
42 |
+
model_name="sentence-transformers/all-mpnet-base-v2"
|
43 |
+
)
|
44 |
+
|
45 |
+
# Create and persist vector store
|
46 |
+
vectorstore = Chroma.from_documents(
|
47 |
+
documents=splits,
|
48 |
+
embedding=embeddings,
|
49 |
+
persist_directory="./chroma_db"
|
50 |
+
)
|
51 |
+
|
52 |
+
return vectorstore
|
53 |
+
|
retrive.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from rag import *
|
2 |
+
|
3 |
+
|
4 |
+
def create_qa_chain_openai(vectorstore, key):
|
5 |
+
"""Create a question-answering chain using the vector store."""
|
6 |
+
# Initialize language model
|
7 |
+
llm = ChatOpenAI(api_key=key,temperature=0)
|
8 |
+
print("Querying the vector store...")
|
9 |
+
|
10 |
+
# Create retrieval chain
|
11 |
+
qa_chain = RetrievalQA.from_chain_type(
|
12 |
+
llm=llm,
|
13 |
+
chain_type="stuff",
|
14 |
+
retriever=vectorstore.as_retriever(search_kwargs={"k": 10}),
|
15 |
+
return_source_documents=True
|
16 |
+
)
|
17 |
+
|
18 |
+
return qa_chain
|