ReAG / app.py
DrishtiSharma's picture
Update app.py
9fdd9f4 verified
import streamlit as st
from langchain_groq import ChatGroq
from langchain_ollama import ChatOllama
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.schema import Document
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field
import os
import json
from prompts import REAG_SYSTEM_PROMPT, rag_prompt
# Hugging Face Spaces API Key Handling
st.set_page_config(page_title="ReAG", layout="centered")
# Set API Key using Hugging Face Secrets
os.environ["GROQ_API_KEY"] = st.secrets["GROQ_API_KEY"]
# Initialize LLM models
llm_relevancy = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)
llm = ChatOllama(model="deepseek-r1:14b", temperature=0.6, max_tokens=3000)
# Define schema for extracted content
class ResponseSchema(BaseModel):
content: str = Field(..., description="Relevant content from the document")
reasoning: str = Field(..., description="Why this content was selected")
is_irrelevant: bool = Field(..., description="True if the content is irrelevant")
class RelevancySchemaMessage(BaseModel):
source: ResponseSchema
relevancy_parser = JsonOutputParser(pydantic_object=RelevancySchemaMessage)
# Function to format document
def format_doc(doc: Document) -> str:
return f"Document_Title: {doc.metadata.get('title', 'Unknown')}\nPage: {doc.metadata.get('page', 'Unknown')}\nContent: {doc.page_content}"
# Extract relevant context function
def extract_relevant_context(question,documents):
result = []
for doc in documents:
formatted_documents = format_doc(doc)
system = f"{REAG_SYSTEM_PROMPT}\n\n# Available source\n\n{formatted_documents}"
prompt = f"""Determine if the 'Avaiable source' content supplied is sufficient and relevant to ANSWER the QUESTION asked.
QUESTION: {question}
#INSTRUCTIONS TO FOLLOW
1. Analyze the context provided thoroughly to check its relevancy to help formulizing a response for the QUESTION asked.
2, STRICTLY PROVIDE THE RESPONSE IN A JSON STRUCTURE AS DESCRIBED BELOW:
```json
{{"content":<<The page content of the document that is relevant or sufficient to answer the question asked>>,
"reasoning":<<The reasoning for selecting The page content with respect to the question asked>>,
"is_irrelevant":<<Specify 'True' if the content in the document is not sufficient or relevant.Specify 'False' if the page content is sufficient to answer the QUESTION>>
}}
```
"""
messages =[ {"role": "system", "content": system},
{"role": "user", "content": prompt},
]
response = llm_relevancy.invoke(messages)
print(response.content)
formatted_response = relevancy_parser.parse(response.content)
result.append(formatted_response)
final_context = []
for items in result:
if (items['is_irrelevant'] == False) or ( items['is_irrelevant'] == 'false') or (items['is_irrelevant'] == 'False'):
final_context.append(items['content'])
return final_context
# Generate response using RAG Prompt
def generate_response(question, final_context):
with st.spinner("πŸ“ Generating Response..."):
prompt = PromptTemplate(template=rag_prompt, input_variables=["question", "context"])
chain = prompt | llm
response = chain.invoke({"question": question, "context": final_context})
return response.content.split("\n\n")[-1]
# Streamlit UI
st.title("πŸ“š ReAG")
st.markdown("Upload a PDF and ask questions based on its content.")
uploaded_file = st.file_uploader("πŸ“‚ Upload PDF", type=["pdf"])
if uploaded_file:
with st.spinner("πŸ“₯ Uploading and processing PDF..."):
file_path = f"/tmp/{uploaded_file.name}"
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
loader = PyMuPDFLoader(file_path)
docs = loader.load()
st.success("βœ… PDF uploaded and processed successfully!")
question = st.text_input("❓ Ask a question about the document:")
if st.button("πŸš€ Get Answer"):
if question:
final_context = extract_relevant_context(question, docs)
if final_context:
answer = generate_response(question, final_context)
st.success(f"🧠 **Response:**\n\n{answer}")
else:
st.warning("⚠️ No relevant information found in the document.")