Spaces:
Build error
Build error
import streamlit as st | |
from langchain_groq import ChatGroq | |
from langchain_ollama import ChatOllama | |
from langchain_community.document_loaders import PyMuPDFLoader | |
from langchain.schema import Document | |
from langchain_core.prompts import PromptTemplate | |
from langchain_core.output_parsers import JsonOutputParser | |
from pydantic import BaseModel, Field | |
import os | |
import json | |
from prompts import REAG_SYSTEM_PROMPT, rag_prompt | |
# Hugging Face Spaces API Key Handling | |
st.set_page_config(page_title="ReAG", layout="centered") | |
# Set API Key using Hugging Face Secrets | |
os.environ["GROQ_API_KEY"] = st.secrets["GROQ_API_KEY"] | |
# Initialize LLM models | |
llm_relevancy = ChatGroq(model="llama-3.3-70b-versatile", temperature=0) | |
llm = ChatOllama(model="deepseek-r1:14b", temperature=0.6, max_tokens=3000) | |
# Define schema for extracted content | |
class ResponseSchema(BaseModel): | |
content: str = Field(..., description="Relevant content from the document") | |
reasoning: str = Field(..., description="Why this content was selected") | |
is_irrelevant: bool = Field(..., description="True if the content is irrelevant") | |
class RelevancySchemaMessage(BaseModel): | |
source: ResponseSchema | |
relevancy_parser = JsonOutputParser(pydantic_object=RelevancySchemaMessage) | |
# Function to format document | |
def format_doc(doc: Document) -> str: | |
return f"Document_Title: {doc.metadata.get('title', 'Unknown')}\nPage: {doc.metadata.get('page', 'Unknown')}\nContent: {doc.page_content}" | |
# Extract relevant context function | |
def extract_relevant_context(question,documents): | |
result = [] | |
for doc in documents: | |
formatted_documents = format_doc(doc) | |
system = f"{REAG_SYSTEM_PROMPT}\n\n# Available source\n\n{formatted_documents}" | |
prompt = f"""Determine if the 'Avaiable source' content supplied is sufficient and relevant to ANSWER the QUESTION asked. | |
QUESTION: {question} | |
#INSTRUCTIONS TO FOLLOW | |
1. Analyze the context provided thoroughly to check its relevancy to help formulizing a response for the QUESTION asked. | |
2, STRICTLY PROVIDE THE RESPONSE IN A JSON STRUCTURE AS DESCRIBED BELOW: | |
```json | |
{{"content":<<The page content of the document that is relevant or sufficient to answer the question asked>>, | |
"reasoning":<<The reasoning for selecting The page content with respect to the question asked>>, | |
"is_irrelevant":<<Specify 'True' if the content in the document is not sufficient or relevant.Specify 'False' if the page content is sufficient to answer the QUESTION>> | |
}} | |
``` | |
""" | |
messages =[ {"role": "system", "content": system}, | |
{"role": "user", "content": prompt}, | |
] | |
response = llm_relevancy.invoke(messages) | |
print(response.content) | |
formatted_response = relevancy_parser.parse(response.content) | |
result.append(formatted_response) | |
final_context = [] | |
for items in result: | |
if (items['is_irrelevant'] == False) or ( items['is_irrelevant'] == 'false') or (items['is_irrelevant'] == 'False'): | |
final_context.append(items['content']) | |
return final_context | |
# Generate response using RAG Prompt | |
def generate_response(question, final_context): | |
with st.spinner("π Generating Response..."): | |
prompt = PromptTemplate(template=rag_prompt, input_variables=["question", "context"]) | |
chain = prompt | llm | |
response = chain.invoke({"question": question, "context": final_context}) | |
return response.content.split("\n\n")[-1] | |
# Streamlit UI | |
st.title("π ReAG") | |
st.markdown("Upload a PDF and ask questions based on its content.") | |
uploaded_file = st.file_uploader("π Upload PDF", type=["pdf"]) | |
if uploaded_file: | |
with st.spinner("π₯ Uploading and processing PDF..."): | |
file_path = f"/tmp/{uploaded_file.name}" | |
with open(file_path, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
loader = PyMuPDFLoader(file_path) | |
docs = loader.load() | |
st.success("β PDF uploaded and processed successfully!") | |
question = st.text_input("β Ask a question about the document:") | |
if st.button("π Get Answer"): | |
if question: | |
final_context = extract_relevant_context(question, docs) | |
if final_context: | |
answer = generate_response(question, final_context) | |
st.success(f"π§ **Response:**\n\n{answer}") | |
else: | |
st.warning("β οΈ No relevant information found in the document.") | |