ReAG / utils.py
DrishtiSharma's picture
Create utils.py
07de8c7 verified
raw
history blame contribute delete
626 Bytes
import streamlit as st
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.schema import Document
# Function to format document content
def format_doc(doc: Document) -> str:
return f"Document_Title: {doc.metadata.get('title', 'Unknown')}\nPage: {doc.metadata.get('page', 'Unknown')}\nContent: {doc.page_content}"
# Function to load and process document
def load_document(uploaded_file):
file_path = f"/tmp/{uploaded_file.name}"
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
loader = PyMuPDFLoader(file_path)
docs = loader.load()
return docs