MOSPI2 / upload_pdf_page.py
akshansh36's picture
Upload 8 files
70124e2 verified
import streamlit as st
from pymongo import MongoClient
import os
from dotenv import load_dotenv
from helper.upload_file_to_s3 import upload_file
from helper.process_pdf import process_pdf
import time
# Load environment variables
load_dotenv()
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
COLLECTION_NAME2=os.getenv("COMPANY_COLLECTION_NAME")
mongo_client = MongoClient(MONGO_URI)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]
collection2=db[COLLECTION_NAME2]
def upload_pdf():
if st.button("Back"):
st.session_state.page = "upload_main"
st.rerun()
# File uploader (image files only)
uploaded_pdf = st.file_uploader("Choose a PDF file to upload", type=["pdf"],
accept_multiple_files=False)
# Fetch tags and categories from MongoDB
tags_doc = collection2.find_one({"type": "tags"})
categories_doc = collection2.find_one({"type": "categories"})
tags_options = tags_doc["tags"] if tags_doc and "tags" in tags_doc else []
categories_options = categories_doc["categories"] if categories_doc and "categories" in categories_doc else []
# Multi-select dropdowns for tags and categories
selected_tags = st.multiselect("Select Tags", options=tags_options)
selected_categories = st.multiselect("Select Categories", options=categories_options)
if uploaded_pdf and selected_tags and selected_categories:
flag=False
if st.button("Submit"):
with st.spinner(text="Uploading and Processing PDF"):
# Upload file to S3
metadata = upload_file(uploaded_pdf,"PDF")
if metadata:
object_url = metadata.get("object_url")
filename = metadata.get("name")
# Process image with LLM for description
pdf_processed = process_pdf(object_url,selected_tags,selected_categories)
if pdf_processed:
collection.update_one(
{"object_url": object_url},
{"$set": {
"tags": selected_tags,
"categories": selected_categories,
"status": "processed"
}}
)
st.success("PDF has been successfully uploaded and processed.")
flag = True
else:
st.error("Could Not Process the PDF. Please try again.")
collection.update_one(
{"object_url": object_url},
{"$set": {
"tags": selected_tags,
"categories": selected_categories,
"status": "failed"
}}
)
if flag:
st.write("Redirecting to View Page to view all uploaded pdfs")
time.sleep(2)
st.session_state.page = "view_pdf"
st.rerun()