import streamlit as st from pymongo import MongoClient import os from dotenv import load_dotenv from helper.upload_file_to_s3 import upload_file from helper.process_pdf import process_pdf import time # Load environment variables load_dotenv() AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME") MONGO_URI = os.getenv("MONGO_URI") DB_NAME = os.getenv("DB_NAME") COLLECTION_NAME = os.getenv("COLLECTION_NAME") COLLECTION_NAME2=os.getenv("COMPANY_COLLECTION_NAME") mongo_client = MongoClient(MONGO_URI) db = mongo_client[DB_NAME] collection = db[COLLECTION_NAME] collection2=db[COLLECTION_NAME2] def upload_pdf(): if st.button("Back"): st.session_state.page = "upload_main" st.rerun() # File uploader (image files only) uploaded_pdf = st.file_uploader("Choose a PDF file to upload", type=["pdf"], accept_multiple_files=False) # Fetch tags and categories from MongoDB tags_doc = collection2.find_one({"type": "tags"}) categories_doc = collection2.find_one({"type": "categories"}) tags_options = tags_doc["tags"] if tags_doc and "tags" in tags_doc else [] categories_options = categories_doc["categories"] if categories_doc and "categories" in categories_doc else [] # Multi-select dropdowns for tags and categories selected_tags = st.multiselect("Select Tags", options=tags_options) selected_categories = st.multiselect("Select Categories", options=categories_options) if uploaded_pdf and selected_tags and selected_categories: flag=False if st.button("Submit"): with st.spinner(text="Uploading and Processing Image"): # Upload file to S3 metadata = upload_file(uploaded_pdf,"PDF") if metadata: object_url = metadata.get("object_url") filename = metadata.get("name") # Process image with LLM for description pdf_processed = process_pdf(object_url,selected_tags,selected_categories) if pdf_processed: collection.update_one( {"object_url": object_url}, {"$set": { "tags": selected_tags, "categories": selected_categories, "status": "processed" }} ) st.success("PDF has been successfully uploaded and processed.") flag = True else: st.error("Could Not Process the PDF. Please try again.") collection.update_one( {"object_url": object_url}, {"$set": { "tags": selected_tags, "categories": selected_categories, "status": "failed" }} ) if flag: st.write("Redirecting to View Page to view all uploaded pdfs") time.sleep(2) st.session_state.page = "view_pdf" st.rerun()