Spaces:
Sleeping
Sleeping
import streamlit as st | |
from pymongo import MongoClient | |
import os | |
from dotenv import load_dotenv | |
from helper.upload_file_to_s3 import upload_file | |
from helper.process_pdf import process_pdf | |
import time | |
# Load environment variables | |
load_dotenv() | |
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") | |
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") | |
AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME") | |
MONGO_URI = os.getenv("MONGO_URI") | |
DB_NAME = os.getenv("DB_NAME") | |
COLLECTION_NAME = os.getenv("COLLECTION_NAME") | |
COLLECTION_NAME2=os.getenv("COMPANY_COLLECTION_NAME") | |
mongo_client = MongoClient(MONGO_URI) | |
db = mongo_client[DB_NAME] | |
collection = db[COLLECTION_NAME] | |
collection2=db[COLLECTION_NAME2] | |
def upload_pdf(): | |
if st.button("Back"): | |
st.session_state.page = "upload_main" | |
st.rerun() | |
# File uploader (image files only) | |
uploaded_pdf = st.file_uploader("Choose a PDF file to upload", type=["pdf"], | |
accept_multiple_files=False) | |
# Fetch tags and categories from MongoDB | |
tags_doc = collection2.find_one({"type": "tags"}) | |
categories_doc = collection2.find_one({"type": "categories"}) | |
tags_options = tags_doc["tags"] if tags_doc and "tags" in tags_doc else [] | |
categories_options = categories_doc["categories"] if categories_doc and "categories" in categories_doc else [] | |
# Multi-select dropdowns for tags and categories | |
selected_tags = st.multiselect("Select Tags", options=tags_options) | |
selected_categories = st.multiselect("Select Categories", options=categories_options) | |
if uploaded_pdf and selected_tags and selected_categories: | |
flag=False | |
if st.button("Submit"): | |
with st.spinner(text="Uploading and Processing PDF"): | |
# Upload file to S3 | |
metadata = upload_file(uploaded_pdf,"PDF") | |
if metadata: | |
object_url = metadata.get("object_url") | |
filename = metadata.get("name") | |
# Process image with LLM for description | |
pdf_processed = process_pdf(object_url,selected_tags,selected_categories) | |
if pdf_processed: | |
collection.update_one( | |
{"object_url": object_url}, | |
{"$set": { | |
"tags": selected_tags, | |
"categories": selected_categories, | |
"status": "processed" | |
}} | |
) | |
st.success("PDF has been successfully uploaded and processed.") | |
flag = True | |
else: | |
st.error("Could Not Process the PDF. Please try again.") | |
collection.update_one( | |
{"object_url": object_url}, | |
{"$set": { | |
"tags": selected_tags, | |
"categories": selected_categories, | |
"status": "failed" | |
}} | |
) | |
if flag: | |
st.write("Redirecting to View Page to view all uploaded pdfs") | |
time.sleep(2) | |
st.session_state.page = "view_pdf" | |
st.rerun() | |