akshansh36 commited on
Commit
4ab6643
·
verified ·
1 Parent(s): 38aad92

Upload 9 files

Browse files
Files changed (9) hide show
  1. .env +9 -0
  2. app.py +67 -0
  3. requirements.txt +0 -0
  4. search_page.py +534 -0
  5. upload_image_page.py +88 -0
  6. upload_main.py +180 -0
  7. upload_pdf_page.py +85 -0
  8. view_images.py +84 -0
  9. view_pdf.py +70 -0
.env ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ AWS_ACCESS_KEY_ID="AKIAZC2PGOX2MMJQNJ5H"
2
+ AWS_SECRET_ACCESS_KEY="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"
3
+ AWS_BUCKET_NAME="youdata-akshansh"
4
+ MONGO_URI="mongodb+srv://eagle-dev:[email protected]/"
5
+ DB_NAME="MoSPI"
6
+ COLLECTION_NAME="files"
7
+ COMPANY_COLLECTION_NAME="about_company"
8
+ FLASH_API="AIzaSyANNRKfEb-YnVIBaSAq6hQ38XpxxGwvaws"
9
+
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from upload_main import upload_main_page
3
+ from upload_image_page import upload
4
+ from view_images import view_images
5
+ from search_page import search
6
+ from upload_pdf_page import upload_pdf
7
+ from view_pdf import view_pdfs
8
+ st.set_page_config(layout='wide',page_title="MoSPI", page_icon="📄")
9
+
10
+ if "page" not in st.session_state:
11
+ st.session_state.page = "home"
12
+
13
+ if st.session_state.page == "home":
14
+ # Title and Header
15
+ st.title("Welcome to AI Assistant! Your Intelligent Search Partner.",)
16
+
17
+ # Message paragraph
18
+ st.write("""
19
+ We bring you AI-innovated smart search, which supports multilingual and voice search to streamline your searches.
20
+ """)
21
+
22
+ st.subheader("Key Features")
23
+ st.markdown("""
24
+ - **Semantic search** across PDFs, images, and official documents.
25
+ - **Quick, relevant responses** with document page highlights.
26
+ - **Multilingual and voice-based search** capabilities.
27
+ - **Integration with company’s data systems** for high relevance.
28
+ """)
29
+
30
+ st.markdown("<hr>", unsafe_allow_html=True)
31
+ # Buttons for Search and Upload
32
+ col1, col2 = st.columns([1,7])
33
+
34
+ with col1:
35
+ if st.button("Search",help="Smart Search"):
36
+ st.session_state.page = "search"
37
+ st.rerun()
38
+
39
+ with col2:
40
+ if st.button("Upload PDF/Images",help="Upload PDFs and Images"):
41
+ st.session_state.page = "upload_main"
42
+ st.rerun()
43
+
44
+
45
+ elif st.session_state.page == "upload_main":
46
+ upload_main_page()
47
+
48
+
49
+ elif st.session_state.page == "upload_image":
50
+ upload() # Render the image upload function
51
+
52
+ elif st.session_state.page=="view_image":
53
+ view_images()
54
+
55
+ elif st.session_state.page=="search":
56
+ search()
57
+
58
+
59
+ elif st.session_state.page=="upload_pdf":
60
+ upload_pdf()
61
+
62
+ elif st.session_state.page=="view_pdf":
63
+ view_pdfs()
64
+
65
+
66
+
67
+
requirements.txt ADDED
Binary file (3.9 kB). View file
 
search_page.py ADDED
@@ -0,0 +1,534 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from pymongo import MongoClient
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from langchain_core.prompts import ChatPromptTemplate
5
+ import re
6
+ import json
7
+ import streamlit as st
8
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
9
+ import os
10
+ import pinecone
11
+ from dotenv import load_dotenv
12
+ from bson import ObjectId
13
+ import google.generativeai as genai
14
+
15
+ load_dotenv()
16
+ MONGO_URI = os.getenv("MONGO_URI")
17
+ DB_NAME = os.getenv("DB_NAME")
18
+ COLLECTION_NAME = os.getenv("COLLECTION_NAME")
19
+ FLASH_API = os.getenv("FLASH_API")
20
+ mongo_client = MongoClient(MONGO_URI)
21
+ db = mongo_client[DB_NAME]
22
+ collection = db[COLLECTION_NAME]
23
+ collection2=db['about_company']
24
+ genai.configure(api_key=FLASH_API)
25
+
26
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=FLASH_API)
27
+ model = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0, max_tokens=None, google_api_key=FLASH_API)
28
+ model2 = genai.GenerativeModel('models/gemini-1.5-flash')
29
+ pc = pinecone.Pinecone(
30
+ api_key="4a80f293-ae6d-489a-a7d8-33ea3fcdd26b" # Your Pinecone API key
31
+ )
32
+ index_name = "mospi"
33
+ index = pc.Index(index_name)
34
+
35
+ temp_audio_folder = "temp-audio"
36
+ os.makedirs(temp_audio_folder, exist_ok=True)
37
+
38
+ about_company_doc=collection2.find_one({"type":"about_company"})
39
+ if about_company_doc:
40
+ about_company=about_company_doc.get('company_description','')
41
+ def process_user_query(user_query, about_company=""):
42
+ try:
43
+ # No f-string here, so we avoid additional formatting complications
44
+ prompt_template = ChatPromptTemplate.from_template("""
45
+ Given is a user query. Your task is to first translate the user query from any other language to English if not already in English.
46
+ Then you have to reformulate that translated query such that it is contextually rich and consistent.Do not add too much extra information in the query.
47
+ Also extract important keywords from this query. Return the result in the format given below.
48
+
49
+ Expected output format:
50
+ {{'query':'String',
51
+ 'keywords':['String']
52
+ }}
53
+ Given below is the user query which is related to a company about which is given below, but this company information is just to understand what the query might be related to, until explicitly asked you do not need to include company information in query:
54
+ "About Company": {about_company}
55
+ "Query":
56
+ {user_query}
57
+ """)
58
+
59
+ # Chain the prompt with LLM for response generation
60
+ chain = prompt_template | model
61
+ result = chain.invoke({
62
+ "about_company": about_company,
63
+ "user_query": user_query
64
+ })
65
+ print(f"Model response for reformulated query is {result.content}")
66
+
67
+ # Use non-greedy regex and handle multiline content
68
+ match = re.search(r"\{[\s\S]*?\}", result.content.strip())
69
+ if match:
70
+ json_data = match.group(0) # Extract JSON-like content as a string
71
+ json_data = json_data.replace("'", '"')
72
+ data = json.loads(json_data)
73
+ enhanced_query = data.get('query', '')
74
+ keywords = data.get('keywords', [])
75
+ return enhanced_query, keywords
76
+ else:
77
+ print("No JSON data found in the model response.")
78
+ return None, None
79
+
80
+ except Exception as e:
81
+ print(f"Error occurred while processing query using LLM: {e}")
82
+ return None, None
83
+
84
+
85
+
86
+
87
+ def filter_chunks_by_keywords_images(chunks, keywords):
88
+ keywords_set = set(kw.strip().lower() for kw in keywords)
89
+ chunks_with_keyword_counts = []
90
+
91
+ for chunk in chunks:
92
+ chunk_text = chunk['metadata'].get('description', '').lower()
93
+ keyword_count = sum(1 for kw in keywords_set if kw in chunk_text)
94
+ chunks_with_keyword_counts.append({
95
+ 'chunk': chunk,
96
+ 'keyword_count': keyword_count
97
+ })
98
+
99
+ # Sort chunks based on keyword count and similarity score
100
+ sorted_chunks = sorted(
101
+ chunks_with_keyword_counts,
102
+ key=lambda x: (x['keyword_count'], x['chunk']['score']),
103
+ reverse=True
104
+ )
105
+
106
+ # Filter chunks that have at least one keyword match
107
+ chunks_with_keywords = [item for item in sorted_chunks if item['keyword_count'] > 0]
108
+
109
+ if len(chunks_with_keywords) >= 3:
110
+ # If 3 or more chunks have keyword matches, return the top 3 of those
111
+ return chunks_with_keywords[:3]
112
+ elif len(chunks_with_keywords) > 0:
113
+ # If fewer than 3 chunks have keyword matches, return all that have matches
114
+ return chunks_with_keywords
115
+ else:
116
+ # If no chunks have keyword matches, return the top 3 by similarity score alone
117
+ sorted_by_similarity = sorted(
118
+ chunks_with_keyword_counts,
119
+ key=lambda x: x['chunk']['score'],
120
+ reverse=True
121
+ )
122
+ return sorted_by_similarity[:3]
123
+
124
+ def filter_chunks_by_keywords_pdf(chunks, keywords):
125
+ keywords_set = set(kw.strip().lower() for kw in keywords)
126
+ pdf_chunk_map = {}
127
+
128
+ # Step 1: Calculate keyword count and similarity for each chunk, grouped by PDF URL
129
+ for chunk in chunks:
130
+ chunk_text = chunk['metadata'].get('description', '').lower()
131
+ pdf_url = chunk['metadata'].get('url') # Unique identifier for each PDF
132
+ keyword_count = sum(1 for kw in keywords_set if kw in chunk_text)
133
+
134
+ # Structure each chunk with its metadata and computed values
135
+ chunk_data = {
136
+ 'chunk': chunk,
137
+ 'keyword_count': keyword_count,
138
+ 'similarity_score': chunk['score']
139
+ }
140
+
141
+ # Group chunks by PDF URL, keeping only the most relevant chunk per PDF
142
+ if pdf_url not in pdf_chunk_map:
143
+ pdf_chunk_map[pdf_url] = chunk_data
144
+ else:
145
+ existing_chunk = pdf_chunk_map[pdf_url]
146
+ # Keep the chunk with higher relevance (more keywords or higher similarity)
147
+ if (chunk_data['keyword_count'], chunk_data['similarity_score']) > (existing_chunk['keyword_count'], existing_chunk['similarity_score']):
148
+ pdf_chunk_map[pdf_url] = chunk_data
149
+
150
+ # Step 2: Collect the top chunk from each PDF, sort by keyword count and similarity score
151
+ sorted_chunks = sorted(
152
+ pdf_chunk_map.values(),
153
+ key=lambda x: (x['keyword_count'], x['similarity_score']),
154
+ reverse=True
155
+ )
156
+
157
+ # Step 3: Select the top 3 chunks from different PDFs
158
+ top_chunks = sorted_chunks[:3] if len(sorted_chunks) >= 3 else sorted_chunks
159
+
160
+ return top_chunks
161
+
162
+
163
+ def get_images_from_chunks(chunks):
164
+ images = []
165
+ for item in chunks:
166
+ chunk = item['chunk']
167
+ mongo_id_str = chunk['metadata'].get('mongo_id')
168
+ if mongo_id_str:
169
+
170
+ mongo_id = ObjectId(mongo_id_str)
171
+ image = collection.find_one({"_id": mongo_id})
172
+ if image:
173
+ images.append({
174
+ 'image': image,
175
+ 'similarity_score': chunk['score']
176
+ })
177
+ return images
178
+
179
+ def get_pdfs_from_chunks(chunks):
180
+ pdfs = []
181
+ for item in chunks:
182
+ chunk = item['chunk']
183
+ mongo_id_str = chunk['metadata'].get('mongo_id')
184
+ page_number=chunk['metadata'].get('page_number')
185
+ if mongo_id_str:
186
+
187
+ mongo_id = ObjectId(mongo_id_str)
188
+ pdf = collection.find_one({"_id": mongo_id})
189
+ if pdf:
190
+ pdfs.append({
191
+ 'pdf': pdf,
192
+ 'similarity_score': chunk['score'],
193
+ 'page_number':page_number
194
+
195
+ })
196
+ return pdfs
197
+
198
+
199
+ def format_date(timestamp):
200
+ """Convert timestamp to a readable date format."""
201
+ return datetime.fromtimestamp(timestamp).strftime("%B %d, %Y")
202
+
203
+ # def display_images(images):
204
+ # images = sorted(images, key=lambda x: x['similarity_score'], reverse=True)
205
+ # num_images = len(images)
206
+ # if num_images == 0:
207
+ # st.write("No images to display.")
208
+ # return
209
+ #
210
+ # # Iterate over the images in steps of 3 to create rows
211
+ # st.write("Here are the matching images for your query")
212
+ # for start_idx in range(0, num_images, 3):
213
+ # # Determine the number of columns for this row (could be less than 3 in the last row)
214
+ # num_cols = min(3, num_images - start_idx)
215
+ # cols = st.columns(num_cols)
216
+ #
217
+ # # Display images in the current row
218
+ # for idx in range(num_cols):
219
+ # img_info = images[start_idx + idx]
220
+ # col = cols[idx]
221
+ # with col:
222
+ # image_data = img_info['image']
223
+ # similarity_score = img_info['similarity_score']
224
+ #
225
+ # # Display the image using object_url directly with consistent sizing
226
+ # st.markdown(
227
+ # f"""
228
+ # <div style='text-align: center;'>
229
+ # <img src='{image_data['object_url']}' alt='Image' style='width:250px; height:250px; object-fit: cover; border-radius: 8px;' />
230
+ # <p><strong>Similarity Score:</strong> {similarity_score:.4f}</p>
231
+ # </div>
232
+ # """,
233
+ # unsafe_allow_html=True
234
+ # )
235
+ #
236
+ # # Expander for image details
237
+ # with st.expander("View Image Details"):
238
+ # st.write(f"**File Name:** {image_data.get('name', 'N/A')}")
239
+ # st.write(f"**Date Uploaded:** {format_date(image_data.get('upload_date', datetime.now().timestamp()))}")
240
+ # st.write(f"**Description:** {image_data.get('description', 'No description available')}")
241
+ #
242
+ # # Display tags if available
243
+ # tags = ", ".join(image_data.get("tags", []))
244
+ # st.write(f"**Tags:** {tags if tags else 'No tags'}")
245
+ #
246
+ # # Display categories if available
247
+ # categories = ", ".join(image_data.get("categories", []))
248
+ # st.write(f"**Categories:** {categories if categories else 'No categories'}")
249
+ #
250
+ # # Download link
251
+ # st.markdown(
252
+ # f"<a href='{image_data['object_url']}' class='download-link' download>Download Image</a>",
253
+ # unsafe_allow_html=True
254
+ # )
255
+
256
+ def display_results(images, pdfs):
257
+ # Display Images Section
258
+ images = sorted(images, key=lambda x: x['similarity_score'], reverse=True)
259
+ num_images = len(images)
260
+
261
+ if num_images > 0:
262
+ st.write("### Here are the matching images for your query")
263
+ for start_idx in range(0, num_images, 3):
264
+ num_cols = min(3, num_images - start_idx)
265
+ cols = st.columns(num_cols)
266
+
267
+ # Display images in the current row
268
+ for idx in range(num_cols):
269
+ img_info = images[start_idx + idx]
270
+ col = cols[idx]
271
+ with col:
272
+ image_data = img_info['image']
273
+ similarity_score = img_info['similarity_score']
274
+
275
+ st.markdown(
276
+ f"""
277
+ <div style='text-align: center;'>
278
+ <img src='{image_data['object_url']}' alt='Image' style='width:250px; height:250px; object-fit: cover; border-radius: 8px;' />
279
+ <p><strong>Similarity Score:</strong> {similarity_score:.4f}</p>
280
+ </div>
281
+ """,
282
+ unsafe_allow_html=True
283
+ )
284
+
285
+ with st.expander("View Image Details"):
286
+ st.write(f"**File Name:** {image_data.get('name', 'N/A')}")
287
+ st.write(
288
+ f"**Date Uploaded:** {format_date(image_data.get('upload_date', datetime.now().timestamp()))}")
289
+ st.write(f"**Description:** {image_data.get('description', 'No description available')}")
290
+
291
+ tags = ", ".join(image_data.get("tags", []))
292
+ st.write(f"**Tags:** {tags if tags else 'No tags'}")
293
+
294
+ categories = ", ".join(image_data.get("categories", []))
295
+ st.write(f"**Categories:** {categories if categories else 'No categories'}")
296
+
297
+ st.markdown(
298
+ f"<a href='{image_data['object_url']}' class='download-link' download>Download Image</a>",
299
+ unsafe_allow_html=True
300
+ )
301
+ else:
302
+ st.write("No images to display.")
303
+
304
+ # Display PDFs Section in rows of three columns
305
+ pdfs = sorted(pdfs, key=lambda x: x['similarity_score'], reverse=True)
306
+ num_pdfs = len(pdfs)
307
+
308
+ if num_pdfs > 0:
309
+ st.write("### Here are the matching PDFs for your query")
310
+
311
+ for start_idx in range(0, num_pdfs, 3):
312
+ num_cols = min(3, num_pdfs - start_idx)
313
+ cols = st.columns(num_cols)
314
+
315
+ for idx in range(num_cols):
316
+ pdf_info = pdfs[start_idx + idx]
317
+ col = cols[idx]
318
+ with col:
319
+ pdf_data = pdf_info['pdf']
320
+ similarity_score = pdf_info['similarity_score']
321
+
322
+
323
+ # Expander for each PDF in a column
324
+ with st.expander(f"{pdf_data.get('name', 'PDF Document')}"):
325
+ st.write(f"**File Name:** {pdf_data.get('name', 'N/A')}")
326
+ st.write(f"**Page Number:** {int(pdf_info['page_number'])}")
327
+ st.write(
328
+ f"**Date Uploaded:** {format_date(pdf_data.get('upload_date', datetime.now().timestamp()))}")
329
+ tags = ", ".join(pdf_data.get("tags", []))
330
+ st.write(f"**Tags:** {tags if tags else 'No tags'}")
331
+
332
+ categories = ", ".join(pdf_data.get("categories", []))
333
+ st.write(f"**Categories:** {categories if categories else 'No categories'}")
334
+
335
+ st.markdown(
336
+ f"<a href='{pdf_data['object_url']}' class='download-link' download>Download PDF</a>",
337
+ unsafe_allow_html=True
338
+ )
339
+
340
+ st.markdown(
341
+ f"""<div style='text-align: center;'>
342
+ <p><strong>Similarity Score:</strong> {similarity_score:.4f}</p></div>""",
343
+ unsafe_allow_html=True
344
+ )
345
+ else:
346
+ st.write("No PDFs to display.")
347
+
348
+
349
+ def upload_audio_google(audio_path):
350
+ try:
351
+ audio_file = genai.upload_file(path=audio_path, display_name="Query Audio")
352
+ print(f"Uploaded file '{audio_file.display_name}' as: {audio_file.uri}")
353
+ return audio_file
354
+
355
+ except Exception as e:
356
+ print(f"error occured while uploading audio to google : {e}")
357
+ return None
358
+
359
+ def extract_query_from_audio(audio_file):
360
+
361
+ try:
362
+
363
+ prompt=f""" Given is a user query related to a company in form of audio, your task is to understand the user query and convert it to text. If the audio is not in english then transalte it to english textual query. Make sure the generated query is consistent and contextual.Also extract important keywords from the query.
364
+ For the context I am providing with company information {about_company}
365
+ Expected output format : {{
366
+ "query":"String",
367
+ "keywords":["String"]
368
+ }}
369
+
370
+ """
371
+ response = model2.generate_content(
372
+ [prompt, audio_file]
373
+ )
374
+
375
+ if response:
376
+ print(response.text)
377
+ match = re.search(r"\{[\s\S]*?\}", response.text)
378
+ if match:
379
+ json_data = match.group(0) # Extract JSON-like content as a string
380
+ json_data = json_data.replace("'", '"')
381
+ data = json.loads(json_data)
382
+ enhanced_query = data.get('query', '')
383
+ keywords = data.get('keywords', [])
384
+ return enhanced_query, keywords
385
+
386
+ else:
387
+ print("No JSON data found in the model response.")
388
+ return None,None
389
+
390
+ except Exception as e:
391
+ print(f"error occured in extracting query from audio {e}")
392
+ return None,None
393
+
394
+
395
+
396
+
397
+ def search_pinecone(k,filetype,query_embedding):
398
+ search_results = index.query(
399
+ vector=query_embedding,
400
+ top_k=k,
401
+ include_metadata=True,
402
+ filter={"tag": filetype}
403
+ )
404
+
405
+ return search_results
406
+
407
+
408
+ def search():
409
+ if st.button("Back",key="back_button"):
410
+ st.session_state.page="home"
411
+ st.rerun()
412
+
413
+
414
+ st.title("AI Inspired Smart Search Engine")
415
+ st.subheader("Multilingual text search 🖊️")
416
+ user_query = st.text_input("Enter your search query:")
417
+
418
+ if user_query and st.button("submit query",key="submit_query"):
419
+ with st.spinner("Processing your query, please wait"):
420
+
421
+ enhanced_query,keywords=process_user_query(user_query,about_company)
422
+
423
+ if enhanced_query and keywords:
424
+ query_embedding = embeddings.embed_query(enhanced_query)
425
+ search_results_image = search_pinecone(5,"Image",query_embedding)
426
+ search_result_pdfs = search_pinecone(20, "PDF", query_embedding)
427
+
428
+ matches_pdf = search_result_pdfs['matches']
429
+ matches_image = search_results_image['matches']
430
+ images=[]
431
+ pdfs=[]
432
+ if not matches_image and not matches_pdf:
433
+ print(f"No matching PDFs and Images found for your query")
434
+ st.write(f"No matching PDFs and Images found for your query")
435
+ else:
436
+ if matches_image:
437
+ top_chunks_images = filter_chunks_by_keywords_images(matches_image, keywords)
438
+
439
+ if top_chunks_images:
440
+ # Step 5: Retrieve images from MongoDB
441
+ images = get_images_from_chunks(top_chunks_images)
442
+
443
+ if matches_pdf:
444
+ top_chunks_pdf=filter_chunks_by_keywords_pdf(matches_pdf,keywords)
445
+ if top_chunks_pdf:
446
+ pdfs=get_pdfs_from_chunks(top_chunks_pdf)
447
+
448
+
449
+ display_results(images,pdfs)
450
+
451
+
452
+
453
+
454
+ else:
455
+ st.error(f"Sorry could not process your request, please try again later!")
456
+
457
+ st.markdown("<hr>", unsafe_allow_html=True)
458
+
459
+ st.subheader("Multilingual Audio Search 🗣️")
460
+ audio_value = st.audio_input("Record your query")
461
+ if audio_value and st.button("Submit Audio",key="audio-button"):
462
+ with st.spinner("Processing your query, please wait"):
463
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
464
+ audio_file_path = os.path.join(temp_audio_folder, f"audio_query_{timestamp}.wav")
465
+
466
+ # Save the audio input to the file
467
+ with open(audio_file_path, "wb") as f:
468
+ f.write(audio_value.getvalue())
469
+
470
+ print(f"Audio saved to {audio_file_path}")
471
+
472
+ audio_file=upload_audio_google(audio_file_path)
473
+ if audio_file:
474
+ audio_query,audio_keywords=extract_query_from_audio(audio_file)
475
+
476
+ if audio_query and audio_keywords:
477
+ query_embedding = embeddings.embed_query(audio_query)
478
+ search_results_image = search_pinecone(5, "Image", query_embedding)
479
+ search_result_pdfs = search_pinecone(20, "PDF", query_embedding)
480
+
481
+ matches_pdf = search_result_pdfs['matches']
482
+ matches_image = search_results_image['matches']
483
+ images = []
484
+ pdfs = []
485
+ if not matches_image and not matches_pdf:
486
+ print(f"No matching PDFs and Images found for your query")
487
+ st.write(f"No matching PDFs and Images found for your query")
488
+ else:
489
+ if matches_image:
490
+ top_chunks_images = filter_chunks_by_keywords_images(matches_image, audio_keywords)
491
+
492
+ if not top_chunks_images:
493
+ st.write("No chunks matched the keywords.")
494
+ else:
495
+ # Step 5: Retrieve images from MongoDB
496
+ images = get_images_from_chunks(top_chunks_images)
497
+
498
+ if matches_pdf:
499
+ top_chunks_pdf = filter_chunks_by_keywords_pdf(matches_pdf, audio_keywords)
500
+ if top_chunks_pdf:
501
+ pdfs = get_pdfs_from_chunks(top_chunks_pdf)
502
+
503
+ display_results(images,pdfs)
504
+
505
+ else:
506
+ st.error(f"Sorry could not process your request, please try again later!")
507
+
508
+ #deleting the audio file from google
509
+ try:
510
+ genai.delete_file(audio_file.name)
511
+ print(f"deleted audio file from google storage")
512
+ except Exception as e:
513
+ print(f"failed to delete audio file from google storage")
514
+ #delete files inside temp directory
515
+ for filename in os.listdir(temp_audio_folder):
516
+ file_path = os.path.join(temp_audio_folder, filename)
517
+ try:
518
+ if os.path.isfile(file_path) or os.path.islink(file_path):
519
+ os.unlink(file_path) # Remove the file
520
+ print(f"Deleted file: {file_path}")
521
+ except Exception as e:
522
+ print(f"Failed to delete {file_path}. Reason: {e}")
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+
534
+
upload_image_page.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pymongo import MongoClient
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from helper.upload_file_to_s3 import upload_file
6
+ from helper.process_image import process_image_using_llm
7
+ from helper.create_embeddings import create_embedding
8
+ import time
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+ AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
13
+ AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
14
+ AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")
15
+ MONGO_URI = os.getenv("MONGO_URI")
16
+ DB_NAME = os.getenv("DB_NAME")
17
+ COLLECTION_NAME = os.getenv("COLLECTION_NAME")
18
+ COLLECTION_NAME2=os.getenv("COMPANY_COLLECTION_NAME")
19
+
20
+ mongo_client = MongoClient(MONGO_URI)
21
+ db = mongo_client[DB_NAME]
22
+ collection = db[COLLECTION_NAME]
23
+ collection2=db[COLLECTION_NAME2]
24
+
25
+ def upload():
26
+ if st.button("Back"):
27
+ st.session_state.page = "upload_main"
28
+ st.rerun()
29
+
30
+ # File uploader (image files only)
31
+ uploaded_image = st.file_uploader("Choose an image file to upload", type=["png", "jpg", "jpeg"],
32
+ accept_multiple_files=False)
33
+
34
+ # Fetch tags and categories from MongoDB
35
+ tags_doc = collection2.find_one({"type": "tags"})
36
+ categories_doc = collection2.find_one({"type": "categories"})
37
+
38
+ tags_options = tags_doc["tags"] if tags_doc and "tags" in tags_doc else []
39
+ categories_options = categories_doc["categories"] if categories_doc and "categories" in categories_doc else []
40
+
41
+ # Multi-select dropdowns for tags and categories
42
+ selected_tags = st.multiselect("Select Tags", options=tags_options)
43
+ selected_categories = st.multiselect("Select Categories", options=categories_options)
44
+
45
+ if uploaded_image and selected_tags and selected_categories:
46
+ flag=False
47
+ if st.button("Submit"):
48
+
49
+ with st.spinner(text="Uploading and Processing Image"):
50
+ # Upload file to S3
51
+ metadata = upload_file(uploaded_image,"Image")
52
+ if metadata:
53
+ object_url = metadata.get("object_url")
54
+ filename = metadata.get("name")
55
+
56
+ # Process image with LLM for description
57
+ llm_processed = process_image_using_llm(object_url)
58
+ if llm_processed:
59
+ # Create embedding with tags and categories in metadata
60
+ embedding_created = create_embedding(
61
+ object_url,
62
+ selected_tags,
63
+ selected_categories
64
+ )
65
+ if embedding_created:
66
+ # Save tags and categories to MongoDB document for the uploaded image
67
+ collection.update_one(
68
+ {"object_url": object_url},
69
+ {"$set": {
70
+ "tags": selected_tags,
71
+ "categories": selected_categories
72
+ }}
73
+ )
74
+ st.success("Image has been successfully uploaded and processed.")
75
+ flag=True
76
+ else:
77
+ st.error("Could not create embedding. Please try again.")
78
+ else:
79
+ st.error("Could not process the image description. Please try again.")
80
+ else:
81
+ st.error("Could not upload the image. Please try again.")
82
+
83
+
84
+ if flag:
85
+ st.write("Redirecting to View Page to view all uploaded images")
86
+ time.sleep(2)
87
+ st.session_state.page = "view_image"
88
+ st.rerun()
upload_main.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pymongo import MongoClient
3
+ import os
4
+ from dotenv import load_dotenv
5
+ load_dotenv()
6
+ import time
7
+
8
+ AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
9
+ AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
10
+ AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")
11
+ MONGO_URI = os.getenv("MONGO_URI")
12
+ DB_NAME = os.getenv("DB_NAME")
13
+ COLLECTION_NAME = os.getenv("COMPANY_COLLECTION_NAME")
14
+
15
+ mongo_client = MongoClient(MONGO_URI)
16
+ db = mongo_client[DB_NAME]
17
+ collection = db[COLLECTION_NAME]
18
+
19
+
20
+
21
+ def upload_main_page():
22
+ #
23
+ if st.button("Back",key="Back_button"):
24
+ st.session_state.page = "home"
25
+ st.rerun()
26
+
27
+ st.markdown("---")
28
+ col1, col2 = st.columns([1,6])
29
+ with col1:
30
+ if st.button("Upload Image",key="upload_image_button"):
31
+ st.session_state.page="upload_image"
32
+ st.rerun()
33
+
34
+ # Placeholder for image upload function
35
+ with col2:
36
+ if st.button("View Images",key="view_image_button"):
37
+ st.session_state.page = "view_image"
38
+ st.rerun()
39
+
40
+ # Row 2: Buttons for PDF actions
41
+ col3, col4 = st.columns([1,6])
42
+ with col3:
43
+ if st.button("Upload PDF",key="upload_pdf_button"):
44
+ st.session_state.page="upload_pdf"
45
+ st.rerun()
46
+ with col4:
47
+ if st.button("View PDFs",key="view_pdf_button"):
48
+ st.session_state.page="view_pdf"
49
+ st.rerun()
50
+
51
+ # Line break
52
+ st.markdown("---")
53
+
54
+ # Section for company description
55
+ st.subheader("Tell us about your company in a few lines")
56
+
57
+ # Fetch the "about_company" document from MongoDB
58
+ about_company_doc = collection.find_one({"type": "about_company"})
59
+
60
+ if about_company_doc and "company_description" in about_company_doc:
61
+ # If company_description exists, show it and allow for updates
62
+ st.write("### Current Description")
63
+ st.write(about_company_doc["company_description"]) # Display current description (non-editable)
64
+
65
+ # Show an empty text box for new input
66
+ new_description = st.text_area("Enter new description to update", "")
67
+ if st.button("Update",key="update_company"):
68
+ # Update the company description in MongoDB
69
+ collection.update_one(
70
+ {"_id": about_company_doc["_id"]},
71
+ {"$set": {"company_description": new_description}}
72
+ )
73
+ st.success("Company description updated successfully.")
74
+ time.sleep(3)
75
+ st.rerun()
76
+ else:
77
+ # If company_description does not exist, show an empty text box for initial input
78
+ new_description = st.text_area("Enter company description", "")
79
+ if st.button("Save",key="save_description"):
80
+ # Insert a new company description field in MongoDB
81
+ if about_company_doc:
82
+ # Update the existing document
83
+ collection.update_one(
84
+ {"_id": about_company_doc["_id"]},
85
+ {"$set": {"company_description": new_description}}
86
+ )
87
+ else:
88
+ # Insert a new document if it doesn't exist
89
+ collection.insert_one({"type": "about_company", "company_description": new_description})
90
+ st.success("Company description saved successfully.")
91
+
92
+ st.markdown("---")
93
+
94
+ # Section for adding and displaying tags
95
+ st.subheader("Manage Tags and Categories")
96
+
97
+ # Part 1: Tags
98
+ st.write("#### Add Tags")
99
+
100
+ # Fetch tags document
101
+ tags_doc = collection.find_one({"type": "tags"})
102
+ current_tags = tags_doc["tags"] if tags_doc and "tags" in tags_doc else []
103
+
104
+ # Display current tags with horizontal alignment and black-blue text color
105
+ if current_tags:
106
+ st.write("Current Tags:")
107
+ tag_html = " ".join([
108
+ f"<span style='display: inline-block; padding: 5px 10px; margin: 5px; border-radius: 15px; background-color: #e0e0e0; color: #0000FF;'>{tag}</span>"
109
+ for tag in current_tags
110
+ ])
111
+ st.markdown(tag_html, unsafe_allow_html=True)
112
+ else:
113
+ st.write("No tags available.")
114
+
115
+ # Input box to add new tags
116
+ new_tags = st.text_input("Enter new tags separated by commas", "")
117
+ if st.button("Add Tags",key="add_tags"):
118
+ # Split the input tags by commas, strip whitespace, and remove any duplicates
119
+ tags_to_add = list(set([tag.strip() for tag in new_tags.split(",") if tag.strip()]))
120
+
121
+ # Update the MongoDB document with new tags
122
+ if tags_doc:
123
+ # Update the existing tags document
124
+ collection.update_one(
125
+ {"_id": tags_doc["_id"]},
126
+ {"$addToSet": {"tags": {"$each": tags_to_add}}}
127
+ )
128
+ else:
129
+ # Insert a new tags document if it doesn't exist
130
+ collection.insert_one({"type": "tags", "tags": tags_to_add})
131
+
132
+ st.success("Tags added successfully.")
133
+ st.rerun() # Refresh the page to display updated tags
134
+
135
+ # Part 2: Categories
136
+ st.write("#### Add Categories")
137
+
138
+ # Fetch categories document
139
+ categories_doc = collection.find_one({"type": "categories"})
140
+ current_categories = categories_doc["categories"] if categories_doc and "categories" in categories_doc else []
141
+
142
+ # Display current categories with horizontal alignment and black-blue text color
143
+ if current_categories:
144
+ st.write("Current Categories:")
145
+ category_html = " ".join([
146
+ f"<span style='display: inline-block; padding: 5px 10px; margin: 5px; border-radius: 15px; background-color: #e0e0e0; color: #0000FF;'>{category}</span>"
147
+ for category in current_categories
148
+ ])
149
+ st.markdown(category_html, unsafe_allow_html=True)
150
+ else:
151
+ st.write("No categories available.")
152
+
153
+ # Input box to add new categories
154
+ new_categories = st.text_input("Enter new categories separated by commas", "")
155
+ if st.button("Add Categories",key="add_categories"):
156
+ # Split the input categories by commas, strip whitespace, and remove any duplicates
157
+ categories_to_add = list(set([category.strip() for category in new_categories.split(",") if category.strip()]))
158
+
159
+ # Update the MongoDB document with new categories
160
+ if categories_doc:
161
+ # Update the existing categories document
162
+ collection.update_one(
163
+ {"_id": categories_doc["_id"]},
164
+ {"$addToSet": {"categories": {"$each": categories_to_add}}}
165
+ )
166
+ else:
167
+ # Insert a new categories document if it doesn't exist
168
+ collection.insert_one({"type": "categories", "categories": categories_to_add})
169
+
170
+ st.success("Categories added successfully.")
171
+ st.rerun() # Refresh the page to display updated categories
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+
180
+
upload_pdf_page.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pymongo import MongoClient
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from helper.upload_file_to_s3 import upload_file
6
+ from helper.process_pdf import process_pdf
7
+ import time
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+ AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
12
+ AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
13
+ AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")
14
+ MONGO_URI = os.getenv("MONGO_URI")
15
+ DB_NAME = os.getenv("DB_NAME")
16
+ COLLECTION_NAME = os.getenv("COLLECTION_NAME")
17
+ COLLECTION_NAME2=os.getenv("COMPANY_COLLECTION_NAME")
18
+
19
+ mongo_client = MongoClient(MONGO_URI)
20
+ db = mongo_client[DB_NAME]
21
+ collection = db[COLLECTION_NAME]
22
+ collection2=db[COLLECTION_NAME2]
23
+
24
+ def upload_pdf():
25
+ if st.button("Back"):
26
+ st.session_state.page = "upload_main"
27
+ st.rerun()
28
+
29
+ # File uploader (image files only)
30
+ uploaded_pdf = st.file_uploader("Choose a PDF file to upload", type=["pdf"],
31
+ accept_multiple_files=False)
32
+
33
+ # Fetch tags and categories from MongoDB
34
+ tags_doc = collection2.find_one({"type": "tags"})
35
+ categories_doc = collection2.find_one({"type": "categories"})
36
+
37
+ tags_options = tags_doc["tags"] if tags_doc and "tags" in tags_doc else []
38
+ categories_options = categories_doc["categories"] if categories_doc and "categories" in categories_doc else []
39
+
40
+ # Multi-select dropdowns for tags and categories
41
+ selected_tags = st.multiselect("Select Tags", options=tags_options)
42
+ selected_categories = st.multiselect("Select Categories", options=categories_options)
43
+
44
+ if uploaded_pdf and selected_tags and selected_categories:
45
+ flag=False
46
+ if st.button("Submit"):
47
+
48
+ with st.spinner(text="Uploading and Processing Image"):
49
+ # Upload file to S3
50
+ metadata = upload_file(uploaded_pdf,"PDF")
51
+ if metadata:
52
+ object_url = metadata.get("object_url")
53
+ filename = metadata.get("name")
54
+
55
+ # Process image with LLM for description
56
+ pdf_processed = process_pdf(object_url,selected_tags,selected_categories)
57
+ if pdf_processed:
58
+ collection.update_one(
59
+ {"object_url": object_url},
60
+ {"$set": {
61
+ "tags": selected_tags,
62
+ "categories": selected_categories,
63
+ "status": "processed"
64
+ }}
65
+ )
66
+ st.success("PDF has been successfully uploaded and processed.")
67
+ flag = True
68
+ else:
69
+ st.error("Could Not Process the PDF. Please try again.")
70
+ collection.update_one(
71
+ {"object_url": object_url},
72
+ {"$set": {
73
+ "tags": selected_tags,
74
+ "categories": selected_categories,
75
+ "status": "failed"
76
+ }}
77
+ )
78
+
79
+
80
+ if flag:
81
+ st.write("Redirecting to View Page to view all uploaded pdfs")
82
+ time.sleep(2)
83
+ st.session_state.page = "view_pdf"
84
+ st.rerun()
85
+
view_images.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pymongo import MongoClient
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from datetime import datetime
6
+
7
+ # Load environment variables
8
+ load_dotenv()
9
+ MONGO_URI = os.getenv("MONGO_URI")
10
+ DB_NAME = os.getenv("DB_NAME")
11
+ COLLECTION_NAME = os.getenv("COLLECTION_NAME")
12
+
13
+ mongo_client = MongoClient(MONGO_URI)
14
+ db = mongo_client[DB_NAME]
15
+ collection = db[COLLECTION_NAME]
16
+
17
+ def format_date(timestamp):
18
+ """Convert timestamp to a readable date format."""
19
+ return datetime.fromtimestamp(timestamp).strftime("%B %d, %Y")
20
+
21
+ # Custom CSS to control image and expander container width and styling
22
+
23
+
24
+ def view_images():
25
+ if st.button("Back"):
26
+ st.session_state.page = "upload_main"
27
+ st.rerun()
28
+ st.title("Your Uploaded Images")
29
+
30
+ # Fetch all uploaded images from MongoDB
31
+ images = list(collection.find({"type": "Image"}))
32
+
33
+ if not images:
34
+ st.write("You have not uploaded any images yet.")
35
+ return
36
+
37
+ # Display images in a grid (4 images per row)
38
+ cols = st.columns(4)
39
+ for idx, image in enumerate(images):
40
+ col = cols[idx % 4]
41
+
42
+ with col:
43
+ # Container for each image and its expander
44
+ st.markdown("<div class='image-wrapper'>", unsafe_allow_html=True)
45
+
46
+ # Display the image using HTML
47
+
48
+ st.markdown(
49
+ f"""
50
+ <div style='text-align: center;'>
51
+ <img src='{image['object_url']}' alt='{image.get('name','Image')}' style='width:250px; height:250px; object-fit: cover; border-radius: 8px;' />
52
+
53
+ </div>
54
+ """,
55
+ unsafe_allow_html=True
56
+ )
57
+
58
+ st.markdown("</div>", unsafe_allow_html=True) # Close image container
59
+
60
+ # Expander for image details
61
+ with st.expander("View Image Details"):
62
+ st.write(f"**File Name:** {image.get('name', 'N/A')}")
63
+ st.write(f"**Date Uploaded:** {format_date(image.get('upload_date', datetime.now().timestamp()))}")
64
+ st.write(f"**Description:** {image.get('description', 'No description available')}")
65
+
66
+ # Display tags if available
67
+ tags = ", ".join(image.get("tags", []))
68
+ st.write(f"**Tags:** {tags if tags else 'No tags'}")
69
+
70
+ # Display categories if available
71
+ categories = ", ".join(image.get("categories", []))
72
+ st.write(f"**Categories:** {categories if categories else 'No categories'}")
73
+
74
+ # Download link
75
+ st.markdown(
76
+ f"<a href='{image['object_url']}' class='download-link' download>Download Image</a>",
77
+ unsafe_allow_html=True
78
+ )
79
+
80
+ # Move to a new row after every 4 images
81
+ if (idx + 1) % 4 == 0:
82
+ st.write("") # Line break to move to the next row
83
+
84
+
view_pdf.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pymongo import MongoClient
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from datetime import datetime
6
+
7
+ # Load environment variables
8
+ load_dotenv()
9
+ MONGO_URI = os.getenv("MONGO_URI")
10
+ DB_NAME = os.getenv("DB_NAME")
11
+ COLLECTION_NAME = os.getenv("COLLECTION_NAME")
12
+
13
+ mongo_client = MongoClient(MONGO_URI)
14
+ db = mongo_client[DB_NAME]
15
+ collection = db[COLLECTION_NAME]
16
+
17
+
18
+ def format_date(timestamp):
19
+ """Convert timestamp to a readable date format."""
20
+ return datetime.fromtimestamp(timestamp).strftime("%B %d, %Y")
21
+
22
+
23
+ # Custom CSS to control image and expander container width and styling
24
+
25
+
26
+ def view_pdfs():
27
+ if st.button("Back"):
28
+ st.session_state.page = "upload_main"
29
+ st.rerun()
30
+ st.title("Your Uploaded PDFs")
31
+
32
+ # Fetch all uploaded images from MongoDB
33
+ pdfs = list(collection.find({"type": "PDF"}))
34
+
35
+ if not pdfs:
36
+ st.write("You have not uploaded any PDFs yet.")
37
+ return
38
+
39
+ # Display images in a grid (4 images per row)
40
+ cols = st.columns(4)
41
+ for idx, pdf in enumerate(pdfs):
42
+ col = cols[idx % 4]
43
+
44
+ with col:
45
+
46
+ # Expander for image details
47
+ filename=pdf.get('name','N/A')
48
+ with st.expander(f"{filename}"):
49
+ st.write(f"**File Name:** {pdf.get('name', 'N/A')}")
50
+ st.write(f"**Date Uploaded:** {format_date(pdf.get('upload_date', datetime.now().timestamp()))}")
51
+
52
+ # Display tags if available
53
+ tags = ", ".join(pdf.get("tags", []))
54
+ st.write(f"**Tags:** {tags if tags else 'No tags'}")
55
+
56
+ # Display categories if available
57
+ categories = ", ".join(pdf.get("categories", []))
58
+ st.write(f"**Categories:** {categories if categories else 'No categories'}")
59
+
60
+ # Download link
61
+ st.markdown(
62
+ f"<a href='{pdf['object_url']}' class='download-link' download>Download PDF</a>",
63
+ unsafe_allow_html=True
64
+ )
65
+
66
+ # Move to a new row after every 4 images
67
+ if (idx + 1) % 4 == 0:
68
+ st.write("") # Line break to move to the next row
69
+
70
+