noticeboard / app.py
makhdoomnaeem's picture
Update app.py
121359e verified
import streamlit as st
import pdfplumber
import requests
from io import BytesIO
import pandas as pd
import re
# Streamlit UI
st.title("πŸ“œ Find Information From Noticeboard")
st.write("Provide at least one user input (Name or Form Number).")
# Google Drive links mapped to meaningful names
google_drive_files = {
"CCN Updated Attendance": "https://drive.google.com/file/d/17OqE5oSlXHlPlIzu7TLhnQXRWsE6TxQ1/view?usp=sharing",
}
# User inputs
name = st.text_input("Enter Name:", placeholder="Enter the full name to search for (e.g., John Doe).")
form_no = st.text_input("Enter Form Number:", placeholder="Enter the form number as it appears in the document.")
# Function to download PDFs from Google Drive links
def download_pdf_from_drive(link):
try:
file_id = link.split("/")[-2] # Extract the file ID from the link
download_url = f"https://drive.google.com/uc?id={file_id}&export=download"
response = requests.get(download_url, stream=True)
if response.status_code == 200:
return BytesIO(response.content)
else:
st.warning(f"⚠ Failed to download file: {link} (Status code: {response.status_code})")
return None
except Exception as e:
st.warning(f"⚠ Error: {e}")
return None
# Function to extract column headers dynamically
def detect_column_headers(pdf_file):
with pdfplumber.open(pdf_file) as pdf:
for page in pdf.pages[:3]: # Check first 3 pages for headers
table = page.extract_table()
if table and len(table) > 1 and table[0] is not None:
return table[0] # First row is header
text = page.extract_text()
if text:
lines = text.split("\n")
for line in lines:
columns = re.split(r'\s{2,}|\t', line.strip())
if len(columns) > 2:
return columns
return None
# Function to extract relevant rows dynamically
def extract_relevant_rows(pdf_file, search_terms, file_name, detected_headers):
relevant_rows = []
with pdfplumber.open(pdf_file) as pdf:
for page_num, page in enumerate(pdf.pages, start=1):
table = page.extract_table()
if table and len(table) > 1:
for row in table[1:]:
if row and any(term.lower() in " ".join(map(str, row)).lower() for term in search_terms):
row_dict = {"File Name": file_name, "Page": page_num}
row_dict.update({detected_headers[i]: row[i] if i < len(row) else None for i in range(len(detected_headers))})
relevant_rows.append(row_dict)
else:
text = page.extract_text()
if text:
lines = text.split("\n")
for line in lines:
for term in search_terms:
if term.lower() in line.lower():
columns = re.split(r'\s{2,}|\t', line.strip())
row_dict = {"File Name": file_name, "Page": page_num}
row_dict.update({detected_headers[i]: columns[i] if i < len(columns) else None for i in range(len(detected_headers))})
relevant_rows.append(row_dict)
return relevant_rows
# Process the request when the button is clicked
if st.button("πŸ” Find Information"):
if not (name or form_no):
st.error("❌ Please provide at least one input (Name or Form Number).")
else:
search_terms = [term for term in [name, form_no] if term]
all_relevant_rows = []
for file_name, link in google_drive_files.items():
pdf_file = download_pdf_from_drive(link)
if pdf_file:
detected_headers = detect_column_headers(pdf_file)
if not detected_headers:
st.warning(f"⚠ Unable to detect headers in {file_name}.")
continue
relevant_rows = extract_relevant_rows(pdf_file, search_terms, file_name, detected_headers)
if relevant_rows:
all_relevant_rows.extend(relevant_rows)
if all_relevant_rows:
st.success("βœ… Relevant information found:")
df = pd.DataFrame(all_relevant_rows).fillna("-") # Fill NaN values for better readability
st.dataframe(df)
else:
st.warning("⚠ No matching information found in the provided files.")
# Footer
st.markdown("---")
st.markdown(
"<p style='text-align: center; font-size: 14px;'>Designed by: <b>Engr. Makhdoom Muhammad Naeem Hashmi</b></p>",
unsafe_allow_html=True
)