import base64 import re import time from os import getenv import py7zr import requests import streamlit as st import streamlit_authenticator as stauth from bs4 import BeautifulSoup from planning_ai.common.utils import Paths from planning_ai.main import main as report_main from planning_ai.preprocessing.azure_doc import azure_process_pdfs from planning_ai.preprocessing.gcpt3 import main as preprocess_main st.set_page_config(layout="wide") st.markdown( """ """, unsafe_allow_html=True, ) # Encode the image to base64 def get_image_base64(path): with open(path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8") logo_base64 = get_image_base64("logo.png") # Add the logo in the footer using base64 st.markdown( f""" """, unsafe_allow_html=True, ) # Top bar content st.markdown( """

Planning AI

Contact

""", unsafe_allow_html=True, ) # Load authentication secrets auth = st.secrets.to_dict() if "credentials" not in auth: auth["credentials"] = { "usernames": { "admin": { "email": getenv("EMAIL"), "password": getenv("PASSWORD"), "first_name": "Admin", "last_name": "Admin", "logged_in": False, "roles": ["viewer"], } } } auth["cookie"] = { "name": "some_cookie_name", "key": "some_signature_key", "expiry_days": 30, } # Initialize the authenticator authenticator = stauth.Authenticate( auth["credentials"], auth["cookie"]["name"], auth["cookie"]["key"], auth["cookie"]["expiry_days"], ) UPLOAD_DIR = Paths.RAW / "gcpt3" def initialize_session_state(): """Initialize session state variables.""" if "chapters" not in st.session_state: st.session_state["chapters"] = False if "files_extracted" not in st.session_state: st.session_state["files_extracted"] = False if "completed" not in st.session_state: st.session_state["completed"] = False if "start_time" not in st.session_state: st.session_state["start_time"] = None if "end_time" not in st.session_state: st.session_state["end_time"] = None def get_chapters(consultation_url: str): if not consultation_url: return "None", ["None"] try: response = requests.get(consultation_url) except requests.exceptions.RequestException: st.error("Use a valid URL.") return "", [] if not response.ok: st.error("Failed to fetch consultation document") return "", [] soup = BeautifulSoup(response.text, "html.parser") h2_tags = soup.find_all("h2") if not len(h2_tags) >= 2: st.error("Invalid page format - not enough

headers") return "", [] first_h2 = h2_tags[0] second_h2 = h2_tags[1] # Collect links between the first and second

links_between = [] for sibling in first_h2.find_all_next(): if sibling == second_h2: # Stop when reaching the second

break if sibling.name == "a": # If it's a link link_text = sibling.text.strip() if link_text: links_between.append(link_text) cleaned_links = [re.sub(r"\s$.?$$", "", link) for link in links_between] cleaned_title = first_h2.text.strip() return cleaned_title, cleaned_links def specify_chapters(): st.title("Specify Chapters") st.write( "Please specify the Consultation Document URL from the Consultation Hub. This will autopopulate the chapter headings for the final document. \n\nPlease ensure that the final chapter headings are correct." ) chapters = [] consultation_url = st.text_input( "Consultation Document URL", key="consultation_url", placeholder="https://oc2.greatercambridgeplanning.org/document/1314", ) title, chapters = get_chapters(consultation_url) st.write(f"Title: {title}") st.write("Chapters:", "\n- " + "\n- ".join(chapters)) st.write( "If the chapter headings are incorrect, please add them manually below, separated by commas." ) chapters = st.text_input( "Chapter Headings", key="chapter_headings", placeholder=", ".join(chapters), value=", ".join(chapters), ) chapters = [chapter.strip() for chapter in chapters.split(",")] with open(Paths.RAW / "chapters.txt", "w") as f: f.write("\n".join(chapters)) with open(Paths.RAW / "title.txt", "w") as f: f.write(title) st.button( "Save Chapters", on_click=lambda: st.session_state.update({"chapters": True}) ) def upload_and_extract_files(): """Handle file upload and extraction.""" main1, main2 = st.columns(2) with main1: st.title("Introduction") st.write( """ This program allows you to process JDi `.json` files automatically, to extract detailed information using AI, and produce comprehensive reports. For each _'representation document'_ two AI generated documents are produced. 1. Representation Summary documents contain automatically generated summaries of each representation, these representations are numbered sequentially, and by their unique ID. 2. Executive Report documents contain first an executive summary of the key points extracted from response documents, following this, a Profile of Submissions plots the demographic and geographic distribution of responses. Finally this document details Themes and Policies, where key themes and policies by response are highlighted, with notable information from responses bullet-pointed. This document contains inline citations, which relate back to the numbers associated with responses in the Representation Summary Documents. Citations are included to allow readers to manually verify the claims and points made by the AI model. """ ) with main2: st.title("Upload JDi files") st.write( """ 1. Upload your `.json` files here as a `7zip` file. 2. Please ensure that the `.json` files follow the correct format:""" ) with st.expander("File Format example"): st.write( r""" ```json { "id": 10008, "method": "Paper", "respondentpostcode": "CB2 9NE", "text": "", "attachments": [ { "id": 3803, "url": "http://www.cambridge.gov.uk/public/ldf/localplan2031/15417.pdf", "published": false } ], "representations": [ { "id": 15417, "support/object": "Object", "document": "Issues and Options Report", "documentelementid": 29785, "documentelementtitle": "3 - Spatial Strategy, Question 3.10", "summary": "No more green belt taken away, which is prime agricultural land. Noise pollution & light pollution for surrounding villages and new houses being built, no bus services either!" }, ] } ``` """ ) if uploaded_file := st.file_uploader("Choose a `.7z` file:", type="7z"): with st.spinner("Extracting files...", show_time=True): try: # Remove old files _ = [file.unlink() for file in UPLOAD_DIR.glob(".json")] # Extract new files with py7zr.SevenZipFile(uploaded_file, mode="r") as archive: archive.extractall(path=UPLOAD_DIR) st.session_state["files_extracted"] = True st.success( f"Extracted `{len(list(UPLOAD_DIR.glob('.json')))}` files." ) except Exception as e: st.error(f"Failed to extract files {e}") def build_report(): """Build the report from extracted files.""" # Remove old files _ = [file.unlink() for file in (Paths.OUT / "summaries").rglob(".pdf")] st.title("Build Report") st.write( "Once the files are extracted, click the button below to build the report.\n\n" "Do not close this page while the report is being built." ) if not st.session_state["start_time"]: if st.button("Build Report", type="primary"): st.session_state["start_time"] = time.time() with st.spinner("Preprocessing files...", show_time=True): try: preprocess_main() time_taken = time.time() - st.session_state["start_time"] st.success( f"Preprocessing completed successfully in {time_taken:.1f} seconds!" ) except Exception as e: st.error(f"An error occurred during preprocessing: {e}") with st.spinner("Extracting text from PDFs...", show_time=True): try: azure_process_pdfs() time_taken = time.time() - st.session_state["start_time"] st.success( f"Text extraction completed successfully in {time_taken:.1f} seconds!" ) except Exception as e: st.error(f"An error occurred during PDF text extraction: {e}") with st.spinner("Building report...", show_time=True): report_main() st.session_state["end_time"] = time.time() st.session_state["completed"] = True total_time = ( st.session_state["end_time"] - st.session_state["start_time"] ) st.success(f"Report building completed in {total_time:.1f} seconds!") def display_download_buttons(rep): """Display download buttons for the generated reports.""" # remove some old intermediate files _ = [file.unlink() for file in (Paths.STAGING / "pdfs_azure").glob(".pdf")] with open((Paths.RAW / "failed_downloads.txt"), "w") as f: f.write("") st.success("Reports built successfully! Please click download buttons below.") st.write("---") st.header("Download Reports") st.markdown( """ The following download buttons provides links to the final report, alongside summaries of the representations used to built this report. """ ) # Add some spacing and better organization st.markdown("---") # Create a container for the Executive Reports with st.expander("Executive Reports"): summaries_pdf_path = Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.pdf" summaries_docx_path = ( Paths.SUMMARY / f"Overview_of_Public_Submissions-{rep}.docx" ) with st.container(): st.subheader(f"Executive Report for {rep}") col1, col2 = st.columns(2) with col1: with open(summaries_pdf_path, "rb") as pdf_file: st.download_button( label="Download PDF Version", data=pdf_file, file_name=f"Overview_of_Public_Submissions-{rep}.pdf", mime="application/pdf", use_container_width=True, key=f"exec_pdf_{hash(rep)}", ) with col2: with open(summaries_docx_path, "rb") as docx_file: st.download_button( label="Download DOCX Version", data=docx_file, file_name=f"Overview_of_Public_Submissions-{rep}.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document", use_container_width=True, key=f"exec_docx_{hash(rep)}", ) st.markdown("---") # Create a container for the Representation Summaries with st.expander("Representation Summaries"): report_pdf_path = Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.pdf" report_docx_path = Paths.SUMMARY / f"Summaries_of_Public_Submissions-{rep}.docx" with st.container(): st.subheader(f"Representation Summary for {rep}") col1, col2 = st.columns(2) with col1: with open(report_pdf_path, "rb") as pdf_file: st.download_button( label="Download PDF Version", data=pdf_file, file_name=f"Summaries_of_Public_Submissions-{rep}.pdf", mime="application/pdf", use_container_width=True, key=f"rep_pdf_{hash(rep)}", ) with col2: with open(report_docx_path, "rb") as docx_file: st.download_button( label="Download DOCX Version", data=docx_file, file_name=f"Summaries_of_Public_Submissions-{rep}.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document", use_container_width=True, key=f"rep_docx_{hash(rep)}", ) st.markdown("---") def reset_session(): st.session_state["chapters"] = False st.session_state["files_extracted"] = False st.session_state["completed"] = False st.session_state["start_time"] = None st.session_state["end_time"] = None def main(): """Main function to run the Streamlit app.""" authenticator.login() initialize_session_state() # Handle authentication states if st.session_state["authentication_status"] is False: st.error("Username/password is incorrect") elif st.session_state["authentication_status"] is None: st.warning("Please enter your username and password") # Reset session if not authenticated if not st.session_state["authentication_status"]: reset_session() return if st.session_state["authentication_status"]: authenticator.logout() # show logout button # Step 1: Specify chapters if not st.session_state["chapters"]: specify_chapters() # Step 2: Upload and extract files if not st.session_state["files_extracted"] and st.session_state["chapters"]: upload_and_extract_files() # Step 3: Build report if files are ready if st.session_state["files_extracted"]: build_report() # Step 4: Show download buttons when complete with open(Paths.RAW / "title.txt", "r") as f: rep = f.read().strip() if st.session_state["completed"]: display_download_buttons(rep) if name == "main": main()