Spaces:

cjber
/

planning-ai

Build error

cjber commited on Feb 20

Commit

7ba5cef

1 Parent(s): 07f0f16

fix: remove caching as it doesn't regenerate automatically with new files

Former-commit-id: 22799dd0ee191972843945e1fa918ac0d8667ad4 [formerly b24513f77292e73dffdc51ccc84a096143561a6a]
Former-commit-id: b708c40962ede960a2550b6cbaa34222e1ee9d2a

Files changed (1) hide show

app.py +56 -67

app.py CHANGED Viewed

@@ -86,28 +86,16 @@ def upload_and_extract_files():
                     archive.extractall(path=UPLOAD_DIR)
                 st.session_state["files_extracted"] = True
                 st.success(f"Extracted `{len(list(UPLOAD_DIR.glob('*.json')))}` files.")
             except Exception as e:
                 st.error(f"Failed to extract files {e}")
-@st.cache_data
-def cache_preprocess_main():
-    return preprocess_main()
-@st.cache_data
-def cache_process_pdfs():
-    return azure_process_pdfs()
-@st.cache_resource
-def cache_report_main():
-    return report_main()
 def build_report():
     """Build the report from extracted files."""
     if st.session_state["files_extracted"] and not st.session_state["completed"]:
         st.title("Build Report")
         st.write(
             "Once the files are extracted, click the button below to build the report."
@@ -115,64 +103,71 @@ def build_report():
         if st.button("Build Report", type="primary"):
             with st.spinner("Preprocessing files..."):
                 try:
-                    cache_preprocess_main()
                     st.success("Preprocessing completed successfully!")
                 except Exception as e:
                     st.error(f"An error occurred during preprocessing: {e}")
             with st.spinner("Extracting text from PDFs..."):
                 try:
-                    cache_process_pdfs()
                     st.success("Text extraction completed successfully!")
                 except Exception as e:
                     st.error(f"An error occurred during PDF text extraction: {e}")
             with st.spinner("Building report..."):
-                cache_report_main()
                 st.session_state["completed"] = True
 def display_download_buttons():
     """Display download buttons for the generated reports."""
-    if st.session_state["completed"]:
-        representations_documents = (
-            pl.scan_parquet(Paths.STAGING / "gcpt3.parquet")
-            .select(pl.col("representations_document"))
-            .unique()
-            .collect()["representations_document"]
-            .to_list()
-        )
-        st.success("Reports built successfully! Please click download buttons below.")
-        for rep in representations_documents:
-            report_path = Paths.SUMMARY / f"Summary_Documents-{rep}.pdf"
-            summaries_path = Paths.SUMMARY / f"Summary_of_Submitted_Responses-{rep}.pdf"
-            col1, col2 = st.columns(2, border=True)
-            with col1:
-                with open(summaries_path, "rb") as pdf_file:
-                    st.markdown("**Executive Report Download**")
-                    st.download_button(
-                        label=f"{rep}",
-                        data=pdf_file,
-                        file_name=f"Summary_of_Submitted_Responses-{rep}.pdf",
-                        mime="application/pdf",
-                        type="primary",
-                    )
-            with col2:
-                with open(report_path, "rb") as pdf_file:
-                    st.markdown("**Represtations Summary Download**")
-                    st.download_button(
-                        label=f"{rep}",
-                        data=pdf_file,
-                        file_name=f"Summary_Documents-{rep}.pdf",
-                        mime="application/pdf",
-                        type="primary",
-                    )
-def reset_session_state():
-    """Reset session state after report generation."""
-    st.session_state["files_extracted"] = False
-    st.session_state["completed"] = False
 def main():
@@ -190,14 +185,8 @@ def main():
     elif st.session_state["authentication_status"] is None:
         st.warning("Please enter your username and password")
-    display_download_buttons()
-    st.write("---")
-    if st.session_state.get("completed"):
-        st.warning(
-            "You **must** clear the memory if you are processing a new collection of representations."
-        )
-        st.button("Clear Memory", on_click=reset_session_state, type="primary")
 if __name__ == "__main__":

                     archive.extractall(path=UPLOAD_DIR)
                 st.session_state["files_extracted"] = True
                 st.success(f"Extracted `{len(list(UPLOAD_DIR.glob('*.json')))}` files.")
+                st.session_state["completed"] = False
             except Exception as e:
                 st.error(f"Failed to extract files {e}")
 def build_report():
     """Build the report from extracted files."""
     if st.session_state["files_extracted"] and not st.session_state["completed"]:
+        # Remove old files
+        _ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
         st.title("Build Report")
         st.write(
             "Once the files are extracted, click the button below to build the report."
         if st.button("Build Report", type="primary"):
             with st.spinner("Preprocessing files..."):
                 try:
+                    preprocess_main()
                     st.success("Preprocessing completed successfully!")
                 except Exception as e:
                     st.error(f"An error occurred during preprocessing: {e}")
             with st.spinner("Extracting text from PDFs..."):
                 try:
+                    azure_process_pdfs()
                     st.success("Text extraction completed successfully!")
                 except Exception as e:
                     st.error(f"An error occurred during PDF text extraction: {e}")
             with st.spinner("Building report..."):
+                report_main()
                 st.session_state["completed"] = True
 def display_download_buttons():
     """Display download buttons for the generated reports."""
+    representations_documents = (
+        pl.scan_parquet(Paths.STAGING / "gcpt3.parquet")
+        .select(pl.col("representations_document"))
+        .unique()
+        .collect()["representations_document"]
+        .to_list()
+    )
+    # remove some old intermediate files
+    _ = [file.unlink() for file in (Paths.STAGING / "pdfs_azure").glob("*.pdf")]
+    with open((Paths.RAW / "failed_downloads.txt"), "w") as f:
+        f.write("")
+    st.success("Reports built successfully! Please click download buttons below.")
+    st.write("---")
+    st.header("Download Reports")
+    st.markdown(
+        f"""
+        The processing has produced {len(representations_documents)} reports based on the different
+        representation documents. The following download buttons provides links to all of these reports,
+        alongside summaries for each representation used to form those reports.
+        """
+    )
+    for rep in representations_documents:
+        report_path = Paths.SUMMARY / f"Summary_Documents-{rep}.pdf"
+        summaries_path = Paths.SUMMARY / f"Summary_of_Submitted_Responses-{rep}.pdf"
+        col1, col2 = st.columns(2, border=True)
+        with col1:
+            with open(summaries_path, "rb") as pdf_file:
+                st.markdown("**Executive Report Download**")
+                st.download_button(
+                    label=f"{rep}",
+                    data=pdf_file,
+                    file_name=f"Summary_of_Submitted_Responses-{rep}.pdf",
+                    mime="application/pdf",
+                    type="primary",
+                )
+        with col2:
+            with open(report_path, "rb") as pdf_file:
+                st.markdown("**Represtations Summary Download**")
+                st.download_button(
+                    label=f"{rep}",
+                    data=pdf_file,
+                    file_name=f"Summary_Documents-{rep}.pdf",
+                    mime="application/pdf",
+                    type="primary",
+                )
 def main():
     elif st.session_state["authentication_status"] is None:
         st.warning("Please enter your username and password")
+    if st.session_state["completed"]:
+        display_download_buttons()
 if __name__ == "__main__":