Spaces:
Build error
Build error
feat: single click to process doc
Browse files
app.py
CHANGED
@@ -10,52 +10,48 @@ UPLOAD_DIR = Paths.RAW / "gcpt3"
|
|
10 |
|
11 |
if "files_extracted" not in st.session_state:
|
12 |
st.session_state["files_extracted"] = False
|
13 |
-
if "files_processed" not in st.session_state:
|
14 |
-
st.session_state["files_processed"] = False
|
15 |
-
if "pdfs_processed" not in st.session_state:
|
16 |
-
st.session_state["pdfs_processed"] = False
|
17 |
|
18 |
st.title("Planning AI")
|
19 |
|
20 |
|
21 |
-
st.header("
|
22 |
st.write(
|
23 |
-
""
|
24 |
-
|
25 |
-
|
26 |
-
The `.json` files should look like the following:
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
"respondentpostcode": "CB2 9NE",
|
33 |
-
"text": "",
|
34 |
-
"attachments": [
|
35 |
-
{
|
36 |
-
"id": 3803,
|
37 |
-
"url": "http:\/\/www.cambridge.gov.uk\/public\/ldf\/localplan2031\/15417.pdf",
|
38 |
-
"published": false
|
39 |
-
}
|
40 |
-
],
|
41 |
-
"representations": [
|
42 |
-
{
|
43 |
-
"id": 15417,
|
44 |
-
"support\/object": "Object",
|
45 |
-
"document": "Issues and Options Report",
|
46 |
-
"documentelementid": 29785,
|
47 |
-
"documentelementtitle": "3 - Spatial Strategy, Question 3.10",
|
48 |
-
"summary": "No more green belt taken away, which is prime agricultural land. Noise pollution & light pollution for surrounding villages and new houses being built, no bus services either!"
|
49 |
-
},
|
50 |
-
]
|
51 |
-
}
|
52 |
-
```
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
"""
|
55 |
-
)
|
56 |
-
uploaded_file
|
57 |
-
|
58 |
-
if uploaded_file and not st.session_state["files_extracted"]:
|
59 |
with st.spinner("Extracting files..."):
|
60 |
try:
|
61 |
with py7zr.SevenZipFile(uploaded_file, mode="r") as archive:
|
@@ -70,49 +66,28 @@ if uploaded_file and not st.session_state["files_extracted"]:
|
|
70 |
if not st.session_state["files_extracted"]:
|
71 |
st.write("No files uploaded yet.")
|
72 |
|
|
|
|
|
73 |
if st.session_state["files_extracted"]:
|
74 |
-
st.
|
75 |
st.write(
|
76 |
-
"Once the files are extracted, click the button below to
|
77 |
)
|
78 |
-
if st.button("
|
79 |
-
with st.spinner("
|
80 |
try:
|
81 |
preprocess_main()
|
82 |
-
st.session_state["files_processed"] = True
|
83 |
st.success("Preprocessing completed successfully!")
|
84 |
except Exception as e:
|
85 |
st.error(f"An error occurred during preprocessing: {e}")
|
86 |
-
|
87 |
-
if st.session_state["files_extracted"] and st.session_state["files_processed"]:
|
88 |
-
st.header("3. Extract text from PDFs.")
|
89 |
-
st.write(
|
90 |
-
"After preprocessing the `.json` files, you can now extract text from the PDFs by clicking the button below."
|
91 |
-
)
|
92 |
-
if st.button("Process PDFs"):
|
93 |
with st.spinner("Extracting text from PDFs..."):
|
94 |
try:
|
95 |
azure_process_pdfs()
|
96 |
-
st.session_state["pdfs_processed"] = True
|
97 |
st.success("Text extraction completed successfully!")
|
98 |
except Exception as e:
|
99 |
st.error(f"An error occurred during PDF text extraction: {e}")
|
100 |
-
|
101 |
-
if (
|
102 |
-
st.session_state["files_extracted"]
|
103 |
-
and st.session_state["files_processed"]
|
104 |
-
and st.session_state["pdfs_processed"]
|
105 |
-
):
|
106 |
-
st.title("Build final report.")
|
107 |
-
st.write(
|
108 |
-
"After extracting text from PDFs, you can now run the full report building pipeline!"
|
109 |
-
)
|
110 |
-
if st.button("Build Report", type="primary"):
|
111 |
with st.spinner("Building report..."):
|
112 |
-
|
113 |
-
report_main()
|
114 |
-
except Exception as e:
|
115 |
-
st.error(f"An error occurred during report building: {e}")
|
116 |
report_path = Paths.SUMMARY / "Summary_Documents.pdf"
|
117 |
summaries_path = Paths.SUMMARY / "Summary_of_Submitted_Responses.pdf"
|
118 |
|
|
|
10 |
|
11 |
if "files_extracted" not in st.session_state:
|
12 |
st.session_state["files_extracted"] = False
|
|
|
|
|
|
|
|
|
13 |
|
14 |
st.title("Planning AI")
|
15 |
|
16 |
|
17 |
+
st.header("Upload JDL response `.json` files")
|
18 |
st.write(
|
19 |
+
"Upload your `.json` files here as a `7zip` file, they will be saved to the `data/raw/gcpt3` directory."
|
20 |
+
)
|
|
|
|
|
21 |
|
22 |
+
with st.expander("File Format"):
|
23 |
+
st.write(
|
24 |
+
"""
|
25 |
+
The `.json` files should look like the following:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
```json
|
28 |
+
{
|
29 |
+
"id": 10008,
|
30 |
+
"method": "Paper",
|
31 |
+
"respondentpostcode": "CB2 9NE",
|
32 |
+
"text": "",
|
33 |
+
"attachments": [
|
34 |
+
{
|
35 |
+
"id": 3803,
|
36 |
+
"url": "http:\/\/www.cambridge.gov.uk\/public\/ldf\/localplan2031\/15417.pdf",
|
37 |
+
"published": false
|
38 |
+
}
|
39 |
+
],
|
40 |
+
"representations": [
|
41 |
+
{
|
42 |
+
"id": 15417,
|
43 |
+
"support\/object": "Object",
|
44 |
+
"document": "Issues and Options Report",
|
45 |
+
"documentelementid": 29785,
|
46 |
+
"documentelementtitle": "3 - Spatial Strategy, Question 3.10",
|
47 |
+
"summary": "No more green belt taken away, which is prime agricultural land. Noise pollution & light pollution for surrounding villages and new houses being built, no bus services either!"
|
48 |
+
},
|
49 |
+
]
|
50 |
+
}
|
51 |
+
```
|
52 |
"""
|
53 |
+
)
|
54 |
+
if uploaded_file := st.file_uploader("Choose a `.7z` file:", type="7z"):
|
|
|
|
|
55 |
with st.spinner("Extracting files..."):
|
56 |
try:
|
57 |
with py7zr.SevenZipFile(uploaded_file, mode="r") as archive:
|
|
|
66 |
if not st.session_state["files_extracted"]:
|
67 |
st.write("No files uploaded yet.")
|
68 |
|
69 |
+
st.write("---")
|
70 |
+
|
71 |
if st.session_state["files_extracted"]:
|
72 |
+
st.title("Build Report")
|
73 |
st.write(
|
74 |
+
"Once the files are extracted, click the button below to build the report."
|
75 |
)
|
76 |
+
if st.button("Build Report", type="primary"):
|
77 |
+
with st.spinner("Preprocessing files..."):
|
78 |
try:
|
79 |
preprocess_main()
|
|
|
80 |
st.success("Preprocessing completed successfully!")
|
81 |
except Exception as e:
|
82 |
st.error(f"An error occurred during preprocessing: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
with st.spinner("Extracting text from PDFs..."):
|
84 |
try:
|
85 |
azure_process_pdfs()
|
|
|
86 |
st.success("Text extraction completed successfully!")
|
87 |
except Exception as e:
|
88 |
st.error(f"An error occurred during PDF text extraction: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
with st.spinner("Building report..."):
|
90 |
+
report_main()
|
|
|
|
|
|
|
91 |
report_path = Paths.SUMMARY / "Summary_Documents.pdf"
|
92 |
summaries_path = Paths.SUMMARY / "Summary_of_Submitted_Responses.pdf"
|
93 |
|