Spaces:
Build error
Build error
feat: add themes breakdown
Browse files- planning_ai/document.py +123 -17
planning_ai/document.py
CHANGED
@@ -1,9 +1,13 @@
|
|
|
|
|
|
1 |
import re
|
|
|
2 |
|
3 |
import geopandas as gpd
|
4 |
import matplotlib.pyplot as plt
|
5 |
import numpy as np
|
6 |
import polars as pl
|
|
|
7 |
|
8 |
from planning_ai.common.utils import Paths
|
9 |
|
@@ -17,29 +21,99 @@ def _process_postcodes(final):
|
|
17 |
.with_columns(pl.col("postcode").str.replace_all(" ", ""))
|
18 |
)
|
19 |
onspd = pl.read_csv(
|
20 |
-
Paths.RAW / "onspd" / "ONSPD_FEB_2024.csv",
|
|
|
21 |
).with_columns(pl.col("PCD").str.replace_all(" ", "").alias("postcode"))
|
22 |
postcodes = postcodes.join(onspd, on="postcode")
|
23 |
return postcodes
|
24 |
|
25 |
|
26 |
def _process_policies(final):
|
27 |
-
|
28 |
-
|
29 |
-
all_policies = ""
|
30 |
-
for (theme, stance), policy in policies_df.group_by(
|
31 |
-
["themes", "stance"], maintain_order=True
|
32 |
-
):
|
33 |
details = "".join(
|
34 |
f'\n### {row["policies"]}\n\n'
|
35 |
+ "".join(
|
36 |
f"- {detail} {doc_id}\n"
|
37 |
for detail, doc_id in zip(row["detail"], row["doc_id"])
|
38 |
)
|
39 |
-
for row in
|
40 |
)
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
|
45 |
def fig_wards(postcodes):
|
@@ -89,13 +163,13 @@ def fig_wards(postcodes):
|
|
89 |
ax=ax,
|
90 |
column="count",
|
91 |
legend=True,
|
92 |
-
|
93 |
legend_kwds={"label": "Number of Representations"},
|
94 |
)
|
95 |
ward_boundaries.plot(ax=ax, color="none", edgecolor="gray")
|
96 |
camb_ward_boundaries.plot(ax=ax, color="none", edgecolor="black")
|
97 |
|
98 |
-
bounds =
|
99 |
buffer = 10_000
|
100 |
ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
|
101 |
ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
|
@@ -175,20 +249,26 @@ def build_final_report(out):
|
|
175 |
This report was produced using a generative pre-trained transformer (GPT) large-language model (LLM) to produce an abstractive summary of all responses to the related planning application. This model automatically reviews every response in detail, and extracts key information to inform decision making. This document first consolidates this information into a single-page executive summary, highlighting areas of particular interest to consider, and the broad consensus of responses. Figures generated from responses then give both a geographic and statistical overview, highlighting any demographic imbalances in responses. The document then extracts detailed information from responses, grouped by theme and policy. In this section we incorporate citations which relate with the 'Summary Responses' document, to increase transparency.
|
176 |
"""
|
177 |
figures_paragraph = """
|
178 |
-
@fig-wards shows the percentage of responses by total population within each Ward that had at least one response. @fig-imd shows the percentage of responses by total population within each IMD quintile.
|
|
|
|
|
|
|
179 |
"""
|
180 |
final = out["generate_final_report"]
|
181 |
-
|
182 |
postcodes = _process_postcodes(final)
|
|
|
|
|
183 |
|
184 |
fig_wards(postcodes)
|
|
|
185 |
fig_imd(postcodes)
|
186 |
|
187 |
quarto_doc = (
|
188 |
"---\n"
|
189 |
f"title: 'Summary of Submitted Responses'\n"
|
190 |
"format:\n"
|
191 |
-
"
|
192 |
" papersize: A4\n"
|
193 |
"execute:\n"
|
194 |
" freeze: auto\n"
|
@@ -198,16 +278,36 @@ This report was produced using a generative pre-trained transformer (GPT) large-
|
|
198 |
" - Scale=0.55\n"
|
199 |
"---\n\n"
|
200 |
f"{final['executive']}\n\n"
|
|
|
|
|
201 |
f"{introduction_paragraph}\n\n"
|
202 |
"\n# Figures\n\n"
|
|
|
203 |
f"{{#fig-wards}}\n\n"
|
|
|
204 |
f"{{#fig-imd}}\n\n"
|
205 |
"# Themes and Policies\n\n"
|
206 |
-
f"{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
)
|
208 |
|
209 |
with open(Paths.SUMMARY / "Summary_of_Submitted_Responses.qmd", "w") as f:
|
210 |
f.write(quarto_doc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
|
213 |
def build_summaries_document(out):
|
@@ -223,7 +323,7 @@ def build_summaries_document(out):
|
|
223 |
"---\n"
|
224 |
"title: 'Summary Documents'\n"
|
225 |
"format:\n"
|
226 |
-
"
|
227 |
" papersize: A4\n"
|
228 |
"execute:\n"
|
229 |
" freeze: auto\n"
|
@@ -235,3 +335,9 @@ def build_summaries_document(out):
|
|
235 |
)
|
236 |
with open(Paths.SUMMARY / "Summary_Documents.qmd", "w") as f:
|
237 |
f.write(f"{quarto_header}{full_text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import itertools
|
2 |
+
import logging
|
3 |
import re
|
4 |
+
from collections import Counter
|
5 |
|
6 |
import geopandas as gpd
|
7 |
import matplotlib.pyplot as plt
|
8 |
import numpy as np
|
9 |
import polars as pl
|
10 |
+
from polars.dependencies import subprocess
|
11 |
|
12 |
from planning_ai.common.utils import Paths
|
13 |
|
|
|
21 |
.with_columns(pl.col("postcode").str.replace_all(" ", ""))
|
22 |
)
|
23 |
onspd = pl.read_csv(
|
24 |
+
Paths.RAW / "onspd" / "ONSPD_FEB_2024.csv",
|
25 |
+
columns=["PCD", "OSWARD", "LSOA11", "OA21"],
|
26 |
).with_columns(pl.col("PCD").str.replace_all(" ", "").alias("postcode"))
|
27 |
postcodes = postcodes.join(onspd, on="postcode")
|
28 |
return postcodes
|
29 |
|
30 |
|
31 |
def _process_policies(final):
|
32 |
+
def process_policy_group(policy_group, theme, stance):
|
|
|
|
|
|
|
|
|
|
|
33 |
details = "".join(
|
34 |
f'\n### {row["policies"]}\n\n'
|
35 |
+ "".join(
|
36 |
f"- {detail} {doc_id}\n"
|
37 |
for detail, doc_id in zip(row["detail"], row["doc_id"])
|
38 |
)
|
39 |
+
for row in policy_group.rows(named=True)
|
40 |
)
|
41 |
+
return f"## {theme} - {stance}\n\n{details}\n"
|
42 |
+
|
43 |
+
policies_df = final["policies"]
|
44 |
+
|
45 |
+
support_policies = ""
|
46 |
+
object_policies = ""
|
47 |
+
other_policies = ""
|
48 |
+
|
49 |
+
for (theme, stance), policy in policies_df.group_by(
|
50 |
+
["themes", "stance"], maintain_order=True
|
51 |
+
):
|
52 |
+
if stance == "Support":
|
53 |
+
support_policies += process_policy_group(policy, theme, stance)
|
54 |
+
elif stance == "Object":
|
55 |
+
object_policies += process_policy_group(policy, theme, stance)
|
56 |
+
else:
|
57 |
+
other_policies += process_policy_group(policy, theme, stance)
|
58 |
+
|
59 |
+
return support_policies, object_policies, other_policies
|
60 |
+
|
61 |
+
|
62 |
+
def _process_stances(final):
|
63 |
+
documents = final["documents"]
|
64 |
+
stances = [
|
65 |
+
doc["document"].metadata["representations_support/object"] for doc in documents
|
66 |
+
]
|
67 |
+
value_counts = Counter(stances)
|
68 |
+
total_values = sum(value_counts.values())
|
69 |
+
percentages = {
|
70 |
+
key: {"count": count, "percentage": (count / total_values)}
|
71 |
+
for key, count in value_counts.items()
|
72 |
+
}
|
73 |
+
stances_top = sorted(
|
74 |
+
percentages.items(), key=lambda x: x[1]["percentage"], reverse=True
|
75 |
+
)
|
76 |
+
return " | ".join(
|
77 |
+
[
|
78 |
+
f"**{item}**: {stance['percentage']:.2%} _({stance['count']})_"
|
79 |
+
for item, stance in stances_top
|
80 |
+
]
|
81 |
+
)
|
82 |
+
|
83 |
+
|
84 |
+
def _process_themes(final):
|
85 |
+
documents = final["documents"]
|
86 |
+
themes = [list(doc["themes"]) for doc in documents]
|
87 |
+
themes = Counter(list(itertools.chain.from_iterable(themes)))
|
88 |
+
themes = pl.DataFrame(themes).transpose(include_header=True)
|
89 |
+
themes_breakdown = themes.with_columns(
|
90 |
+
((pl.col("column_0") / pl.sum("column_0")) * 100).round(2).alias("percentage")
|
91 |
+
).sort("percentage", descending=True)
|
92 |
+
themes_breakdown = themes_breakdown.rename(
|
93 |
+
{"column": "Theme", "column_0": "Count", "percentage": "Percentage"}
|
94 |
+
)
|
95 |
+
return themes_breakdown.to_pandas().to_markdown(index=False)
|
96 |
+
|
97 |
+
|
98 |
+
def fig_oa(postcodes):
|
99 |
+
oac = pl.read_csv(Paths.RAW / "oac21ew.csv")
|
100 |
+
postcodes = (
|
101 |
+
postcodes.join(oac, left_on="OA21", right_on="oa21cd")
|
102 |
+
.group_by("supergroup")
|
103 |
+
.len()
|
104 |
+
.sort("supergroup")
|
105 |
+
)
|
106 |
+
postcodes_pd = postcodes.to_pandas()
|
107 |
+
|
108 |
+
_, ax1 = plt.subplots()
|
109 |
+
|
110 |
+
ax1.bar(postcodes_pd["supergroup"], postcodes_pd["len"])
|
111 |
+
ax1.set_xlabel("Output Area Classification (OAC) Supergroup")
|
112 |
+
ax1.set_ylabel("Number of Representations")
|
113 |
+
|
114 |
+
plt.tight_layout()
|
115 |
+
|
116 |
+
plt.savefig(Paths.SUMMARY / "figs" / "oas.png")
|
117 |
|
118 |
|
119 |
def fig_wards(postcodes):
|
|
|
163 |
ax=ax,
|
164 |
column="count",
|
165 |
legend=True,
|
166 |
+
vmax=20,
|
167 |
legend_kwds={"label": "Number of Representations"},
|
168 |
)
|
169 |
ward_boundaries.plot(ax=ax, color="none", edgecolor="gray")
|
170 |
camb_ward_boundaries.plot(ax=ax, color="none", edgecolor="black")
|
171 |
|
172 |
+
bounds = np.array([541419.8982, 253158.2036, 549420.4025, 262079.7998])
|
173 |
buffer = 10_000
|
174 |
ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
|
175 |
ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
|
|
|
249 |
This report was produced using a generative pre-trained transformer (GPT) large-language model (LLM) to produce an abstractive summary of all responses to the related planning application. This model automatically reviews every response in detail, and extracts key information to inform decision making. This document first consolidates this information into a single-page executive summary, highlighting areas of particular interest to consider, and the broad consensus of responses. Figures generated from responses then give both a geographic and statistical overview, highlighting any demographic imbalances in responses. The document then extracts detailed information from responses, grouped by theme and policy. In this section we incorporate citations which relate with the 'Summary Responses' document, to increase transparency.
|
250 |
"""
|
251 |
figures_paragraph = """
|
252 |
+
@fig-wards shows the percentage of responses by total population within each Ward that had at least one response. This figure helps to identify which Wards are more active in terms of participation and representation. @fig-imd shows the percentage of responses by total population within each IMD quintile. This figure provides insight into the socio-economic distribution of the respondents, highlighting any potential demographic imbalances. @fig-oas displays the total number of representations submitted by Output Area (OA 2021). This figure offers a detailed geographic overview of the responses, allowing for a more granular analysis of participation across different areas.
|
253 |
+
"""
|
254 |
+
themes_paragraph = """
|
255 |
+
The following section provides a detailed breakdown of notable details from responses, grouped by themes and policies. Each theme is grouped by whether a responses is supporting, opposed, or a general comment. This section aims to give a comprehensive view of the key issues raised by the respondents with respect to the themes and policies outlined.
|
256 |
"""
|
257 |
final = out["generate_final_report"]
|
258 |
+
support_policies, object_policies, other_policies = _process_policies(final)
|
259 |
postcodes = _process_postcodes(final)
|
260 |
+
stances = _process_stances(final)
|
261 |
+
themes = _process_themes(final)
|
262 |
|
263 |
fig_wards(postcodes)
|
264 |
+
fig_oa(postcodes)
|
265 |
fig_imd(postcodes)
|
266 |
|
267 |
quarto_doc = (
|
268 |
"---\n"
|
269 |
f"title: 'Summary of Submitted Responses'\n"
|
270 |
"format:\n"
|
271 |
+
" pdf:\n"
|
272 |
" papersize: A4\n"
|
273 |
"execute:\n"
|
274 |
" freeze: auto\n"
|
|
|
278 |
" - Scale=0.55\n"
|
279 |
"---\n\n"
|
280 |
f"{final['executive']}\n\n"
|
281 |
+
f"{stances}\n\n"
|
282 |
+
"# Introduction\n\n"
|
283 |
f"{introduction_paragraph}\n\n"
|
284 |
"\n# Figures\n\n"
|
285 |
+
f"{figures_paragraph}\n\n"
|
286 |
f"{{#fig-wards}}\n\n"
|
287 |
+
f"{{#fig-oas}}\n\n"
|
288 |
f"{{#fig-imd}}\n\n"
|
289 |
"# Themes and Policies\n\n"
|
290 |
+
f"{themes_paragraph}\n\n"
|
291 |
+
f"{themes}{{#tbl-themes}}\n\n"
|
292 |
+
"## Support\n\n"
|
293 |
+
f"{support_policies}\n\n"
|
294 |
+
"## Object\n\n"
|
295 |
+
f"{object_policies}\n\n"
|
296 |
+
"## Other\n\n"
|
297 |
+
f"{other_policies}\n\n"
|
298 |
)
|
299 |
|
300 |
with open(Paths.SUMMARY / "Summary_of_Submitted_Responses.qmd", "w") as f:
|
301 |
f.write(quarto_doc)
|
302 |
+
command = [
|
303 |
+
"quarto",
|
304 |
+
"render",
|
305 |
+
f"{Paths.SUMMARY / 'Summary_of_Submitted_Responses.qmd'}",
|
306 |
+
]
|
307 |
+
try:
|
308 |
+
subprocess.run(command, check=True, capture_output=True)
|
309 |
+
except subprocess.CalledProcessError as e:
|
310 |
+
logging.error(f"Error during Summary_of_Submitted_Responses.qmd render: {e}")
|
311 |
|
312 |
|
313 |
def build_summaries_document(out):
|
|
|
323 |
"---\n"
|
324 |
"title: 'Summary Documents'\n"
|
325 |
"format:\n"
|
326 |
+
" pdf:\n"
|
327 |
" papersize: A4\n"
|
328 |
"execute:\n"
|
329 |
" freeze: auto\n"
|
|
|
335 |
)
|
336 |
with open(Paths.SUMMARY / "Summary_Documents.qmd", "w") as f:
|
337 |
f.write(f"{quarto_header}{full_text}")
|
338 |
+
|
339 |
+
command = ["quarto", "render", f"{Paths.SUMMARY / 'Summary_Documents.qmd'}"]
|
340 |
+
try:
|
341 |
+
subprocess.run(command, check=True, capture_output=True)
|
342 |
+
except subprocess.CalledProcessError as e:
|
343 |
+
logging.error(f"Error during Summary_Documents.qmd render: {e}")
|