cjber commited on
Commit
0d53040
·
1 Parent(s): 5277da5

feat: add themes breakdown

Browse files
Files changed (1) hide show
  1. planning_ai/document.py +123 -17
planning_ai/document.py CHANGED
@@ -1,9 +1,13 @@
 
 
1
  import re
 
2
 
3
  import geopandas as gpd
4
  import matplotlib.pyplot as plt
5
  import numpy as np
6
  import polars as pl
 
7
 
8
  from planning_ai.common.utils import Paths
9
 
@@ -17,29 +21,99 @@ def _process_postcodes(final):
17
  .with_columns(pl.col("postcode").str.replace_all(" ", ""))
18
  )
19
  onspd = pl.read_csv(
20
- Paths.RAW / "onspd" / "ONSPD_FEB_2024.csv", columns=["PCD", "OSWARD", "LSOA11"]
 
21
  ).with_columns(pl.col("PCD").str.replace_all(" ", "").alias("postcode"))
22
  postcodes = postcodes.join(onspd, on="postcode")
23
  return postcodes
24
 
25
 
26
  def _process_policies(final):
27
- policies_df = final["policies"]
28
-
29
- all_policies = ""
30
- for (theme, stance), policy in policies_df.group_by(
31
- ["themes", "stance"], maintain_order=True
32
- ):
33
  details = "".join(
34
  f'\n### {row["policies"]}\n\n'
35
  + "".join(
36
  f"- {detail} {doc_id}\n"
37
  for detail, doc_id in zip(row["detail"], row["doc_id"])
38
  )
39
- for row in policy.rows(named=True)
40
  )
41
- all_policies += f"## {theme} - {stance}\n\n{details}\n"
42
- return all_policies
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
 
45
  def fig_wards(postcodes):
@@ -89,13 +163,13 @@ def fig_wards(postcodes):
89
  ax=ax,
90
  column="count",
91
  legend=True,
92
- # vmax=0.05,
93
  legend_kwds={"label": "Number of Representations"},
94
  )
95
  ward_boundaries.plot(ax=ax, color="none", edgecolor="gray")
96
  camb_ward_boundaries.plot(ax=ax, color="none", edgecolor="black")
97
 
98
- bounds = camb_ward_boundaries.total_bounds
99
  buffer = 10_000
100
  ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
101
  ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
@@ -175,20 +249,26 @@ def build_final_report(out):
175
  This report was produced using a generative pre-trained transformer (GPT) large-language model (LLM) to produce an abstractive summary of all responses to the related planning application. This model automatically reviews every response in detail, and extracts key information to inform decision making. This document first consolidates this information into a single-page executive summary, highlighting areas of particular interest to consider, and the broad consensus of responses. Figures generated from responses then give both a geographic and statistical overview, highlighting any demographic imbalances in responses. The document then extracts detailed information from responses, grouped by theme and policy. In this section we incorporate citations which relate with the 'Summary Responses' document, to increase transparency.
176
  """
177
  figures_paragraph = """
178
- @fig-wards shows the percentage of responses by total population within each Ward that had at least one response. @fig-imd shows the percentage of responses by total population within each IMD quintile.
 
 
 
179
  """
180
  final = out["generate_final_report"]
181
- policies = _process_policies(final)
182
  postcodes = _process_postcodes(final)
 
 
183
 
184
  fig_wards(postcodes)
 
185
  fig_imd(postcodes)
186
 
187
  quarto_doc = (
188
  "---\n"
189
  f"title: 'Summary of Submitted Responses'\n"
190
  "format:\n"
191
- " PrettyPDF-pdf:\n"
192
  " papersize: A4\n"
193
  "execute:\n"
194
  " freeze: auto\n"
@@ -198,16 +278,36 @@ This report was produced using a generative pre-trained transformer (GPT) large-
198
  " - Scale=0.55\n"
199
  "---\n\n"
200
  f"{final['executive']}\n\n"
 
 
201
  f"{introduction_paragraph}\n\n"
202
  "\n# Figures\n\n"
 
203
  f"![Total number of representations submitted by Ward.](./figs/wards.png){{#fig-wards}}\n\n"
 
204
  f"![Percentage of representations submitted by quintile of index of multiple deprivation (2019)](./figs/imd_decile.png){{#fig-imd}}\n\n"
205
  "# Themes and Policies\n\n"
206
- f"{policies}"
 
 
 
 
 
 
 
207
  )
208
 
209
  with open(Paths.SUMMARY / "Summary_of_Submitted_Responses.qmd", "w") as f:
210
  f.write(quarto_doc)
 
 
 
 
 
 
 
 
 
211
 
212
 
213
  def build_summaries_document(out):
@@ -223,7 +323,7 @@ def build_summaries_document(out):
223
  "---\n"
224
  "title: 'Summary Documents'\n"
225
  "format:\n"
226
- " PrettyPDF-pdf:\n"
227
  " papersize: A4\n"
228
  "execute:\n"
229
  " freeze: auto\n"
@@ -235,3 +335,9 @@ def build_summaries_document(out):
235
  )
236
  with open(Paths.SUMMARY / "Summary_Documents.qmd", "w") as f:
237
  f.write(f"{quarto_header}{full_text}")
 
 
 
 
 
 
 
1
+ import itertools
2
+ import logging
3
  import re
4
+ from collections import Counter
5
 
6
  import geopandas as gpd
7
  import matplotlib.pyplot as plt
8
  import numpy as np
9
  import polars as pl
10
+ from polars.dependencies import subprocess
11
 
12
  from planning_ai.common.utils import Paths
13
 
 
21
  .with_columns(pl.col("postcode").str.replace_all(" ", ""))
22
  )
23
  onspd = pl.read_csv(
24
+ Paths.RAW / "onspd" / "ONSPD_FEB_2024.csv",
25
+ columns=["PCD", "OSWARD", "LSOA11", "OA21"],
26
  ).with_columns(pl.col("PCD").str.replace_all(" ", "").alias("postcode"))
27
  postcodes = postcodes.join(onspd, on="postcode")
28
  return postcodes
29
 
30
 
31
  def _process_policies(final):
32
+ def process_policy_group(policy_group, theme, stance):
 
 
 
 
 
33
  details = "".join(
34
  f'\n### {row["policies"]}\n\n'
35
  + "".join(
36
  f"- {detail} {doc_id}\n"
37
  for detail, doc_id in zip(row["detail"], row["doc_id"])
38
  )
39
+ for row in policy_group.rows(named=True)
40
  )
41
+ return f"## {theme} - {stance}\n\n{details}\n"
42
+
43
+ policies_df = final["policies"]
44
+
45
+ support_policies = ""
46
+ object_policies = ""
47
+ other_policies = ""
48
+
49
+ for (theme, stance), policy in policies_df.group_by(
50
+ ["themes", "stance"], maintain_order=True
51
+ ):
52
+ if stance == "Support":
53
+ support_policies += process_policy_group(policy, theme, stance)
54
+ elif stance == "Object":
55
+ object_policies += process_policy_group(policy, theme, stance)
56
+ else:
57
+ other_policies += process_policy_group(policy, theme, stance)
58
+
59
+ return support_policies, object_policies, other_policies
60
+
61
+
62
+ def _process_stances(final):
63
+ documents = final["documents"]
64
+ stances = [
65
+ doc["document"].metadata["representations_support/object"] for doc in documents
66
+ ]
67
+ value_counts = Counter(stances)
68
+ total_values = sum(value_counts.values())
69
+ percentages = {
70
+ key: {"count": count, "percentage": (count / total_values)}
71
+ for key, count in value_counts.items()
72
+ }
73
+ stances_top = sorted(
74
+ percentages.items(), key=lambda x: x[1]["percentage"], reverse=True
75
+ )
76
+ return " | ".join(
77
+ [
78
+ f"**{item}**: {stance['percentage']:.2%} _({stance['count']})_"
79
+ for item, stance in stances_top
80
+ ]
81
+ )
82
+
83
+
84
+ def _process_themes(final):
85
+ documents = final["documents"]
86
+ themes = [list(doc["themes"]) for doc in documents]
87
+ themes = Counter(list(itertools.chain.from_iterable(themes)))
88
+ themes = pl.DataFrame(themes).transpose(include_header=True)
89
+ themes_breakdown = themes.with_columns(
90
+ ((pl.col("column_0") / pl.sum("column_0")) * 100).round(2).alias("percentage")
91
+ ).sort("percentage", descending=True)
92
+ themes_breakdown = themes_breakdown.rename(
93
+ {"column": "Theme", "column_0": "Count", "percentage": "Percentage"}
94
+ )
95
+ return themes_breakdown.to_pandas().to_markdown(index=False)
96
+
97
+
98
+ def fig_oa(postcodes):
99
+ oac = pl.read_csv(Paths.RAW / "oac21ew.csv")
100
+ postcodes = (
101
+ postcodes.join(oac, left_on="OA21", right_on="oa21cd")
102
+ .group_by("supergroup")
103
+ .len()
104
+ .sort("supergroup")
105
+ )
106
+ postcodes_pd = postcodes.to_pandas()
107
+
108
+ _, ax1 = plt.subplots()
109
+
110
+ ax1.bar(postcodes_pd["supergroup"], postcodes_pd["len"])
111
+ ax1.set_xlabel("Output Area Classification (OAC) Supergroup")
112
+ ax1.set_ylabel("Number of Representations")
113
+
114
+ plt.tight_layout()
115
+
116
+ plt.savefig(Paths.SUMMARY / "figs" / "oas.png")
117
 
118
 
119
  def fig_wards(postcodes):
 
163
  ax=ax,
164
  column="count",
165
  legend=True,
166
+ vmax=20,
167
  legend_kwds={"label": "Number of Representations"},
168
  )
169
  ward_boundaries.plot(ax=ax, color="none", edgecolor="gray")
170
  camb_ward_boundaries.plot(ax=ax, color="none", edgecolor="black")
171
 
172
+ bounds = np.array([541419.8982, 253158.2036, 549420.4025, 262079.7998])
173
  buffer = 10_000
174
  ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
175
  ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
 
249
  This report was produced using a generative pre-trained transformer (GPT) large-language model (LLM) to produce an abstractive summary of all responses to the related planning application. This model automatically reviews every response in detail, and extracts key information to inform decision making. This document first consolidates this information into a single-page executive summary, highlighting areas of particular interest to consider, and the broad consensus of responses. Figures generated from responses then give both a geographic and statistical overview, highlighting any demographic imbalances in responses. The document then extracts detailed information from responses, grouped by theme and policy. In this section we incorporate citations which relate with the 'Summary Responses' document, to increase transparency.
250
  """
251
  figures_paragraph = """
252
+ @fig-wards shows the percentage of responses by total population within each Ward that had at least one response. This figure helps to identify which Wards are more active in terms of participation and representation. @fig-imd shows the percentage of responses by total population within each IMD quintile. This figure provides insight into the socio-economic distribution of the respondents, highlighting any potential demographic imbalances. @fig-oas displays the total number of representations submitted by Output Area (OA 2021). This figure offers a detailed geographic overview of the responses, allowing for a more granular analysis of participation across different areas.
253
+ """
254
+ themes_paragraph = """
255
+ The following section provides a detailed breakdown of notable details from responses, grouped by themes and policies. Each theme is grouped by whether a responses is supporting, opposed, or a general comment. This section aims to give a comprehensive view of the key issues raised by the respondents with respect to the themes and policies outlined.
256
  """
257
  final = out["generate_final_report"]
258
+ support_policies, object_policies, other_policies = _process_policies(final)
259
  postcodes = _process_postcodes(final)
260
+ stances = _process_stances(final)
261
+ themes = _process_themes(final)
262
 
263
  fig_wards(postcodes)
264
+ fig_oa(postcodes)
265
  fig_imd(postcodes)
266
 
267
  quarto_doc = (
268
  "---\n"
269
  f"title: 'Summary of Submitted Responses'\n"
270
  "format:\n"
271
+ " pdf:\n"
272
  " papersize: A4\n"
273
  "execute:\n"
274
  " freeze: auto\n"
 
278
  " - Scale=0.55\n"
279
  "---\n\n"
280
  f"{final['executive']}\n\n"
281
+ f"{stances}\n\n"
282
+ "# Introduction\n\n"
283
  f"{introduction_paragraph}\n\n"
284
  "\n# Figures\n\n"
285
+ f"{figures_paragraph}\n\n"
286
  f"![Total number of representations submitted by Ward.](./figs/wards.png){{#fig-wards}}\n\n"
287
+ f"![Total number of representations submitted by Output Area (OA 2021).](./figs/oas.png){{#fig-oas}}\n\n"
288
  f"![Percentage of representations submitted by quintile of index of multiple deprivation (2019)](./figs/imd_decile.png){{#fig-imd}}\n\n"
289
  "# Themes and Policies\n\n"
290
+ f"{themes_paragraph}\n\n"
291
+ f"{themes}{{#tbl-themes}}\n\n"
292
+ "## Support\n\n"
293
+ f"{support_policies}\n\n"
294
+ "## Object\n\n"
295
+ f"{object_policies}\n\n"
296
+ "## Other\n\n"
297
+ f"{other_policies}\n\n"
298
  )
299
 
300
  with open(Paths.SUMMARY / "Summary_of_Submitted_Responses.qmd", "w") as f:
301
  f.write(quarto_doc)
302
+ command = [
303
+ "quarto",
304
+ "render",
305
+ f"{Paths.SUMMARY / 'Summary_of_Submitted_Responses.qmd'}",
306
+ ]
307
+ try:
308
+ subprocess.run(command, check=True, capture_output=True)
309
+ except subprocess.CalledProcessError as e:
310
+ logging.error(f"Error during Summary_of_Submitted_Responses.qmd render: {e}")
311
 
312
 
313
  def build_summaries_document(out):
 
323
  "---\n"
324
  "title: 'Summary Documents'\n"
325
  "format:\n"
326
+ " pdf:\n"
327
  " papersize: A4\n"
328
  "execute:\n"
329
  " freeze: auto\n"
 
335
  )
336
  with open(Paths.SUMMARY / "Summary_Documents.qmd", "w") as f:
337
  f.write(f"{quarto_header}{full_text}")
338
+
339
+ command = ["quarto", "render", f"{Paths.SUMMARY / 'Summary_Documents.qmd'}"]
340
+ try:
341
+ subprocess.run(command, check=True, capture_output=True)
342
+ except subprocess.CalledProcessError as e:
343
+ logging.error(f"Error during Summary_Documents.qmd render: {e}")