cjber commited on
Commit
ccf9cd4
·
1 Parent(s): b1ad960

fix: update document format

Browse files
.gitattributes CHANGED
@@ -6,4 +6,5 @@ data/raw/camb_wards.parquet filter=lfs diff=lfs merge=lfs -text
6
  data/raw/imd_camb.parquet filter=lfs diff=lfs merge=lfs -text
7
  data/raw/lsoa_camb.parquet filter=lfs diff=lfs merge=lfs -text
8
  data/raw/onspd_cambridge.parquet filter=lfs diff=lfs merge=lfs -text
 
9
  data/covers/*.docx filter=lfs diff=lfs merge=lfs -text
 
6
  data/raw/imd_camb.parquet filter=lfs diff=lfs merge=lfs -text
7
  data/raw/lsoa_camb.parquet filter=lfs diff=lfs merge=lfs -text
8
  data/raw/onspd_cambridge.parquet filter=lfs diff=lfs merge=lfs -text
9
+ data/raw/camb_lads.parquet filter=lfs diff=lfs merge=lfs -text
10
  data/covers/*.docx filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -194,35 +194,71 @@ def display_download_buttons():
194
  # Create a container for the Executive Reports
195
  with st.expander("**Executive Reports**"):
196
  for i, rep in enumerate(representations_documents):
197
- summaries_path = Paths.SUMMARY / f"Summary_of_Submitted_Responses-{rep}.pdf"
 
 
 
 
 
198
  with st.container():
199
  st.subheader(f"Executive Report for {rep}")
200
- with open(summaries_path, "rb") as pdf_file:
201
- st.download_button(
202
- label="Download Executive Report",
203
- data=pdf_file,
204
- file_name=f"Summary_of_Submitted_Responses-{rep}.pdf",
205
- mime="application/pdf",
206
- use_container_width=True,
207
- key=f"exec_{i}_{hash(rep)}", # Ensure key uniqueness with index + hash
208
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  st.markdown("---")
210
 
211
  # Create a container for the Representation Summaries
212
  with st.expander("**Representation Summaries**"):
213
  for i, rep in enumerate(representations_documents):
214
- report_path = Paths.SUMMARY / f"Summary_Documents-{rep}.pdf"
 
215
  with st.container():
216
  st.subheader(f"Representation Summary for {rep}")
217
- with open(report_path, "rb") as pdf_file:
218
- st.download_button(
219
- label="Download Representation Summary",
220
- data=pdf_file,
221
- file_name=f"Summary_Documents-{rep}.pdf",
222
- mime="application/pdf",
223
- use_container_width=True,
224
- key=f"rep_{i}_{hash(rep)}", # Ensure key uniqueness with index + hash
225
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  st.markdown("---")
227
 
228
 
 
194
  # Create a container for the Executive Reports
195
  with st.expander("**Executive Reports**"):
196
  for i, rep in enumerate(representations_documents):
197
+ summaries_pdf_path = (
198
+ Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}.pdf"
199
+ )
200
+ summaries_docx_path = (
201
+ Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}.docx"
202
+ )
203
  with st.container():
204
  st.subheader(f"Executive Report for {rep}")
205
+ col1, col2 = st.columns(2)
206
+ with col1:
207
+ with open(summaries_pdf_path, "rb") as pdf_file:
208
+ st.download_button(
209
+ label="Download PDF Version",
210
+ data=pdf_file,
211
+ file_name=f"Summary_of_Submitted_Representations-{rep}.pdf",
212
+ mime="application/pdf",
213
+ use_container_width=True,
214
+ key=f"exec_pdf_{i}_{hash(rep)}",
215
+ )
216
+ with col2:
217
+ if summaries_docx_path.exists():
218
+ with open(summaries_docx_path, "rb") as docx_file:
219
+ st.download_button(
220
+ label="Download DOCX Version",
221
+ data=docx_file,
222
+ file_name=f"Summary_of_Submitted_Representations-{rep}.docx",
223
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
224
+ use_container_width=True,
225
+ key=f"exec_docx_{i}_{hash(rep)}",
226
+ )
227
+ else:
228
+ st.warning("DOCX version not available")
229
  st.markdown("---")
230
 
231
  # Create a container for the Representation Summaries
232
  with st.expander("**Representation Summaries**"):
233
  for i, rep in enumerate(representations_documents):
234
+ report_pdf_path = Paths.SUMMARY / f"Summary_Documents-{rep}.pdf"
235
+ report_docx_path = Paths.SUMMARY / f"Summary_Documents-{rep}.docx"
236
  with st.container():
237
  st.subheader(f"Representation Summary for {rep}")
238
+ col1, col2 = st.columns(2)
239
+ with col1:
240
+ with open(report_pdf_path, "rb") as pdf_file:
241
+ st.download_button(
242
+ label="Download PDF Version",
243
+ data=pdf_file,
244
+ file_name=f"Summary_Documents-{rep}.pdf",
245
+ mime="application/pdf",
246
+ use_container_width=True,
247
+ key=f"rep_pdf_{i}_{hash(rep)}",
248
+ )
249
+ with col2:
250
+ if report_docx_path.exists():
251
+ with open(report_docx_path, "rb") as docx_file:
252
+ st.download_button(
253
+ label="Download DOCX Version",
254
+ data=docx_file,
255
+ file_name=f"Summary_Documents-{rep}.docx",
256
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
257
+ use_container_width=True,
258
+ key=f"rep_docx_{i}_{hash(rep)}",
259
+ )
260
+ else:
261
+ st.warning("DOCX version not available")
262
  st.markdown("---")
263
 
264
 
packages.txt CHANGED
@@ -1,5 +1,5 @@
1
  texlive-latex-extra
2
- texlive-fonts-extra
3
  cm-super
4
  dvipng
5
  pandoc
 
1
  texlive-latex-extra
2
+ ttf-liberation
3
  cm-super
4
  dvipng
5
  pandoc
planning_ai/documents/document.py CHANGED
@@ -3,7 +3,6 @@ import re
3
  from collections import Counter
4
 
5
  import geopandas as gpd
6
- import matplotlib as mpl
7
  import matplotlib.pyplot as plt
8
  import numpy as np
9
  import pandas as pd
@@ -13,25 +12,9 @@ from polars.dependencies import subprocess
13
 
14
  from planning_ai.common.utils import Paths
15
 
16
- mpl.rcParams["text.usetex"] = True
17
- mpl.rcParams["text.latex.preamble"] = r"\usepackage{libertine}"
18
-
19
- WARDS = [
20
- "E05013050",
21
- "E05013051",
22
- "E05013052",
23
- "E05013053",
24
- "E05013054",
25
- "E05013055",
26
- "E05013056",
27
- "E05013057",
28
- "E05013058",
29
- "E05013059",
30
- "E05013060",
31
- "E05013061",
32
- "E05013062",
33
- "E05013063",
34
- ]
35
 
36
 
37
  def _process_postcodes(final):
@@ -52,8 +35,8 @@ def _process_postcodes(final):
52
  )
53
  onspd = pl.read_parquet(
54
  Paths.RAW / "onspd_cambridge.parquet",
55
- columns=["PCD", "OSWARD", "LSOA11", "OA21"],
56
- ).with_columns(pl.col("PCD").str.replace_all(" ", "").alias("postcode"))
57
  postcodes = postcodes.join(onspd, on="postcode")
58
  return postcodes
59
 
@@ -177,7 +160,7 @@ def fig_oa(postcodes, rep):
177
  )
178
  oac = oac.join(oa_pop, left_on="oa21cd", right_on="OA2021")
179
  oac = (
180
- postcodes.join(oac, left_on="OA21", right_on="oa21cd", how="right")
181
  .group_by(["supergroup", "supergroup_name"])
182
  .sum()
183
  .select(["supergroup", "supergroup_name", "population", "count"])
@@ -210,20 +193,21 @@ def fig_oa(postcodes, rep):
210
  bars1 = ax1.bar(
211
  oa_pd["supergroup"],
212
  oa_pd["perc_diff"],
213
- label="Percentage of Representations (\%)",
214
  color=colors[: len(oa_pd)],
215
- edgecolor="black",
 
216
  )
217
 
218
  # Add centerline at y=0
219
- ax1.axhline(0, color="black", linewidth=1.5)
220
 
221
  # Annotate bars with percentage values
222
  for bar in bars1:
223
  height = bar.get_height()
224
  if height > 0:
225
  ax1.annotate(
226
- f"{height:.0f}\%",
227
  xy=(bar.get_x() + bar.get_width() / 2, height),
228
  xytext=(0, 3), # 3 points vertical offset
229
  textcoords="offset points",
@@ -232,16 +216,16 @@ def fig_oa(postcodes, rep):
232
  )
233
  else:
234
  ax1.annotate(
235
- f"{height:.0f}\%",
236
  xy=(bar.get_x() + bar.get_width() / 2, height),
237
- xytext=(0, -6), # 10 points vertical offset
238
  textcoords="offset points",
239
  ha="center",
240
  va="top",
241
  )
242
 
243
  ax1.set_xlabel("Output Area Classification (OAC) Supergroup")
244
- ax1.set_ylabel("Difference from national average (\%)")
245
 
246
  supergroup_names = [
247
  f"{i}: {name}"
@@ -259,10 +243,14 @@ def fig_oa(postcodes, rep):
259
 
260
 
261
  def fig_wards(postcodes, rep):
 
262
  ward_boundaries = gpd.read_parquet(Paths.RAW / "camb_wards.parquet")
263
- camb_ward_boundaries = ward_boundaries[ward_boundaries["WD21CD"].isin(WARDS)]
 
 
 
264
  ward_boundaries_prop = ward_boundaries.merge(
265
- postcodes.to_pandas(), left_on="WD21CD", right_on="OSWARD"
266
  )
267
 
268
  _, ax = plt.subplots(figsize=(8, 8))
@@ -270,15 +258,10 @@ def fig_wards(postcodes, rep):
270
  ax=ax,
271
  column="count",
272
  legend=True,
273
- legend_kwds={"label": "Number of Representations"},
274
  )
275
- ward_boundaries.plot(ax=ax, color="none", edgecolor="gray")
276
- camb_ward_boundaries.plot(ax=ax, color="none", edgecolor="black")
277
-
278
- bounds = np.array([541419.8982, 253158.2036, 549420.4025, 262079.7998])
279
- buffer = 20_000
280
- ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
281
- ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
282
 
283
  plt.axis("off")
284
  plt.tight_layout()
@@ -290,7 +273,7 @@ def fig_imd(postcodes, rep):
290
  imd = pl.read_parquet(Paths.RAW / "imd_camb.parquet")
291
  pops = pl.read_parquet(Paths.RAW / "pops_camb.parquet")
292
  imd = (
293
- postcodes.join(imd, left_on="LSOA11", right_on="LSOA", how="right")
294
  .join(pops, left_on="LSOA", right_on="LSOA 2021 Code")
295
  .group_by("SOA_decile")
296
  .agg(pl.col("count").sum(), pl.col("LSOA").count(), pl.col("Total").sum())
@@ -317,15 +300,16 @@ def fig_imd(postcodes, rep):
317
  ax1.bar(
318
  x, # Shift to the left
319
  postcodes_pd["perc_diff"],
320
- edgecolor="black",
321
  color=colors,
 
322
  )
323
 
324
  # Set labels and ticks
325
  ax1.set_xlabel("Deprivation Quintile")
326
- ax1.set_ylabel("Difference from national average (\%)")
327
  ax1.set_xticks(x)
328
- ax1.axhline(0, color="black", linewidth=1.5)
329
 
330
  # ax1.legend(loc="upper center", bbox_to_anchor=(0.5, 1.1), ncol=5, frameon=False)
331
  plt.tight_layout()
@@ -361,16 +345,40 @@ def build_final_report(out, rep):
361
  fig_imd(postcodes, rep)
362
 
363
  quarto_doc = (
364
- "---\n"
365
- f"title: 'Summary of Submitted Representations: {rep}'\n"
366
- "geometry: a4paper\n"
367
- "margin: 2cm\n"
368
- "fontfamily: libertinus\n"
369
- "monofont: 'JetBrains Mono'\n"
370
- "monofontoptions:\n"
371
- " - Scale=0.55\n"
372
- "---\n\n"
373
- "# Executive Summary\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  f"{final['executive']}\n\n"
375
  f"There were a total of {len(responses):,} responses. Of these, submissions indicated "
376
  "the following support and objection of the plan:\n\n"
@@ -380,8 +388,8 @@ def build_final_report(out, rep):
380
  "\n# Profile of Submissions\n\n"
381
  f"{figures_paragraph}\n\n"
382
  f"![Total number of representations submitted by Ward\\label{{fig-wards}}](./data/out/summary/figs/wards-{rep}.pdf)\n\n"
383
- f"![Total number of representations submitted by Output Area (OA 2021)\\label{{fig-oas}}](./data/out/summary/figs/oas-{rep}.pdf)\n\n"
384
- f"![Percentage of representations submitted by quintile of index of multiple deprivation (2019)\\label{{fig-imd}}](./data/out/summary/figs/imd_decile-{rep}.pdf)\n\n"
385
  r"\newpage"
386
  "\n\n# Themes and Policies\n\n"
387
  f"{themes_paragraph}\n\n"
@@ -408,15 +416,25 @@ def build_final_report(out, rep):
408
  "to page 12 of the document attached to representation 175933."
409
  )
410
 
411
- out_path = Paths.SUMMARY / f"Summary_of_Submitted_Responses-{rep}.md"
412
- out_file = Paths.SUMMARY / f"Summary_of_Submitted_Responses-{rep}.pdf"
413
  with open(out_path, "w") as f:
414
  f.write(quarto_doc)
415
- command = ["pandoc", f"{out_path}", "-o", f"{out_file}"]
416
  try:
 
 
 
 
 
 
 
 
 
417
  subprocess.run(command, check=True, capture_output=True)
418
  except subprocess.CalledProcessError as e:
419
- logging.error(f"Error during Summary_of_Submitted_Responses.md render: {e}")
 
 
420
 
421
 
422
  def build_summaries_document(out, rep):
@@ -430,24 +448,56 @@ def build_summaries_document(out, rep):
430
  for document in out["generate_final_report"]["documents"]
431
  )
432
  header = (
433
- "---\n"
434
- f"title: 'Summary Documents: {rep}'\n"
435
- "fontfamily: libertinus\n"
436
- "geometry: a4paper\n"
437
- "margin: 2cm\n"
438
- "monofont: 'JetBrains Mono'\n"
439
- "monofontoptions:\n"
440
- " - Scale=0.55\n"
441
- "---\n\n"
442
- f"{summary_intro}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
  )
444
  out_path = Paths.SUMMARY / f"Summary_Documents-{rep}.md"
445
- out_file = Paths.SUMMARY / f"Summary_Documents-{rep}.pdf"
446
  with open(out_path, "w") as f:
447
  f.write(f"{header}{full_text}")
448
 
449
- command = ["pandoc", f"{out_path}", "-o", f"{out_file}"]
450
  try:
 
 
 
 
 
 
 
 
 
451
  subprocess.run(command, check=True, capture_output=True)
452
  except subprocess.CalledProcessError as e:
453
  logging.error(f"Error during render: {e}")
 
3
  from collections import Counter
4
 
5
  import geopandas as gpd
 
6
  import matplotlib.pyplot as plt
7
  import numpy as np
8
  import pandas as pd
 
12
 
13
  from planning_ai.common.utils import Paths
14
 
15
+ plt.rcParams.update(
16
+ {"font.family": "sans-serif", "font.sans-serif": ["Liberation Sans"]}
17
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  def _process_postcodes(final):
 
35
  )
36
  onspd = pl.read_parquet(
37
  Paths.RAW / "onspd_cambridge.parquet",
38
+ columns=["pcd", "osward", "lsoa11", "oa21"],
39
+ ).with_columns(pl.col("pcd").str.replace_all(" ", "").alias("postcode"))
40
  postcodes = postcodes.join(onspd, on="postcode")
41
  return postcodes
42
 
 
160
  )
161
  oac = oac.join(oa_pop, left_on="oa21cd", right_on="OA2021")
162
  oac = (
163
+ postcodes.join(oac, left_on="oa21", right_on="oa21cd", how="right")
164
  .group_by(["supergroup", "supergroup_name"])
165
  .sum()
166
  .select(["supergroup", "supergroup_name", "population", "count"])
 
193
  bars1 = ax1.bar(
194
  oa_pd["supergroup"],
195
  oa_pd["perc_diff"],
196
+ label="Percentage of Representations (%)",
197
  color=colors[: len(oa_pd)],
198
+ edgecolor="none",
199
+ width=0.9,
200
  )
201
 
202
  # Add centerline at y=0
203
+ ax1.axhline(0, color="black", linewidth=1)
204
 
205
  # Annotate bars with percentage values
206
  for bar in bars1:
207
  height = bar.get_height()
208
  if height > 0:
209
  ax1.annotate(
210
+ f"{height:.0f}%",
211
  xy=(bar.get_x() + bar.get_width() / 2, height),
212
  xytext=(0, 3), # 3 points vertical offset
213
  textcoords="offset points",
 
216
  )
217
  else:
218
  ax1.annotate(
219
+ f"{height:.0f}%",
220
  xy=(bar.get_x() + bar.get_width() / 2, height),
221
+ xytext=(0, -6),
222
  textcoords="offset points",
223
  ha="center",
224
  va="top",
225
  )
226
 
227
  ax1.set_xlabel("Output Area Classification (OAC) Supergroup")
228
+ ax1.set_ylabel("Difference from national average (%)")
229
 
230
  supergroup_names = [
231
  f"{i}: {name}"
 
243
 
244
 
245
  def fig_wards(postcodes, rep):
246
+ camb_lads = gpd.read_parquet(Paths.RAW / "camb_lads.parquet")
247
  ward_boundaries = gpd.read_parquet(Paths.RAW / "camb_wards.parquet")
248
+ ward_pcs = postcodes.group_by("osward").sum()
249
+ camb_ward_boundaries = ward_boundaries[
250
+ ward_boundaries["WD21CD"].isin(postcodes["osward"].unique())
251
+ ]
252
  ward_boundaries_prop = ward_boundaries.merge(
253
+ ward_pcs.to_pandas(), left_on="WD21CD", right_on="osward"
254
  )
255
 
256
  _, ax = plt.subplots(figsize=(8, 8))
 
258
  ax=ax,
259
  column="count",
260
  legend=True,
261
+ legend_kwds={"label": "Number of Representations", "fmt": "{:.0f}"},
262
  )
263
+ camb_lads.plot(ax=ax, color="none", edgecolor="gray", linewidth=0.5)
264
+ camb_ward_boundaries.plot(ax=ax, color="none", edgecolor="black", linewidth=0.5)
 
 
 
 
 
265
 
266
  plt.axis("off")
267
  plt.tight_layout()
 
273
  imd = pl.read_parquet(Paths.RAW / "imd_camb.parquet")
274
  pops = pl.read_parquet(Paths.RAW / "pops_camb.parquet")
275
  imd = (
276
+ postcodes.join(imd, left_on="lsoa11", right_on="LSOA", how="right")
277
  .join(pops, left_on="LSOA", right_on="LSOA 2021 Code")
278
  .group_by("SOA_decile")
279
  .agg(pl.col("count").sum(), pl.col("LSOA").count(), pl.col("Total").sum())
 
300
  ax1.bar(
301
  x, # Shift to the left
302
  postcodes_pd["perc_diff"],
303
+ edgecolor="none",
304
  color=colors,
305
+ width=0.9,
306
  )
307
 
308
  # Set labels and ticks
309
  ax1.set_xlabel("Deprivation Quintile")
310
+ ax1.set_ylabel("Difference from national average (%)")
311
  ax1.set_xticks(x)
312
+ ax1.axhline(0, color="black", linewidth=1)
313
 
314
  # ax1.legend(loc="upper center", bbox_to_anchor=(0.5, 1.1), ncol=5, frameon=False)
315
  plt.tight_layout()
 
345
  fig_imd(postcodes, rep)
346
 
347
  quarto_doc = (
348
+ f"---\ntitle: '**Summary of Submitted Representations: {rep}**'\n"
349
+ r"""
350
+ mainfont: Liberation Sans
351
+ fontsize: 11pt
352
+ margin: 2cm
353
+ geometry: a4paper
354
+ header-includes: |
355
+ \usepackage{graphicx}
356
+ \usepackage{fancyhdr}
357
+ \usepackage{geometry}
358
+ \usepackage{sectsty}
359
+ \geometry{top=1in, bottom=1in, left=1in, right=1in}
360
+ \pagestyle{fancy}
361
+ \fancyhead[L]{}
362
+ \fancyhead[C]{}
363
+ \fancyhead[R]{\includegraphics[width=3cm]{logo.png}}
364
+ \fancyfoot[L]{}
365
+ \fancyfoot[C]{}
366
+ \fancyfoot[R]{\thepage}
367
+ \renewcommand{\headrulewidth}{0pt}
368
+ \renewcommand{\footrulewidth}{0pt}
369
+ \linespread{1.2}
370
+ \usepackage{titlesec}
371
+ \usepackage{xcolor}
372
+ \definecolor{uolblue}{HTML}{1F2B7D}
373
+ \titleformat{\section}[block]{\normalfont\Large\bfseries\color{uolblue}}{}{0em}{}
374
+ \titleformat{\subsection}[block]{\normalfont\large\bfseries\color{uolblue}}{}{0em}{}
375
+ \fancypagestyle{plain}{\fancyhf{}\fancyfoot[R]{\thepage}\fancyhead[R]{\includegraphics[width=3cm]{logo.png}}}
376
+ \usepackage{titling}
377
+ \pretitle{\begin{flushleft}\Huge\color{uolblue}}
378
+ \posttitle{\end{flushleft}\vspace{-2em}}
379
+ ---
380
+ """
381
+ "\n# Executive Summary\n\n"
382
  f"{final['executive']}\n\n"
383
  f"There were a total of {len(responses):,} responses. Of these, submissions indicated "
384
  "the following support and objection of the plan:\n\n"
 
388
  "\n# Profile of Submissions\n\n"
389
  f"{figures_paragraph}\n\n"
390
  f"![Total number of representations submitted by Ward\\label{{fig-wards}}](./data/out/summary/figs/wards-{rep}.pdf)\n\n"
391
+ f"![Proportional frequency of representations submitted by 2021 Output Area\\label{{fig-oas}}](./data/out/summary/figs/oas-{rep}.pdf)\n\n"
392
+ f"![Distribution of representations submitted by quintile of index of multiple deprivation (2019)\\label{{fig-imd}}](./data/out/summary/figs/imd_decile-{rep}.pdf)\n\n"
393
  r"\newpage"
394
  "\n\n# Themes and Policies\n\n"
395
  f"{themes_paragraph}\n\n"
 
416
  "to page 12 of the document attached to representation 175933."
417
  )
418
 
419
+ out_path = Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}.md"
420
+ out_file = Paths.SUMMARY / f"Summary_of_Submitted_Representations-{rep}"
421
  with open(out_path, "w") as f:
422
  f.write(quarto_doc)
 
423
  try:
424
+ command = [
425
+ "pandoc",
426
+ f"{out_path}",
427
+ "-o",
428
+ f"{out_file}.pdf",
429
+ "--pdf-engine=xelatex",
430
+ ]
431
+ subprocess.run(command, check=True, capture_output=True)
432
+ command = ["pandoc", f"{out_path}", "-o", f"{out_file}.docx"]
433
  subprocess.run(command, check=True, capture_output=True)
434
  except subprocess.CalledProcessError as e:
435
+ logging.error(
436
+ f"Error during Summary_of_Submitted_Representations.md render: {e}"
437
+ )
438
 
439
 
440
  def build_summaries_document(out, rep):
 
448
  for document in out["generate_final_report"]["documents"]
449
  )
450
  header = (
451
+ f"---\ntitle: '**Summary Documents: {rep}**'\n"
452
+ r"""
453
+ mainfont: Liberation Sans
454
+ fontsize: 11pt
455
+ margin: 2cm
456
+ geometry: a4paper
457
+ header-includes: |
458
+ \usepackage{graphicx}
459
+ \usepackage{fancyhdr}
460
+ \usepackage{geometry}
461
+ \usepackage{sectsty}
462
+ \geometry{top=1in, bottom=1in, left=1in, right=1in}
463
+ \pagestyle{fancy}
464
+ \fancyhead[L]{}
465
+ \fancyhead[C]{}
466
+ \fancyhead[R]{\includegraphics[width=3cm]{logo.png}}
467
+ \fancyfoot[L]{}
468
+ \fancyfoot[C]{}
469
+ \fancyfoot[R]{\thepage}
470
+ \renewcommand{\headrulewidth}{0pt}
471
+ \renewcommand{\footrulewidth}{0pt}
472
+ \linespread{1.2}
473
+ \usepackage{titlesec}
474
+ \usepackage{xcolor}
475
+ \definecolor{uolblue}{HTML}{1F2B7D}
476
+ \titleformat{\section}[block]{\normalfont\Large\bfseries\color{uolblue}}{}{0em}{}
477
+ \titleformat{\subsection}[block]{\normalfont\large\bfseries\color{uolblue}}{}{0em}{}
478
+ \fancypagestyle{plain}{\fancyhf{}\fancyfoot[R]{\thepage}\fancyhead[R]{\includegraphics[width=3cm]{logo.png}}}
479
+ \usepackage{titling}
480
+ \pretitle{\begin{flushleft}\Huge\color{uolblue}}
481
+ \posttitle{\end{flushleft}\vspace{-2em}}
482
+ ---
483
+ """
484
+ f"\n{summary_intro}\n\n"
485
  )
486
  out_path = Paths.SUMMARY / f"Summary_Documents-{rep}.md"
487
+ out_file = Paths.SUMMARY / f"Summary_Documents-{rep}"
488
  with open(out_path, "w") as f:
489
  f.write(f"{header}{full_text}")
490
 
 
491
  try:
492
+ command = [
493
+ "pandoc",
494
+ f"{out_path}",
495
+ "-o",
496
+ f"{out_file}.pdf",
497
+ "--pdf-engine=xelatex",
498
+ ]
499
+ subprocess.run(command, check=True, capture_output=True)
500
+ command = ["pandoc", f"{out_path}", "-o", f"{out_file}.docx"]
501
  subprocess.run(command, check=True, capture_output=True)
502
  except subprocess.CalledProcessError as e:
503
  logging.error(f"Error during render: {e}")
planning_ai/documents/figures.txt CHANGED
@@ -1,5 +1,5 @@
1
  This section describes the characteristics of where submissions were received from. This can help to identify how representative submissions were and whether there were any communities whose views were not being considered. Figure \ref{fig-wards} shows the number (frequency) of submitted representations by Ward based on the address attached to the submission. To interpret the figure, areas which are coloured white had no submissions from residents, and then areas are coloured in based on the total number of submissions with yellows and greens representing the largest numbers. This figure helps to identify which Wards are more active in terms of participation and representation in this report.
2
 
3
- Figure \ref{fig-oas} displays the percentage of representations submitted by the Output Area Classification (2021). The Output Area Classification is the Office for National Statistics preferred classification of neighbourhoods. This measure groups neighbourhoods (here defined as Output Areas, typically containing 100 people) into categories that capture similar types of people based on population, demographic and socioeconomic characteristics. It therefore provides an insightful view of the types of communities who submitted representations. To interpret the figure, where bars extend higher/upwards, this represents a larger population share within a specific area type. The blue bars represent the characteristics of who submitted representations, and the orange bars represent the underlying population – allowing one to compare whether the profile of submissions matched the characteristics of the local population. This figure uses OAC 'Supergroups', which are the highest level of the hierarchy, and provide information relative to the average values for the UK population at large.
4
 
5
- Figure \ref{fig-imd} shows the percentage of responses by level of neighbourhood socioeconomic deprivation. The information is presented using the 2019 Index of Multiple Deprivation, divided into quintiles (i.e., dividing the English population into equal fifths). This measure is the UK Government’s preferred measure of socioeconomic deprivation and is based on information about income, employment, education, health, crime, housing and the local environment for small areas (Lower Super Output Areas, typically containing 1600 people). To interpret the graph, bars represent the share of population from each quintile. Quintile 1 represents the most deprived 20% of areas, and quintile 5 the least deprived 20% of areas. The orange bars represent the distribution of people who submitted representations (i.e., larger bars mean that more people from these areas submitted representations). The blue bars show the distribution of the local population, allowing one to evaluate whether the evidence submitted was from the same communities in the area.
 
1
  This section describes the characteristics of where submissions were received from. This can help to identify how representative submissions were and whether there were any communities whose views were not being considered. Figure \ref{fig-wards} shows the number (frequency) of submitted representations by Ward based on the address attached to the submission. To interpret the figure, areas which are coloured white had no submissions from residents, and then areas are coloured in based on the total number of submissions with yellows and greens representing the largest numbers. This figure helps to identify which Wards are more active in terms of participation and representation in this report.
2
 
3
+ Figure \ref{fig-oas} displays the percentage of representations submitted by the Output Area Classification (2021). The Output Area Classification is the Office for National Statistics preferred classification of neighbourhoods. This measure groups neighbourhoods (here defined as Output Areas, typically containing 100 people) into categories that capture similar types of people based on population, demographic and socio-economic characteristics. It therefore provides an insightful view of the types of communities who submitted representations. To interpret the figure, where bars extend higher/upwards, this represents a larger population share within a specific area type. The blue bars represent the characteristics of who submitted representations, and the orange bars represent the underlying population – allowing one to compare whether the profile of submissions matched the characteristics of the local population. This figure uses OAC 'Supergroups', which are the highest level of the hierarchy, and provide information relative to the average values for the UK population at large.
4
 
5
+ Figure \ref{fig-imd} shows the percentage of responses by level of neighbourhood socio-economic deprivation. The information is presented using the 2019 Index of Multiple Deprivation, divided into quintiles (i.e., dividing the English population into equal fifths). This measure is the UK Government’s preferred measure of socioeconomic deprivation and is based on information about income, employment, education, health, crime, housing and the local environment for small areas (Lower Super Output Areas, typically containing 1600 people). To interpret the graph, bars represent the share of population from each quintile. Quintile 1 represents the most deprived 20% of areas, and quintile 5 the least deprived 20% of areas. The orange bars represent the distribution of people who submitted representations (i.e., larger bars mean that more people from these areas submitted representations). The blue bars show the distribution of the local population, allowing one to evaluate whether the evidence submitted was from the same communities in the area.
planning_ai/documents/introduction.txt CHANGED
@@ -1 +1 @@
1
- This report was produced using a generative pre-trained transformer (GPT) large-language model (LLM) to produce an abstractive summary of all responses to the related planning application. This model automatically reviews every response in detail, and extracts key information to inform decision making. This document first consolidates this information into a single-page executive summary, highlighting areas of particular interest to consider, and the broad consensus of responses. Figures generated from responses then give both a geographic and statistical overview, highlighting any demographic imbalances in responses. The document then extracts detailed information from responses, grouped by theme and policy. In this section we incorporate citations which relate with the 'Summary Responses' document, to increase transparency.
 
1
+ This report was produced using a generative pre-trained transformer (GPT) large-language model (LLM) to produce a summary of all responses to the related planning application. This model automatically reviews every response in detail, and extracts key information to inform decision making. This document first consolidates this information into a single-page executive summary, highlighting areas of particular interest to consider, and the broad consensus of responses. Figures generated from responses then give both a geographic and statistical overview, highlighting any demographic imbalances in responses. The document then extracts detailed information from responses, grouped by theme and policy. In this section we incorporate citations which relate with the 'Summary Responses' document, to increase transparency.
planning_ai/documents/summary_intro.txt CHANGED
@@ -1,3 +1,7 @@
1
- This document provides a summary of each representation, along with the **Document ID** which corresponds with the citations in the coresponding **Summary of Submitted Representations** document. Each summary also provides the **Representations ID** which correponds with the `id` column in the `.json` JDL files. This allows the user to link the produced summaries back to the original documents.
 
 
 
 
 
2
 
3
- Note that PDF documents have been split by pages into multiple **Document ID**, but will share the same **Representations ID**+
 
1
+ This document provides a summary of each representation, along with the **Document ID** which corresponds with the citations in the corresponding **Summary of Submitted Representations** document. Each summary also provides the **Representations ID** which corresponds with the `id` column in the `.json` JDI files. This allows the user to link the produced summaries back to the original documents.
2
+
3
+ Note that PDF documents have been split by pages into multiple **Document ID**, but will share the same **Representations ID**.
4
+
5
+ ---
6
+
7
 
 
planning_ai/documents/themes.txt CHANGED
@@ -1 +1 @@
1
- The following section provides a detailed breakdown of notable details from responses, grouped by themes and policies. Both themes and associated policies are automatically determined through an analysis of the summary content by an LLM agent. Each theme is grouped by whether a responses is supporting, opposed, or a general comment. This section aims to give a comprehensive view of the key issues raised by the respondents with respect to the themes and policies outlined. We have incorporated citations into eac hpoint (see numbers in square brackets) which relate to the specific document they were made in, to promote the transparency of where information was sourced from. @tbl-themes gives a breakdown of the number of submissions that relate with each theme, submissions may relate to more than one theme.
 
1
+ The following section provides a detailed breakdown of notable details from responses, grouped by themes and policies. Both themes and associated policies are automatically determined through an analysis of the summary content by an LLM agent. Each theme is grouped by whether responses are supporting, opposed, or a general comment. This section aims to give a comprehensive view of the key issues raised by the respondents with respect to the themes and policies outlined. We have incorporated citations into each point (see numbers in square brackets) which relate to the specific document they were made in, to promote the transparency of where information was sourced from. @tbl-themes gives a breakdown of the number of submissions that relate with each theme, submissions may relate to more than one theme.