cjber commited on
Commit
134bc43
·
1 Parent(s): 999a3f7

feat: swap to gpt4o

Browse files
planning_ai/graph.py CHANGED
@@ -10,7 +10,6 @@ from planning_ai.nodes.hallucination_node import (
10
  from planning_ai.nodes.map_node import (
11
  add_entities,
12
  generate_summary,
13
- map_retrieve_themes,
14
  map_summaries,
15
  retrieve_themes,
16
  )
@@ -33,9 +32,6 @@ def create_graph():
33
  map_summaries,
34
  ["generate_summary"],
35
  )
36
- graph.add_conditional_edges(
37
- "retrieve_themes",
38
- )
39
  graph.add_conditional_edges(
40
  "generate_summary",
41
  map_hallucinations,
 
10
  from planning_ai.nodes.map_node import (
11
  add_entities,
12
  generate_summary,
 
13
  map_summaries,
14
  retrieve_themes,
15
  )
 
32
  map_summaries,
33
  ["generate_summary"],
34
  )
 
 
 
35
  graph.add_conditional_edges(
36
  "generate_summary",
37
  map_hallucinations,
planning_ai/llms/llm.py CHANGED
@@ -3,4 +3,4 @@ from langchain_openai import ChatOpenAI
3
 
4
  load_dotenv()
5
 
6
- LLM = ChatOpenAI(temperature=0, model="gpt-4o-mini")
 
3
 
4
  load_dotenv()
5
 
6
+ LLM = ChatOpenAI(temperature=0, model="gpt-4o")
planning_ai/main.py CHANGED
@@ -58,7 +58,7 @@ def read_docs():
58
  int(pdf.stem) if pdf.stem.isdigit() else 0
59
  for pdf in (Paths.STAGING / "pdfs_azure").glob("*.pdf")
60
  ]
61
- pdf_loader = PyPDFDirectoryLoader(Paths.STAGING / "pdfs_azure")
62
  out = pdf_loader.load()
63
 
64
  pdfs_combined = {}
@@ -129,12 +129,30 @@ def wards_pop(postcodes):
129
  ward_boundaries = gpd.read_file(
130
  Paths.RAW / "Wards_December_2021_GB_BFE_2022_7523259277605796091.zip"
131
  )
132
- ward_boundaries = ward_boundaries.merge(
 
 
 
 
 
 
 
 
 
 
133
  postcodes.to_pandas(), left_on="WD21CD", right_on="OSWARD"
134
  )
135
 
136
  _, ax = plt.subplots()
137
- ward_boundaries.plot(ax=ax, column="prop", legend=True)
 
 
 
 
 
 
 
 
138
 
139
  plt.axis("off")
140
  plt.savefig(Paths.SUMMARY / "figs" / "wards.png")
@@ -180,7 +198,7 @@ def imd_bar(postcodes):
180
 
181
 
182
  def main():
183
- docs = read_docs()
184
  n_docs = len(docs)
185
 
186
  logging.warning(f"{n_docs} documents being processed!")
 
58
  int(pdf.stem) if pdf.stem.isdigit() else 0
59
  for pdf in (Paths.STAGING / "pdfs_azure").glob("*.pdf")
60
  ]
61
+ pdf_loader = PyPDFDirectoryLoader(Paths.STAGING / "pdfs_azure", silent_errors=True)
62
  out = pdf_loader.load()
63
 
64
  pdfs_combined = {}
 
129
  ward_boundaries = gpd.read_file(
130
  Paths.RAW / "Wards_December_2021_GB_BFE_2022_7523259277605796091.zip"
131
  )
132
+ camb_ward_codes = (
133
+ wards.filter(pl.col("Electoral wards and divisions").str.contains("Cambridge"))[
134
+ "Electoral wards and divisions Code"
135
+ ]
136
+ .unique()
137
+ .to_list()
138
+ )
139
+ camb_ward_boundaries = ward_boundaries[
140
+ ward_boundaries["WD21CD"].isin(camb_ward_codes)
141
+ ]
142
+ ward_boundaries_prop = ward_boundaries.merge(
143
  postcodes.to_pandas(), left_on="WD21CD", right_on="OSWARD"
144
  )
145
 
146
  _, ax = plt.subplots()
147
+ ward_boundaries.plot(ax=ax, color="white", edgecolor="gray")
148
+ camb_ward_boundaries.plot(ax=ax, color="white", edgecolor="black")
149
+ ward_boundaries_prop.plot(ax=ax, column="prop", legend=True)
150
+
151
+ __import__("ipdb").set_trace()
152
+ bounds = camb_ward_boundaries.total_bounds
153
+ buffer = 0.1
154
+ ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
155
+ ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
156
 
157
  plt.axis("off")
158
  plt.savefig(Paths.SUMMARY / "figs" / "wards.png")
 
198
 
199
 
200
  def main():
201
+ docs = read_docs()[:5]
202
  n_docs = len(docs)
203
 
204
  logging.warning(f"{n_docs} documents being processed!")
planning_ai/preprocessing/geo.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import polars as pl
2
+
3
+ df = pl.read_parquet("./data/staging/gcpt3.parquet")