cjber commited on
Commit
0ed214c
·
1 Parent(s): 563c398

add paths utility class

Browse files
planning_ai/chains/fix_chain.py CHANGED
@@ -1,8 +1,9 @@
1
  from langchain_core.prompts import ChatPromptTemplate
2
 
3
  from planning_ai.chains.map_chain import SLLM
 
4
 
5
- with open("./planning_ai/chains/prompts/fix_hallucination.txt", "r") as f:
6
  map_template = f.read()
7
 
8
  map_prompt = ChatPromptTemplate.from_messages([("system", map_template)])
 
1
  from langchain_core.prompts import ChatPromptTemplate
2
 
3
  from planning_ai.chains.map_chain import SLLM
4
+ from planning_ai.common.utils import Paths
5
 
6
+ with open(Paths.PROMPTS / "fix_hallucination.txt", "r") as f:
7
  map_template = f.read()
8
 
9
  map_prompt = ChatPromptTemplate.from_messages([("system", map_template)])
planning_ai/chains/hallucination_chain.py CHANGED
@@ -1,9 +1,10 @@
1
  from langchain_core.prompts import ChatPromptTemplate
2
  from langchain_core.pydantic_v1 import BaseModel, Field
3
 
 
4
  from planning_ai.llms.llm import LLM
5
 
6
- with open("./planning_ai/chains/prompts/hallucination.txt", "r") as f:
7
  reduce_template = f.read()
8
 
9
 
 
1
  from langchain_core.prompts import ChatPromptTemplate
2
  from langchain_core.pydantic_v1 import BaseModel, Field
3
 
4
+ from planning_ai.common.utils import Paths
5
  from planning_ai.llms.llm import LLM
6
 
7
+ with open(Paths.PROMPTS / "hallucination.txt", "r") as f:
8
  reduce_template = f.read()
9
 
10
 
planning_ai/chains/map_chain.py CHANGED
@@ -2,11 +2,12 @@ from enum import Enum
2
  from typing import Literal, Optional
3
 
4
  from langchain_core.prompts import ChatPromptTemplate
5
- from langchain_core.pydantic_v1 import BaseModel, Field, validator
6
 
 
7
  from planning_ai.llms.llm import LLM
8
 
9
- with open("./planning_ai/chains/prompts/map.txt", "r") as f:
10
  map_template = f.read()
11
 
12
 
 
2
  from typing import Literal, Optional
3
 
4
  from langchain_core.prompts import ChatPromptTemplate
5
+ from langchain_core.pydantic_v1 import BaseModel, Field
6
 
7
+ from planning_ai.common.utils import Paths
8
  from planning_ai.llms.llm import LLM
9
 
10
+ with open(Paths.PROMPTS / "map.txt", "r") as f:
11
  map_template = f.read()
12
 
13
 
planning_ai/chains/prompts/extract.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Extract the relevant text **verbatime** relating to the following aims:
planning_ai/chains/reduce_chain.py CHANGED
@@ -1,20 +1,29 @@
1
  from langchain_core.output_parsers import StrOutputParser
2
  from langchain_core.prompts import ChatPromptTemplate
3
 
 
4
  from planning_ai.llms.llm import LLM
5
 
6
- with open("./planning_ai/chains/prompts/reduce.txt", "r") as f:
7
  reduce_template = f.read()
8
 
9
- reduce_prompt = ChatPromptTemplate([("human", reduce_template)])
 
10
  reduce_chain = reduce_prompt | LLM | StrOutputParser()
11
 
 
12
  if __name__ == "__main__":
13
  test_summary = """
 
 
14
  The author expresses concern over the proposed mass development north-west of Cambridge,
15
- highlighting significant growth in the area over the past twenty years, particularly with
16
- the creation of Cambourne and the expansion of Papworth Everard.
17
- Related Aims: [Homes, Infrastructure]
 
 
 
 
18
  """
19
 
20
  result = reduce_chain.invoke({"context": test_summary})
 
1
  from langchain_core.output_parsers import StrOutputParser
2
  from langchain_core.prompts import ChatPromptTemplate
3
 
4
+ from planning_ai.common.utils import Paths
5
  from planning_ai.llms.llm import LLM
6
 
7
+ with open(Paths.PROMPTS / "reduce.txt", "r") as f:
8
  reduce_template = f.read()
9
 
10
+
11
+ reduce_prompt = ChatPromptTemplate([("system", reduce_template)])
12
  reduce_chain = reduce_prompt | LLM | StrOutputParser()
13
 
14
+
15
  if __name__ == "__main__":
16
  test_summary = """
17
+ Summary:
18
+
19
  The author expresses concern over the proposed mass development north-west of Cambridge,
20
+ highlighting the significant growth in the area over the past twenty years,
21
+ particularly with the establishment of Cambourne and the expansion of Papworth Everard.
22
+
23
+ Related Aims:
24
+
25
+ 1: Homes
26
+ 2: Infrastructure
27
  """
28
 
29
  result = reduce_chain.invoke({"context": test_summary})
planning_ai/common/utils.py CHANGED
@@ -13,6 +13,19 @@ pl.Config(
13
 
14
  class Paths:
15
  DATA = Path("data")
 
16
  RAW = DATA / "raw"
17
  STAGING = DATA / "staging"
18
  OUT = DATA / "out"
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  class Paths:
15
  DATA = Path("data")
16
+
17
  RAW = DATA / "raw"
18
  STAGING = DATA / "staging"
19
  OUT = DATA / "out"
20
+
21
+ SUMMARY = OUT / "summary"
22
+
23
+ PROMPTS = Path("planning_ai/chains/prompts")
24
+
25
+ @classmethod
26
+ def ensure_directories_exist(cls):
27
+ for path in [cls.DATA, cls.RAW, cls.STAGING, cls.OUT, cls.SUMMARY]:
28
+ path.mkdir(parents=True, exist_ok=True)
29
+
30
+
31
+ Paths.ensure_directories_exist()
planning_ai/graph.py CHANGED
@@ -39,6 +39,7 @@ def create_graph():
39
  map_hallucinations,
40
  ["check_hallucination"],
41
  )
 
42
  graph.add_edge("check_hallucination", "generate_final_summary")
43
  graph.add_edge("generate_final_summary", END)
44
 
 
39
  map_hallucinations,
40
  ["check_hallucination"],
41
  )
42
+
43
  graph.add_edge("check_hallucination", "generate_final_summary")
44
  graph.add_edge("generate_final_summary", END)
45
 
planning_ai/llms/llm.py CHANGED
@@ -1,11 +1,6 @@
1
  from dotenv import load_dotenv
2
- from langchain_core.rate_limiters import InMemoryRateLimiter
3
  from langchain_openai import ChatOpenAI
4
 
5
  load_dotenv()
6
 
7
- # rate_limiter = InMemoryRateLimiter(
8
- # requests_per_second=50,
9
- # check_every_n_seconds=0.1,
10
- # )
11
  LLM = ChatOpenAI(temperature=0, model="gpt-4o-mini")
 
1
  from dotenv import load_dotenv
 
2
  from langchain_openai import ChatOpenAI
3
 
4
  load_dotenv()
5
 
 
 
 
 
6
  LLM = ChatOpenAI(temperature=0, model="gpt-4o-mini")
planning_ai/main.py CHANGED
@@ -18,7 +18,7 @@ load_dotenv()
18
 
19
 
20
  def map_locations(places_df: pl.DataFrame):
21
- lad = gpd.read_file("./data/raw/LAD_BUC_2022.gpkg").to_crs("epsg:4326")
22
  lad_camb = lad[lad["LAD22NM"].str.contains("Cambridge")]
23
  api_key = os.getenv("OPENCAGE_API_KEY")
24
  geocoder = OpenCageGeocode(key=api_key)
@@ -45,12 +45,14 @@ def map_locations(places_df: pl.DataFrame):
45
  lad.plot(ax=ax, color="white", edgecolor="gray")
46
  lad_camb.plot(ax=ax, color="white", edgecolor="black")
47
  places_gdf.plot(ax=ax, column="Mean Sentiment", markersize=5, legend=True)
 
 
48
  bounds = lad_camb.total_bounds
49
  buffer = 0.1
50
  ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
51
  ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
52
  plt.axis("off")
53
- plt.savefig("./reports/figs/places.png")
54
 
55
 
56
  def build_quarto_doc(doc_title, out):
@@ -164,7 +166,7 @@ def build_quarto_doc(doc_title, out):
164
  f"{short_summaries}"
165
  )
166
 
167
- with open(f"./reports/{doc_title.replace(' ', '_')}.qmd", "w") as f:
168
  f.write(quarto_doc)
169
 
170
 
@@ -176,7 +178,7 @@ def main():
176
  loader_cls=TextLoader,
177
  recursive=True,
178
  )
179
- docs = [doc for doc in loader.load() if doc.page_content]
180
  text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
181
  chunk_size=1000, chunk_overlap=0
182
  )
@@ -201,10 +203,10 @@ def main():
201
 
202
  if __name__ == "__main__":
203
  doc_title = "Cambridge Response Summary"
 
204
  tic = time.time()
205
  out = main()
206
  build_quarto_doc(doc_title, out)
207
- print(out["generate_final_summary"]["final_summary"])
208
  toc = time.time()
209
 
210
  print(f"Time taken: {(toc - tic) / 60:.2f} minutes.")
 
18
 
19
 
20
  def map_locations(places_df: pl.DataFrame):
21
+ lad = gpd.read_file(Paths.RAW / "LAD_BUC_2022.gpkg").to_crs("epsg:4326")
22
  lad_camb = lad[lad["LAD22NM"].str.contains("Cambridge")]
23
  api_key = os.getenv("OPENCAGE_API_KEY")
24
  geocoder = OpenCageGeocode(key=api_key)
 
45
  lad.plot(ax=ax, color="white", edgecolor="gray")
46
  lad_camb.plot(ax=ax, color="white", edgecolor="black")
47
  places_gdf.plot(ax=ax, column="Mean Sentiment", markersize=5, legend=True)
48
+
49
+ ax = geoplot.kdeplot(places_gdf, projection=gcrs.AlbersEqualArea())
50
  bounds = lad_camb.total_bounds
51
  buffer = 0.1
52
  ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
53
  ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
54
  plt.axis("off")
55
+ plt.savefig(Paths.SUMMARY / "figs" / "places.png")
56
 
57
 
58
  def build_quarto_doc(doc_title, out):
 
166
  f"{short_summaries}"
167
  )
168
 
169
+ with open(Paths.SUMMARY / f"{doc_title.replace(' ', '_')}.qmd", "w") as f:
170
  f.write(quarto_doc)
171
 
172
 
 
178
  loader_cls=TextLoader,
179
  recursive=True,
180
  )
181
+ docs = [doc for doc in loader.load()[:10] if doc.page_content]
182
  text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
183
  chunk_size=1000, chunk_overlap=0
184
  )
 
203
 
204
  if __name__ == "__main__":
205
  doc_title = "Cambridge Response Summary"
206
+
207
  tic = time.time()
208
  out = main()
209
  build_quarto_doc(doc_title, out)
 
210
  toc = time.time()
211
 
212
  print(f"Time taken: {(toc - tic) / 60:.2f} minutes.")
planning_ai/preprocessing/gclp.py CHANGED
@@ -2,15 +2,21 @@ import polars as pl
2
 
3
  from planning_ai.common.utils import Paths
4
 
5
- df = pl.read_excel(
6
- Paths.RAW / "gclp-first-proposals-questionnaire-responses-redacted.xlsx"
7
- )
8
 
9
- free_cols = [df.columns[0]] + df.columns[6:13] + [df.columns[33]]
10
- df = df[free_cols]
 
 
11
 
12
- for row in df.rows(named=True):
13
- user = row.pop("UserNo")
14
- content = "\n\n".join([f"**{k}**\n\n{v}" for k, v in row.items() if v != "-"])
15
- with open(Paths.STAGING / "gclp" / f"{user}.txt", "w") as f:
16
- f.write(content)
 
 
 
 
 
 
 
 
2
 
3
  from planning_ai.common.utils import Paths
4
 
 
 
 
5
 
6
+ def main():
7
+ df = pl.read_excel(
8
+ Paths.RAW / "gclp-first-proposals-questionnaire-responses-redacted.xlsx"
9
+ )
10
 
11
+ free_cols = [df.columns[0]] + df.columns[6:13] + [df.columns[33]]
12
+ df = df[free_cols]
13
+
14
+ for row in df.rows(named=True):
15
+ user = row.pop("UserNo")
16
+ content = "\n\n".join([f"**{k}**\n\n{v}" for k, v in row.items() if v != "-"])
17
+ with open(Paths.STAGING / "gclp" / f"{user}.txt", "w") as f:
18
+ f.write(content)
19
+
20
+
21
+ if __name__ == "__main__":
22
+ main()
planning_ai/preprocessing/web_comments.py CHANGED
@@ -2,13 +2,19 @@ import polars as pl
2
 
3
  from planning_ai.common.utils import Paths
4
 
5
- dfs = pl.read_excel(Paths.RAW / "web comments.xlsx", sheet_id=0)
6
-
7
- for sheet_name, df in dfs.items():
8
- string_df = df.select(pl.col(pl.String)).drop_nulls()
9
- for col in string_df.columns:
10
- series = string_df[col]
11
- name = series.name
12
- content = f"**{name}**" + "\n\n* ".join(["\n"] + series.to_list())
13
- with open(Paths.STAGING / "web" / f"{sheet_name}.txt", "w") as f:
14
- f.write(content)
 
 
 
 
 
 
 
2
 
3
  from planning_ai.common.utils import Paths
4
 
5
+
6
+ def main():
7
+ dfs = pl.read_excel(Paths.RAW / "web comments.xlsx", sheet_id=0)
8
+
9
+ for sheet_name, df in dfs.items():
10
+ string_df = df.select(pl.col(pl.String)).drop_nulls()
11
+ for col in string_df.columns:
12
+ series = string_df[col]
13
+ name = series.name
14
+ content = f"**{name}**" + "\n\n* ".join(["\n"] + series.to_list())
15
+ with open(Paths.STAGING / "web" / f"{sheet_name}.txt", "w") as f:
16
+ f.write(content)
17
+
18
+
19
+ if __name__ == "__main__":
20
+ main()