Spaces:
Build error
Build error
add paths utility class
Browse files- planning_ai/chains/fix_chain.py +2 -1
- planning_ai/chains/hallucination_chain.py +2 -1
- planning_ai/chains/map_chain.py +3 -2
- planning_ai/chains/prompts/extract.txt +1 -0
- planning_ai/chains/reduce_chain.py +14 -5
- planning_ai/common/utils.py +13 -0
- planning_ai/graph.py +1 -0
- planning_ai/llms/llm.py +0 -5
- planning_ai/main.py +7 -5
- planning_ai/preprocessing/gclp.py +16 -10
- planning_ai/preprocessing/web_comments.py +16 -10
planning_ai/chains/fix_chain.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
from langchain_core.prompts import ChatPromptTemplate
|
2 |
|
3 |
from planning_ai.chains.map_chain import SLLM
|
|
|
4 |
|
5 |
-
with open("
|
6 |
map_template = f.read()
|
7 |
|
8 |
map_prompt = ChatPromptTemplate.from_messages([("system", map_template)])
|
|
|
1 |
from langchain_core.prompts import ChatPromptTemplate
|
2 |
|
3 |
from planning_ai.chains.map_chain import SLLM
|
4 |
+
from planning_ai.common.utils import Paths
|
5 |
|
6 |
+
with open(Paths.PROMPTS / "fix_hallucination.txt", "r") as f:
|
7 |
map_template = f.read()
|
8 |
|
9 |
map_prompt = ChatPromptTemplate.from_messages([("system", map_template)])
|
planning_ai/chains/hallucination_chain.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
from langchain_core.prompts import ChatPromptTemplate
|
2 |
from langchain_core.pydantic_v1 import BaseModel, Field
|
3 |
|
|
|
4 |
from planning_ai.llms.llm import LLM
|
5 |
|
6 |
-
with open("
|
7 |
reduce_template = f.read()
|
8 |
|
9 |
|
|
|
1 |
from langchain_core.prompts import ChatPromptTemplate
|
2 |
from langchain_core.pydantic_v1 import BaseModel, Field
|
3 |
|
4 |
+
from planning_ai.common.utils import Paths
|
5 |
from planning_ai.llms.llm import LLM
|
6 |
|
7 |
+
with open(Paths.PROMPTS / "hallucination.txt", "r") as f:
|
8 |
reduce_template = f.read()
|
9 |
|
10 |
|
planning_ai/chains/map_chain.py
CHANGED
@@ -2,11 +2,12 @@ from enum import Enum
|
|
2 |
from typing import Literal, Optional
|
3 |
|
4 |
from langchain_core.prompts import ChatPromptTemplate
|
5 |
-
from langchain_core.pydantic_v1 import BaseModel, Field
|
6 |
|
|
|
7 |
from planning_ai.llms.llm import LLM
|
8 |
|
9 |
-
with open("
|
10 |
map_template = f.read()
|
11 |
|
12 |
|
|
|
2 |
from typing import Literal, Optional
|
3 |
|
4 |
from langchain_core.prompts import ChatPromptTemplate
|
5 |
+
from langchain_core.pydantic_v1 import BaseModel, Field
|
6 |
|
7 |
+
from planning_ai.common.utils import Paths
|
8 |
from planning_ai.llms.llm import LLM
|
9 |
|
10 |
+
with open(Paths.PROMPTS / "map.txt", "r") as f:
|
11 |
map_template = f.read()
|
12 |
|
13 |
|
planning_ai/chains/prompts/extract.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Extract the relevant text **verbatime** relating to the following aims:
|
planning_ai/chains/reduce_chain.py
CHANGED
@@ -1,20 +1,29 @@
|
|
1 |
from langchain_core.output_parsers import StrOutputParser
|
2 |
from langchain_core.prompts import ChatPromptTemplate
|
3 |
|
|
|
4 |
from planning_ai.llms.llm import LLM
|
5 |
|
6 |
-
with open("
|
7 |
reduce_template = f.read()
|
8 |
|
9 |
-
|
|
|
10 |
reduce_chain = reduce_prompt | LLM | StrOutputParser()
|
11 |
|
|
|
12 |
if __name__ == "__main__":
|
13 |
test_summary = """
|
|
|
|
|
14 |
The author expresses concern over the proposed mass development north-west of Cambridge,
|
15 |
-
highlighting significant growth in the area over the past twenty years,
|
16 |
-
the
|
17 |
-
|
|
|
|
|
|
|
|
|
18 |
"""
|
19 |
|
20 |
result = reduce_chain.invoke({"context": test_summary})
|
|
|
1 |
from langchain_core.output_parsers import StrOutputParser
|
2 |
from langchain_core.prompts import ChatPromptTemplate
|
3 |
|
4 |
+
from planning_ai.common.utils import Paths
|
5 |
from planning_ai.llms.llm import LLM
|
6 |
|
7 |
+
with open(Paths.PROMPTS / "reduce.txt", "r") as f:
|
8 |
reduce_template = f.read()
|
9 |
|
10 |
+
|
11 |
+
reduce_prompt = ChatPromptTemplate([("system", reduce_template)])
|
12 |
reduce_chain = reduce_prompt | LLM | StrOutputParser()
|
13 |
|
14 |
+
|
15 |
if __name__ == "__main__":
|
16 |
test_summary = """
|
17 |
+
Summary:
|
18 |
+
|
19 |
The author expresses concern over the proposed mass development north-west of Cambridge,
|
20 |
+
highlighting the significant growth in the area over the past twenty years,
|
21 |
+
particularly with the establishment of Cambourne and the expansion of Papworth Everard.
|
22 |
+
|
23 |
+
Related Aims:
|
24 |
+
|
25 |
+
1: Homes
|
26 |
+
2: Infrastructure
|
27 |
"""
|
28 |
|
29 |
result = reduce_chain.invoke({"context": test_summary})
|
planning_ai/common/utils.py
CHANGED
@@ -13,6 +13,19 @@ pl.Config(
|
|
13 |
|
14 |
class Paths:
|
15 |
DATA = Path("data")
|
|
|
16 |
RAW = DATA / "raw"
|
17 |
STAGING = DATA / "staging"
|
18 |
OUT = DATA / "out"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
class Paths:
|
15 |
DATA = Path("data")
|
16 |
+
|
17 |
RAW = DATA / "raw"
|
18 |
STAGING = DATA / "staging"
|
19 |
OUT = DATA / "out"
|
20 |
+
|
21 |
+
SUMMARY = OUT / "summary"
|
22 |
+
|
23 |
+
PROMPTS = Path("planning_ai/chains/prompts")
|
24 |
+
|
25 |
+
@classmethod
|
26 |
+
def ensure_directories_exist(cls):
|
27 |
+
for path in [cls.DATA, cls.RAW, cls.STAGING, cls.OUT, cls.SUMMARY]:
|
28 |
+
path.mkdir(parents=True, exist_ok=True)
|
29 |
+
|
30 |
+
|
31 |
+
Paths.ensure_directories_exist()
|
planning_ai/graph.py
CHANGED
@@ -39,6 +39,7 @@ def create_graph():
|
|
39 |
map_hallucinations,
|
40 |
["check_hallucination"],
|
41 |
)
|
|
|
42 |
graph.add_edge("check_hallucination", "generate_final_summary")
|
43 |
graph.add_edge("generate_final_summary", END)
|
44 |
|
|
|
39 |
map_hallucinations,
|
40 |
["check_hallucination"],
|
41 |
)
|
42 |
+
|
43 |
graph.add_edge("check_hallucination", "generate_final_summary")
|
44 |
graph.add_edge("generate_final_summary", END)
|
45 |
|
planning_ai/llms/llm.py
CHANGED
@@ -1,11 +1,6 @@
|
|
1 |
from dotenv import load_dotenv
|
2 |
-
from langchain_core.rate_limiters import InMemoryRateLimiter
|
3 |
from langchain_openai import ChatOpenAI
|
4 |
|
5 |
load_dotenv()
|
6 |
|
7 |
-
# rate_limiter = InMemoryRateLimiter(
|
8 |
-
# requests_per_second=50,
|
9 |
-
# check_every_n_seconds=0.1,
|
10 |
-
# )
|
11 |
LLM = ChatOpenAI(temperature=0, model="gpt-4o-mini")
|
|
|
1 |
from dotenv import load_dotenv
|
|
|
2 |
from langchain_openai import ChatOpenAI
|
3 |
|
4 |
load_dotenv()
|
5 |
|
|
|
|
|
|
|
|
|
6 |
LLM = ChatOpenAI(temperature=0, model="gpt-4o-mini")
|
planning_ai/main.py
CHANGED
@@ -18,7 +18,7 @@ load_dotenv()
|
|
18 |
|
19 |
|
20 |
def map_locations(places_df: pl.DataFrame):
|
21 |
-
lad = gpd.read_file("
|
22 |
lad_camb = lad[lad["LAD22NM"].str.contains("Cambridge")]
|
23 |
api_key = os.getenv("OPENCAGE_API_KEY")
|
24 |
geocoder = OpenCageGeocode(key=api_key)
|
@@ -45,12 +45,14 @@ def map_locations(places_df: pl.DataFrame):
|
|
45 |
lad.plot(ax=ax, color="white", edgecolor="gray")
|
46 |
lad_camb.plot(ax=ax, color="white", edgecolor="black")
|
47 |
places_gdf.plot(ax=ax, column="Mean Sentiment", markersize=5, legend=True)
|
|
|
|
|
48 |
bounds = lad_camb.total_bounds
|
49 |
buffer = 0.1
|
50 |
ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
|
51 |
ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
|
52 |
plt.axis("off")
|
53 |
-
plt.savefig("
|
54 |
|
55 |
|
56 |
def build_quarto_doc(doc_title, out):
|
@@ -164,7 +166,7 @@ def build_quarto_doc(doc_title, out):
|
|
164 |
f"{short_summaries}"
|
165 |
)
|
166 |
|
167 |
-
with open(f"
|
168 |
f.write(quarto_doc)
|
169 |
|
170 |
|
@@ -176,7 +178,7 @@ def main():
|
|
176 |
loader_cls=TextLoader,
|
177 |
recursive=True,
|
178 |
)
|
179 |
-
docs = [doc for doc in loader.load() if doc.page_content]
|
180 |
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
|
181 |
chunk_size=1000, chunk_overlap=0
|
182 |
)
|
@@ -201,10 +203,10 @@ def main():
|
|
201 |
|
202 |
if __name__ == "__main__":
|
203 |
doc_title = "Cambridge Response Summary"
|
|
|
204 |
tic = time.time()
|
205 |
out = main()
|
206 |
build_quarto_doc(doc_title, out)
|
207 |
-
print(out["generate_final_summary"]["final_summary"])
|
208 |
toc = time.time()
|
209 |
|
210 |
print(f"Time taken: {(toc - tic) / 60:.2f} minutes.")
|
|
|
18 |
|
19 |
|
20 |
def map_locations(places_df: pl.DataFrame):
|
21 |
+
lad = gpd.read_file(Paths.RAW / "LAD_BUC_2022.gpkg").to_crs("epsg:4326")
|
22 |
lad_camb = lad[lad["LAD22NM"].str.contains("Cambridge")]
|
23 |
api_key = os.getenv("OPENCAGE_API_KEY")
|
24 |
geocoder = OpenCageGeocode(key=api_key)
|
|
|
45 |
lad.plot(ax=ax, color="white", edgecolor="gray")
|
46 |
lad_camb.plot(ax=ax, color="white", edgecolor="black")
|
47 |
places_gdf.plot(ax=ax, column="Mean Sentiment", markersize=5, legend=True)
|
48 |
+
|
49 |
+
ax = geoplot.kdeplot(places_gdf, projection=gcrs.AlbersEqualArea())
|
50 |
bounds = lad_camb.total_bounds
|
51 |
buffer = 0.1
|
52 |
ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
|
53 |
ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
|
54 |
plt.axis("off")
|
55 |
+
plt.savefig(Paths.SUMMARY / "figs" / "places.png")
|
56 |
|
57 |
|
58 |
def build_quarto_doc(doc_title, out):
|
|
|
166 |
f"{short_summaries}"
|
167 |
)
|
168 |
|
169 |
+
with open(Paths.SUMMARY / f"{doc_title.replace(' ', '_')}.qmd", "w") as f:
|
170 |
f.write(quarto_doc)
|
171 |
|
172 |
|
|
|
178 |
loader_cls=TextLoader,
|
179 |
recursive=True,
|
180 |
)
|
181 |
+
docs = [doc for doc in loader.load()[:10] if doc.page_content]
|
182 |
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
|
183 |
chunk_size=1000, chunk_overlap=0
|
184 |
)
|
|
|
203 |
|
204 |
if __name__ == "__main__":
|
205 |
doc_title = "Cambridge Response Summary"
|
206 |
+
|
207 |
tic = time.time()
|
208 |
out = main()
|
209 |
build_quarto_doc(doc_title, out)
|
|
|
210 |
toc = time.time()
|
211 |
|
212 |
print(f"Time taken: {(toc - tic) / 60:.2f} minutes.")
|
planning_ai/preprocessing/gclp.py
CHANGED
@@ -2,15 +2,21 @@ import polars as pl
|
|
2 |
|
3 |
from planning_ai.common.utils import Paths
|
4 |
|
5 |
-
df = pl.read_excel(
|
6 |
-
Paths.RAW / "gclp-first-proposals-questionnaire-responses-redacted.xlsx"
|
7 |
-
)
|
8 |
|
9 |
-
|
10 |
-
df =
|
|
|
|
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
from planning_ai.common.utils import Paths
|
4 |
|
|
|
|
|
|
|
5 |
|
6 |
+
def main():
|
7 |
+
df = pl.read_excel(
|
8 |
+
Paths.RAW / "gclp-first-proposals-questionnaire-responses-redacted.xlsx"
|
9 |
+
)
|
10 |
|
11 |
+
free_cols = [df.columns[0]] + df.columns[6:13] + [df.columns[33]]
|
12 |
+
df = df[free_cols]
|
13 |
+
|
14 |
+
for row in df.rows(named=True):
|
15 |
+
user = row.pop("UserNo")
|
16 |
+
content = "\n\n".join([f"**{k}**\n\n{v}" for k, v in row.items() if v != "-"])
|
17 |
+
with open(Paths.STAGING / "gclp" / f"{user}.txt", "w") as f:
|
18 |
+
f.write(content)
|
19 |
+
|
20 |
+
|
21 |
+
if __name__ == "__main__":
|
22 |
+
main()
|
planning_ai/preprocessing/web_comments.py
CHANGED
@@ -2,13 +2,19 @@ import polars as pl
|
|
2 |
|
3 |
from planning_ai.common.utils import Paths
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
for
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
f.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
from planning_ai.common.utils import Paths
|
4 |
|
5 |
+
|
6 |
+
def main():
|
7 |
+
dfs = pl.read_excel(Paths.RAW / "web comments.xlsx", sheet_id=0)
|
8 |
+
|
9 |
+
for sheet_name, df in dfs.items():
|
10 |
+
string_df = df.select(pl.col(pl.String)).drop_nulls()
|
11 |
+
for col in string_df.columns:
|
12 |
+
series = string_df[col]
|
13 |
+
name = series.name
|
14 |
+
content = f"**{name}**" + "\n\n* ".join(["\n"] + series.to_list())
|
15 |
+
with open(Paths.STAGING / "web" / f"{sheet_name}.txt", "w") as f:
|
16 |
+
f.write(content)
|
17 |
+
|
18 |
+
|
19 |
+
if __name__ == "__main__":
|
20 |
+
main()
|