Spaces:

cjber
/

planning-ai

Build error

App Files Files Community

cjber commited on Sep 18, 2024

Commit

0ed214c

1 Parent(s): 563c398

add paths utility class

Browse files

Files changed (11) hide show

planning_ai/chains/fix_chain.py +2 -1
planning_ai/chains/hallucination_chain.py +2 -1
planning_ai/chains/map_chain.py +3 -2
planning_ai/chains/prompts/extract.txt +1 -0
planning_ai/chains/reduce_chain.py +14 -5
planning_ai/common/utils.py +13 -0
planning_ai/graph.py +1 -0
planning_ai/llms/llm.py +0 -5
planning_ai/main.py +7 -5
planning_ai/preprocessing/gclp.py +16 -10
planning_ai/preprocessing/web_comments.py +16 -10

planning_ai/chains/fix_chain.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from langchain_core.prompts import ChatPromptTemplate
 from planning_ai.chains.map_chain import SLLM
-with open("./planning_ai/chains/prompts/fix_hallucination.txt", "r") as f:
     map_template = f.read()
 map_prompt = ChatPromptTemplate.from_messages([("system", map_template)])

 from langchain_core.prompts import ChatPromptTemplate
 from planning_ai.chains.map_chain import SLLM
+from planning_ai.common.utils import Paths
+with open(Paths.PROMPTS / "fix_hallucination.txt", "r") as f:
     map_template = f.read()
 map_prompt = ChatPromptTemplate.from_messages([("system", map_template)])

planning_ai/chains/hallucination_chain.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.pydantic_v1 import BaseModel, Field
 from planning_ai.llms.llm import LLM
-with open("./planning_ai/chains/prompts/hallucination.txt", "r") as f:
     reduce_template = f.read()

 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.pydantic_v1 import BaseModel, Field
+from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import LLM
+with open(Paths.PROMPTS / "hallucination.txt", "r") as f:
     reduce_template = f.read()

planning_ai/chains/map_chain.py CHANGED Viewed

@@ -2,11 +2,12 @@ from enum import Enum
 from typing import Literal, Optional
 from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.pydantic_v1 import BaseModel, Field, validator
 from planning_ai.llms.llm import LLM
-with open("./planning_ai/chains/prompts/map.txt", "r") as f:
     map_template = f.read()

 from typing import Literal, Optional
 from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.pydantic_v1 import BaseModel, Field
+from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import LLM
+with open(Paths.PROMPTS / "map.txt", "r") as f:
     map_template = f.read()

planning_ai/chains/prompts/extract.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ Extract the relevant text verbatime relating to the following aims:

planning_ai/chains/reduce_chain.py CHANGED Viewed

@@ -1,20 +1,29 @@
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from planning_ai.llms.llm import LLM
-with open("./planning_ai/chains/prompts/reduce.txt", "r") as f:
     reduce_template = f.read()
-reduce_prompt = ChatPromptTemplate([("human", reduce_template)])
 reduce_chain = reduce_prompt | LLM | StrOutputParser()
 if __name__ == "__main__":
     test_summary = """
         The author expresses concern over the proposed mass development north-west of Cambridge,
-        highlighting significant growth in the area over the past twenty years, particularly with
-        the creation of Cambourne and the expansion of Papworth Everard.
-        Related Aims: [Homes, Infrastructure]
         """
     result = reduce_chain.invoke({"context": test_summary})

 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
+from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import LLM
+with open(Paths.PROMPTS / "reduce.txt", "r") as f:
     reduce_template = f.read()
+reduce_prompt = ChatPromptTemplate([("system", reduce_template)])
 reduce_chain = reduce_prompt | LLM | StrOutputParser()
 if __name__ == "__main__":
     test_summary = """
+        Summary:
         The author expresses concern over the proposed mass development north-west of Cambridge,
+        highlighting the significant growth in the area over the past twenty years,
+        particularly with the establishment of Cambourne and the expansion of Papworth Everard.
+        Related Aims:
+        1: Homes
+        2: Infrastructure
         """
     result = reduce_chain.invoke({"context": test_summary})

planning_ai/common/utils.py CHANGED Viewed

@@ -13,6 +13,19 @@ pl.Config(
 class Paths:
     DATA = Path("data")
     RAW = DATA / "raw"
     STAGING = DATA / "staging"
     OUT = DATA / "out"

 class Paths:
     DATA = Path("data")
     RAW = DATA / "raw"
     STAGING = DATA / "staging"
     OUT = DATA / "out"
+    SUMMARY = OUT / "summary"
+    PROMPTS = Path("planning_ai/chains/prompts")
+    @classmethod
+    def ensure_directories_exist(cls):
+        for path in [cls.DATA, cls.RAW, cls.STAGING, cls.OUT, cls.SUMMARY]:
+            path.mkdir(parents=True, exist_ok=True)
+Paths.ensure_directories_exist()

planning_ai/graph.py CHANGED Viewed

@@ -39,6 +39,7 @@ def create_graph():
         map_hallucinations,
         ["check_hallucination"],
     )
     graph.add_edge("check_hallucination", "generate_final_summary")
     graph.add_edge("generate_final_summary", END)

         map_hallucinations,
         ["check_hallucination"],
     )
     graph.add_edge("check_hallucination", "generate_final_summary")
     graph.add_edge("generate_final_summary", END)

planning_ai/llms/llm.py CHANGED Viewed

@@ -1,11 +1,6 @@
 from dotenv import load_dotenv
-from langchain_core.rate_limiters import InMemoryRateLimiter
 from langchain_openai import ChatOpenAI
 load_dotenv()
-# rate_limiter = InMemoryRateLimiter(
-#     requests_per_second=50,
-#     check_every_n_seconds=0.1,
-# )
 LLM = ChatOpenAI(temperature=0, model="gpt-4o-mini")

 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
 load_dotenv()
 LLM = ChatOpenAI(temperature=0, model="gpt-4o-mini")

planning_ai/main.py CHANGED Viewed

@@ -18,7 +18,7 @@ load_dotenv()
 def map_locations(places_df: pl.DataFrame):
-    lad = gpd.read_file("./data/raw/LAD_BUC_2022.gpkg").to_crs("epsg:4326")
     lad_camb = lad[lad["LAD22NM"].str.contains("Cambridge")]
     api_key = os.getenv("OPENCAGE_API_KEY")
     geocoder = OpenCageGeocode(key=api_key)
@@ -45,12 +45,14 @@ def map_locations(places_df: pl.DataFrame):
     lad.plot(ax=ax, color="white", edgecolor="gray")
     lad_camb.plot(ax=ax, color="white", edgecolor="black")
     places_gdf.plot(ax=ax, column="Mean Sentiment", markersize=5, legend=True)
     bounds = lad_camb.total_bounds
     buffer = 0.1
     ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
     ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
     plt.axis("off")
-    plt.savefig("./reports/figs/places.png")
 def build_quarto_doc(doc_title, out):
@@ -164,7 +166,7 @@ def build_quarto_doc(doc_title, out):
         f"{short_summaries}"
     )
-    with open(f"./reports/{doc_title.replace(' ', '_')}.qmd", "w") as f:
         f.write(quarto_doc)
@@ -176,7 +178,7 @@ def main():
         loader_cls=TextLoader,
         recursive=True,
     )
-    docs = [doc for doc in loader.load() if doc.page_content]
     text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
         chunk_size=1000, chunk_overlap=0
     )
@@ -201,10 +203,10 @@ def main():
 if __name__ == "__main__":
     doc_title = "Cambridge Response Summary"
     tic = time.time()
     out = main()
     build_quarto_doc(doc_title, out)
-    print(out["generate_final_summary"]["final_summary"])
     toc = time.time()
     print(f"Time taken: {(toc - tic) / 60:.2f} minutes.")

 def map_locations(places_df: pl.DataFrame):
+    lad = gpd.read_file(Paths.RAW / "LAD_BUC_2022.gpkg").to_crs("epsg:4326")
     lad_camb = lad[lad["LAD22NM"].str.contains("Cambridge")]
     api_key = os.getenv("OPENCAGE_API_KEY")
     geocoder = OpenCageGeocode(key=api_key)
     lad.plot(ax=ax, color="white", edgecolor="gray")
     lad_camb.plot(ax=ax, color="white", edgecolor="black")
     places_gdf.plot(ax=ax, column="Mean Sentiment", markersize=5, legend=True)
+    ax = geoplot.kdeplot(places_gdf, projection=gcrs.AlbersEqualArea())
     bounds = lad_camb.total_bounds
     buffer = 0.1
     ax.set_xlim([bounds[0] - buffer, bounds[2] + buffer])
     ax.set_ylim([bounds[1] - buffer, bounds[3] + buffer])
     plt.axis("off")
+    plt.savefig(Paths.SUMMARY / "figs" / "places.png")
 def build_quarto_doc(doc_title, out):
         f"{short_summaries}"
     )
+    with open(Paths.SUMMARY / f"{doc_title.replace(' ', '_')}.qmd", "w") as f:
         f.write(quarto_doc)
         loader_cls=TextLoader,
         recursive=True,
     )
+    docs = [doc for doc in loader.load()[:10] if doc.page_content]
     text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
         chunk_size=1000, chunk_overlap=0
     )
 if __name__ == "__main__":
     doc_title = "Cambridge Response Summary"
     tic = time.time()
     out = main()
     build_quarto_doc(doc_title, out)
     toc = time.time()
     print(f"Time taken: {(toc - tic) / 60:.2f} minutes.")

planning_ai/preprocessing/gclp.py CHANGED Viewed

@@ -2,15 +2,21 @@ import polars as pl
 from planning_ai.common.utils import Paths
-df = pl.read_excel(
-    Paths.RAW / "gclp-first-proposals-questionnaire-responses-redacted.xlsx"
-)
-free_cols = [df.columns[0]] + df.columns[6:13] + [df.columns[33]]
-df = df[free_cols]
-for row in df.rows(named=True):
-    user = row.pop("UserNo")
-    content = "\n\n".join([f"**{k}**\n\n{v}" for k, v in row.items() if v != "-"])
-    with open(Paths.STAGING / "gclp" / f"{user}.txt", "w") as f:
-        f.write(content)

 from planning_ai.common.utils import Paths
+def main():
+    df = pl.read_excel(
+        Paths.RAW / "gclp-first-proposals-questionnaire-responses-redacted.xlsx"
+    )
+    free_cols = [df.columns[0]] + df.columns[6:13] + [df.columns[33]]
+    df = df[free_cols]
+    for row in df.rows(named=True):
+        user = row.pop("UserNo")
+        content = "\n\n".join([f"**{k}**\n\n{v}" for k, v in row.items() if v != "-"])
+        with open(Paths.STAGING / "gclp" / f"{user}.txt", "w") as f:
+            f.write(content)
+if __name__ == "__main__":
+    main()

planning_ai/preprocessing/web_comments.py CHANGED Viewed

@@ -2,13 +2,19 @@ import polars as pl
 from planning_ai.common.utils import Paths
-dfs = pl.read_excel(Paths.RAW / "web comments.xlsx", sheet_id=0)
-for sheet_name, df in dfs.items():
-    string_df = df.select(pl.col(pl.String)).drop_nulls()
-    for col in string_df.columns:
-        series = string_df[col]
-        name = series.name
-        content = f"**{name}**" + "\n\n* ".join(["\n"] + series.to_list())
-        with open(Paths.STAGING / "web" / f"{sheet_name}.txt", "w") as f:
-            f.write(content)

 from planning_ai.common.utils import Paths
+def main():
+    dfs = pl.read_excel(Paths.RAW / "web comments.xlsx", sheet_id=0)
+    for sheet_name, df in dfs.items():
+        string_df = df.select(pl.col(pl.String)).drop_nulls()
+        for col in string_df.columns:
+            series = string_df[col]
+            name = series.name
+            content = f"**{name}**" + "\n\n* ".join(["\n"] + series.to_list())
+            with open(Paths.STAGING / "web" / f"{sheet_name}.txt", "w") as f:
+                f.write(content)
+if __name__ == "__main__":
+    main()