Spaces:

cjber
/

planning-ai

Build error

App Files Files Community

cjber commited on Jan 30

Commit

3a57990

1 Parent(s): bed065c

feat: add scores for theme selection to allow filtering

Browse files

Files changed (5) hide show

planning_ai/chains/map_chain.py +9 -14
planning_ai/chains/themes_chain.py +8 -3
planning_ai/nodes/hallucination_node.py +2 -1
planning_ai/nodes/map_node.py +13 -6
planning_ai/nodes/reduce_node.py +3 -4

planning_ai/chains/map_chain.py CHANGED Viewed

@@ -2,7 +2,7 @@ from enum import Enum, auto
 from typing import Optional, Set, Type
 from langchain_core.prompts import ChatPromptTemplate
-from pydantic import BaseModel, create_model
 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import LLM
@@ -13,13 +13,13 @@ with open(Paths.PROMPTS / "map.txt", "r") as f:
 def create_policy_enum(
-    policy_groups: Set[str], name: str = "DynamicPolicyEnum"
 ) -> Enum:
     """
     Create a dynamic enum for policies based on the given policy groups.
     Args:
-        policy_groups (Set[str]): A set of policy group names.
         name (str): Name of the enum to be created.
     Returns:
@@ -39,29 +39,24 @@ def create_brief_summary_model(policy_enum: Enum) -> Type[BaseModel]:
         Type[BaseModel]: A dynamically generated Pydantic model for BriefSummary.
     """
-    # NOTE: For some reason GPT4o doesn't work if we use too much structure
-    DynamicPolicy = create_model(
-        "DynamicPolicy",
-        # policy=(policy_enum, ...),
-        policy=(str, ...),
-        note=(str, ...),
-        __config__={"extra": "forbid"},
-    )
     return create_model(
         "DynamicBriefSummary",
         summary=(str, ...),
-        policies=(Optional[list[DynamicPolicy]], ...),
         __module__=__name__,
         __config__={"extra": "forbid"},
     )
 def create_dynamic_map_chain(themes, prompt: str):
-    policy_groups = set()
     for theme in themes:
         if theme in THEMES_AND_POLICIES:
-            policy_groups.update(THEMES_AND_POLICIES[theme])
     PolicyEnum = create_policy_enum(policy_groups)
     DynamicBriefSummary = create_brief_summary_model(PolicyEnum)

 from typing import Optional, Set, Type
 from langchain_core.prompts import ChatPromptTemplate
+from pydantic import BaseModel, Field, create_model
 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import LLM
 def create_policy_enum(
+    policy_groups: list[str], name: str = "DynamicPolicyEnum"
 ) -> Enum:
     """
     Create a dynamic enum for policies based on the given policy groups.
     Args:
+        policy_groups (list[str]): A set of policy group names.
         name (str): Name of the enum to be created.
     Returns:
         Type[BaseModel]: A dynamically generated Pydantic model for BriefSummary.
     """
+    class Policy(BaseModel):
+        policy: policy_enum
+        note: str
     return create_model(
         "DynamicBriefSummary",
         summary=(str, ...),
+        policies=(list[Policy], ...),
         __module__=__name__,
         __config__={"extra": "forbid"},
     )
 def create_dynamic_map_chain(themes, prompt: str):
+    policy_groups = []
     for theme in themes:
         if theme in THEMES_AND_POLICIES:
+            policy_groups.extend(THEMES_AND_POLICIES[theme])
     PolicyEnum = create_policy_enum(policy_groups)
     DynamicBriefSummary = create_brief_summary_model(PolicyEnum)

planning_ai/chains/themes_chain.py CHANGED Viewed

@@ -2,13 +2,13 @@ from enum import Enum
 from typing import Optional
 from langchain_core.prompts import ChatPromptTemplate
-from pydantic import BaseModel
 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import LLM
-class Themes(Enum):
     climate_change = "Climate Change"
     biodiversity = "Biodiversity and Green Spaces"
     wellbeing = "Wellbeing and Social Inclusion"
@@ -18,8 +18,13 @@ class Themes(Enum):
     infrastructure = "Infrastructure"
 class ThemeSelector(BaseModel):
-    themes: Optional[list[Themes]]
 with open(Paths.PROMPTS / "themes.txt", "r") as f:

 from typing import Optional
 from langchain_core.prompts import ChatPromptTemplate
+from pydantic import BaseModel, Field
 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import LLM
+class Theme(Enum):
     climate_change = "Climate Change"
     biodiversity = "Biodiversity and Green Spaces"
     wellbeing = "Wellbeing and Social Inclusion"
     infrastructure = "Infrastructure"
+class ThemeScore(BaseModel):
+    theme: Theme
+    score: int
 class ThemeSelector(BaseModel):
+    themes: Optional[list[ThemeScore]]
 with open(Paths.PROMPTS / "themes.txt", "r") as f:

planning_ai/nodes/hallucination_node.py CHANGED Viewed

@@ -83,7 +83,8 @@ def fix_hallucination(state: DocumentState):
         hallucinations.
     """
     logger.warning(f"Fixing hallucinations for document {state['filename']}")
-    fix_chain = create_dynamic_map_chain(state["themes"], fix_template)
     try:
         response = fix_chain.invoke(
             {

         hallucinations.
     """
     logger.warning(f"Fixing hallucinations for document {state['filename']}")
+    themes = [theme["theme"].value for theme in state["themes"]]
+    fix_chain = create_dynamic_map_chain(themes, fix_template)
     try:
         response = fix_chain.invoke(
             {

planning_ai/nodes/map_node.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import spacy
 from langgraph.types import Send
 from presidio_analyzer import AnalyzerEngine
@@ -14,19 +15,24 @@ anonymizer = AnonymizerEngine()
 nlp = spacy.load("en_core_web_lg")
 def retrieve_themes(state: DocumentState) -> DocumentState:
     try:
         result = themes_chain.invoke({"document": state["document"].page_content})
         if not result.themes:
-            state["themes"] = set()
             return state
-        themes = [theme.value for theme in result.themes]
     except Exception as e:
         logger.error(f"Theme selection error: {e}")
         themes = []
-    state["themes"] = set(themes)
     return state
@@ -98,7 +104,8 @@ def generate_summary(state: DocumentState) -> dict:
             ]
         }
-    map_chain = create_dynamic_map_chain(themes=state["themes"], prompt=map_template)
     try:
         response = map_chain.invoke({"context": state["document"].page_content})
     except Exception as e:

+import numpy as np
 import spacy
 from langgraph.types import Send
 from presidio_analyzer import AnalyzerEngine
 nlp = spacy.load("en_core_web_lg")
 def retrieve_themes(state: DocumentState) -> DocumentState:
     try:
         result = themes_chain.invoke({"document": state["document"].page_content})
         if not result.themes:
+            state["themes"] = []
             return state
+        themes = [theme.model_dump() for theme in result.themes]
     except Exception as e:
         logger.error(f"Theme selection error: {e}")
         themes = []
+    state["themes"] = themes
+    state["themes"] = [d for d in state["themes"] if d["score"] > 2]
+    state["score"] = np.mean([theme["score"] for theme in state["themes"]])
+    if state["score"] < 3:
+        state["processed"] = True
+        state["failed"] = True
+    logger.info(f"Document {state['filename']} theme score: {state['score']}")
     return state
             ]
         }
+    themes = [theme["theme"].value for theme in state["themes"]]
+    map_chain = create_dynamic_map_chain(themes=themes, prompt=map_template)
     try:
         response = map_chain.invoke({"context": state["document"].page_content})
     except Exception as e:

planning_ai/nodes/reduce_node.py CHANGED Viewed

@@ -23,7 +23,7 @@ def save_summaries_to_json(docs):
             **doc["document"].metadata,
             "filename": doc["filename"],
             "entities": doc["entities"],
-            "themes": list(doc["themes"]),
             "summary": doc["summary"].model_dump()["summary"],
             "policies": doc["policies"],
             "notes": doc["notes"],
@@ -47,10 +47,10 @@ def extract_policies_from_docs(docs):
             continue
         for policy in doc["summary"].policies:
             for theme, p in THEMES_AND_POLICIES.items():
-                if policy.policy in p:
                     policies["doc_id"].append(doc["doc_id"])
                     policies["themes"].append(theme)
-                    policies["policies"].append(policy.policy)
                     policies["details"].append(policy.note)
                     policies["stance"].append(
                         doc["document"].metadata["representations_support/object"]
@@ -131,7 +131,6 @@ def generate_final_report(state: OverallState):
 def final_output(final_docs):
     docs = [doc for doc in final_docs if not doc["failed"]]
     docs = add_doc_id(docs)
     policy_groups = extract_policies_from_docs(docs)

             **doc["document"].metadata,
             "filename": doc["filename"],
             "entities": doc["entities"],
+            "themes": doc["themes"].model_dump(),
             "summary": doc["summary"].model_dump()["summary"],
             "policies": doc["policies"],
             "notes": doc["notes"],
             continue
         for policy in doc["summary"].policies:
             for theme, p in THEMES_AND_POLICIES.items():
+                if policy.policy.name in p:
                     policies["doc_id"].append(doc["doc_id"])
                     policies["themes"].append(theme)
+                    policies["policies"].append(policy.policy.name)
                     policies["details"].append(policy.note)
                     policies["stance"].append(
                         doc["document"].metadata["representations_support/object"]
 def final_output(final_docs):
     docs = [doc for doc in final_docs if not doc["failed"]]
     docs = add_doc_id(docs)
     policy_groups = extract_policies_from_docs(docs)