Spaces:

cjber
/

planning-ai

Build error

App Files Files Community

cjber commited on Mar 7

Commit

fbb5eac

1 Parent(s): 347760a

fix: allow chapters to dynamically update

Browse files

Files changed (5) hide show

app.py +14 -5
planning_ai/chains/themes_chain.py +14 -24
planning_ai/main.py +4 -2
planning_ai/nodes/map_node.py +2 -1
planning_ai/states.py +1 -0

app.py CHANGED Viewed

@@ -158,6 +158,9 @@ def initialize_session_state():
     if "end_time" not in st.session_state:
         st.session_state["end_time"] = None
 def get_chapters(consultation_url: str):
     if not consultation_url:
@@ -230,6 +233,8 @@ def specify_chapters():
         "Save Chapters", on_click=lambda: st.session_state.update({"chapters": True})
     )
 def upload_and_extract_files():
     """Handle file upload and extraction."""
@@ -302,7 +307,7 @@ def upload_and_extract_files():
                     st.error(f"Failed to extract files {e}")
-def build_report():
     """Build the report from extracted files."""
     # Remove old files
     _ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
@@ -333,7 +338,7 @@ def build_report():
                 except Exception as e:
                     st.error(f"An error occurred during PDF text extraction: {e}")
             with st.spinner("Building report...", show_time=True):
-                report_main()
                 st.session_state["end_time"] = time.time()
                 st.session_state["completed"] = True
                 total_time = (
@@ -459,12 +464,16 @@ def main():
             specify_chapters()
         # Step 2: Upload and extract files
-        if not st.session_state["files_extracted"] and st.session_state["chapters"]:
             upload_and_extract_files()
         # Step 3: Build report if files are ready
-        if st.session_state["files_extracted"]:
-            build_report()
         # Step 4: Show download buttons when complete
         with open(Paths.RAW / "title.txt", "r") as f:

     if "end_time" not in st.session_state:
         st.session_state["end_time"] = None
+    if "chapters_list" not in st.session_state:
+        st.session_state["chapters_list"] = []
 def get_chapters(consultation_url: str):
     if not consultation_url:
         "Save Chapters", on_click=lambda: st.session_state.update({"chapters": True})
     )
+    st.session_state["chapters_list"] = chapters
 def upload_and_extract_files():
     """Handle file upload and extraction."""
                     st.error(f"Failed to extract files {e}")
+def build_report(chapters):
     """Build the report from extracted files."""
     # Remove old files
     _ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
                 except Exception as e:
                     st.error(f"An error occurred during PDF text extraction: {e}")
             with st.spinner("Building report...", show_time=True):
+                report_main(chapters)
                 st.session_state["end_time"] = time.time()
                 st.session_state["completed"] = True
                 total_time = (
             specify_chapters()
         # Step 2: Upload and extract files
+        if (
+            not st.session_state["files_extracted"]
+            and st.session_state["chapters"]
+            and st.session_state["chapters_list"]
+        ):
             upload_and_extract_files()
         # Step 3: Build report if files are ready
+        if st.session_state["files_extracted"] and st.session_state["chapters_list"]:
+            build_report(st.session_state["chapters_list"])
         # Step 4: Show download buttons when complete
         with open(Paths.RAW / "title.txt", "r") as f:

planning_ai/chains/themes_chain.py CHANGED Viewed

@@ -7,13 +7,6 @@ from pydantic import BaseModel
 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import GPT4o
-# Read the chapter lines from the file
-if (Paths.RAW / "chapters.txt").exists():
-    with open(Paths.RAW / "chapters.txt", "r") as f:
-        chapters = [line.strip() for line in f.readlines() if line.strip()]
-else:
-    chapters = []
 def create_dynamic_enum(chapters):
     """
@@ -33,28 +26,25 @@ def create_dynamic_enum(chapters):
     return Enum("Chapter", enum_members)
-# Create the dynamic enum
-Chapter = create_dynamic_enum(chapters)
-class ChapterScore(BaseModel):
-    chapter: Chapter
-    score: int
-    description: str
-class ChapterSelector(BaseModel):
-    chapters: Optional[list[ChapterScore]]
-with open(Paths.PROMPTS / "chapters.txt", "r") as f:
-    chapters_template = f.read()
-chapters_prompt = ChatPromptTemplate.from_messages([("system", chapters_template)])
-SLLM = GPT4o.with_structured_output(ChapterSelector, strict=True)
-chapters_chain = chapters_prompt | SLLM
 if __name__ == "__main__":

 from planning_ai.common.utils import Paths
 from planning_ai.llms.llm import GPT4o
 def create_dynamic_enum(chapters):
     """
     return Enum("Chapter", enum_members)
+def create_dynamic_chain(chapters: list[str]):
+    Chapter = create_dynamic_enum(chapters)
+    class ChapterScore(BaseModel):
+        chapter: Chapter
+        score: int
+        description: str
+    class ChapterSelector(BaseModel):
+        chapters: Optional[list[ChapterScore]]
+    with open(Paths.PROMPTS / "chapters.txt", "r") as f:
+        chapters_template = f.read()
+    chapters_prompt = ChatPromptTemplate.from_messages([("system", chapters_template)])
+    SLLM = GPT4o.with_structured_output(ChapterSelector, strict=True)
+    chapters_chain = chapters_prompt | SLLM
+    return chapters_chain
 if __name__ == "__main__":

planning_ai/main.py CHANGED Viewed

@@ -86,11 +86,13 @@ def read_docs(representations_document: str):
     return [{"document": doc, "filename": doc.metadata["filename"]} for doc in docs]
-def main():
     with open(Paths.RAW / "title.txt", "r") as f:
         rep = f.read().strip()
-    docs = read_docs(rep)
     n_docs = len(docs)
     logger.info(f"{n_docs} documents being processed!")

     return [{"document": doc, "filename": doc.metadata["filename"]} for doc in docs]
+def main(chapters: list[str]):
     with open(Paths.RAW / "title.txt", "r") as f:
         rep = f.read().strip()
+    docs = read_docs(rep)[:1]
+    for doc in docs:
+        doc["chapters"] = chapters
     n_docs = len(docs)
     logger.info(f"{n_docs} documents being processed!")

planning_ai/nodes/map_node.py CHANGED Viewed

@@ -5,7 +5,7 @@ from presidio_analyzer import AnalyzerEngine
 from presidio_anonymizer import AnonymizerEngine
 from planning_ai.chains.map_chain import map_chain
-from planning_ai.chains.themes_chain import chapters_chain
 from planning_ai.logging import logger
 from planning_ai.states import DocumentState, OverallState
@@ -28,6 +28,7 @@ def retrieve_chapters(state: DocumentState) -> DocumentState:
         DocumentState: The updated document state with themes and scores.
     """
     try:
         result = chapters_chain.invoke({"document": state["document"].page_content})
         if not result.chapters:
             state["themes"] = []

 from presidio_anonymizer import AnonymizerEngine
 from planning_ai.chains.map_chain import map_chain
+from planning_ai.chains.themes_chain import create_dynamic_chain
 from planning_ai.logging import logger
 from planning_ai.states import DocumentState, OverallState
         DocumentState: The updated document state with themes and scores.
     """
     try:
+        chapters_chain = create_dynamic_chain(state["chapters"])
         result = chapters_chain.invoke({"document": state["document"].page_content})
         if not result.chapters:
             state["themes"] = []

planning_ai/states.py CHANGED Viewed

@@ -18,6 +18,7 @@ class DocumentState(TypedDict):
     summary: BaseModel
     hallucination: HallucinationChecker
     is_hallucinated: bool
     refinement_attempts: int

     summary: BaseModel
     hallucination: HallucinationChecker
+    chapters: list[str]
     is_hallucinated: bool
     refinement_attempts: int