cjber commited on
Commit
fbb5eac
·
1 Parent(s): 347760a

fix: allow chapters to dynamically update

Browse files
app.py CHANGED
@@ -158,6 +158,9 @@ def initialize_session_state():
158
  if "end_time" not in st.session_state:
159
  st.session_state["end_time"] = None
160
 
 
 
 
161
 
162
  def get_chapters(consultation_url: str):
163
  if not consultation_url:
@@ -230,6 +233,8 @@ def specify_chapters():
230
  "Save Chapters", on_click=lambda: st.session_state.update({"chapters": True})
231
  )
232
 
 
 
233
 
234
  def upload_and_extract_files():
235
  """Handle file upload and extraction."""
@@ -302,7 +307,7 @@ def upload_and_extract_files():
302
  st.error(f"Failed to extract files {e}")
303
 
304
 
305
- def build_report():
306
  """Build the report from extracted files."""
307
  # Remove old files
308
  _ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
@@ -333,7 +338,7 @@ def build_report():
333
  except Exception as e:
334
  st.error(f"An error occurred during PDF text extraction: {e}")
335
  with st.spinner("Building report...", show_time=True):
336
- report_main()
337
  st.session_state["end_time"] = time.time()
338
  st.session_state["completed"] = True
339
  total_time = (
@@ -459,12 +464,16 @@ def main():
459
  specify_chapters()
460
 
461
  # Step 2: Upload and extract files
462
- if not st.session_state["files_extracted"] and st.session_state["chapters"]:
 
 
 
 
463
  upload_and_extract_files()
464
 
465
  # Step 3: Build report if files are ready
466
- if st.session_state["files_extracted"]:
467
- build_report()
468
 
469
  # Step 4: Show download buttons when complete
470
  with open(Paths.RAW / "title.txt", "r") as f:
 
158
  if "end_time" not in st.session_state:
159
  st.session_state["end_time"] = None
160
 
161
+ if "chapters_list" not in st.session_state:
162
+ st.session_state["chapters_list"] = []
163
+
164
 
165
  def get_chapters(consultation_url: str):
166
  if not consultation_url:
 
233
  "Save Chapters", on_click=lambda: st.session_state.update({"chapters": True})
234
  )
235
 
236
+ st.session_state["chapters_list"] = chapters
237
+
238
 
239
  def upload_and_extract_files():
240
  """Handle file upload and extraction."""
 
307
  st.error(f"Failed to extract files {e}")
308
 
309
 
310
+ def build_report(chapters):
311
  """Build the report from extracted files."""
312
  # Remove old files
313
  _ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
 
338
  except Exception as e:
339
  st.error(f"An error occurred during PDF text extraction: {e}")
340
  with st.spinner("Building report...", show_time=True):
341
+ report_main(chapters)
342
  st.session_state["end_time"] = time.time()
343
  st.session_state["completed"] = True
344
  total_time = (
 
464
  specify_chapters()
465
 
466
  # Step 2: Upload and extract files
467
+ if (
468
+ not st.session_state["files_extracted"]
469
+ and st.session_state["chapters"]
470
+ and st.session_state["chapters_list"]
471
+ ):
472
  upload_and_extract_files()
473
 
474
  # Step 3: Build report if files are ready
475
+ if st.session_state["files_extracted"] and st.session_state["chapters_list"]:
476
+ build_report(st.session_state["chapters_list"])
477
 
478
  # Step 4: Show download buttons when complete
479
  with open(Paths.RAW / "title.txt", "r") as f:
planning_ai/chains/themes_chain.py CHANGED
@@ -7,13 +7,6 @@ from pydantic import BaseModel
7
  from planning_ai.common.utils import Paths
8
  from planning_ai.llms.llm import GPT4o
9
 
10
- # Read the chapter lines from the file
11
- if (Paths.RAW / "chapters.txt").exists():
12
- with open(Paths.RAW / "chapters.txt", "r") as f:
13
- chapters = [line.strip() for line in f.readlines() if line.strip()]
14
- else:
15
- chapters = []
16
-
17
 
18
  def create_dynamic_enum(chapters):
19
  """
@@ -33,28 +26,25 @@ def create_dynamic_enum(chapters):
33
  return Enum("Chapter", enum_members)
34
 
35
 
36
- # Create the dynamic enum
37
- Chapter = create_dynamic_enum(chapters)
38
-
39
-
40
- class ChapterScore(BaseModel):
41
- chapter: Chapter
42
- score: int
43
- description: str
44
-
45
-
46
- class ChapterSelector(BaseModel):
47
- chapters: Optional[list[ChapterScore]]
48
 
 
 
 
 
49
 
50
- with open(Paths.PROMPTS / "chapters.txt", "r") as f:
51
- chapters_template = f.read()
52
 
53
- chapters_prompt = ChatPromptTemplate.from_messages([("system", chapters_template)])
 
54
 
55
- SLLM = GPT4o.with_structured_output(ChapterSelector, strict=True)
 
56
 
57
- chapters_chain = chapters_prompt | SLLM
 
58
 
59
 
60
  if __name__ == "__main__":
 
7
  from planning_ai.common.utils import Paths
8
  from planning_ai.llms.llm import GPT4o
9
 
 
 
 
 
 
 
 
10
 
11
  def create_dynamic_enum(chapters):
12
  """
 
26
  return Enum("Chapter", enum_members)
27
 
28
 
29
+ def create_dynamic_chain(chapters: list[str]):
30
+ Chapter = create_dynamic_enum(chapters)
 
 
 
 
 
 
 
 
 
 
31
 
32
+ class ChapterScore(BaseModel):
33
+ chapter: Chapter
34
+ score: int
35
+ description: str
36
 
37
+ class ChapterSelector(BaseModel):
38
+ chapters: Optional[list[ChapterScore]]
39
 
40
+ with open(Paths.PROMPTS / "chapters.txt", "r") as f:
41
+ chapters_template = f.read()
42
 
43
+ chapters_prompt = ChatPromptTemplate.from_messages([("system", chapters_template)])
44
+ SLLM = GPT4o.with_structured_output(ChapterSelector, strict=True)
45
 
46
+ chapters_chain = chapters_prompt | SLLM
47
+ return chapters_chain
48
 
49
 
50
  if __name__ == "__main__":
planning_ai/main.py CHANGED
@@ -86,11 +86,13 @@ def read_docs(representations_document: str):
86
  return [{"document": doc, "filename": doc.metadata["filename"]} for doc in docs]
87
 
88
 
89
- def main():
90
  with open(Paths.RAW / "title.txt", "r") as f:
91
  rep = f.read().strip()
92
 
93
- docs = read_docs(rep)
 
 
94
  n_docs = len(docs)
95
 
96
  logger.info(f"{n_docs} documents being processed!")
 
86
  return [{"document": doc, "filename": doc.metadata["filename"]} for doc in docs]
87
 
88
 
89
+ def main(chapters: list[str]):
90
  with open(Paths.RAW / "title.txt", "r") as f:
91
  rep = f.read().strip()
92
 
93
+ docs = read_docs(rep)[:1]
94
+ for doc in docs:
95
+ doc["chapters"] = chapters
96
  n_docs = len(docs)
97
 
98
  logger.info(f"{n_docs} documents being processed!")
planning_ai/nodes/map_node.py CHANGED
@@ -5,7 +5,7 @@ from presidio_analyzer import AnalyzerEngine
5
  from presidio_anonymizer import AnonymizerEngine
6
 
7
  from planning_ai.chains.map_chain import map_chain
8
- from planning_ai.chains.themes_chain import chapters_chain
9
  from planning_ai.logging import logger
10
  from planning_ai.states import DocumentState, OverallState
11
 
@@ -28,6 +28,7 @@ def retrieve_chapters(state: DocumentState) -> DocumentState:
28
  DocumentState: The updated document state with themes and scores.
29
  """
30
  try:
 
31
  result = chapters_chain.invoke({"document": state["document"].page_content})
32
  if not result.chapters:
33
  state["themes"] = []
 
5
  from presidio_anonymizer import AnonymizerEngine
6
 
7
  from planning_ai.chains.map_chain import map_chain
8
+ from planning_ai.chains.themes_chain import create_dynamic_chain
9
  from planning_ai.logging import logger
10
  from planning_ai.states import DocumentState, OverallState
11
 
 
28
  DocumentState: The updated document state with themes and scores.
29
  """
30
  try:
31
+ chapters_chain = create_dynamic_chain(state["chapters"])
32
  result = chapters_chain.invoke({"document": state["document"].page_content})
33
  if not result.chapters:
34
  state["themes"] = []
planning_ai/states.py CHANGED
@@ -18,6 +18,7 @@ class DocumentState(TypedDict):
18
 
19
  summary: BaseModel
20
  hallucination: HallucinationChecker
 
21
 
22
  is_hallucinated: bool
23
  refinement_attempts: int
 
18
 
19
  summary: BaseModel
20
  hallucination: HallucinationChecker
21
+ chapters: list[str]
22
 
23
  is_hallucinated: bool
24
  refinement_attempts: int