Spaces:
Build error
Build error
fix: allow chapters to dynamically update
Browse files- app.py +14 -5
- planning_ai/chains/themes_chain.py +14 -24
- planning_ai/main.py +4 -2
- planning_ai/nodes/map_node.py +2 -1
- planning_ai/states.py +1 -0
app.py
CHANGED
@@ -158,6 +158,9 @@ def initialize_session_state():
|
|
158 |
if "end_time" not in st.session_state:
|
159 |
st.session_state["end_time"] = None
|
160 |
|
|
|
|
|
|
|
161 |
|
162 |
def get_chapters(consultation_url: str):
|
163 |
if not consultation_url:
|
@@ -230,6 +233,8 @@ def specify_chapters():
|
|
230 |
"Save Chapters", on_click=lambda: st.session_state.update({"chapters": True})
|
231 |
)
|
232 |
|
|
|
|
|
233 |
|
234 |
def upload_and_extract_files():
|
235 |
"""Handle file upload and extraction."""
|
@@ -302,7 +307,7 @@ def upload_and_extract_files():
|
|
302 |
st.error(f"Failed to extract files {e}")
|
303 |
|
304 |
|
305 |
-
def build_report():
|
306 |
"""Build the report from extracted files."""
|
307 |
# Remove old files
|
308 |
_ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
|
@@ -333,7 +338,7 @@ def build_report():
|
|
333 |
except Exception as e:
|
334 |
st.error(f"An error occurred during PDF text extraction: {e}")
|
335 |
with st.spinner("Building report...", show_time=True):
|
336 |
-
report_main()
|
337 |
st.session_state["end_time"] = time.time()
|
338 |
st.session_state["completed"] = True
|
339 |
total_time = (
|
@@ -459,12 +464,16 @@ def main():
|
|
459 |
specify_chapters()
|
460 |
|
461 |
# Step 2: Upload and extract files
|
462 |
-
if
|
|
|
|
|
|
|
|
|
463 |
upload_and_extract_files()
|
464 |
|
465 |
# Step 3: Build report if files are ready
|
466 |
-
if st.session_state["files_extracted"]:
|
467 |
-
build_report()
|
468 |
|
469 |
# Step 4: Show download buttons when complete
|
470 |
with open(Paths.RAW / "title.txt", "r") as f:
|
|
|
158 |
if "end_time" not in st.session_state:
|
159 |
st.session_state["end_time"] = None
|
160 |
|
161 |
+
if "chapters_list" not in st.session_state:
|
162 |
+
st.session_state["chapters_list"] = []
|
163 |
+
|
164 |
|
165 |
def get_chapters(consultation_url: str):
|
166 |
if not consultation_url:
|
|
|
233 |
"Save Chapters", on_click=lambda: st.session_state.update({"chapters": True})
|
234 |
)
|
235 |
|
236 |
+
st.session_state["chapters_list"] = chapters
|
237 |
+
|
238 |
|
239 |
def upload_and_extract_files():
|
240 |
"""Handle file upload and extraction."""
|
|
|
307 |
st.error(f"Failed to extract files {e}")
|
308 |
|
309 |
|
310 |
+
def build_report(chapters):
|
311 |
"""Build the report from extracted files."""
|
312 |
# Remove old files
|
313 |
_ = [file.unlink() for file in (Paths.OUT / "summaries").rglob("*.pdf")]
|
|
|
338 |
except Exception as e:
|
339 |
st.error(f"An error occurred during PDF text extraction: {e}")
|
340 |
with st.spinner("Building report...", show_time=True):
|
341 |
+
report_main(chapters)
|
342 |
st.session_state["end_time"] = time.time()
|
343 |
st.session_state["completed"] = True
|
344 |
total_time = (
|
|
|
464 |
specify_chapters()
|
465 |
|
466 |
# Step 2: Upload and extract files
|
467 |
+
if (
|
468 |
+
not st.session_state["files_extracted"]
|
469 |
+
and st.session_state["chapters"]
|
470 |
+
and st.session_state["chapters_list"]
|
471 |
+
):
|
472 |
upload_and_extract_files()
|
473 |
|
474 |
# Step 3: Build report if files are ready
|
475 |
+
if st.session_state["files_extracted"] and st.session_state["chapters_list"]:
|
476 |
+
build_report(st.session_state["chapters_list"])
|
477 |
|
478 |
# Step 4: Show download buttons when complete
|
479 |
with open(Paths.RAW / "title.txt", "r") as f:
|
planning_ai/chains/themes_chain.py
CHANGED
@@ -7,13 +7,6 @@ from pydantic import BaseModel
|
|
7 |
from planning_ai.common.utils import Paths
|
8 |
from planning_ai.llms.llm import GPT4o
|
9 |
|
10 |
-
# Read the chapter lines from the file
|
11 |
-
if (Paths.RAW / "chapters.txt").exists():
|
12 |
-
with open(Paths.RAW / "chapters.txt", "r") as f:
|
13 |
-
chapters = [line.strip() for line in f.readlines() if line.strip()]
|
14 |
-
else:
|
15 |
-
chapters = []
|
16 |
-
|
17 |
|
18 |
def create_dynamic_enum(chapters):
|
19 |
"""
|
@@ -33,28 +26,25 @@ def create_dynamic_enum(chapters):
|
|
33 |
return Enum("Chapter", enum_members)
|
34 |
|
35 |
|
36 |
-
|
37 |
-
Chapter = create_dynamic_enum(chapters)
|
38 |
-
|
39 |
-
|
40 |
-
class ChapterScore(BaseModel):
|
41 |
-
chapter: Chapter
|
42 |
-
score: int
|
43 |
-
description: str
|
44 |
-
|
45 |
-
|
46 |
-
class ChapterSelector(BaseModel):
|
47 |
-
chapters: Optional[list[ChapterScore]]
|
48 |
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
|
51 |
-
|
52 |
|
53 |
-
|
|
|
54 |
|
55 |
-
|
|
|
56 |
|
57 |
-
chapters_chain = chapters_prompt | SLLM
|
|
|
58 |
|
59 |
|
60 |
if __name__ == "__main__":
|
|
|
7 |
from planning_ai.common.utils import Paths
|
8 |
from planning_ai.llms.llm import GPT4o
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
def create_dynamic_enum(chapters):
|
12 |
"""
|
|
|
26 |
return Enum("Chapter", enum_members)
|
27 |
|
28 |
|
29 |
+
def create_dynamic_chain(chapters: list[str]):
|
30 |
+
Chapter = create_dynamic_enum(chapters)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
class ChapterScore(BaseModel):
|
33 |
+
chapter: Chapter
|
34 |
+
score: int
|
35 |
+
description: str
|
36 |
|
37 |
+
class ChapterSelector(BaseModel):
|
38 |
+
chapters: Optional[list[ChapterScore]]
|
39 |
|
40 |
+
with open(Paths.PROMPTS / "chapters.txt", "r") as f:
|
41 |
+
chapters_template = f.read()
|
42 |
|
43 |
+
chapters_prompt = ChatPromptTemplate.from_messages([("system", chapters_template)])
|
44 |
+
SLLM = GPT4o.with_structured_output(ChapterSelector, strict=True)
|
45 |
|
46 |
+
chapters_chain = chapters_prompt | SLLM
|
47 |
+
return chapters_chain
|
48 |
|
49 |
|
50 |
if __name__ == "__main__":
|
planning_ai/main.py
CHANGED
@@ -86,11 +86,13 @@ def read_docs(representations_document: str):
|
|
86 |
return [{"document": doc, "filename": doc.metadata["filename"]} for doc in docs]
|
87 |
|
88 |
|
89 |
-
def main():
|
90 |
with open(Paths.RAW / "title.txt", "r") as f:
|
91 |
rep = f.read().strip()
|
92 |
|
93 |
-
docs = read_docs(rep)
|
|
|
|
|
94 |
n_docs = len(docs)
|
95 |
|
96 |
logger.info(f"{n_docs} documents being processed!")
|
|
|
86 |
return [{"document": doc, "filename": doc.metadata["filename"]} for doc in docs]
|
87 |
|
88 |
|
89 |
+
def main(chapters: list[str]):
|
90 |
with open(Paths.RAW / "title.txt", "r") as f:
|
91 |
rep = f.read().strip()
|
92 |
|
93 |
+
docs = read_docs(rep)[:1]
|
94 |
+
for doc in docs:
|
95 |
+
doc["chapters"] = chapters
|
96 |
n_docs = len(docs)
|
97 |
|
98 |
logger.info(f"{n_docs} documents being processed!")
|
planning_ai/nodes/map_node.py
CHANGED
@@ -5,7 +5,7 @@ from presidio_analyzer import AnalyzerEngine
|
|
5 |
from presidio_anonymizer import AnonymizerEngine
|
6 |
|
7 |
from planning_ai.chains.map_chain import map_chain
|
8 |
-
from planning_ai.chains.themes_chain import
|
9 |
from planning_ai.logging import logger
|
10 |
from planning_ai.states import DocumentState, OverallState
|
11 |
|
@@ -28,6 +28,7 @@ def retrieve_chapters(state: DocumentState) -> DocumentState:
|
|
28 |
DocumentState: The updated document state with themes and scores.
|
29 |
"""
|
30 |
try:
|
|
|
31 |
result = chapters_chain.invoke({"document": state["document"].page_content})
|
32 |
if not result.chapters:
|
33 |
state["themes"] = []
|
|
|
5 |
from presidio_anonymizer import AnonymizerEngine
|
6 |
|
7 |
from planning_ai.chains.map_chain import map_chain
|
8 |
+
from planning_ai.chains.themes_chain import create_dynamic_chain
|
9 |
from planning_ai.logging import logger
|
10 |
from planning_ai.states import DocumentState, OverallState
|
11 |
|
|
|
28 |
DocumentState: The updated document state with themes and scores.
|
29 |
"""
|
30 |
try:
|
31 |
+
chapters_chain = create_dynamic_chain(state["chapters"])
|
32 |
result = chapters_chain.invoke({"document": state["document"].page_content})
|
33 |
if not result.chapters:
|
34 |
state["themes"] = []
|
planning_ai/states.py
CHANGED
@@ -18,6 +18,7 @@ class DocumentState(TypedDict):
|
|
18 |
|
19 |
summary: BaseModel
|
20 |
hallucination: HallucinationChecker
|
|
|
21 |
|
22 |
is_hallucinated: bool
|
23 |
refinement_attempts: int
|
|
|
18 |
|
19 |
summary: BaseModel
|
20 |
hallucination: HallucinationChecker
|
21 |
+
chapters: list[str]
|
22 |
|
23 |
is_hallucinated: bool
|
24 |
refinement_attempts: int
|