cjber commited on
Commit
06d47d7
·
1 Parent(s): c0faedc

remove token limit

Browse files
planning_ai/common/utils.py CHANGED
@@ -1,10 +1,6 @@
1
  from pathlib import Path
2
- from typing import List
3
 
4
  import polars as pl
5
- from langchain_core.documents import Document
6
-
7
- from planning_ai.llms.llm import LLM
8
 
9
  pl.Config(
10
  fmt_str_lengths=9,
@@ -20,12 +16,3 @@ class Paths:
20
  RAW = DATA / "raw"
21
  STAGING = DATA / "staging"
22
  OUT = DATA / "out"
23
-
24
-
25
- class Consts:
26
- TOKEN_MAX = 100_000
27
-
28
-
29
- def length_function(documents: List[Document]) -> int:
30
- """Get number of tokens for input contents."""
31
- return sum(LLM.get_num_tokens(doc.page_content) for doc in documents)
 
1
  from pathlib import Path
 
2
 
3
  import polars as pl
 
 
 
4
 
5
  pl.Config(
6
  fmt_str_lengths=9,
 
16
  RAW = DATA / "raw"
17
  STAGING = DATA / "staging"
18
  OUT = DATA / "out"
 
 
 
 
 
 
 
 
 
planning_ai/graph.py CHANGED
@@ -4,9 +4,8 @@ from planning_ai.nodes.map_node import (
4
  collect_summaries,
5
  generate_summary,
6
  map_summaries,
7
- should_collapse,
8
  )
9
- from planning_ai.nodes.reduce_node import collapse_summaries, generate_final_summary
10
  from planning_ai.states import OverallState
11
 
12
 
@@ -14,13 +13,11 @@ def create_graph():
14
  graph = StateGraph(OverallState)
15
  graph.add_node("generate_summary", generate_summary)
16
  graph.add_node("collect_summaries", collect_summaries)
17
- graph.add_node("collapse_summaries", collapse_summaries)
18
  graph.add_node("generate_final_summary", generate_final_summary)
19
 
20
  graph.add_conditional_edges(START, map_summaries, ["generate_summary"])
21
  graph.add_edge("generate_summary", "collect_summaries")
22
- graph.add_conditional_edges("collect_summaries", should_collapse)
23
- graph.add_conditional_edges("collapse_summaries", should_collapse)
24
  graph.add_edge("generate_final_summary", END)
25
 
26
  return graph.compile()
 
4
  collect_summaries,
5
  generate_summary,
6
  map_summaries,
 
7
  )
8
+ from planning_ai.nodes.reduce_node import generate_final_summary
9
  from planning_ai.states import OverallState
10
 
11
 
 
13
  graph = StateGraph(OverallState)
14
  graph.add_node("generate_summary", generate_summary)
15
  graph.add_node("collect_summaries", collect_summaries)
 
16
  graph.add_node("generate_final_summary", generate_final_summary)
17
 
18
  graph.add_conditional_edges(START, map_summaries, ["generate_summary"])
19
  graph.add_edge("generate_summary", "collect_summaries")
20
+ graph.add_edge("collect_summaries", "generate_final_summary")
 
21
  graph.add_edge("generate_final_summary", END)
22
 
23
  return graph.compile()
planning_ai/main.py CHANGED
@@ -7,8 +7,6 @@ from langchain_text_splitters import CharacterTextSplitter
7
  from planning_ai.common.utils import Paths
8
  from planning_ai.graph import create_graph
9
 
10
- doc_title = "Cambridge Response Summary"
11
-
12
 
13
  def build_quarto_doc(doc_title, out):
14
  final = out["generate_final_summary"]
@@ -120,7 +118,7 @@ def main():
120
  loader_cls=TextLoader,
121
  recursive=True,
122
  )
123
- docs = [doc for doc in loader.load()[:200] if doc.page_content]
124
  text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
125
  chunk_size=1000, chunk_overlap=0
126
  )
@@ -145,5 +143,6 @@ def main():
145
 
146
 
147
  if __name__ == "__main__":
 
148
  out = main()
149
  build_quarto_doc(doc_title, out)
 
7
  from planning_ai.common.utils import Paths
8
  from planning_ai.graph import create_graph
9
 
 
 
10
 
11
  def build_quarto_doc(doc_title, out):
12
  final = out["generate_final_summary"]
 
118
  loader_cls=TextLoader,
119
  recursive=True,
120
  )
121
+ docs = [doc for doc in loader.load()[:20] if doc.page_content]
122
  text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
123
  chunk_size=1000, chunk_overlap=0
124
  )
 
143
 
144
 
145
  if __name__ == "__main__":
146
+ doc_title = "Cambridge Response Summary"
147
  out = main()
148
  build_quarto_doc(doc_title, out)
planning_ai/nodes/map_node.py CHANGED
@@ -1,10 +1,7 @@
1
- from typing import Literal
2
-
3
  from langchain_core.documents import Document
4
  from langgraph.constants import Send
5
 
6
  from planning_ai.chains.map_chain import map_chain
7
- from planning_ai.common.utils import Consts, length_function
8
  from planning_ai.states import OverallState, SummaryState
9
 
10
 
@@ -48,13 +45,3 @@ def collect_summaries(state: OverallState):
48
  for idx, summary in enumerate(state["summaries"], start=1)
49
  ]
50
  }
51
-
52
-
53
- def should_collapse(
54
- state: OverallState,
55
- ) -> Literal["collapse_summaries", "generate_final_summary"]:
56
- num_tokens = length_function(state["collapsed_summaries"])
57
- if num_tokens > Consts.TOKEN_MAX:
58
- return "collapse_summaries"
59
- else:
60
- return "generate_final_summary"
 
 
 
1
  from langchain_core.documents import Document
2
  from langgraph.constants import Send
3
 
4
  from planning_ai.chains.map_chain import map_chain
 
5
  from planning_ai.states import OverallState, SummaryState
6
 
7
 
 
45
  for idx, summary in enumerate(state["summaries"], start=1)
46
  ]
47
  }
 
 
 
 
 
 
 
 
 
 
planning_ai/nodes/reduce_node.py CHANGED
@@ -1,21 +1,7 @@
1
- from langchain.chains.combine_documents.reduce import collapse_docs, split_list_of_docs
2
-
3
  from planning_ai.chains.reduce_chain import reduce_chain
4
- from planning_ai.common.utils import Consts, length_function
5
  from planning_ai.states import OverallState
6
 
7
 
8
- def collapse_summaries(state: OverallState):
9
- doc_lists = split_list_of_docs(
10
- state["collapsed_summaries"], length_function, Consts.TOKEN_MAX
11
- )
12
- results = []
13
- for doc_list in doc_lists:
14
- results.append(collapse_docs(doc_list, reduce_chain.invoke))
15
-
16
- return {"collapsed_summaries": results}
17
-
18
-
19
  def generate_final_summary(state: OverallState):
20
  response = reduce_chain.invoke({"context": state["collapsed_summaries"]})
21
  return {
 
 
 
1
  from planning_ai.chains.reduce_chain import reduce_chain
 
2
  from planning_ai.states import OverallState
3
 
4
 
 
 
 
 
 
 
 
 
 
 
 
5
  def generate_final_summary(state: OverallState):
6
  response = reduce_chain.invoke({"context": state["collapsed_summaries"]})
7
  return {