mitulagr2 commited on
Commit
881c0e5
·
1 Parent(s): 8b90c15
Files changed (1) hide show
  1. app/rag.py +4 -4
app/rag.py CHANGED
@@ -52,7 +52,7 @@ class ChatPDF:
52
 
53
 
54
  def __init__(self):
55
- text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
56
 
57
  self.logger.info("initializing the vector store related objects")
58
  # client = QdrantClient(host="localhost", port=6333)
@@ -78,11 +78,11 @@ class ChatPDF:
78
  # tokenizer.save_pretrained("./models/tokenizer/")
79
 
80
  self.logger.info("initializing the global settings")
81
- Settings.text_splitter = text_parser
82
  Settings.embed_model = self.embed_model
83
  Settings.llm = llm
84
  # Settings.tokenzier = tokenizer
85
- Settings.transformations = [text_parser]
86
 
87
  def ingest(self, files_dir: str):
88
  text_chunks = []
@@ -93,7 +93,7 @@ class ChatPDF:
93
 
94
  self.logger.info("enumerating docs")
95
  for doc_idx, doc in enumerate(docs):
96
- curr_text_chunks = text_parser.split_text(doc.text)
97
  text_chunks.extend(curr_text_chunks)
98
  doc_ids.extend([doc_idx] * len(curr_text_chunks))
99
 
 
52
 
53
 
54
  def __init__(self):
55
+ self.text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
56
 
57
  self.logger.info("initializing the vector store related objects")
58
  # client = QdrantClient(host="localhost", port=6333)
 
78
  # tokenizer.save_pretrained("./models/tokenizer/")
79
 
80
  self.logger.info("initializing the global settings")
81
+ Settings.text_splitter = self.text_parser
82
  Settings.embed_model = self.embed_model
83
  Settings.llm = llm
84
  # Settings.tokenzier = tokenizer
85
+ Settings.transformations = [self.text_parser]
86
 
87
  def ingest(self, files_dir: str):
88
  text_chunks = []
 
93
 
94
  self.logger.info("enumerating docs")
95
  for doc_idx, doc in enumerate(docs):
96
+ curr_text_chunks = self.text_parser.split_text(doc.text)
97
  text_chunks.extend(curr_text_chunks)
98
  doc_ids.extend([doc_idx] * len(curr_text_chunks))
99