Spaces:
Running
Running
minor fix
Browse files- app/rag.py +4 -4
app/rag.py
CHANGED
@@ -52,7 +52,7 @@ class ChatPDF:
|
|
52 |
|
53 |
|
54 |
def __init__(self):
|
55 |
-
text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
|
56 |
|
57 |
self.logger.info("initializing the vector store related objects")
|
58 |
# client = QdrantClient(host="localhost", port=6333)
|
@@ -78,11 +78,11 @@ class ChatPDF:
|
|
78 |
# tokenizer.save_pretrained("./models/tokenizer/")
|
79 |
|
80 |
self.logger.info("initializing the global settings")
|
81 |
-
Settings.text_splitter = text_parser
|
82 |
Settings.embed_model = self.embed_model
|
83 |
Settings.llm = llm
|
84 |
# Settings.tokenzier = tokenizer
|
85 |
-
Settings.transformations = [text_parser]
|
86 |
|
87 |
def ingest(self, files_dir: str):
|
88 |
text_chunks = []
|
@@ -93,7 +93,7 @@ class ChatPDF:
|
|
93 |
|
94 |
self.logger.info("enumerating docs")
|
95 |
for doc_idx, doc in enumerate(docs):
|
96 |
-
curr_text_chunks = text_parser.split_text(doc.text)
|
97 |
text_chunks.extend(curr_text_chunks)
|
98 |
doc_ids.extend([doc_idx] * len(curr_text_chunks))
|
99 |
|
|
|
52 |
|
53 |
|
54 |
def __init__(self):
|
55 |
+
self.text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
|
56 |
|
57 |
self.logger.info("initializing the vector store related objects")
|
58 |
# client = QdrantClient(host="localhost", port=6333)
|
|
|
78 |
# tokenizer.save_pretrained("./models/tokenizer/")
|
79 |
|
80 |
self.logger.info("initializing the global settings")
|
81 |
+
Settings.text_splitter = self.text_parser
|
82 |
Settings.embed_model = self.embed_model
|
83 |
Settings.llm = llm
|
84 |
# Settings.tokenzier = tokenizer
|
85 |
+
Settings.transformations = [self.text_parser]
|
86 |
|
87 |
def ingest(self, files_dir: str):
|
88 |
text_chunks = []
|
|
|
93 |
|
94 |
self.logger.info("enumerating docs")
|
95 |
for doc_idx, doc in enumerate(docs):
|
96 |
+
curr_text_chunks = self.text_parser.split_text(doc.text)
|
97 |
text_chunks.extend(curr_text_chunks)
|
98 |
doc_ids.extend([doc_idx] * len(curr_text_chunks))
|
99 |
|