Spaces:
Sleeping
Sleeping
Update helpers.py
Browse files- helpers.py +1 -1
helpers.py
CHANGED
@@ -154,7 +154,7 @@ def get_web_documents(base_urls=['https://nct.neu.edu.vn/']):
|
|
154 |
|
155 |
def load_text_data(file_path):
|
156 |
"""Load text content from a DOCX file (tables removed)."""
|
157 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=
|
158 |
loader = DoclingLoader(
|
159 |
file_path=file_path,
|
160 |
export_type=ExportType.MARKDOWN, # Enable internal chunking,
|
|
|
154 |
|
155 |
def load_text_data(file_path):
|
156 |
"""Load text content from a DOCX file (tables removed)."""
|
157 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=1000)
|
158 |
loader = DoclingLoader(
|
159 |
file_path=file_path,
|
160 |
export_type=ExportType.MARKDOWN, # Enable internal chunking,
|