Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -30,7 +30,8 @@ def pdf_to_text(pdf_file, query):
|
|
30 |
# Extract the texst from the page and add it to the text variable
|
31 |
text += page.extract_text()
|
32 |
#embedding step
|
33 |
-
text_splitter
|
|
|
34 |
texts = text_splitter.split_text(text)
|
35 |
|
36 |
embeddings = HuggingFaceEmbeddings()
|
|
|
30 |
# Extract the texst from the page and add it to the text variable
|
31 |
text += page.extract_text()
|
32 |
#embedding step
|
33 |
+
from langchain.text_splitter import CharacterTextSplitter
|
34 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
35 |
texts = text_splitter.split_text(text)
|
36 |
|
37 |
embeddings = HuggingFaceEmbeddings()
|