PDF-QA-Opensource

Sleeping

Noobian commited on Jul 19, 2023

Commit

ec97476

•

1 Parent(s): 8f70ba9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -30,7 +30,8 @@ def pdf_to_text(pdf_file, query):
           # Extract the texst from the page and add it to the text variable
           text += page.extract_text()
     #embedding step
-  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
   texts = text_splitter.split_text(text)
   embeddings = HuggingFaceEmbeddings()

           # Extract the texst from the page and add it to the text variable
           text += page.extract_text()
     #embedding step
+  from langchain.text_splitter import CharacterTextSplitter
+  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
   texts = text_splitter.split_text(text)
   embeddings = HuggingFaceEmbeddings()