rchrdgwr commited on
Commit
ad0431b
1 Parent(s): 8dd79e8

added pdf processing

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -56,16 +56,15 @@ def process_text_file(file: AskFileResponse):
56
 
57
  file_extension = os.path.splitext(file.name)[1].lower()
58
 
59
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
60
  temp_file_path = temp_file.name
 
61
 
62
  if file_extension == ".txt":
63
- with open(temp_file_path, "wb") as f:
64
- f.write(file.content)
65
-
66
- text_loader = TextFileLoader(temp_file_path)
67
- documents = text_loader.load_documents()
68
- texts = text_splitter.split_texts(documents)
69
 
70
  elif file_extension == ".pdf":
71
  pdf_document = fitz.open(temp_file_path)
 
56
 
57
  file_extension = os.path.splitext(file.name)[1].lower()
58
 
59
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=file_extension) as temp_file:
60
  temp_file_path = temp_file.name
61
+ temp_file.write(file.content)
62
 
63
  if file_extension == ".txt":
64
+ with open(temp_file_path, "r", encoding="utf-8") as f:
65
+ text_loader = TextFileLoader(temp_file_path)
66
+ documents = text_loader.load_documents()
67
+ texts = text_splitter.split_texts(documents)
 
 
68
 
69
  elif file_extension == ".pdf":
70
  pdf_document = fitz.open(temp_file_path)