rchrdgwr commited on
Commit
f4ce1d6
1 Parent(s): f2aaa42

allow pdfs

Browse files
Files changed (1) hide show
  1. app.py +25 -8
app.py CHANGED
@@ -52,16 +52,33 @@ text_splitter = CharacterTextSplitter()
52
 
53
  def process_text_file(file: AskFileResponse):
54
  import tempfile
 
 
 
 
55
 
56
  with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
57
  temp_file_path = temp_file.name
58
 
59
- with open(temp_file_path, "wb") as f:
60
- f.write(file.content)
61
-
62
- text_loader = TextFileLoader(temp_file_path)
63
- documents = text_loader.load_documents()
64
- texts = text_splitter.split_texts(documents)
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  return texts
66
 
67
 
@@ -72,8 +89,8 @@ async def on_chat_start():
72
  # Wait for the user to upload a file
73
  while files == None:
74
  files = await cl.AskFileMessage(
75
- content="Please upload a .txt or .pdf File file to begin!",
76
- accept=["text/plain"],
77
  max_size_mb=2,
78
  timeout=180,
79
  ).send()
 
52
 
53
  def process_text_file(file: AskFileResponse):
54
  import tempfile
55
+ import fitz
56
+ import os
57
+
58
+ file_extension = os.path.splitext(file.name)[1].lower()
59
 
60
  with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
61
  temp_file_path = temp_file.name
62
 
63
+ if file_extension == ".txt":
64
+ with open(temp_file_path, "wb") as f:
65
+ f.write(file.content)
66
+
67
+ text_loader = TextFileLoader(temp_file_path)
68
+ documents = text_loader.load_documents()
69
+ texts = text_splitter.split_texts(documents)
70
+
71
+ elif file_extension == ".pdf":
72
+ pdf_document = fitz.open(temp_file_path)
73
+ documents = []
74
+ for page_num in range(len(pdf_document)):
75
+ page = pdf_document.load_page(page_num)
76
+ text = page.get_text()
77
+ documents.append(text)
78
+ texts = text_splitter.split_texts(documents)
79
+ else:
80
+ raise ValueError("Unsupported file type")
81
+
82
  return texts
83
 
84
 
 
89
  # Wait for the user to upload a file
90
  while files == None:
91
  files = await cl.AskFileMessage(
92
+ content="Please upload a .txt or .pdf file to begin!",
93
+ accept=["text/plain", "pdf"],
94
  max_size_mb=2,
95
  timeout=180,
96
  ).send()