anmolsahai commited on
Commit
8024e72
1 Parent(s): 53cccdd
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -14,11 +14,11 @@ def pdf_to_text_with_layout(pdf_file):
14
  for page_num in range(doc.page_count):
15
  page = doc.load_page(page_num)
16
  text.append(page.get_text("text"))
17
- return "\n".join(text)
18
 
19
  def clean_text(text):
20
  # Remove non-ASCII and control characters
21
- return ''.join(c for c in text if c.is printable() and ord(c) < 65536)
22
 
23
  def text_to_word_with_formatting(text, word_path):
24
  doc = Document()
 
14
  for page_num in range(doc.page_count):
15
  page = doc.load_page(page_num)
16
  text.append(page.get_text("text"))
17
+ return "\n.join(text)
18
 
19
  def clean_text(text):
20
  # Remove non-ASCII and control characters
21
+ return ''.join(c for c in text if c.isprintable() and ord(c) < 65536)
22
 
23
  def text_to_word_with_formatting(text, word_path):
24
  doc = Document()