Spaces:
Sleeping
Sleeping
anmolsahai
commited on
Commit
•
aa1d5d7
1
Parent(s):
9270790
mere
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ def pdf_to_text_with_layout(pdf_file):
|
|
14 |
for page_num in range(doc.page_count):
|
15 |
page = doc.load_page(page_num)
|
16 |
text.append(page.get_text("text"))
|
17 |
-
return \"
|
18 |
def clean_text(text):
|
19 |
# Remove non-ASCII and control characters
|
20 |
return ''.join(c for c in text if c.isprintable() and ord(c) < 65536)
|
|
|
14 |
for page_num in range(doc.page_count):
|
15 |
page = doc.load_page(page_num)
|
16 |
text.append(page.get_text("text"))
|
17 |
+
return "\n".join(text)
|
18 |
def clean_text(text):
|
19 |
# Remove non-ASCII and control characters
|
20 |
return ''.join(c for c in text if c.isprintable() and ord(c) < 65536)
|