Spaces:
Sleeping
Sleeping
anmolsahai
commited on
Commit
•
8024e72
1
Parent(s):
53cccdd
update
Browse files
app.py
CHANGED
@@ -14,11 +14,11 @@ def pdf_to_text_with_layout(pdf_file):
|
|
14 |
for page_num in range(doc.page_count):
|
15 |
page = doc.load_page(page_num)
|
16 |
text.append(page.get_text("text"))
|
17 |
-
return "\n
|
18 |
|
19 |
def clean_text(text):
|
20 |
# Remove non-ASCII and control characters
|
21 |
-
return ''.join(c for c in text if c.
|
22 |
|
23 |
def text_to_word_with_formatting(text, word_path):
|
24 |
doc = Document()
|
|
|
14 |
for page_num in range(doc.page_count):
|
15 |
page = doc.load_page(page_num)
|
16 |
text.append(page.get_text("text"))
|
17 |
+
return "\n.join(text)
|
18 |
|
19 |
def clean_text(text):
|
20 |
# Remove non-ASCII and control characters
|
21 |
+
return ''.join(c for c in text if c.isprintable() and ord(c) < 65536)
|
22 |
|
23 |
def text_to_word_with_formatting(text, word_path):
|
24 |
doc = Document()
|