Spaces:
Sleeping
Sleeping
anmolsahai
commited on
Commit
•
28cc4a3
1
Parent(s):
507a7c5
bug12
Browse files
app.py
CHANGED
@@ -16,8 +16,8 @@ def pdf_to_text_with_layout(pdf_file):
|
|
16 |
return "\n".join(text)
|
17 |
|
18 |
def clean_text(text):
|
19 |
-
# Remove non-
|
20 |
-
return
|
21 |
|
22 |
def text_to_word_with_formatting(text, word_path):
|
23 |
doc = Document()
|
|
|
16 |
return "\n".join(text)
|
17 |
|
18 |
def clean_text(text):
|
19 |
+
# Remove non-ASCII and control characters
|
20 |
+
return ''.join(c for c in text if c.isprintable() and ord(c) < 65536)
|
21 |
|
22 |
def text_to_word_with_formatting(text, word_path):
|
23 |
doc = Document()
|