kuroiikimono
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -178,7 +178,7 @@ This translation app is useful for people who want to translate something or wan
|
|
178 |
my_bar1 = progressbar1.progress(0)
|
179 |
|
180 |
from bs4 import BeautifulSoup
|
181 |
-
|
182 |
for index, page in enumerate(doc):
|
183 |
#page_text = page.extract_text()
|
184 |
#page_text = page.get_text(sort=True)
|
@@ -188,9 +188,7 @@ This translation app is useful for people who want to translate something or wan
|
|
188 |
|
189 |
for tag0 in soup.find_all("block"):
|
190 |
temp_y_posi = 0.0
|
191 |
-
|
192 |
for tag1 in tag0.find_all("line"):
|
193 |
-
line_text = ""
|
194 |
for tag2 in tag1.find_all("font"):
|
195 |
for tag3 in tag2.find_all("char"):
|
196 |
y_posi = tag3.get("y")
|
@@ -198,10 +196,9 @@ This translation app is useful for people who want to translate something or wan
|
|
198 |
page_text2 += "\n"
|
199 |
temp_y_posi = y_posi
|
200 |
page_text2 += tag3.get("c")
|
201 |
-
|
202 |
-
xml_block_line.write(line_text)
|
203 |
|
204 |
-
|
205 |
|
206 |
#for index, page in enumerate(doc.pages):
|
207 |
#for index, page in enumerate(doc):
|
|
|
178 |
my_bar1 = progressbar1.progress(0)
|
179 |
|
180 |
from bs4 import BeautifulSoup
|
181 |
+
|
182 |
for index, page in enumerate(doc):
|
183 |
#page_text = page.extract_text()
|
184 |
#page_text = page.get_text(sort=True)
|
|
|
188 |
|
189 |
for tag0 in soup.find_all("block"):
|
190 |
temp_y_posi = 0.0
|
|
|
191 |
for tag1 in tag0.find_all("line"):
|
|
|
192 |
for tag2 in tag1.find_all("font"):
|
193 |
for tag3 in tag2.find_all("char"):
|
194 |
y_posi = tag3.get("y")
|
|
|
196 |
page_text2 += "\n"
|
197 |
temp_y_posi = y_posi
|
198 |
page_text2 += tag3.get("c")
|
199 |
+
|
|
|
200 |
|
201 |
+
|
202 |
|
203 |
#for index, page in enumerate(doc.pages):
|
204 |
#for index, page in enumerate(doc):
|