Spaces:
Sleeping
Sleeping
figures list and table list propagated up
Browse files
app.py
CHANGED
@@ -112,6 +112,10 @@ if 'page_count' in st.session_state:
|
|
112 |
|
113 |
st.session_state.color_image_list = []
|
114 |
st.session_state.gray_image_np_list = []
|
|
|
|
|
|
|
|
|
115 |
for page_number in range(st.session_state.num_pages_to_extract):
|
116 |
image = pdf2image.convert_from_path(st.session_state.uploaded_pdf_path, first_page=page_number+1, last_page=page_number+1)
|
117 |
st.session_state.color_image_list.append(image[0])
|
@@ -127,7 +131,10 @@ if 'page_count' in st.session_state:
|
|
127 |
for index, gray_pdf_image_np in enumerate(st.session_state.gray_image_np_list):
|
128 |
print("index="+str(index))
|
129 |
|
130 |
-
text=utils.gray_pdf_image_np_to_text(index,gray_pdf_image_np, debug=True)
|
|
|
|
|
|
|
131 |
st.session_state.extracted_text=st.session_state.extracted_text+f"<Page {index+1} start>\n" + text + f"\n<Page {index+1} end>\n>"
|
132 |
# st.write(text)
|
133 |
# print(text)
|
|
|
112 |
|
113 |
st.session_state.color_image_list = []
|
114 |
st.session_state.gray_image_np_list = []
|
115 |
+
pdf_figures_image_list=[]
|
116 |
+
pdf_tables_image_list=[]
|
117 |
+
pdf_text_list=[]
|
118 |
+
|
119 |
for page_number in range(st.session_state.num_pages_to_extract):
|
120 |
image = pdf2image.convert_from_path(st.session_state.uploaded_pdf_path, first_page=page_number+1, last_page=page_number+1)
|
121 |
st.session_state.color_image_list.append(image[0])
|
|
|
131 |
for index, gray_pdf_image_np in enumerate(st.session_state.gray_image_np_list):
|
132 |
print("index="+str(index))
|
133 |
|
134 |
+
figures_image_list,tables_image_list,text=utils.gray_pdf_image_np_to_text(index,gray_pdf_image_np, debug=True)
|
135 |
+
pdf_figures_image_list.append(figures_image_list)
|
136 |
+
pdf_tables_image_list.append(tables_image_list)
|
137 |
+
pdf_text_list.append(text)
|
138 |
st.session_state.extracted_text=st.session_state.extracted_text+f"<Page {index+1} start>\n" + text + f"\n<Page {index+1} end>\n>"
|
139 |
# st.write(text)
|
140 |
# print(text)
|
utils.py
CHANGED
@@ -420,6 +420,6 @@ def gray_pdf_image_np_to_text(image_index,gray_pdf_image_np, debug=False):
|
|
420 |
if text == "error":
|
421 |
return("error")
|
422 |
else:
|
423 |
-
return text
|
424 |
else:
|
425 |
-
return text
|
|
|
420 |
if text == "error":
|
421 |
return("error")
|
422 |
else:
|
423 |
+
return figures_image_list,tables_image_list,text
|
424 |
else:
|
425 |
+
return figures_image_list,tables_image_list,text
|