zmbfeng commited on
Commit
f9cf3d0
1 Parent(s): 25c9941

figures list and table list propagated up

Browse files
Files changed (2) hide show
  1. app.py +8 -1
  2. utils.py +2 -2
app.py CHANGED
@@ -112,6 +112,10 @@ if 'page_count' in st.session_state:
112
 
113
  st.session_state.color_image_list = []
114
  st.session_state.gray_image_np_list = []
 
 
 
 
115
  for page_number in range(st.session_state.num_pages_to_extract):
116
  image = pdf2image.convert_from_path(st.session_state.uploaded_pdf_path, first_page=page_number+1, last_page=page_number+1)
117
  st.session_state.color_image_list.append(image[0])
@@ -127,7 +131,10 @@ if 'page_count' in st.session_state:
127
  for index, gray_pdf_image_np in enumerate(st.session_state.gray_image_np_list):
128
  print("index="+str(index))
129
 
130
- text=utils.gray_pdf_image_np_to_text(index,gray_pdf_image_np, debug=True)
 
 
 
131
  st.session_state.extracted_text=st.session_state.extracted_text+f"<Page {index+1} start>\n" + text + f"\n<Page {index+1} end>\n>"
132
  # st.write(text)
133
  # print(text)
 
112
 
113
  st.session_state.color_image_list = []
114
  st.session_state.gray_image_np_list = []
115
+ pdf_figures_image_list=[]
116
+ pdf_tables_image_list=[]
117
+ pdf_text_list=[]
118
+
119
  for page_number in range(st.session_state.num_pages_to_extract):
120
  image = pdf2image.convert_from_path(st.session_state.uploaded_pdf_path, first_page=page_number+1, last_page=page_number+1)
121
  st.session_state.color_image_list.append(image[0])
 
131
  for index, gray_pdf_image_np in enumerate(st.session_state.gray_image_np_list):
132
  print("index="+str(index))
133
 
134
+ figures_image_list,tables_image_list,text=utils.gray_pdf_image_np_to_text(index,gray_pdf_image_np, debug=True)
135
+ pdf_figures_image_list.append(figures_image_list)
136
+ pdf_tables_image_list.append(tables_image_list)
137
+ pdf_text_list.append(text)
138
  st.session_state.extracted_text=st.session_state.extracted_text+f"<Page {index+1} start>\n" + text + f"\n<Page {index+1} end>\n>"
139
  # st.write(text)
140
  # print(text)
utils.py CHANGED
@@ -420,6 +420,6 @@ def gray_pdf_image_np_to_text(image_index,gray_pdf_image_np, debug=False):
420
  if text == "error":
421
  return("error")
422
  else:
423
- return text
424
  else:
425
- return text
 
420
  if text == "error":
421
  return("error")
422
  else:
423
+ return figures_image_list,tables_image_list,text
424
  else:
425
+ return figures_image_list,tables_image_list,text