zmbfeng commited on
Commit
cd77918
1 Parent(s): d792040

figures list displayed

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -5,6 +5,7 @@ import numpy as np
5
  import cv2
6
  import os
7
  import io
 
8
  import shutil
9
 
10
  import time
@@ -112,7 +113,7 @@ if 'page_count' in st.session_state:
112
 
113
  st.session_state.color_image_list = []
114
  st.session_state.gray_image_np_list = []
115
- pdf_figures_image_list=[]
116
  pdf_tables_image_list=[]
117
  st.session_state.pdf_text_list=[]
118
 
@@ -133,7 +134,7 @@ if 'page_count' in st.session_state:
133
  print("index="+str(index))
134
 
135
  figures_image_list,tables_image_list,text=utils.gray_pdf_image_np_to_text(index,gray_pdf_image_np, debug=True)
136
- pdf_figures_image_list.append(figures_image_list)
137
  pdf_tables_image_list.append(tables_image_list)
138
  st.session_state.pdf_text_list.append(text)
139
  st.session_state.extracted_text=st.session_state.extracted_text+f"<Page {index+1} start>\n" + text + f"\n<Page {index+1} end>\n>"
@@ -151,8 +152,17 @@ if 'page_count' in st.session_state:
151
  data=string_buffer.getvalue(),
152
  file_name=txt_file_path,
153
  mime="text/plain")
154
- for index,pdf_text in enumerate(st.session_state.pdf_text_list):
155
- st.write(f"Page {index+1} \n\n {pdf_text}\n")
 
 
 
 
 
 
 
 
 
156
 
157
  # for index, gray_pdf_image_np in enumerate(st.session_state.gray_image_np_list[0:5], start=0):
158
  # print("index="+str(index))
 
5
  import cv2
6
  import os
7
  import io
8
+ from PIL import Image
9
  import shutil
10
 
11
  import time
 
113
 
114
  st.session_state.color_image_list = []
115
  st.session_state.gray_image_np_list = []
116
+ st.session_state.pdf_figures_image_list=[]
117
  pdf_tables_image_list=[]
118
  st.session_state.pdf_text_list=[]
119
 
 
134
  print("index="+str(index))
135
 
136
  figures_image_list,tables_image_list,text=utils.gray_pdf_image_np_to_text(index,gray_pdf_image_np, debug=True)
137
+ st.session_state.pdf_figures_image_list.append(figures_image_list)
138
  pdf_tables_image_list.append(tables_image_list)
139
  st.session_state.pdf_text_list.append(text)
140
  st.session_state.extracted_text=st.session_state.extracted_text+f"<Page {index+1} start>\n" + text + f"\n<Page {index+1} end>\n>"
 
152
  data=string_buffer.getvalue(),
153
  file_name=txt_file_path,
154
  mime="text/plain")
155
+ # st.image(Image.fromarray(bgr_image))
156
+ # for index,pdf_text in enumerate(st.session_state.pdf_text_list):
157
+ for index, gray_pdf_image_np in enumerate(st.session_state.gray_image_np_list):
158
+ st.write(f"Page {index+1} \n\n {st.session_state.pdf_text_list[index]}\n")
159
+ if not st.session_state.pdf_figures_image_list[index]:
160
+ st.write("no figures")
161
+ else:
162
+ for pdf_figure_text_image in st.session_state.pdf_figures_image_list[index]:
163
+ st.write(pdf_figure_text_image[0])
164
+ st.image(Image.fromarray(pdf_figure_text_image[1]))
165
+
166
 
167
  # for index, gray_pdf_image_np in enumerate(st.session_state.gray_image_np_list[0:5], start=0):
168
  # print("index="+str(index))