Spaces:
Sleeping
Sleeping
figures list displayed
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import numpy as np
|
|
5 |
import cv2
|
6 |
import os
|
7 |
import io
|
|
|
8 |
import shutil
|
9 |
|
10 |
import time
|
@@ -112,7 +113,7 @@ if 'page_count' in st.session_state:
|
|
112 |
|
113 |
st.session_state.color_image_list = []
|
114 |
st.session_state.gray_image_np_list = []
|
115 |
-
pdf_figures_image_list=[]
|
116 |
pdf_tables_image_list=[]
|
117 |
st.session_state.pdf_text_list=[]
|
118 |
|
@@ -133,7 +134,7 @@ if 'page_count' in st.session_state:
|
|
133 |
print("index="+str(index))
|
134 |
|
135 |
figures_image_list,tables_image_list,text=utils.gray_pdf_image_np_to_text(index,gray_pdf_image_np, debug=True)
|
136 |
-
pdf_figures_image_list.append(figures_image_list)
|
137 |
pdf_tables_image_list.append(tables_image_list)
|
138 |
st.session_state.pdf_text_list.append(text)
|
139 |
st.session_state.extracted_text=st.session_state.extracted_text+f"<Page {index+1} start>\n" + text + f"\n<Page {index+1} end>\n>"
|
@@ -151,8 +152,17 @@ if 'page_count' in st.session_state:
|
|
151 |
data=string_buffer.getvalue(),
|
152 |
file_name=txt_file_path,
|
153 |
mime="text/plain")
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
# for index, gray_pdf_image_np in enumerate(st.session_state.gray_image_np_list[0:5], start=0):
|
158 |
# print("index="+str(index))
|
|
|
5 |
import cv2
|
6 |
import os
|
7 |
import io
|
8 |
+
from PIL import Image
|
9 |
import shutil
|
10 |
|
11 |
import time
|
|
|
113 |
|
114 |
st.session_state.color_image_list = []
|
115 |
st.session_state.gray_image_np_list = []
|
116 |
+
st.session_state.pdf_figures_image_list=[]
|
117 |
pdf_tables_image_list=[]
|
118 |
st.session_state.pdf_text_list=[]
|
119 |
|
|
|
134 |
print("index="+str(index))
|
135 |
|
136 |
figures_image_list,tables_image_list,text=utils.gray_pdf_image_np_to_text(index,gray_pdf_image_np, debug=True)
|
137 |
+
st.session_state.pdf_figures_image_list.append(figures_image_list)
|
138 |
pdf_tables_image_list.append(tables_image_list)
|
139 |
st.session_state.pdf_text_list.append(text)
|
140 |
st.session_state.extracted_text=st.session_state.extracted_text+f"<Page {index+1} start>\n" + text + f"\n<Page {index+1} end>\n>"
|
|
|
152 |
data=string_buffer.getvalue(),
|
153 |
file_name=txt_file_path,
|
154 |
mime="text/plain")
|
155 |
+
# st.image(Image.fromarray(bgr_image))
|
156 |
+
# for index,pdf_text in enumerate(st.session_state.pdf_text_list):
|
157 |
+
for index, gray_pdf_image_np in enumerate(st.session_state.gray_image_np_list):
|
158 |
+
st.write(f"Page {index+1} \n\n {st.session_state.pdf_text_list[index]}\n")
|
159 |
+
if not st.session_state.pdf_figures_image_list[index]:
|
160 |
+
st.write("no figures")
|
161 |
+
else:
|
162 |
+
for pdf_figure_text_image in st.session_state.pdf_figures_image_list[index]:
|
163 |
+
st.write(pdf_figure_text_image[0])
|
164 |
+
st.image(Image.fromarray(pdf_figure_text_image[1]))
|
165 |
+
|
166 |
|
167 |
# for index, gray_pdf_image_np in enumerate(st.session_state.gray_image_np_list[0:5], start=0):
|
168 |
# print("index="+str(index))
|