rizgiak commited on
Commit
10889be
·
1 Parent(s): 1a53410

add japan support lang, change to ppocr-v4, fix several bugs related padding

Browse files
Files changed (2) hide show
  1. app.py +48 -13
  2. test_pdf2img.py +16 -0
app.py CHANGED
@@ -3,6 +3,7 @@ import string
3
  import random
4
  from collections import Counter
5
  from itertools import count, tee
 
6
 
7
  import cv2
8
  import matplotlib.pyplot as plt
@@ -14,7 +15,7 @@ from PIL import Image
14
  from transformers import DetrImageProcessor, TableTransformerForObjectDetection
15
  from paddleocr import PaddleOCR
16
 
17
- ocr = PaddleOCR(use_angle_cls=True, lang="en",use_gpu=False)
18
 
19
  st.set_option('deprecation.showPyplotGlobalUse', False)
20
  st.set_page_config(layout='wide')
@@ -28,6 +29,10 @@ table_detection_model = TableTransformerForObjectDetection.from_pretrained(
28
  table_recognition_model = TableTransformerForObjectDetection.from_pretrained(
29
  "microsoft/table-transformer-structure-recognition")
30
 
 
 
 
 
31
 
32
  def PIL_to_cv(pil_img):
33
  return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
@@ -201,6 +206,32 @@ class TableExtractionPipeline():
201
  result.paste(pil_img, (left, top))
202
  return result
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  def plot_results_detection(self, c1, model, pil_img, prob, boxes,
205
  delta_xmin, delta_ymin, delta_xmax, delta_ymax):
206
  '''
@@ -213,7 +244,7 @@ class TableExtractionPipeline():
213
 
214
  for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
215
  cl = p.argmax()
216
- xmin, ymin, xmax, ymax = xmin - delta_xmin, ymin - delta_ymin, xmax + delta_xmax, ymax + delta_ymax
217
  ax.add_patch(
218
  plt.Rectangle((xmin, ymin),
219
  xmax - xmin,
@@ -238,8 +269,7 @@ class TableExtractionPipeline():
238
  cropped_img_list = []
239
 
240
  for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
241
-
242
- xmin, ymin, xmax, ymax = xmin - delta_xmin, ymin - delta_ymin, xmax + delta_xmax, ymax + delta_ymax
243
  cropped_img = pil_img.crop((xmin, ymin, xmax, ymax))
244
  cropped_img_list.append(cropped_img)
245
 
@@ -412,7 +442,8 @@ class TableExtractionPipeline():
412
 
413
  @st.cache
414
  def convert_df(self, df):
415
- return df.to_csv().encode('utf-8')
 
416
 
417
  def create_dataframe(self, c3, cell_ocr_res: list, max_cols: int,
418
  max_rows: int):
@@ -456,15 +487,15 @@ class TableExtractionPipeline():
456
  csv = self.convert_df(df)
457
 
458
  try:
459
- numkey = df.iloc[0, 0]
460
- except:
461
  numkey = str(0)
462
 
463
- c3.download_button("Download table",
464
- csv,
465
- "file.csv",
466
- "text/csv",
467
- key='download-csv-' + numkey)
468
 
469
  return df
470
 
@@ -548,7 +579,11 @@ class TableExtractionPipeline():
548
 
549
  if __name__ == "__main__":
550
 
551
- img_name = st.file_uploader("Upload an image with table(s)")
 
 
 
 
552
  st1, st2, st3 = st.columns((1, 1, 1))
553
  TD_th = st1.slider('Table detection threshold', 0.0, 1.0, 0.8)
554
  TSR_th = st2.slider('Table structure recognition threshold', 0.0, 1.0, 0.7)
 
3
  import random
4
  from collections import Counter
5
  from itertools import count, tee
6
+ import base64
7
 
8
  import cv2
9
  import matplotlib.pyplot as plt
 
15
  from transformers import DetrImageProcessor, TableTransformerForObjectDetection
16
  from paddleocr import PaddleOCR
17
 
18
+ ocr = PaddleOCR(use_angle_cls=True, lang="en", use_gpu=False, ocr_version='PP-OCRv4')
19
 
20
  st.set_option('deprecation.showPyplotGlobalUse', False)
21
  st.set_page_config(layout='wide')
 
29
  table_recognition_model = TableTransformerForObjectDetection.from_pretrained(
30
  "microsoft/table-transformer-structure-recognition")
31
 
32
+ def reload_ocr(vlang):
33
+ global ocr
34
+ ocr = PaddleOCR(use_angle_cls=True, lang=vlang, use_gpu=False, ocr_version='PP-OCRv4')
35
+
36
 
37
  def PIL_to_cv(pil_img):
38
  return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
 
206
  result.paste(pil_img, (left, top))
207
  return result
208
 
209
+ @staticmethod
210
+ def dynamic_delta(xmin, ymin, xmax, ymax, delta_xmin, delta_ymin, delta_xmax, delta_ymax, pil_img):
211
+ offset_x = (xmax - xmin) * 0.05
212
+ offset_y = (ymax - ymin) * 0.05
213
+
214
+ w_img, h_img = pil_img.size
215
+
216
+ doxmin = xmin - (delta_xmin + offset_x)
217
+ if (doxmin < 0):
218
+ doxmin = 0
219
+
220
+ doymin = ymin - (delta_ymin + offset_y)
221
+ if (doymin < 0):
222
+ doymin = 0
223
+
224
+ doxmax = xmax + (delta_xmax + offset_x)
225
+ if (doxmax > w_img):
226
+ doxmax = w_img
227
+
228
+ doymax = ymax + (delta_ymax + offset_y)
229
+ if (doymax > h_img):
230
+ doymax = h_img
231
+
232
+
233
+ return doxmin, doymin, doxmax, doymax
234
+
235
  def plot_results_detection(self, c1, model, pil_img, prob, boxes,
236
  delta_xmin, delta_ymin, delta_xmax, delta_ymax):
237
  '''
 
244
 
245
  for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
246
  cl = p.argmax()
247
+ xmin, ymin, xmax, ymax = self.dynamic_delta(xmin, ymin, xmax, ymax, delta_xmin, delta_ymin, delta_xmax, delta_ymax, pil_img)
248
  ax.add_patch(
249
  plt.Rectangle((xmin, ymin),
250
  xmax - xmin,
 
269
  cropped_img_list = []
270
 
271
  for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
272
+ xmin, ymin, xmax, ymax = self.dynamic_delta(xmin, ymin, xmax, ymax, delta_xmin, delta_ymin, delta_xmax, delta_ymax, pil_img)
 
273
  cropped_img = pil_img.crop((xmin, ymin, xmax, ymax))
274
  cropped_img_list.append(cropped_img)
275
 
 
442
 
443
  @st.cache
444
  def convert_df(self, df):
445
+ csv = df.to_csv(index=False, encoding='utf-8-sig') # utf-8-sig to handle BOM for Excel
446
+ return csv.encode('utf-8')
447
 
448
  def create_dataframe(self, c3, cell_ocr_res: list, max_cols: int,
449
  max_rows: int):
 
487
  csv = self.convert_df(df)
488
 
489
  try:
490
+ numkey = str(df.iloc[0, 0])
491
+ except IndexError:
492
  numkey = str(0)
493
 
494
+ # Create a download link with filename and extension
495
+ filename = f"table_{numkey}.csv" # Adjust the filename as needed
496
+ b64_csv = base64.b64encode(csv).decode() # Encode CSV data to base64
497
+ href = f'<a href="data:file/csv;base64,{b64_csv}" download="{filename}">Download {filename}</a>'
498
+ c3.markdown(href, unsafe_allow_html=True)
499
 
500
  return df
501
 
 
579
 
580
  if __name__ == "__main__":
581
 
582
+ st_up, st_lang = st.columns((1, 1))
583
+ img_name = st_up.file_uploader("Upload an image with table(s)")
584
+ lang = st_lang.selectbox('Language', ('en', 'japan'))
585
+ reload_ocr(lang)
586
+
587
  st1, st2, st3 = st.columns((1, 1, 1))
588
  TD_th = st1.slider('Table detection threshold', 0.0, 1.0, 0.8)
589
  TSR_th = st2.slider('Table structure recognition threshold', 0.0, 1.0, 0.7)
test_pdf2img.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pdf2image import convert_from_path
3
+
4
+ # Set the PDF file path
5
+ pdf_path = 'test.pdf'
6
+
7
+ # Convert the first page of the PDF to a JPEG image
8
+ first = 14
9
+ last = 14
10
+ images = convert_from_path(pdf_path, dpi=300, first_page=first, last_page=last, poppler_path=r"C:\poppler-23.07.0\Library\bin")
11
+
12
+ # Save the image file
13
+ image_path = os.path.splitext(pdf_path)[0]
14
+
15
+ for index, image in enumerate(images):
16
+ image.save(image_path + "p" + str(index+first) + '.jpg', 'JPEG')