|
import os, io |
|
from paddleocr import PaddleOCR, draw_ocr,PPStructure |
|
from ppocr.utils.visual import draw_ser_results |
|
from PIL import Image |
|
import gradio as gr |
|
|
|
|
|
def inference__ppocr(img_path): |
|
|
|
ocr = PaddleOCR( |
|
rec_char_dict_path='zhtw_common_dict.txt', |
|
use_gpu=False, |
|
rec_image_shape="3, 48, 320" |
|
) |
|
|
|
result = ocr.ocr(img_path) |
|
|
|
for idx in range(len(result)): |
|
res = result[idx] |
|
for line in res: |
|
print(line) |
|
|
|
result = result[0] |
|
image = Image.open(img_path).convert('RGB') |
|
boxes = [line[0] for line in result] |
|
txts = [line[1][0] if line[1] else '' for line in result] |
|
scores = [line[1][1] for line in result] |
|
im_show_pil = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf") |
|
|
|
return im_show_pil, "\n".join(txts) |
|
|
|
|
|
def inference__ppstructure(img_path): |
|
|
|
ppsutructure = PPStructure( |
|
rec_char_dict_path='zhtw_common_dict.txt', |
|
use_gpu=False, |
|
rec_image_shape="3, 48, 320", |
|
ser_dict_path='ppocr/utils/dict/kie/clinical_class_list.txt' |
|
) |
|
samples = ['病歷','身份','姓名',' Medical','No.','Name','性別','中華民國','002480','身分','Attending','M.D','ID','Medical','by','續上頁診斷書內容','出生地','列印時間','以上','年齡','特予'] |
|
result,_ = ppsutructure.__call__(img_path) |
|
|
|
for element in result: |
|
for sample in samples: |
|
if sample in element['transcription']: |
|
element['pred_id'] = 0 |
|
element['pred'] ='O' |
|
image = draw_ser_results(img_path,result,font_path='./simfang.ttf') |
|
result = [''.join(f"{element['pred']}:{element['transcription']}") for element in result if element['pred']!='O'] |
|
return image, "\n".join(result) |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("<h1 style='text-align: center;'>Form Understanding Project - Certificate of Diagnosis</h1>") |
|
gr.Markdown("Support languages: Traditional Chinese 🇹🇼") |
|
gr.Markdown("version:0.1") |
|
|
|
gr.Markdown(""" |
|
## Usage Description |
|
This interface is designed to process and extract information from Certificates of Diagnosis. |
|
To use this tool: |
|
1. Upload an image of a Certificate of Diagnosis using the 'Upload Image' button. |
|
3. Click 'Process' to extract information from the uploaded certificate. |
|
4. The processed image and extracted text will be displayed on the right. |
|
""") |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("#### Input Image") |
|
image_input = gr.Image(type='filepath', label='Upload Image') |
|
submit_btn = gr.Button("Process") |
|
with gr.Column(): |
|
gr.Markdown("#### Processed Image") |
|
image_output = gr.Image(type="pil", label="Processed Image") |
|
gr.Markdown("#### Extracted Text") |
|
text_output = gr.Textbox(label="Extracted Text") |
|
|
|
submit_btn.click( |
|
inference__ppstructure, |
|
inputs=[image_input], |
|
outputs=[image_output, text_output] |
|
) |
|
|
|
demo.launch(debug=True) |