Spaces:
Sleeping
Sleeping
File size: 2,404 Bytes
24c4def 09efc91 24c4def 55d9644 24c4def |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import cv2
import numpy as np
from PIL import Image
# import sys
# sys.path.append("pipeline/mmocr")
# from mmocr.apis.inferencers import MMOCRInferencer
from pipeline.mmocr.mmocr.apis.inferencers import MMOCRInferencer
# BUILD MMOCR
class MAERec:
def __init__(self):
self.mmocr_inferencer = MMOCRInferencer(
"pipeline/mmocr/configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py",
"models/dbnetpp.pth",
"pipeline/mmocr/configs/textrecog/maerec/maerec_b_union14m.py",
"models/maerec_b.pth",)
#device="cuda:0")
def execute(self, image_path, use_detector=False):
"""Run MMOCR and SAM
Args:
img (np.ndarray): Input image
use_detector (bool, optional): Whether to use detector. Defaults to
True.
"""
data = Image.open(image_path).convert("RGB")
img = np.array(data)
if use_detector:
mode = 'det_rec'
else:
mode = 'rec'
# Build MMOCR
self.mmocr_inferencer.mode = mode
result = self.mmocr_inferencer(img, return_vis=True)
visualization = result['visualization'][0]
result = result['predictions'][0]
if mode == 'det_rec':
rec_texts = result['rec_texts']
det_polygons = result['det_polygons']
det_results = []
for rec_text, det_polygon in zip(rec_texts, det_polygons):
det_polygon = np.array(det_polygon).astype(np.int32).tolist()
det_results.append(f'{rec_text}: {det_polygon}')
out_results = '\n'.join(det_results)
visualization = cv2.cvtColor(
np.array(visualization), cv2.COLOR_RGB2BGR)
cv2.imwrite("/home/wcx/wcx/Union14M/results/{}".format(image_path.split("/")[-1]), np.array(visualization))
visualization = "Done"
else:
rec_text = result['rec_texts'][0]
rec_score = result['rec_scores'][0]
out_results = f'pred: {rec_text} \n score: {rec_score:.2f}'
visualization = None
return visualization, out_results.split("\n")[0][6:]
if __name__ == '__main__':
scene_text_model = MAERec()
vis, res = scene_text_model.execute("/newdisk3/wcx/MLLM/text-to-image/dalle3/582.jpg")
print(vis)
print(res) |