Spaces:
Sleeping
Sleeping
File size: 4,042 Bytes
0b4516f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# Copyright (c) OpenMMLab. All rights reserved.
import math
import os
import os.path as osp
from argparse import ArgumentParser
from functools import partial
import mmengine
from PIL import Image
from mmocr.utils import dump_ocr_data
def parse_args():
parser = ArgumentParser(description='Generate training and validation set '
'of OpenVINO annotations for Open '
'Images by cropping box image.')
parser.add_argument(
'root_path', help='Root dir containing images and annotations')
parser.add_argument(
'n_proc', default=1, type=int, help='Number of processes to run')
args = parser.parse_args()
return args
def process_img(args, src_image_root, dst_image_root):
# Dirty hack for multi-processing
img_idx, img_info, anns = args
src_img = Image.open(osp.join(src_image_root, img_info['file_name']))
labels = []
for ann_idx, ann in enumerate(anns):
attrs = ann['attributes']
text_label = attrs['transcription']
# Ignore illegible or non-English words
if not attrs['legible'] or attrs['language'] != 'english':
continue
x, y, w, h = ann['bbox']
x, y = max(0, math.floor(x)), max(0, math.floor(y))
w, h = math.ceil(w), math.ceil(h)
dst_img = src_img.crop((x, y, x + w, y + h))
dst_img_name = f'img_{img_idx}_{ann_idx}.jpg'
dst_img_path = osp.join(dst_image_root, dst_img_name)
# Preserve JPEG quality
dst_img.save(dst_img_path, qtables=src_img.quantization)
labels.append({
'file_name': dst_img_name,
'anno_info': [{
'text': text_label
}]
})
src_img.close()
return labels
def convert_openimages(root_path,
dst_image_path,
dst_label_filename,
annotation_filename,
img_start_idx=0,
nproc=1):
annotation_path = osp.join(root_path, annotation_filename)
if not osp.exists(annotation_path):
raise Exception(
f'{annotation_path} not exists, please check and try again.')
src_image_root = root_path
# outputs
dst_label_file = osp.join(root_path, dst_label_filename)
dst_image_root = osp.join(root_path, dst_image_path)
os.makedirs(dst_image_root, exist_ok=True)
annotation = mmengine.load(annotation_path)
process_img_with_path = partial(
process_img,
src_image_root=src_image_root,
dst_image_root=dst_image_root)
tasks = []
anns = {}
for ann in annotation['annotations']:
anns.setdefault(ann['image_id'], []).append(ann)
for img_idx, img_info in enumerate(annotation['images']):
tasks.append((img_idx + img_start_idx, img_info, anns[img_info['id']]))
labels_list = mmengine.track_parallel_progress(
process_img_with_path, tasks, keep_order=True, nproc=nproc)
final_labels = []
for label_list in labels_list:
final_labels += label_list
dump_ocr_data(final_labels, dst_label_file, 'textrecog')
return len(annotation['images'])
def main():
args = parse_args()
root_path = args.root_path
print('Processing training set...')
num_train_imgs = 0
for s in '125f':
num_train_imgs = convert_openimages(
root_path=root_path,
dst_image_path=f'image_{s}',
dst_label_filename=f'train_{s}_label.json',
annotation_filename=f'text_spotting_openimages_v5_train_{s}.json',
img_start_idx=num_train_imgs,
nproc=args.n_proc)
print('Processing validation set...')
convert_openimages(
root_path=root_path,
dst_image_path='image_val',
dst_label_filename='val_label.json',
annotation_filename='text_spotting_openimages_v5_validation.json',
img_start_idx=num_train_imgs,
nproc=args.n_proc)
print('Finish')
if __name__ == '__main__':
main()
|