MAERec-Gradio / configs /textdet /maskrcnn /_base_mask-rcnn_resnet50_fpn.py
Mountchicken's picture
Upload 704 files
9bf4bd7
raw
history blame
1.73 kB
_base_ = ['mmdet::_base_/models/mask-rcnn_r50_fpn.py']
mask_rcnn = _base_.pop('model')
# Adapt Mask R-CNN model to OCR task
mask_rcnn.update(
dict(
data_preprocessor=dict(pad_mask=False),
rpn_head=dict(
anchor_generator=dict(
scales=[4], ratios=[0.17, 0.44, 1.13, 2.90, 7.46])),
roi_head=dict(
bbox_head=dict(num_classes=1),
mask_head=dict(num_classes=1),
)))
model = dict(type='MMDetWrapper', text_repr_type='poly', cfg=mask_rcnn)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True,
),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5,
contrast=0.5),
dict(
type='RandomResize',
scale=(640, 640),
ratio_range=(1.0, 4.125),
keep_ratio=True),
dict(type='RandomFlip', prob=0.5),
dict(type='TextDetRandomCrop', target_size=(640, 640)),
dict(type='MMOCR2MMDet', poly2mask=True),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
'scale_factor', 'flip_direction'))
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]