Spaces:

Mountchicken
/

MAERec-Gradio

Running

MAERec-Gradio / configs /textdet /maskrcnn /_base_mask-rcnn_resnet50_fpn.py

Upload 704 files

9bf4bd7 almost 2 years ago

1.73 kB

	_base_ = ['mmdet::_base_/models/mask-rcnn_r50_fpn.py']

	mask_rcnn = _base_.pop('model')
	# Adapt Mask R-CNN model to OCR task
	mask_rcnn.update(
	dict(
	data_preprocessor=dict(pad_mask=False),
	rpn_head=dict(
	anchor_generator=dict(
	scales=[4], ratios=[0.17, 0.44, 1.13, 2.90, 7.46])),
	roi_head=dict(
	bbox_head=dict(num_classes=1),
	mask_head=dict(num_classes=1),
	)))

	model = dict(type='MMDetWrapper', text_repr_type='poly', cfg=mask_rcnn)

	train_pipeline = [
	dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
	dict(
	type='LoadOCRAnnotations',
	with_polygon=True,
	with_bbox=True,
	with_label=True,
	),
	dict(
	type='TorchVisionWrapper',
	op='ColorJitter',
	brightness=32.0 / 255,
	saturation=0.5,
	contrast=0.5),
	dict(
	type='RandomResize',
	scale=(640, 640),
	ratio_range=(1.0, 4.125),
	keep_ratio=True),
	dict(type='RandomFlip', prob=0.5),
	dict(type='TextDetRandomCrop', target_size=(640, 640)),
	dict(type='MMOCR2MMDet', poly2mask=True),
	dict(
	type='mmdet.PackDetInputs',
	meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
	'scale_factor', 'flip_direction'))
	]

	test_pipeline = [
	dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
	dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
	dict(
	type='LoadOCRAnnotations',
	with_polygon=True,
	with_bbox=True,
	with_label=True),
	dict(
	type='PackTextDetInputs',
	meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
	]