Spaces:

sunnychenxiwang
/

EasyDetect

Sleeping

App Files Files Community

EasyDetect / pipeline /mmocr /dataset_zoo /icdar2013 /textrecog.py

sunnychenxiwang

Upload 1595 files

0b4516f verified over 1 year ago

raw

history blame

5.12 kB

	# This configuration prepares the ICDAR13 857 and 1015
	# version, and uses ICDAR13 1015 version by default.
	# You may uncomment the lines if you want to you the original version,
	# which contains 1095 samples.
	# You can check out the generated base config and use the 857
	# version by using its corresponding config variables in your model.

	data_root = 'data/icdar2013'
	cache_path = 'data/cache'

	train_preparer = dict(
	obtainer=dict(
	type='NaiveDataObtainer',
	cache_path=cache_path,
	files=[
	dict(
	url='https://rrc.cvc.uab.es/downloads/'
	'Challenge2_Training_Task3_Images_GT.zip',
	save_name='ic13_textrecog_train_img_gt.zip',
	md5='6f0dbc823645968030878df7543f40a4',
	content=['image'],
	mapping=[
	# ['ic13_textrecog_train_img_gt/gt.txt',
	# 'annotations/train.txt'],
	['ic13_textrecog_train_img_gt', 'textrecog_imgs/train']
	]),
	dict(
	url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
	'icdar_2013/train_labels.json',
	save_name='ic13_train_labels.json',
	md5='008fcd0056e72c4cf3064fb4d1fce81b',
	content=['annotation'],
	mapping=[['ic13_train_labels.json', 'textrecog_train.json']]),
	]))

	# Note that we offer two versions of test set annotations as follows.Please
	# choose one of them to download and comment the other. By default, we use the
	# second one.
	# 1. The original official annotation, which contains 1095 test
	# samples.

	# Uncomment the test_preparer if you want to use the original 1095 version.

	# test_preparer = dict(
	# obtainer=dict(
	# type='NaiveDataObtainer',
	# cache_path=cache_path,
	# files=[
	# dict(
	# url='https://rrc.cvc.uab.es/downloads/'
	# 'Challenge2_Test_Task3_Images.zip',
	# save_name='ic13_textrecog_test_img.zip',
	# md5='3206778eebb3a5c5cc15c249010bf77f',
	# split=['test'],
	# content=['image'],
	# mapping=[['ic13_textrecog_test_img',
	# 'textrecog_imgs/test']]),
	# dict(
	# url='https://rrc.cvc.uab.es/downloads/'
	# 'Challenge2_Test_Task3_GT.txt',
	# save_name='ic13_textrecog_test_gt.txt',
	# md5='2634060ed8fe6e7a4a9b8d68785835a1',
	# split=['test'],
	# content=['annotation'],
	# mapping=[[
	# 'ic13_textrecog_test_gt.txt', 'annotations/test.txt'
	# ]]), # noqa
	# # The 857 version further pruned words shorter than 3 characters.
	# dict(
	# url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
	# 'icdar_2013/textrecog_test_857.json',
	# save_name='textrecog_test_857.json',
	# md5='3bed3985b0c51a989ad4006f6de8352b',
	# split=['test'],
	# content=['annotation'],
	# ),
	# ]),
	# gatherer=dict(type='MonoGatherer', ann_name='test.txt'),
	# parser=dict(
	# type='ICDARTxtTextRecogAnnParser', separator=', ',
	# format='img, text'), # noqa
	# packer=dict(type='TextRecogPacker'),
	# dumper=dict(type='JsonDumper'),
	# )

	# 2. The widely-used version for academic purpose, which filters
	# out words with non-alphanumeric characters. This version contains
	# 1015 test samples.
	test_preparer = dict(
	obtainer=dict(
	type='NaiveDataObtainer',
	cache_path=cache_path,
	files=[
	dict(
	url='https://rrc.cvc.uab.es/downloads/'
	'Challenge2_Test_Task3_Images.zip',
	save_name='ic13_textrecog_test_img.zip',
	md5='3206778eebb3a5c5cc15c249010bf77f',
	split=['test'],
	content=['image'],
	mapping=[['ic13_textrecog_test_img', 'textrecog_imgs/test']]),
	dict(
	url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
	'icdar_2013/textrecog_test_1015.json',
	save_name='textrecog_test.json',
	md5='68fdd818f63df8b93dc952478952009a',
	split=['test'],
	content=['annotation'],
	),
	# The 857 version further pruned words shorter than 3 characters.
	dict(
	url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
	'icdar_2013/textrecog_test_857.json',
	save_name='textrecog_test_857.json',
	md5='3bed3985b0c51a989ad4006f6de8352b',
	split=['test'],
	content=['annotation'],
	),
	]))

	config_generator = dict(
	type='TextRecogConfigGenerator',
	test_anns=[
	dict(ann_file='textrecog_test.json'),
	dict(dataset_postfix='857', ann_file='textrecog_test_857.json')
	])