# This configuration prepares the ICDAR13 857 and 1015 # version, and uses ICDAR13 1015 version by default. # You may uncomment the lines if you want to you the original version, # which contains 1095 samples. # You can check out the generated base config and use the 857 # version by using its corresponding config variables in your model. data_root = 'data/icdar2013' cache_path = 'data/cache' train_preparer = dict( obtainer=dict( type='NaiveDataObtainer', cache_path=cache_path, files=[ dict( url='https://rrc.cvc.uab.es/downloads/' 'Challenge2_Training_Task3_Images_GT.zip', save_name='ic13_textrecog_train_img_gt.zip', md5='6f0dbc823645968030878df7543f40a4', content=['image'], mapping=[ # ['ic13_textrecog_train_img_gt/gt.txt', # 'annotations/train.txt'], ['ic13_textrecog_train_img_gt', 'textrecog_imgs/train'] ]), dict( url='https://download.openmmlab.com/mmocr/data/1.x/recog/' 'icdar_2013/train_labels.json', save_name='ic13_train_labels.json', md5='008fcd0056e72c4cf3064fb4d1fce81b', content=['annotation'], mapping=[['ic13_train_labels.json', 'textrecog_train.json']]), ])) # Note that we offer two versions of test set annotations as follows.Please # choose one of them to download and comment the other. By default, we use the # second one. # 1. The original official annotation, which contains 1095 test # samples. # Uncomment the test_preparer if you want to use the original 1095 version. # test_preparer = dict( # obtainer=dict( # type='NaiveDataObtainer', # cache_path=cache_path, # files=[ # dict( # url='https://rrc.cvc.uab.es/downloads/' # 'Challenge2_Test_Task3_Images.zip', # save_name='ic13_textrecog_test_img.zip', # md5='3206778eebb3a5c5cc15c249010bf77f', # split=['test'], # content=['image'], # mapping=[['ic13_textrecog_test_img', # 'textrecog_imgs/test']]), # dict( # url='https://rrc.cvc.uab.es/downloads/' # 'Challenge2_Test_Task3_GT.txt', # save_name='ic13_textrecog_test_gt.txt', # md5='2634060ed8fe6e7a4a9b8d68785835a1', # split=['test'], # content=['annotation'], # mapping=[[ # 'ic13_textrecog_test_gt.txt', 'annotations/test.txt' # ]]), # noqa # # The 857 version further pruned words shorter than 3 characters. # dict( # url='https://download.openmmlab.com/mmocr/data/1.x/recog/' # 'icdar_2013/textrecog_test_857.json', # save_name='textrecog_test_857.json', # md5='3bed3985b0c51a989ad4006f6de8352b', # split=['test'], # content=['annotation'], # ), # ]), # gatherer=dict(type='MonoGatherer', ann_name='test.txt'), # parser=dict( # type='ICDARTxtTextRecogAnnParser', separator=', ', # format='img, text'), # noqa # packer=dict(type='TextRecogPacker'), # dumper=dict(type='JsonDumper'), # ) # 2. The widely-used version for academic purpose, which filters # out words with non-alphanumeric characters. This version contains # 1015 test samples. test_preparer = dict( obtainer=dict( type='NaiveDataObtainer', cache_path=cache_path, files=[ dict( url='https://rrc.cvc.uab.es/downloads/' 'Challenge2_Test_Task3_Images.zip', save_name='ic13_textrecog_test_img.zip', md5='3206778eebb3a5c5cc15c249010bf77f', split=['test'], content=['image'], mapping=[['ic13_textrecog_test_img', 'textrecog_imgs/test']]), dict( url='https://download.openmmlab.com/mmocr/data/1.x/recog/' 'icdar_2013/textrecog_test_1015.json', save_name='textrecog_test.json', md5='68fdd818f63df8b93dc952478952009a', split=['test'], content=['annotation'], ), # The 857 version further pruned words shorter than 3 characters. dict( url='https://download.openmmlab.com/mmocr/data/1.x/recog/' 'icdar_2013/textrecog_test_857.json', save_name='textrecog_test_857.json', md5='3bed3985b0c51a989ad4006f6de8352b', split=['test'], content=['annotation'], ), ])) config_generator = dict( type='TextRecogConfigGenerator', test_anns=[ dict(ann_file='textrecog_test.json'), dict(dataset_postfix='857', ann_file='textrecog_test_857.json') ])