Spaces:
Sleeping
Sleeping
File size: 5,489 Bytes
0b4516f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os
import tempfile
import unittest
from mmocr.utils import (check_integrity, get_md5, is_archive, list_files,
list_from_file, list_to_file)
lists = [
[],
[' '],
['\t'],
['a'],
[1],
[1.],
['a', 'b'],
['a', 1, 1.],
[1, 1., 'a'],
['啊', '啊啊'],
['選択', 'noël', 'Информацией', 'ÄÆä'],
]
dicts = [
[{
'text': []
}],
[{
'text': [' ']
}],
[{
'text': ['\t']
}],
[{
'text': ['a']
}],
[{
'text': [1]
}],
[{
'text': [1.]
}],
[{
'text': ['a', 'b']
}],
[{
'text': ['a', 1, 1.]
}],
[{
'text': [1, 1., 'a']
}],
[{
'text': ['啊', '啊啊']
}],
[{
'text': ['選択', 'noël', 'Информацией', 'ÄÆä']
}],
]
def test_list_to_file():
with tempfile.TemporaryDirectory() as tmpdirname:
# test txt
for i, lines in enumerate(lists):
filename = f'{tmpdirname}/{i}.txt'
list_to_file(filename, lines)
lines2 = [
line.rstrip('\r\n')
for line in open(filename, encoding='utf-8').readlines()
]
lines = list(map(str, lines))
assert len(lines) == len(lines2)
assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
# test jsonl
for i, lines in enumerate(dicts):
filename = f'{tmpdirname}/{i}.jsonl'
list_to_file(filename, [json.dumps(line) for line in lines])
lines2 = [
json.loads(line.rstrip('\r\n'))['text']
for line in open(filename, encoding='utf-8').readlines()
][0]
lines = list(lines[0]['text'])
assert len(lines) == len(lines2)
assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
def test_list_from_file():
with tempfile.TemporaryDirectory() as tmpdirname:
# test txt file
for i, lines in enumerate(lists):
filename = f'{tmpdirname}/{i}.txt'
with open(filename, 'w', encoding='utf-8') as f:
f.writelines(f'{line}\n' for line in lines)
lines2 = list_from_file(filename, encoding='utf-8')
lines = list(map(str, lines))
assert len(lines) == len(lines2)
assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
# test jsonl file
for i, lines in enumerate(dicts):
filename = f'{tmpdirname}/{i}.jsonl'
with open(filename, 'w', encoding='utf-8') as f:
f.writelines(f'{line}\n' for line in lines)
lines2 = list_from_file(filename, encoding='utf-8')
lines = list(map(str, lines))
assert len(lines) == len(lines2)
assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
class TestIsArchive(unittest.TestCase):
def setUp(self) -> None:
self.zip = 'data/annotations_123.zip'
self.tar = 'data/img.abc.tar'
self.targz = 'data/img12345_.tar.gz'
self.rar = '/m/abc/t.rar'
self.dir = '/a/b/c/'
def test_is_archive(self):
# test zip
self.assertTrue(is_archive(self.zip))
# test tar
self.assertTrue(is_archive(self.tar))
# test tar.gz
self.assertTrue(is_archive(self.targz))
# test rar
self.assertFalse(is_archive(self.rar))
# test dir
self.assertFalse(is_archive(self.dir))
class TestCheckIntegrity(unittest.TestCase):
def setUp(self) -> None:
# Do not use text files for tests, because the md5 value of text files
# is different on different platforms (CR - CRLF)
self.file1 = ('tests/data/det_toy_dataset/imgs/test/img_2.jpg',
'52b28b5dfc92d9027e70ec3ff95d8702')
self.file2 = ('tests/data/det_toy_dataset/imgs/test/img_1.jpg',
'abc123')
self.file3 = ('abc/abc.jpg', 'abc123')
def test_check_integrity(self):
file, md5 = self.file1
self.assertTrue(check_integrity(file, md5))
file, md5 = self.file2
self.assertFalse(check_integrity(file, md5))
self.assertTrue(check_integrity(file, None))
file, md5 = self.file3
self.assertFalse(check_integrity(file, md5))
class TextGetMD5(unittest.TestCase):
def setUp(self) -> None:
# Do not use text files for tests, because the md5 value of text files
# is different on different platforms (CR - CRLF)
self.file1 = ('tests/data/det_toy_dataset/imgs/test/img_2.jpg',
'52b28b5dfc92d9027e70ec3ff95d8702')
self.file2 = ('tests/data/det_toy_dataset/imgs/test/img_1.jpg',
'abc123')
def test_get_md5(self):
file, md5 = self.file1
self.assertEqual(get_md5(file), md5)
file, md5 = self.file2
self.assertNotEqual(get_md5(file), md5)
class TestListFiles(unittest.TestCase):
def setUp(self) -> None:
self.path = 'tests/data/det_toy_dataset/imgs/test'
def test_check_integrity(self):
suffix = 'jpg'
files = list_files(self.path, suffix)
for file in os.listdir(self.path):
if file.endswith(suffix):
self.assertIn(os.path.join(self.path, file), files)
|