Spaces:
Runtime error
Runtime error
# Author: Ming Yang | |
# Date: 2023/01/20 | |
# Description: Traverse the zip file but not decompress it. | |
# Suppose the zip file contains yolo format annotation files. | |
# / | |
# βββ classes.txt | |
# βββ images | |
# β βββ 1.jpg | |
# β βββ 2.jpg | |
# β βββ ... | |
# βββ labels | |
# βββ 1.txt | |
# βββ 2.txt | |
# βββ ... | |
import pathlib | |
import shutil | |
from datetime import datetime | |
from typing import Optional | |
from zipfile import ZipFile | |
from PIL import Image | |
from pycocotools.coco import COCO | |
yolo_label = { | |
'class': str, | |
'class_id': int, | |
'x_center': float, | |
'y_center': float, | |
'width': float, | |
'height': float | |
} | |
yolo_image = { | |
'image_name': str, | |
'image': Image, | |
'labels': list[yolo_label] | |
} | |
coco_annotation = { | |
"id": int, | |
"image_id": int, # the id of the image that the annotation belongs to | |
"category_id": int, # the id of the category that the annotation belongs to | |
# "segmentation": RLE or [polygon], | |
"area": float, | |
"bbox": [float, float, float, float], # [x,y,width,height] | |
"iscrowd": bool, # 0 or 1, | |
} | |
coco_category = { | |
"id": int, | |
"name": str, | |
"supercategory": Optional[str], | |
} | |
coco_image = { | |
"id": int, | |
"width": int, | |
"height": int, | |
"file_name": str, | |
"date_captured": Optional[datetime], | |
} | |
coco_dataset = { | |
"images": list[coco_image], # list of all images in the dataset | |
"annotations": list[coco_annotation], # list of all annotations in the dataset | |
"categories": list[coco_category] # list of all categories | |
} | |
class YoloImage: | |
def __init__(self, image_name: str, image: Image, labels: list[yolo_label]): | |
self.image_name = image_name | |
self.image = image | |
self.labels = labels | |
def __repr__(self): | |
return f'YoloImage(image_name={self.image_name}, image={self.image}, labels={self.labels})' | |
def to_coco_image(self, id: int) -> coco_image: | |
return { | |
"id": id, | |
"width": self.image.width, | |
"height": self.image.height, | |
"file_name": self.image_name, | |
} | |
def to_coco_annotations(self, image_id: int, ann_id_start: int) -> list[coco_annotation]: | |
ann_id = ann_id_start | |
annotations: list[coco_annotation] = [] | |
for label in self.labels: | |
ann_id = ann_id + 1 | |
annotations.append({ | |
"id": ann_id, | |
"image_id": image_id, | |
"category_id": label['class_id'], | |
"area": label['width'] * label['height'] * self.image.width * self.image.height, | |
"bbox": [ | |
(label['x_center'] - label['width'] / 2) * self.image.width, | |
(label['y_center'] - label['height'] / 2) * self.image.height, | |
label['width'] * self.image.width, | |
label['height'] * self.image.height | |
], | |
"iscrowd": False, | |
}) | |
return annotations | |
class YoloDataset: | |
_zip_file: ZipFile | |
_classes: list[str] | |
_images: list[str] | |
_labels: list[str] | |
def __init__(self, zip_file: ZipFile, classes=None, images=None, labels=None): | |
if labels is None: | |
labels = [] | |
if images is None: | |
images = [] | |
if classes is None: | |
classes = [] | |
self._zip_file = zip_file | |
self._classes = classes | |
self._images = images | |
self._labels = labels | |
def from_zip_file(zip_file: ZipFile) -> 'YoloDataset': | |
namelist = zip_file.namelist() | |
namelist = list(filter(lambda x: not (x.endswith('.DS_Store') or x.startswith('__MACOSX')), namelist)) | |
root_name = namelist[0] | |
if not zip_file.getinfo(root_name).is_dir(): | |
root_name = root_name.split('/')[0] + '/' | |
namelist = list(filter(lambda x: not zip_file.getinfo(x).is_dir(), namelist)) | |
cls_filename = root_name + 'classes.txt' | |
if cls_filename in namelist: | |
classes = zip_file.read(cls_filename).decode('utf-8').split('\n') | |
else: | |
classes = [] | |
images = list(filter(lambda x: x.startswith(root_name + 'images'), namelist)) | |
labels = list(filter(lambda x: x.startswith(root_name + 'labels'), namelist)) | |
assert len(images) == len(labels) and len(images) > 0 | |
images.sort() | |
labels.sort() | |
for image, label in zip(images, labels): | |
image_name = image.split('/')[-1] | |
label_name = label.split('/')[-1] | |
assert image_name.split('.')[0] == label_name.split('.')[0] | |
return YoloDataset(zip_file, classes, images, labels) | |
def from_path(path: str) -> 'YoloDataset': | |
zip_file = ZipFile(path, 'r') | |
return YoloDataset.from_zip_file(zip_file) | |
def __len__(self): | |
return len(self._images) | |
def __getitem__(self, index: int) -> YoloImage: | |
image_name = self._images[index] | |
labels = self._zip_file.read(self._labels[index]).decode('utf-8').split('\n') | |
labels = list(filter(lambda x: len(x) > 0, labels)) | |
labels = list(map(lambda x: x.split(' '), labels)) | |
labels = list(map(lambda x: { | |
'class': self._classes[int(x[0])] if len(self._classes) > int(x[0]) else 'unknown', | |
'class_id': int(x[0]), | |
'x_center': float(x[1]), | |
'y_center': float(x[2]), | |
'width': float(x[3]), | |
'height': float(x[4]) | |
}, labels)) | |
return YoloImage(image_name, Image.open(self._zip_file.open(self._images[index])), labels) | |
def __iter__(self): | |
for i in range(len(self)): | |
yield self[i] | |
def __deepcopy__(self, memodict=None): | |
return YoloDataset(self._zip_file, self._classes, self._images, self._labels) | |
def load_image(self, image_name: str) -> Image: | |
return Image.open(self._zip_file.open(image_name)) | |
def to_coco(self) -> COCO: | |
images: list[coco_image] = [] | |
annotations: list[coco_annotation] = [] | |
categories: list[coco_category] = [] | |
ann_id = 0 | |
for i in range(len(self)): | |
image = self[i] | |
images.append(image.to_coco_image(i)) | |
annotations.extend(image.to_coco_annotations(i, ann_id)) | |
ann_id = ann_id + len(image.labels) | |
for i in range(len(self._classes)): | |
categories.append({ | |
"id": i, | |
"name": self._classes[i], | |
"supercategory": None, | |
}) | |
coco_ds = COCO() | |
coco_ds.dataset = { | |
"images": images, | |
"annotations": annotations, | |
"categories": categories, | |
} | |
coco_ds.createIndex() | |
return coco_ds | |
def to_material(self): | |
return MaterialYoloDataset(self) | |
def zip_file(self): | |
return self._zip_file | |
def classes(self): | |
return self._classes | |
class MaterialYoloDataset: | |
def __init__(self, dataset: YoloDataset): | |
print(dataset.to_coco().cats) | |
self._classes = dataset.classes | |
self._zip_file = dataset.zip_file | |
first = self._zip_file.namelist()[0] | |
if self._zip_file.getinfo(first).is_dir(): | |
self._root = first[0:-1] | |
else: | |
self._root = first.split('/')[0] | |
def __enter__(self): | |
# recursively create dir | |
dataset_path = pathlib.Path(f'./datasets/') | |
dataset_path.mkdir(parents=True, exist_ok=True) | |
self._zip_file.extractall(f'./datasets/') | |
with open(f'./datasets/{self._root}/data.yaml', 'w+') as f: | |
f.write(f'path: {dataset_path.absolute()}/{self._root}/\n') | |
f.write(f'train: images\n') | |
f.write(f'val: images\n') | |
f.write(f'\n') | |
f.write(f'# Classes\n') | |
f.write(f'names:\n') | |
for i in range(len(self._classes)): | |
f.write(f' {i}: {self._classes[i]}\n') | |
else: | |
for i in range(0, 2): | |
f.write(f' {i}: unknown\n') | |
return self | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
shutil.rmtree(f'./datasets/{self._root}') | |
def yaml(self): | |
return pathlib.Path(f'./datasets/{self._root}/data.yaml').absolute() | |
def main(): | |
dataset = YoloDataset.from_path('tests/coco8.zip') | |
coco = dataset.to_coco() | |
print(coco) | |
print(dataset.to_material().yaml) | |
if __name__ == '__main__': | |
main() | |