|
import argparse |
|
import multiprocessing |
|
from functools import partial |
|
from io import BytesIO |
|
from pathlib import Path |
|
|
|
import lmdb |
|
from PIL import Image |
|
from torch.utils.data import Dataset |
|
from torchvision.transforms import functional as trans_fn |
|
from tqdm import tqdm |
|
import os |
|
|
|
|
|
def resize_and_convert(img, size, resample, quality=100): |
|
img = trans_fn.resize(img, size, resample) |
|
img = trans_fn.center_crop(img, size) |
|
buffer = BytesIO() |
|
img.save(buffer, format="jpeg", quality=quality) |
|
val = buffer.getvalue() |
|
|
|
return val |
|
|
|
|
|
def resize_multiple(img, |
|
sizes=(128, 256, 512, 1024), |
|
resample=Image.LANCZOS, |
|
quality=100): |
|
imgs = [] |
|
|
|
for size in sizes: |
|
imgs.append(resize_and_convert(img, size, resample, quality)) |
|
|
|
return imgs |
|
|
|
|
|
def resize_worker(img_file, sizes, resample): |
|
i, (file, idx) = img_file |
|
img = Image.open(file) |
|
img = img.convert("RGB") |
|
out = resize_multiple(img, sizes=sizes, resample=resample) |
|
|
|
return i, idx, out |
|
|
|
|
|
def prepare(env, |
|
paths, |
|
n_worker, |
|
sizes=(128, 256, 512, 1024), |
|
resample=Image.LANCZOS): |
|
resize_fn = partial(resize_worker, sizes=sizes, resample=resample) |
|
|
|
|
|
indexs = [] |
|
for each in paths: |
|
file = os.path.basename(each) |
|
name, ext = file.split('.') |
|
idx = int(name) |
|
indexs.append(idx) |
|
|
|
|
|
files = sorted(zip(paths, indexs), key=lambda x: x[1]) |
|
files = list(enumerate(files)) |
|
total = 0 |
|
|
|
with multiprocessing.Pool(n_worker) as pool: |
|
for i, idx, imgs in tqdm(pool.imap_unordered(resize_fn, files)): |
|
for size, img in zip(sizes, imgs): |
|
key = f"{size}-{str(idx).zfill(5)}".encode("utf-8") |
|
|
|
with env.begin(write=True) as txn: |
|
txn.put(key, img) |
|
|
|
total += 1 |
|
|
|
with env.begin(write=True) as txn: |
|
txn.put("length".encode("utf-8"), str(total).encode("utf-8")) |
|
|
|
|
|
class ImageFolder(Dataset): |
|
def __init__(self, folder, exts=['jpg']): |
|
super().__init__() |
|
self.paths = [ |
|
p for ext in exts for p in Path(f'{folder}').glob(f'**/*.{ext}') |
|
] |
|
|
|
def __len__(self): |
|
return len(self.paths) |
|
|
|
def __getitem__(self, index): |
|
path = os.path.join(self.folder, self.paths[index]) |
|
img = Image.open(path) |
|
return img |
|
|
|
|
|
if __name__ == "__main__": |
|
""" |
|
converting ffhq images to lmdb |
|
""" |
|
num_workers = 16 |
|
|
|
in_path = 'datasets/ffhq' |
|
|
|
out_path = 'datasets/ffhq.lmdb' |
|
|
|
if not os.path.exists(out_path): |
|
os.makedirs(out_path) |
|
|
|
resample_map = {"lanczos": Image.LANCZOS, "bilinear": Image.BILINEAR} |
|
resample = resample_map['lanczos'] |
|
|
|
sizes = [256] |
|
|
|
print(f"Make dataset of image sizes:", ", ".join(str(s) for s in sizes)) |
|
|
|
|
|
|
|
exts = ['jpg'] |
|
paths = [p for ext in exts for p in Path(f'{in_path}').glob(f'**/*.{ext}')] |
|
|
|
|
|
with lmdb.open(out_path, map_size=1024**4, readahead=False) as env: |
|
prepare(env, paths, num_workers, sizes=sizes, resample=resample) |
|
|