|
from datasets import load_dataset |
|
import os |
|
import shutil |
|
|
|
|
|
|
|
dataset = load_dataset("housearch/Park-PFI-Overhead") |
|
|
|
|
|
output_dir = "/workspace/datasets/" |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
for i, item in enumerate(dataset["train"]): |
|
|
|
image = item["image"] |
|
image = image.convert("RGB") |
|
image_path = os.path.join(output_dir, f"image_{i:06d}.jpg") |
|
image.save(image_path) |
|
|
|
|
|
caption = item["caption_Flux"] |
|
caption_path = os.path.join(output_dir, f"image_{i:06d}.txt") |
|
with open(caption_path, "w", encoding="utf-8") as f: |
|
f.write(caption) |