thu / trainFlux /download_caption_flux.py
housearch's picture
Upload 3 files
e5615b2 verified
from datasets import load_dataset
import os
import shutil
# 加載 HuggingFace 數據集
dataset = load_dataset("housearch/Park-PFI-Overhead")
# 創建目錄
output_dir = "/workspace/datasets/"
os.makedirs(output_dir, exist_ok=True)
# 下載並保存圖像和標題
for i, item in enumerate(dataset["train"]):
# 保存圖像
image = item["image"]
image = image.convert("RGB")
image_path = os.path.join(output_dir, f"image_{i:06d}.jpg")
image.save(image_path)
# 保存標題
caption = item["caption_Flux"]
caption_path = os.path.join(output_dir, f"image_{i:06d}.txt")
with open(caption_path, "w", encoding="utf-8") as f:
f.write(caption)