thu / download_caption.py
housearch's picture
Upload 3 files
aaa22b8 verified
from datasets import load_dataset
import os
import shutil
# 加載 HuggingFace 數據集
dataset = load_dataset("housearch/landscape")
# 創建目錄
output_dir = "/workspace/datasets"
os.makedirs(output_dir, exist_ok=True)
# 下載並保存圖像和標題
for i, item in enumerate(dataset["train"]):
# 保存圖像
image = item["image"]
image_path = os.path.join(output_dir, f"image_{i:06d}.jpeg")
image.save(image_path)
# 保存標題
caption = item["caption_sdxl"]
caption_path = os.path.join(output_dir, f"image_{i:06d}.txt")
with open(caption_path, "w", encoding="utf-8") as f:
f.write(caption)