from datasets import load_dataset | |
import os | |
import shutil | |
# 加載 HuggingFace 數據集 | |
dataset = load_dataset("housearch/landscape") | |
# 創建目錄 | |
output_dir = "/workspace/datasets" | |
os.makedirs(output_dir, exist_ok=True) | |
# 下載並保存圖像和標題 | |
for i, item in enumerate(dataset["train"]): | |
# 保存圖像 | |
image = item["image"] | |
image_path = os.path.join(output_dir, f"image_{i:06d}.jpeg") | |
image.save(image_path) | |
# 保存標題 | |
caption = item["caption_sdxl"] | |
caption_path = os.path.join(output_dir, f"image_{i:06d}.txt") | |
with open(caption_path, "w", encoding="utf-8") as f: | |
f.write(caption) |