from datasets import load_dataset import os import shutil # 加載 HuggingFace 數據集 dataset = load_dataset("housearch/landscape") # 創建目錄 output_dir = "/workspace/datasets" os.makedirs(output_dir, exist_ok=True) # 下載並保存圖像和標題 for i, item in enumerate(dataset["train"]): # 保存圖像 image = item["image"] image_path = os.path.join(output_dir, f"image_{i:06d}.jpeg") image.save(image_path) # 保存標題 caption = item["caption_sdxl"] caption_path = os.path.join(output_dir, f"image_{i:06d}.txt") with open(caption_path, "w", encoding="utf-8") as f: f.write(caption)