File size: 614 Bytes
aaa22b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from datasets import load_dataset
import os
import shutil

# 加載 HuggingFace 數據集
dataset = load_dataset("housearch/landscape")

# 創建目錄
output_dir = "/workspace/datasets"
os.makedirs(output_dir, exist_ok=True)

# 下載並保存圖像和標題
for i, item in enumerate(dataset["train"]):
	# 保存圖像
	image = item["image"]
	image_path = os.path.join(output_dir, f"image_{i:06d}.jpeg")
	image.save(image_path)
	
	# 保存標題
	caption = item["caption_sdxl"]
	caption_path = os.path.join(output_dir, f"image_{i:06d}.txt")
	with open(caption_path, "w", encoding="utf-8") as f:
		f.write(caption)