File size: 653 Bytes
e5615b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from datasets import load_dataset
import os
import shutil

# 加載 HuggingFace 數據集

dataset = load_dataset("housearch/Park-PFI-Overhead")

# 創建目錄
output_dir = "/workspace/datasets/"
os.makedirs(output_dir, exist_ok=True)

# 下載並保存圖像和標題
for i, item in enumerate(dataset["train"]):
	# 保存圖像
	image = item["image"]
	image = image.convert("RGB")
	image_path = os.path.join(output_dir, f"image_{i:06d}.jpg")
	image.save(image_path)
	
	# 保存標題
	caption = item["caption_Flux"]
	caption_path = os.path.join(output_dir, f"image_{i:06d}.txt")
	with open(caption_path, "w", encoding="utf-8") as f:
		f.write(caption)