housearch commited on
Commit
7a408b6
·
verified ·
1 Parent(s): 4317407

Upload 3 files

Browse files
trainSD3/download_caption_sd3.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ import os
3
+ import shutil
4
+
5
+ # 加載 HuggingFace 數據集
6
+
7
+ dataset = load_dataset("housearch/Park-PFI-Overhead")
8
+
9
+ # 創建目錄
10
+ output_dir = "/workspace/datasets"
11
+ os.makedirs(output_dir, exist_ok=True)
12
+
13
+ # 下載並保存圖像和標題
14
+ for i, item in enumerate(dataset["train"]):
15
+ # 保存圖像
16
+ image = item["image"]
17
+ image = image.convert("RGB")
18
+ image_path = os.path.join(output_dir, f"image_{i:06d}.jpg")
19
+ image.save(image_path)
20
+
21
+ # 保存標題
22
+ caption = item["caption_SD3"]
23
+ caption_path = os.path.join(output_dir, f"image_{i:06d}.txt")
24
+ with open(caption_path, "w", encoding="utf-8") as f:
25
+ f.write(caption)
trainSD3/landscapeOverhead_A6000_sd3.json ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "LoRA_type": "Standard",
3
+ "LyCORIS_preset": "full",
4
+ "adaptive_noise_scale": 0,
5
+ "additional_parameters": "",
6
+ "ae": "",
7
+ "apply_t5_attn_mask": false,
8
+ "async_upload": false,
9
+ "block_alphas": "",
10
+ "block_dims": "",
11
+ "block_lr_zero_threshold": "",
12
+ "blocks_to_swap": 0,
13
+ "bucket_no_upscale": true,
14
+ "bucket_reso_steps": 64,
15
+ "bypass_mode": false,
16
+ "cache_latents": false,
17
+ "cache_latents_to_disk": true,
18
+ "caption_dropout_every_n_epochs": 0,
19
+ "caption_dropout_rate": 0,
20
+ "caption_extension": ".txt",
21
+ "clip_g": "/workspace/clip_g.safetensors",
22
+ "clip_g_dropout_rate": 0,
23
+ "clip_l": "",
24
+ "clip_skip": 1,
25
+ "color_aug": false,
26
+ "constrain": 0,
27
+ "conv_alpha": 1,
28
+ "conv_block_alphas": "",
29
+ "conv_block_dims": "",
30
+ "conv_dim": 1,
31
+ "cpu_offload_checkpointing": false,
32
+ "dataset_config": "",
33
+ "debiased_estimation_loss": false,
34
+ "decompose_both": false,
35
+ "dim_from_weights": false,
36
+ "discrete_flow_shift": 3,
37
+ "dora_wd": false,
38
+ "double_blocks_to_swap": 0,
39
+ "down_lr_weight": "",
40
+ "dynamo_backend": "no",
41
+ "dynamo_mode": "default",
42
+ "dynamo_use_dynamic": false,
43
+ "dynamo_use_fullgraph": false,
44
+ "enable_all_linear": false,
45
+ "enable_bucket": true,
46
+ "epoch": 10,
47
+ "extra_accelerate_launch_args": "",
48
+ "factor": -1,
49
+ "flip_aug": false,
50
+ "flux1_cache_text_encoder_outputs": false,
51
+ "flux1_cache_text_encoder_outputs_to_disk": false,
52
+ "flux1_checkbox": false,
53
+ "fp8_base": false,
54
+ "fp8_base_unet": false,
55
+ "full_bf16": true,
56
+ "full_fp16": false,
57
+ "gpu_ids": "",
58
+ "gradient_accumulation_steps": 1,
59
+ "gradient_checkpointing": true,
60
+ "guidance_scale": 3.5,
61
+ "highvram": false,
62
+ "huber_c": 0.1,
63
+ "huber_scale": 1,
64
+ "huber_schedule": "snr",
65
+ "huggingface_path_in_repo": "",
66
+ "huggingface_repo_id": "",
67
+ "huggingface_repo_type": "",
68
+ "huggingface_repo_visibility": "",
69
+ "huggingface_token": "",
70
+ "img_attn_dim": "",
71
+ "img_mlp_dim": "",
72
+ "img_mod_dim": "",
73
+ "in_dims": "",
74
+ "ip_noise_gamma": 0,
75
+ "ip_noise_gamma_random_strength": false,
76
+ "keep_tokens": 0,
77
+ "learning_rate": 1e-05,
78
+ "log_config": false,
79
+ "log_tracker_config": "",
80
+ "log_tracker_name": "",
81
+ "log_with": "",
82
+ "logging_dir": "/workspace/train/log",
83
+ "logit_mean": 0,
84
+ "logit_std": 1,
85
+ "loraplus_lr_ratio": 0,
86
+ "loraplus_text_encoder_lr_ratio": 0,
87
+ "loraplus_unet_lr_ratio": 0,
88
+ "loss_type": "l2",
89
+ "lowvram": false,
90
+ "lr_scheduler": "cosine_with_restarts",
91
+ "lr_scheduler_args": "",
92
+ "lr_scheduler_num_cycles": 1,
93
+ "lr_scheduler_power": 1,
94
+ "lr_scheduler_type": "",
95
+ "lr_warmup": 10,
96
+ "lr_warmup_steps": 0,
97
+ "main_process_port": 0,
98
+ "masked_loss": false,
99
+ "max_bucket_reso": 2048,
100
+ "max_data_loader_n_workers": 0,
101
+ "max_grad_norm": 1,
102
+ "max_resolution": "1024,1024",
103
+ "max_timestep": 1000,
104
+ "max_token_length": 75,
105
+ "max_train_epochs": 0,
106
+ "max_train_steps": 1600,
107
+ "mem_eff_attn": false,
108
+ "mem_eff_save": false,
109
+ "metadata_author": "Chris Hsu",
110
+ "metadata_description": "Overhead Landscape LoRA for SD3.5 large",
111
+ "metadata_license": "",
112
+ "metadata_tags": "",
113
+ "metadata_title": "overheadLandscape_sd3",
114
+ "mid_lr_weight": "",
115
+ "min_bucket_reso": 256,
116
+ "min_snr_gamma": 0,
117
+ "min_timestep": 0,
118
+ "mixed_precision": "bf16",
119
+ "mode_scale": 1.29,
120
+ "model_list": "",
121
+ "model_prediction_type": "sigma_scaled",
122
+ "module_dropout": 0,
123
+ "multi_gpu": false,
124
+ "multires_noise_discount": 0.3,
125
+ "multires_noise_iterations": 0,
126
+ "network_alpha": 8,
127
+ "network_dim": 4,
128
+ "network_dropout": 0,
129
+ "network_weights": "",
130
+ "noise_offset": 0,
131
+ "noise_offset_random_strength": false,
132
+ "noise_offset_type": "Original",
133
+ "num_cpu_threads_per_process": 2,
134
+ "num_machines": 1,
135
+ "num_processes": 1,
136
+ "optimizer": "AdamW8bit",
137
+ "optimizer_args": "",
138
+ "output_dir": "/workspace/train/model",
139
+ "output_name": "overheadLandscape",
140
+ "persistent_data_loader_workers": false,
141
+ "pos_emb_random_crop_rate": 0,
142
+ "pretrained_model_name_or_path": "/workspace/sd3.5_large.safetensors",
143
+ "prior_loss_weight": 1,
144
+ "random_crop": false,
145
+ "rank_dropout": 0,
146
+ "rank_dropout_scale": false,
147
+ "reg_data_dir": "",
148
+ "rescaled": false,
149
+ "resume": "",
150
+ "resume_from_huggingface": "",
151
+ "sample_every_n_epochs": 0,
152
+ "sample_every_n_steps": 0,
153
+ "sample_prompts": "A bird's-eye perspective watercolor rendering of a landscape architecture proposal for an urban park development. The illustration depicts a curved contemporary pavilion or cultural center with a distinctive gray roof set within an expansive green space. Wooden boardwalks and meandering pathways flow through the landscape, connecting various garden zones and activity areas. A small water feature with aquatic vegetation is visible in the foreground. The park is framed by dense tree plantings that create a buffer between the green space and the surrounding cityscape. --w 1024 --h 768 --l 7.0 \nA detailed bird's-eye perspective watercolor rendering of a comprehensive waterfront park development featuring spectacular ornamental flower fields. The illustration employs a vibrant color palette that emphasizes the dramatic flowering meadows in yellow, orange, pink, and red hues arranged in geometric patterns throughout the landscape. These colorful plantings create a quilt-like pattern across the site, serving as the primary visual attraction while likely representing seasonal displays such as tulips or wildflowers. The park's circulation system features a network of generous gray pathways that provide clear pedestrian routes through the colorful landscape, with brick or paver edges defining the boundaries between walks and plantings. A large central green lawn provides flexible open space for recreation and gathering, shown populated with numerous visitors relaxing and picnicking. The upper portion of the site reveals infrastructure including two distinctive dome or tensile fabric structures that likely serve as event spaces or weather protection, positioned near a blue water feature that appears to be a swimming pool or small lake with recreational amenities. --w 1024 --h 768 --l 7.0\nA comprehensive aerial perspective watercolor rendering of an ambitious lakefront recreational development or theme park. The illustration employs a sophisticated visualization technique that combines precise architectural linework with atmospheric watercolor washes, particularly evident in the dramatic sunset sky rendered in graduated pink, orange, and yellow tones that reflect on the expansive water body. The composition reveals a thoughtfully organized entertainment complex situated on a peninsula or headland extending into a large lake or bay, with distant mountains creating a scenic backdrop along the horizon. The development features multiple programmatic zones: a central circular gathering space or amphitheater area populated with numerous tiny figures suggesting a major event or performance; several water features including a large circular pond likely for water-based activities; various recreational attractions including what appears to be a ferris wheel and playground structures; extensive green open spaces with trees and landscaping; designated parking areas with miniature vehicles; and circulation systems including pathways and roads that connect different program elements. --w 1024 --h 768 --l 7.0",
154
+ "sample_sampler": "euler_a",
155
+ "save_as_bool": false,
156
+ "save_clip": false,
157
+ "save_every_n_epochs": 1,
158
+ "save_every_n_steps": 0,
159
+ "save_last_n_epochs": 0,
160
+ "save_last_n_epochs_state": 0,
161
+ "save_last_n_steps": 0,
162
+ "save_last_n_steps_state": 0,
163
+ "save_model_as": "safetensors",
164
+ "save_precision": "bf16",
165
+ "save_state": false,
166
+ "save_state_on_train_end": false,
167
+ "save_state_to_huggingface": false,
168
+ "save_t5xxl": false,
169
+ "scale_v_pred_loss_like_noise_pred": false,
170
+ "scale_weight_norms": 0,
171
+ "sd3_cache_text_encoder_outputs": false,
172
+ "sd3_cache_text_encoder_outputs_to_disk": false,
173
+ "sd3_checkbox": true,
174
+ "sd3_clip_l": "/workspace/clip_l.safetensors",
175
+ "sd3_clip_l_dropout_rate": 0,
176
+ "sd3_disable_mmap_load_safetensors": false,
177
+ "sd3_enable_scaled_pos_embed": false,
178
+ "sd3_fused_backward_pass": false,
179
+ "sd3_t5_dropout_rate": 0,
180
+ "sd3_t5xxl": "/workspace/t5xxl_fp16.safetensors",
181
+ "sd3_text_encoder_batch_size": 1,
182
+ "sdxl": false,
183
+ "sdxl_cache_text_encoder_outputs": false,
184
+ "sdxl_no_half_vae": false,
185
+ "seed": 0,
186
+ "shuffle_caption": false,
187
+ "single_blocks_to_swap": 0,
188
+ "single_dim": "",
189
+ "single_mod_dim": "",
190
+ "skip_cache_check": false,
191
+ "split_mode": false,
192
+ "split_qkv": false,
193
+ "stop_text_encoder_training": 0,
194
+ "t5xxl": "",
195
+ "t5xxl_device": "",
196
+ "t5xxl_dtype": "bf16",
197
+ "t5xxl_lr": 0,
198
+ "t5xxl_max_token_length": 512,
199
+ "text_encoder_lr": 0,
200
+ "timestep_sampling": "sigma",
201
+ "train_batch_size": 2,
202
+ "train_blocks": "all",
203
+ "train_data_dir": "/workspace/train/img",
204
+ "train_double_block_indices": "all",
205
+ "train_norm": false,
206
+ "train_on_input": true,
207
+ "train_single_block_indices": "all",
208
+ "train_t5xxl": false,
209
+ "training_comment": "",
210
+ "txt_attn_dim": "",
211
+ "txt_mlp_dim": "",
212
+ "txt_mod_dim": "",
213
+ "unet_lr": 1e-05,
214
+ "unit": 1,
215
+ "up_lr_weight": "",
216
+ "use_cp": false,
217
+ "use_scalar": false,
218
+ "use_tucker": false,
219
+ "v2": false,
220
+ "v_parameterization": false,
221
+ "v_pred_like_loss": 0,
222
+ "vae": "",
223
+ "vae_batch_size": 0,
224
+ "wandb_api_key": "",
225
+ "wandb_run_name": "",
226
+ "weighted_captions": false,
227
+ "weighting_scheme": "logit_normal",
228
+ "xformers": "xformers"
229
+ }
trainSD3/trainSD3.sh ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #使用 bash指令執行這個腳本 ---> bash trainSD3.sh
2
+ #!/bin/bash
3
+ cd /workspace
4
+ wget --trust-server-name --content-disposition https://huggingface.co/housearch/thu/resolve/main/trainSD3/download_caption_sd3.py
5
+ wget --trust-server-name --content-disposition https://huggingface.co/housearch/thu/resolve/main/trainSD3/landscapeOverhead_A6000_sd3.json
6
+ python -m venv env
7
+ source /workspace/env/bin/activate
8
+ pip install datasets huggingface_hub Pillow
9
+ python download_caption_sd3.py
10
+ deactivate
11
+ cd /workspace
12
+ git clone https://github.com/bmaltais/kohya_ss
13
+ cd kohya_ss
14
+ #預設是訓練SD1.5, SDXL的版本,如果要訓練SD3, Flux 必須切換至sd3-flux.1
15
+ git branch -a
16
+ git checkout sd3-flux.1
17
+ ./setup-runpod.sh
18
+ ./gui.sh --share --headless
19
+
20
+ '''
21
+ 另開 terminal 視窗,把以下內容貼上並按下enter執行,直接下載訓練Flux LoRA必須的相關模型(節省時間)
22
+
23
+ cd /workspace
24
+ wget --trust-server-name --content-disposition https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF/resolve/dff185441d61601155591a46f691d7f73151acdd/sd3.5_large.safetensors
25
+
26
+ wget --trust-server-name --content-disposition https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF/resolve/dff185441d61601155591a46f691d7f73151acdd/t5xxl_fp16.safetensors
27
+
28
+ wget --trust-server-name --content-disposition https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF/resolve/dff185441d61601155591a46f691d7f73151acdd/clip_l.safetensors
29
+
30
+ wget --trust-server-name --content-disposition https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF/resolve/dff185441d61601155591a46f691d7f73151acdd/clip_g.safetensors
31
+
32
+ '''
33
+ #./gui.sh --share --headless --config "/workspace/config.toml"