Model card auto-generated by SimpleTuner
Browse files
README.md
CHANGED
@@ -119,7 +119,7 @@ A photo-realistic image of a cat
|
|
119 |
```
|
120 |
|
121 |
## Validation settings
|
122 |
-
- CFG: `
|
123 |
- CFG Rescale: `0.0`
|
124 |
- Steps: `20`
|
125 |
- Sampler: `None`
|
@@ -143,8 +143,8 @@ You may reuse the base model text encoder for inference.
|
|
143 |
- Training steps: 100
|
144 |
- Learning rate: 0.0001
|
145 |
- Max grad norm: 0.01
|
146 |
-
- Effective batch size:
|
147 |
-
- Micro-batch size:
|
148 |
- Gradient accumulation steps: 1
|
149 |
- Number of GPUs: 3
|
150 |
- Prediction type: flow-matching
|
@@ -159,20 +159,20 @@ You may reuse the base model text encoder for inference.
|
|
159 |
"bypass_mode": true,
|
160 |
"algo": "lokr",
|
161 |
"multiplier": 1.0,
|
|
|
162 |
"linear_dim": 10000,
|
163 |
"linear_alpha": 1,
|
164 |
"factor": 12,
|
165 |
"apply_preset": {
|
166 |
"target_module": [
|
167 |
-
"
|
168 |
-
"FeedForward"
|
169 |
],
|
170 |
"module_algo_map": {
|
171 |
-
"Attention": {
|
172 |
-
"factor": 12
|
173 |
-
},
|
174 |
"FeedForward": {
|
175 |
"factor": 6
|
|
|
|
|
|
|
176 |
}
|
177 |
}
|
178 |
}
|
@@ -181,33 +181,6 @@ You may reuse the base model text encoder for inference.
|
|
181 |
|
182 |
## Datasets
|
183 |
|
184 |
-
### reg-512
|
185 |
-
- Repeats: 0
|
186 |
-
- Total number of images: ~288
|
187 |
-
- Total number of aspect buckets: 3
|
188 |
-
- Resolution: 0.262144 megapixels
|
189 |
-
- Cropped: False
|
190 |
-
- Crop style: None
|
191 |
-
- Crop aspect: None
|
192 |
-
- Used for regularisation data: Yes
|
193 |
-
### reg-1024
|
194 |
-
- Repeats: 0
|
195 |
-
- Total number of images: ~291
|
196 |
-
- Total number of aspect buckets: 9
|
197 |
-
- Resolution: 1.048576 megapixels
|
198 |
-
- Cropped: False
|
199 |
-
- Crop style: None
|
200 |
-
- Crop aspect: None
|
201 |
-
- Used for regularisation data: Yes
|
202 |
-
### cheechandchong-uncropped-512
|
203 |
-
- Repeats: 10
|
204 |
-
- Total number of images: ~24
|
205 |
-
- Total number of aspect buckets: 5
|
206 |
-
- Resolution: 0.262144 megapixels
|
207 |
-
- Cropped: False
|
208 |
-
- Crop style: None
|
209 |
-
- Crop aspect: None
|
210 |
-
- Used for regularisation data: No
|
211 |
### cheechandchong-cropped-512
|
212 |
- Repeats: 10
|
213 |
- Total number of images: ~24
|
@@ -217,15 +190,6 @@ You may reuse the base model text encoder for inference.
|
|
217 |
- Crop style: None
|
218 |
- Crop aspect: None
|
219 |
- Used for regularisation data: No
|
220 |
-
### cheechandchong-uncropped-1024
|
221 |
-
- Repeats: 10
|
222 |
-
- Total number of images: ~24
|
223 |
-
- Total number of aspect buckets: 7
|
224 |
-
- Resolution: 1.048576 megapixels
|
225 |
-
- Cropped: False
|
226 |
-
- Crop style: None
|
227 |
-
- Crop aspect: None
|
228 |
-
- Used for regularisation data: No
|
229 |
### cheechandchong-cropped-1024
|
230 |
- Repeats: 10
|
231 |
- Total number of images: ~24
|
@@ -261,7 +225,7 @@ image = pipeline(
|
|
261 |
generator=torch.Generator(device='cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu').manual_seed(1641421826),
|
262 |
width=1024,
|
263 |
height=1024,
|
264 |
-
guidance_scale=
|
265 |
).images[0]
|
266 |
image.save("output.png", format="PNG")
|
267 |
```
|
|
|
119 |
```
|
120 |
|
121 |
## Validation settings
|
122 |
+
- CFG: `4.0`
|
123 |
- CFG Rescale: `0.0`
|
124 |
- Steps: `20`
|
125 |
- Sampler: `None`
|
|
|
143 |
- Training steps: 100
|
144 |
- Learning rate: 0.0001
|
145 |
- Max grad norm: 0.01
|
146 |
+
- Effective batch size: 3
|
147 |
+
- Micro-batch size: 1
|
148 |
- Gradient accumulation steps: 1
|
149 |
- Number of GPUs: 3
|
150 |
- Prediction type: flow-matching
|
|
|
159 |
"bypass_mode": true,
|
160 |
"algo": "lokr",
|
161 |
"multiplier": 1.0,
|
162 |
+
"full_matrix": true,
|
163 |
"linear_dim": 10000,
|
164 |
"linear_alpha": 1,
|
165 |
"factor": 12,
|
166 |
"apply_preset": {
|
167 |
"target_module": [
|
168 |
+
"JointTransformerBlock"
|
|
|
169 |
],
|
170 |
"module_algo_map": {
|
|
|
|
|
|
|
171 |
"FeedForward": {
|
172 |
"factor": 6
|
173 |
+
},
|
174 |
+
"JointTransformerBlock": {
|
175 |
+
"factor": 12
|
176 |
}
|
177 |
}
|
178 |
}
|
|
|
181 |
|
182 |
## Datasets
|
183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
### cheechandchong-cropped-512
|
185 |
- Repeats: 10
|
186 |
- Total number of images: ~24
|
|
|
190 |
- Crop style: None
|
191 |
- Crop aspect: None
|
192 |
- Used for regularisation data: No
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
### cheechandchong-cropped-1024
|
194 |
- Repeats: 10
|
195 |
- Total number of images: ~24
|
|
|
225 |
generator=torch.Generator(device='cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu').manual_seed(1641421826),
|
226 |
width=1024,
|
227 |
height=1024,
|
228 |
+
guidance_scale=4.0,
|
229 |
).images[0]
|
230 |
image.save("output.png", format="PNG")
|
231 |
```
|