Spaces:
Runtime error
Runtime error
fix vae nan bug
Browse files
app.py
CHANGED
|
@@ -228,29 +228,32 @@ if NEW_MODEL:
|
|
| 228 |
print(f"encoder after load_state_dict parameters max: {max([p.max() for p in autoencoder.encoder.parameters()])}")
|
| 229 |
autoencoder = autoencoder.to(device)
|
| 230 |
autoencoder.eval()
|
|
|
|
|
|
|
|
|
|
| 231 |
assert len(missing_keys) == 0
|
| 232 |
-
else:
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
sam_path = hf_hub_download(repo_id="Chaerin5/FoundHand-weights", filename="sam_vit_h_4b8939.pth", token=token)
|
| 255 |
sam_predictor = init_sam(ckpt_path=sam_path, device='cpu')
|
| 256 |
|
|
@@ -492,6 +495,7 @@ def get_ref_anno(ref):
|
|
| 492 |
print(f"opts.latent_scaling_factor: {opts.latent_scaling_factor}")
|
| 493 |
print(f"autoencoder encoder before operating max: {min([p.min() for p in autoencoder.encoder.parameters()])}")
|
| 494 |
print(f"autoencoder encoder before operating min: {max([p.max() for p in autoencoder.encoder.parameters()])}")
|
|
|
|
| 495 |
latent = opts.latent_scaling_factor * autoencoder.encode(image).sample()
|
| 496 |
print(f"latent.max(): {latent.max()}, latent.min(): {latent.min()}")
|
| 497 |
if not REF_POSE_MASK:
|
|
|
|
| 228 |
print(f"encoder after load_state_dict parameters max: {max([p.max() for p in autoencoder.encoder.parameters()])}")
|
| 229 |
autoencoder = autoencoder.to(device)
|
| 230 |
autoencoder.eval()
|
| 231 |
+
print(f"encoder after eval() min: {min([p.min() for p in autoencoder.encoder.parameters()])}")
|
| 232 |
+
print(f"encoder after eval() max: {max([p.max() for p in autoencoder.encoder.parameters()])}")
|
| 233 |
+
print(f"autoencoder encoder after eval() dtype: {next(autoencoder.encoder.parameters()).dtype}")
|
| 234 |
assert len(missing_keys) == 0
|
| 235 |
+
# else:
|
| 236 |
+
# opts = HandDiffOpts()
|
| 237 |
+
# model_path = './finetune_epoch=5-step=130000.ckpt'
|
| 238 |
+
# sd_path = './sd-v1-4.ckpt'
|
| 239 |
+
# print('Load diffusion model...')
|
| 240 |
+
# diffusion = create_diffusion(str(opts.test_sampling_steps))
|
| 241 |
+
# model = vit.DiT_XL_2(
|
| 242 |
+
# input_size=opts.latent_size[0],
|
| 243 |
+
# latent_dim=opts.latent_dim,
|
| 244 |
+
# in_channels=opts.latent_dim+opts.n_keypoints+opts.n_mask,
|
| 245 |
+
# learn_sigma=True,
|
| 246 |
+
# ).to(device)
|
| 247 |
+
# ckpt_state_dict = torch.load(model_path)['state_dict']
|
| 248 |
+
# dit_state_dict = {remove_prefix(k, 'diffusion_backbone.'): v for k, v in ckpt_state_dict.items() if k.startswith('diffusion_backbone')}
|
| 249 |
+
# vae_state_dict = {remove_prefix(k, 'autoencoder.'): v for k, v in ckpt_state_dict.items() if k.startswith('autoencoder')}
|
| 250 |
+
# missing_keys, extra_keys = model.load_state_dict(dit_state_dict, strict=False)
|
| 251 |
+
# model.eval()
|
| 252 |
+
# assert len(missing_keys) == 0 and len(extra_keys) == 0
|
| 253 |
+
# autoencoder = vqvae.create_model(3, 3, opts.latent_dim).eval().requires_grad_(False).to(device)
|
| 254 |
+
# missing_keys, extra_keys = autoencoder.load_state_dict(vae_state_dict, strict=False)
|
| 255 |
+
# autoencoder.eval()
|
| 256 |
+
# assert len(missing_keys) == 0 and len(extra_keys) == 0
|
| 257 |
sam_path = hf_hub_download(repo_id="Chaerin5/FoundHand-weights", filename="sam_vit_h_4b8939.pth", token=token)
|
| 258 |
sam_predictor = init_sam(ckpt_path=sam_path, device='cpu')
|
| 259 |
|
|
|
|
| 495 |
print(f"opts.latent_scaling_factor: {opts.latent_scaling_factor}")
|
| 496 |
print(f"autoencoder encoder before operating max: {min([p.min() for p in autoencoder.encoder.parameters()])}")
|
| 497 |
print(f"autoencoder encoder before operating min: {max([p.max() for p in autoencoder.encoder.parameters()])}")
|
| 498 |
+
print(f"autoencoder encoder before operating dtype: {next(autoencoder.encoder.parameters()).dtype}")
|
| 499 |
latent = opts.latent_scaling_factor * autoencoder.encode(image).sample()
|
| 500 |
print(f"latent.max(): {latent.max()}, latent.min(): {latent.min()}")
|
| 501 |
if not REF_POSE_MASK:
|