paige-ai/Virchow · Model weight not match.

I run this code:

import timm
from timm.layers import SwiGLUPacked
import torch
from huggingface_hub import login

model = timm.create_model("hf-hub:paige-ai/Virchow", pretrained=True, mlp_layer=SwiGLUPacked, act_layer=torch.nn.SiLU)

raise error:
RuntimeError: Error(s) in loading state_dict for VisionTransformer:
Unexpected key(s) in state_dict: "blocks.0.ls1.gamma", "blocks.0.ls2.gamma", "blocks.1.ls1.gamma", "blocks.1.ls2.gamma", "blocks.2.ls1.gamma", "blocks.2.ls2.gamma", "blocks.3.ls1.gamma", "blocks.3.ls2.gamma", "blocks.4.ls1.gamma", "blocks.4.ls2.gamma", "blocks.5.ls1.gamma", "blocks.5.ls2.gamma", "blocks.6.ls1.gamma", "blocks.6.ls2.gamma", "blocks.7.ls1.gamma", "blocks.7.ls2.gamma", "blocks.8.ls1.gamma", "blocks.8.ls2.gamma", "blocks.9.ls1.gamma", "blocks.9.ls2.gamma", "blocks.10.ls1.gamma", "blocks.10.ls2.gamma", "blocks.11.ls1.gamma", "blocks.11.ls2.gamma", "blocks.12.ls1.gamma", "blocks.12.ls2.gamma", "blocks.13.ls1.gamma", "blocks.13.ls2.gamma", "blocks.14.ls1.gamma", "blocks.14.ls2.gamma", "blocks.15.ls1.gamma", "blocks.15.ls2.gamma", "blocks.16.ls1.gamma", "blocks.16.ls2.gamma", "blocks.17.ls1.gamma", "blocks.17.ls2.gamma", "blocks.18.ls1.gamma", "blocks.18.ls2.gamma", "blocks.19.ls1.gamma", "blocks.19.ls2.gamma", "blocks.20.ls1.gamma", "blocks.20.ls2.gamma", "blocks.21.ls1.gamma", "blocks.21.ls2.gamma", "blocks.22.ls1.gamma", "blocks.22.ls2.gamma", "blocks.23.ls1.gamma", "blocks.23.ls2.gamma", "blocks.24.ls1.gamma", "blocks.24.ls2.gamma", "blocks.25.ls1.gamma", "blocks.25.ls2.gamma", "blocks.26.ls1.gamma", "blocks.26.ls2.gamma", "blocks.27.ls1.gamma", "blocks.27.ls2.gamma", "blocks.28.ls1.gamma", "blocks.28.ls2.gamma", "blocks.29.ls1.gamma", "blocks.29.ls2.gamma", "blocks.30.ls1.gamma", "blocks.30.ls2.gamma", "blocks.31.ls1.gamma", "blocks.31.ls2.gamma".
size mismatch for blocks.0.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.0.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.0.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.1.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.1.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.1.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.2.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.2.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.2.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.3.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.3.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.3.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.4.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.4.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.4.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.5.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.5.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.5.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.6.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.6.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.6.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.7.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.7.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.7.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.8.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.8.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.8.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.9.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.9.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.9.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.10.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.10.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.10.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.11.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.11.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.11.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.12.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.12.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.12.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.13.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.13.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.13.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.14.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.14.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.14.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.15.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.15.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.15.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.16.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.16.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.16.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.17.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.17.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.17.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.18.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.18.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.18.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.19.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.19.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.19.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.20.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.20.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.20.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.21.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.21.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.21.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.22.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.22.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.22.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.23.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.23.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.23.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.24.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.24.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.24.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.25.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.25.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.25.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.26.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.26.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.26.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.27.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.27.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.27.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.28.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.28.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.28.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.29.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.29.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.29.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.30.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.30.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.30.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560]).
size mismatch for blocks.31.mlp.fc1.weight: copying a param with shape torch.Size([6832, 1280]) from checkpoint, the shape in current model is torch.Size([5120, 1280]).
size mismatch for blocks.31.mlp.fc1.bias: copying a param with shape torch.Size([6832]) from checkpoint, the shape in current model is torch.Size([5120]).
size mismatch for blocks.31.mlp.fc2.weight: copying a param with shape torch.Size([1280, 3416]) from checkpoint, the shape in current model is torch.Size([1280, 2560])