Spaces:
Runtime error
Runtime error
File size: 4,683 Bytes
3b96cb1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
import mmengine
import numpy as np
import torch
def vit_jax_to_torch(jax_weights, num_layer=12):
torch_weights = dict()
# patch embedding
conv_filters = jax_weights['embedding/kernel']
conv_filters = conv_filters.permute(3, 2, 0, 1)
torch_weights['patch_embed.projection.weight'] = conv_filters
torch_weights['patch_embed.projection.bias'] = jax_weights[
'embedding/bias']
# pos embedding
torch_weights['pos_embed'] = jax_weights[
'Transformer/posembed_input/pos_embedding']
# cls token
torch_weights['cls_token'] = jax_weights['cls']
# head
torch_weights['ln1.weight'] = jax_weights['Transformer/encoder_norm/scale']
torch_weights['ln1.bias'] = jax_weights['Transformer/encoder_norm/bias']
# transformer blocks
for i in range(num_layer):
jax_block = f'Transformer/encoderblock_{i}'
torch_block = f'layers.{i}'
# attention norm
torch_weights[f'{torch_block}.ln1.weight'] = jax_weights[
f'{jax_block}/LayerNorm_0/scale']
torch_weights[f'{torch_block}.ln1.bias'] = jax_weights[
f'{jax_block}/LayerNorm_0/bias']
# attention
query_weight = jax_weights[
f'{jax_block}/MultiHeadDotProductAttention_1/query/kernel']
query_bias = jax_weights[
f'{jax_block}/MultiHeadDotProductAttention_1/query/bias']
key_weight = jax_weights[
f'{jax_block}/MultiHeadDotProductAttention_1/key/kernel']
key_bias = jax_weights[
f'{jax_block}/MultiHeadDotProductAttention_1/key/bias']
value_weight = jax_weights[
f'{jax_block}/MultiHeadDotProductAttention_1/value/kernel']
value_bias = jax_weights[
f'{jax_block}/MultiHeadDotProductAttention_1/value/bias']
qkv_weight = torch.from_numpy(
np.stack((query_weight, key_weight, value_weight), 1))
qkv_weight = torch.flatten(qkv_weight, start_dim=1)
qkv_bias = torch.from_numpy(
np.stack((query_bias, key_bias, value_bias), 0))
qkv_bias = torch.flatten(qkv_bias, start_dim=0)
torch_weights[f'{torch_block}.attn.attn.in_proj_weight'] = qkv_weight
torch_weights[f'{torch_block}.attn.attn.in_proj_bias'] = qkv_bias
to_out_weight = jax_weights[
f'{jax_block}/MultiHeadDotProductAttention_1/out/kernel']
to_out_weight = torch.flatten(to_out_weight, start_dim=0, end_dim=1)
torch_weights[
f'{torch_block}.attn.attn.out_proj.weight'] = to_out_weight
torch_weights[f'{torch_block}.attn.attn.out_proj.bias'] = jax_weights[
f'{jax_block}/MultiHeadDotProductAttention_1/out/bias']
# mlp norm
torch_weights[f'{torch_block}.ln2.weight'] = jax_weights[
f'{jax_block}/LayerNorm_2/scale']
torch_weights[f'{torch_block}.ln2.bias'] = jax_weights[
f'{jax_block}/LayerNorm_2/bias']
# mlp
torch_weights[f'{torch_block}.ffn.layers.0.0.weight'] = jax_weights[
f'{jax_block}/MlpBlock_3/Dense_0/kernel']
torch_weights[f'{torch_block}.ffn.layers.0.0.bias'] = jax_weights[
f'{jax_block}/MlpBlock_3/Dense_0/bias']
torch_weights[f'{torch_block}.ffn.layers.1.weight'] = jax_weights[
f'{jax_block}/MlpBlock_3/Dense_1/kernel']
torch_weights[f'{torch_block}.ffn.layers.1.bias'] = jax_weights[
f'{jax_block}/MlpBlock_3/Dense_1/bias']
# transpose weights
for k, v in torch_weights.items():
if 'weight' in k and 'patch_embed' not in k and 'ln' not in k:
v = v.permute(1, 0)
torch_weights[k] = v
return torch_weights
def main():
# stole refactoring code from Robin Strudel, thanks
parser = argparse.ArgumentParser(
description='Convert keys from jax official pretrained vit models to '
'MMSegmentation style.')
parser.add_argument('src', help='src model path or url')
# The dst path must be a full path of the new checkpoint.
parser.add_argument('dst', help='save path')
args = parser.parse_args()
jax_weights = np.load(args.src)
jax_weights_tensor = {}
for key in jax_weights.files:
value = torch.from_numpy(jax_weights[key])
jax_weights_tensor[key] = value
if 'L_16-i21k' in args.src:
num_layer = 24
else:
num_layer = 12
torch_weights = vit_jax_to_torch(jax_weights_tensor, num_layer)
mmengine.mkdir_or_exist(osp.dirname(args.dst))
torch.save(torch_weights, args.dst)
if __name__ == '__main__':
main()
|