stablelm-zephyr-3b-q4f16_1-MLC / ndarray-cache.json
CharlieFRuan's picture
Initial commit
552f2a1 verified
raw
history blame
162 kB
{
"metadata": {
"ParamSize": 390,
"ParamBytes": 1572915200.0,
"BitsPerParam": 4.501369085231279
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 64389120,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
50304,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 64389120,
"byteOffset": 0
}
],
"md5sum": "33bb513b28f36ebb6c9352edf514e607"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 64389120,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
50304,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 64389120,
"byteOffset": 0
}
],
"md5sum": "4c6c33b611b88517efad9ca0b0c38e10"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "92cf54de400a4e0bbb31141cafbc2226"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 28282880,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
50304,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8048640,
"byteOffset": 0
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
50304,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8048640,
"byteOffset": 8048640
},
{
"name": "model.layers.0.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 16097280
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 16102400
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 16107520
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 24954880
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 26060800
},
{
"name": "model.layers.0.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 28272640
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 28277760
}
],
"md5sum": "5bcca722f877257454d7af65489506a5"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "bba45ba957851bfeea1b0776d1324392"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.1.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.1.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "56c70c8ef90b94ce8b9f4cca52130ac3"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "0cfcdcba9657928fab48cfd882b93dfd"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.10.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.10.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "7273a1f1f9582b275e75e24808f7361a"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "4931463e5f784c461e4f59e63a4aee2c"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.11.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.11.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "5bc677a59aacc8552f8f35c3bcd02fe9"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "99acef9e7f788aa450afa6dd81a91ce5"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.12.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.12.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "44c41ed8e6b4b83ddee55003cf9e0114"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "d51e6fca6ff6cf3427f5bc65131004d4"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.13.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.13.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "8ff6902fcca489e878edfe289075cd40"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "1cf12630da5a449e124491e2ba583927"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.14.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.14.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "ebdccdef2fcc4b66a19e4de6bbe1ba2c"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "46a46dfa568f54180eb34ab92b3b8e53"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.15.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.15.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "c216b9a10b71cd3216d15d975d45541a"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "fb9f33b76a27b0c42cbbbcd8bf64bea2"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.16.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.16.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "e9e5c9757bdaf2b9138780e8902f51d3"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "de1bb327af6fcf9febf1eda8c7d7b959"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.17.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.17.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "d532e6af81ebcf82877a8cbdf6496e82"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "1fd755cc97ad0b885c3afac6e4cdcc26"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.18.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.18.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "deb384ae3635b4cba177e80ed78ebf1c"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "5b987afcc196c12dbd647beea2319158"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.19.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.19.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "e9bc895d566d236c5eb14cd03e4bcbe7"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "0b941962d2692fb3933ca78d6d8c99b3"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.2.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.2.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "cc00c351e78f57dd5cd6aaf8161a26a0"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "92d2613b3ca45897ad589fd9e78c0c6c"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.20.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.20.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "2bd0671a3e6ff0ced48ded95c10838da"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "24a8cb1f40e995b68c11abcf481e54d7"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.21.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.21.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "7115779964bd09697a9716b2b8ff159f"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "5c6733f9724f3fccc5a14846d7102fec"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.22.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.22.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "b9bc007e9f3fcf2d0d6ae2b002ff43b6"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "d800838d4a53c3b910c62d2715bde687"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.23.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.23.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "452ebe06ec5b577b81a820b5ca4c248d"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "382ab9423010e3a4f755249d6237690e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.24.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.24.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "71971bda7d8d1f7b96b2b3d477bfd4f3"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "f34ab2dfec684f3ecf8953fae6f6557f"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.25.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.25.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "a25663a81acb8911289d7f92f75c5e7b"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "c015cd1ff73f7de72aa2b55988b7ea3b"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.26.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.26.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "7c766ae6ba486451e80b5e394f741b35"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "282ceccdcfabf9ff53b2d8885d2c2d07"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.27.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.27.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "472a5d5f8c4c0c145c8c8ed570fbc16c"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "4cc3bd4cfee9d1b30a4dd3612e7d7e49"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.28.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.28.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "bc091d5859c2569e14676153dd819caa"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "697f7667dacd874d63be96baf77eb915"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.29.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.29.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "857cdd01c1420567e9ea5f74c3d25b44"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "3b190d8f2179d9ce0c931cc28b7e1c64"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.3.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.3.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "9f304494424994c92fb500044f3538d3"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "81413d32bfa84670dcff7da4012ea78f"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.30.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.30.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "2ece0a5d7949574d1554d1f82242f9a2"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "c623c66c1d6d752fdef8fd3fa4492d07"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.31.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.31.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "44c0b0ca47fa2b735f73fed07ee464b2"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "9d8c42e04066ced497e9eaa6f88718fb"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.4.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.4.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "654a7520c445991c7ada77398fc227c5"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "aa9a6eda73762ad531b256e8637b75ae"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.5.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.5.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "4f69964b0e5e14a98c9b4ec795dfb6bf"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "1d2d72e76c3d037c169a34dd2b231b09"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.6.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.6.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "12cad3b1fb76cac16072846768767f66"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "4c8cbe42728a47514f9ce4a3b3bce476"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.7.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.7.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "c16515be9ab69668a01072dd283ac634"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "62bee20bf1f9fc7f3910c8d8d48ec7b8"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.8.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.8.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "55c32ffbed6edd675625f5646120e94b"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "a20bf1433300232971c943cac71ea494"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 26931200,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.9.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23603200
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24709120
},
{
"name": "model.layers.9.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26920960
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26926080
}
],
"md5sum": "644f39d26f65816bb7f7d277e55c1f4c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 14755840,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.norm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.norm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14750720
}
],
"md5sum": "340ab1623dce958bcb319ebe41c5f3af"
}
]
}