CharlieFRuan's picture
Upload folder using huggingface_hub
2a01724 verified
{
"metadata": {
"ParamSize": 399,
"ParamBytes": 1736187904.0,
"BitsPerParam": 4.500900580433049
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 155582464,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 155582464,
"byteOffset": 0
}
],
"md5sum": "a8f329f6ab0b7bc4d3b142d0f990cc24"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "11621cd4b1fbd1a6bf28fa42866098cb"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 32133120,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 19447808,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19447808
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 19451904
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 30724096
}
],
"md5sum": "0ed8548cb381dec97a27922669579e92"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "18a0910b4e572b42c772684643d8879a"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "f9a226978881ae7ba6458975dd83628e"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b1a02fe0851610a0a181efcae641d1a0"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c6b9efc12099b53746e9a3fb22109c90"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "43f810349fc34e73d49773a43b2691be"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c1c0edd7bae6cc9801a19f813f0a875b"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "06f69167e277ec320c3cafef5d258a94"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6c326b56d9e0aa2c1f94569d9807e8da"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "eb1ee7869d5aa07d2ef427eec7cffadf"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "9d72412a3a00300ce4ac2c543fbe912f"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c2c410aae1e01fab011410aaaa5b58d2"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "1a2b98459c3177cd2ab18b5c0242e975"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "50fe624cc1f74eb46e2af5fb3e07c673"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "dd0d4af95ba1a50bcbb25647bd4f0e1a"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "902f9bdef24591ae5daa11e674598470"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a11e2c9b321ec0a43d1f931023611e23"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "465f73217a90b4e70de377d28356174f"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "dc3dee7770afcc741de2e14a6485c176"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "1e91ae347581b376d1b8fcc57efe907e"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "82b4c0c1091a6c10cfa47711e4f5427c"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b450ad4ffa92212379187d3f9e2979f1"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "dbc992d9c8dfb6cba3984e6366c9ae3f"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b2dda29561a436e0592f25875061a161"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "66713c63029ed84e4ec8e0c95328da29"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "1f8f2bc6a5d72b697b96e3fe37228aa9"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "167a25d104ac1458c2ef9334960c6ba8"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "9add86ee9dfe64dc40de8fce51f064b8"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f424a91514764c06c68a25352b066fd3"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4e39cf97d8e41898a81e8a199f2d2e1a"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "7808e88cb02e47ecc11f1e09a351535b"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "bdba9d8a7afaf56c4eb382cf61af5a38"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "3f00c5ae827a58dad7f9f03cc02cf6f6"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "1d67ce67de7114263a6c2e2a4fd62039"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 26134528,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 20816896
},
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 20822016
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 23443456
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23771136
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 25868288
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26130432
}
],
"md5sum": "3f8f45f5c7b83369a9406fdc79562eb6"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "78dd38a3d37a287219193ba99154668e"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f8a54c6535ad0394d0bfb6e151326edc"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "ad03a6be31392805b80a0fd827d3768a"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "785188cbd5bb969efec41aba675e9d5a"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "666f1060544a4c7cea014814f48ea68a"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "9c3b0db39639f529038d532ca1316cb7"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7934666397ac8d94ffebadb5f7632627"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "ef1d4e22613b54f5dc90f1e89f4865d8"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "194e6dd743ad70d3a9a2bf1c256dc28f"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "6e513d4f8419dfebdc1b4e3267205d18"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "13fe31e9bca0aa05a16d9c61f0dec445"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "367c00236a9be720b9611a57f905843c"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "218fdf077b563197fed1678435747e56"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2c696bd5d3ab7950c93b4b1b35de2e38"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 23647232,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2822144
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 2826240
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 14098432
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 15507456
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18325504
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18329600
},
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 18334720
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 20956160
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 21283840
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 23380992
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23643136
}
],
"md5sum": "794fb0e0419d071443db01f83e0e9714"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "eb94aa834a9edb7f06012d4e0fc8fae6"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6132d9ee48ab43d7e2452d8464a2ce06"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "b9ee4bf3806a0c7e71d5709ba7e5168e"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a2885512bdc11ddd19b5261935637ab0"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "915e7ef089baf02eb9a9a4869a9f230c"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "db12a66f16f53adee8d65bebeb2151ea"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "3d3adf979b06fd3ed7d865e86ac103c9"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "f96376d1ea3379487fc101fbf7d15681"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "127cd186bea292847616b183bc4fba64"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "24f2656df90947a5a65c9fe612cc79dc"
}
]
}