Qwen2.5-14B-Instruct-q4f32_1-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
d766643 verified
{
"metadata": {
"ParamSize": 533,
"ParamBytes": 9234108416.0,
"BitsPerParam": 5.001536828453907
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "718b17ab89995acae054282c19765133"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 48660480,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 48660480,
"byteOffset": 0
}
],
"md5sum": "323bf7391cc68a49f27fdd5d9eef22d0"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a94d103570dab2f9b057fd15eb82bf10"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "9dba084c48a55f21ea9615c7a46761e0"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 48660480,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 48660480,
"byteOffset": 0
}
],
"md5sum": "75bca7695c154208d8571ec9d60f7d3b"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a45a8a94fbd4a1ba16fd1dd4f870460f"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0842458e253cdb7eec380ecb49cca379"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "57ff5d2fc7f9c2405659fa9bf4ff5cc3"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33140736,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4423680
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4433920
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 4444160
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 8867840
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17715200
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 17725440
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 17739776
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 20033536
}
],
"md5sum": "069cd5b16478f27168c9ed5dcd574454"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9869dc1f08de28c8c1e55e0489cebc8b"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6b43522337478a31c77a76f17a7d79db"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 33294336,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1638400
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 1648640
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 6072320
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14919680
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 14929920
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14944256
}
],
"md5sum": "18f439382b1ee118f2ac6e8fc86c2e80"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "aaeb24b0ef10379da938e1d3f8581fd2"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ed855b9b006ebf3e82b080346e2bb2af"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "915052f8cae808457e60e06ecf45534b"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "7baaa43095a3c4d2413f8feb0f015b04"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "71d48dc956bb1ec7165150a6f32914fd"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5f7725a73dc024b4637287eed3772899"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "00ae263634b7eef6d6346f5ce362e9c3"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "c21b7f72c8db473c82a717b57fbf0aa2"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33130496,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14755840
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 14766080
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14780416
}
],
"md5sum": "460746b1c9ddda5d8b2c51992dc3c385"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "35f82d6221b93d44b6b24743b2c225ff"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "369164c1ce1e156ac82b2b5ea3e6361b"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "52c111b04aa07a0217070d64bd318290"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "391159aa84bfe9f85c895fa97fece8e2"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "075ca11fda2ace9a9c299e5e30f7fabb"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "8a077479e77a01c5033390404e833c6a"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25921536,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23603200
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 23613440
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 23627776
}
],
"md5sum": "abc55fa83de4ff5ec10ba32e22e4dd9f"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "acead9c7abd89198186f00af06227726"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f6754d29ee782fcd299a887ffc081180"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2120635450b796fbeeda269572f8983c"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c7cfbe372304ec6e8920eefb971c12cf"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14745600
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19169280
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28026880
}
],
"md5sum": "ace530886709db34627cdb5ae73ba99d"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29515776,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 8857600
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 8871936
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 27222016
}
],
"md5sum": "5894f32993f061365c858e46564363ff"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d503f595d6a606de4e414d2dfa297196"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0cb12c672d9eacfa3baeaed177b71174"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "313b077f332ccd948ae84a8e30705eef"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "4172591c4c70ef65b290e20d503eb609"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a1269ea050e1f3af760508017958e2f5"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "39fb6be79ec8efb22a916a5f4b8b5430"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "38c548fcaa9609c6a2c5dbea9aab8911"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "e15b1c2ddf20f3b9356db0dddef1b9e8"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c2a0f127fb642bb3738c5db66d2e7b6d"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "103ee49177cb76817ca4f5c27479f985"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "1ae377a334d3b6338a351c41d107a5d7"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "cfcfb44b6a38d3988958230cbe919b80"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "55c410146ec8e595220ecde24dc88328"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "915b6b0cf93970ed063b2ffba068d5b1"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "88cd11a5ebcea37e4cbf97d0ce41ef87"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "20065c36afa76f524d4b2387ce65e6ae"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2ebdc7b7e80cb32481672bcdc8628922"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "4fb6c79d14eaf574fa31bb09587614dc"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f62cb7101c6fae216e9753d4053b781c"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "335b4495f15e4e407d0ccb7a3908e7cc"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32475136,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14745600
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19169280
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 19179520
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 23603200
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32450560
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 32460800
}
],
"md5sum": "51a57d4c2c4b21c66e170093297d8c17"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "efdd56cb83117ff2d88af5b716ac3a01"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "476e1bd4d476a84cbee1049283bc6afa"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "dab34a31811c0bf3e256b238404ede30"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "1fa851d7e376ca4a37c6bc18bfd26ebf"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "4edad89860deebfdfd6f6efb38d1d704"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "049a8c23b2c65086cc2a1ace5d95b26a"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "5adf43c3c3873b38047a1b354f7b3e9b"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "a6ad79debf390825ee1a9b99d9b7901c"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "1369a2fc9ce6dc76af0954a80cf27150"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3f2b651fa088465db78e58e01ac336a5"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "f4bacf27fa2dacc0829547bf37f3d441"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "817d5283cddcb962ca2da640ad0c99a6"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "39b55910a11873519c8161c7471cf9da"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6a6b6d5aa198eac338c506f1c8db7cde"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "b4be2cd67a702e2ee05f76a54dbb4a9b"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "8e0fe4c8db60f134597eed0e48f313d6"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8934a8cdf0d56372d874b58e4b211953"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b9f954044613f10262e8272977dcfb93"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "27831f8192df5efff9bcdfabe44c7986"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "4e73146c0ae5ad7db266cfd450e1263b"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "03915fd37e7c559eb8713ba543516261"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "d1090d682cdf81e42f644f146317bd06"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 25921536,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23603200
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 23613440
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 23627776
}
],
"md5sum": "37b9184311164c8e669d22508113fa76"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b5746b1637def17a737abcd63b352c73"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "77865080587584144a5f91e2c1171ccf"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e4e9a00eab9b30be06fc043546bcc8de"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "fff39629763fa2ba88692f40a0dbfab8"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 32475136,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14745600
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19169280
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 19179520
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 23603200
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32450560
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 32460800
}
],
"md5sum": "93197bc1af6cd4236b784fe5210caff7"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "416396660331e9abef66507d3410dbb4"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "79dbe7522db44af6682fc8c491bfbf39"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "81cfff7579b9e7e1c10456076cfeec35"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "9314570b4550f2b9ae901e1076c2957f"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "16ecab1003706a43fd065e7203e25659"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ef099bacf09e7430ead7ee53c29d695c"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "184a7caaf30554b0bdd43e4e45eaa218"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "f61fcb8f19ea80987c53e07bc8321226"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "293769f22e954de1c06e3f29fd5d9c85"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2c6a7564bc89e79610b25a5e6581178d"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "c6428193295c50aa837b50552c052e0a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "6408e88d3872169041c69688d89cc221"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b697d887312ca39cf83be12e89884804"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "bf60b46fa8e3cab8fe1071ff477b1b7a"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "99c683f4b54b91bf566bf54477c52e01"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "b6a8f1da42c8c17015417c9875e98095"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "562479934acf784ff6ae7c778e2a5b0f"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3ca7311f7e6c0112ba4e63b0fdc7a57f"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "9ab020f7a4ffdd08cab27052d6b9030d"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "fd8cc6f8719734584317368e8a822bf8"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f37141b92ee14e780e7e653325094499"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "523a09a6b57d426d77853dcc8d0d604d"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ae9af29d14c15f2f74bb36c75edf7ad4"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "a03012d87f7266865dc5e6e8daed09e4"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9881372b9c73d4e9d41dcca72fce4d35"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "eda357db40f8c4fb392bd22b756d46b6"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "a5a9da3d5240f8b22eddedc67823abab"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "c99dc2e95b6d14acdc12883eae1b7079"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2c523ed56b84313826ac17d6890d8e5d"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "987b092df789070eb1d95b5dbe21dc58"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "bff51da716ffe156fbe6e32b81ea8781"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "8e4a60798a3fe8b2a0cf74cb1769de5d"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "66ddbb0830abb192d19538910f26f617"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4aece74c1f78cab4f9f9553b1cd3576e"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ff67cfbe32eea7ae77942499d2d2c9b0"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "a740893c94622680bf11a9b32114eadf"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c891de351c9782b2e88598efd2340f37"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "a9bc0c53385f083bc82d5a8b572fd65f"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "3d494b092eadefc7a5361c14f64d9dcb"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "b80fa4ef0c2763ea81b50f36162f9dbb"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "919e483a9add85d180b7d5adac38e810"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "8038c98a14289430ac329f1836dc61dd"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "d35d55651ae252b2adcbb0276c5884d1"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "8c0cbef8b286b6eafabcca28d39d8713"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f8c202b417645fd220f7d1497fcf05dd"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0a80c3f33c078bf03772f55da2ee4624"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "9f2d9385ec14236dcd423137e7d72c82"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "cca9fc9054d556afe7f51d76f85f907b"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "84c4bede34fd35db551f7cfffd501ca7"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "39366777f5968abcf673f7a4018ed007"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "009bf020f5840bc4eca7c9ab6de57f9d"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "cf4f6515d5ca15cf4b15854dd25e2d92"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 33130496,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14755840
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 14766080
},
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14780416
}
],
"md5sum": "1d448e5044447721761df1ffbe3c4643"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6a3d1c4855d6ec08132960b56dedbae9"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "949d047600921cb3d3c094f171701e20"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "59b36026d71b31b597529272caccad61"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 30320640,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17039360
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21463040
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30310400
}
],
"md5sum": "01d0f7a949de924b9790cbfaf353d021"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7b1a6db8f1d89b664066d91226abdad7"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 31645696,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 13281280
},
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 13295616
}
],
"md5sum": "588b8f242eb276652edad1b58dce981d"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "75ecd0b48e816bf33295da455a52f00d"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "47eba4c57c1c084eeb045a33744d339f"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "84a8a4345b826e32d9ae8a83e7514f2a"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "205df9426a65d7fdeb6f85fa45776cd1"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "82ded4f3ccb4e6d5f36f5c083da750d2"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2b1029285e309dfc2c7ca378578b164f"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.36.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "3f7bec756c4f9ac1b3bfeb07db7d8fe6"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.36.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "5431428b9cb110dc2b51a68aa425c63a"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3dda26ab36cdc119dfc84d2d2ba77bba"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f2bcf86a33fa862e99c660571e0933d0"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "6fa3a05f347ced336495cbba974e0544"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.37.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "46108ef48121b80ac9d9f3c3a8ffa9d7"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "4f3e27ecd33ff18b1e9817a5ae0f9a14"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2ae79c6d31239af078356dd7c24262e5"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.38.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "bf49f282db3da04e29ceda0f8cac6074"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.38.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "1a9966501ec25e8f4e021952fbe70a65"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "30c7c125fe1b97bfc2f80cbeeb2a0036"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c396bc5cd6a80a8b8fbcb20ee5b656af"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.39.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ebcacd7f3bc3358e58302bb6d183858c"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.39.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "968dfc1e04160049650718d9e28a84fd"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "433f95ab0fc67f32308a76c61878e9f7"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.40.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "a015e12d0e4c688eac76abe2600bf619"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 25921536,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23603200
},
{
"name": "model.layers.40.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 23613440
},
{
"name": "model.layers.40.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 23627776
}
],
"md5sum": "9213b05276df8bd335244e1bf0d3bf60"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c83f676856094b1cfbc1e3912e214c33"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "22c529812c40fe2210068cc992ff2888"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e14410965108c90e843cc058f8fb2a22"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "995cb2ec795235139229d9c1ac1f07f7"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 32475136,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14745600
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19169280
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 19179520
},
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 23603200
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32450560
},
{
"name": "model.layers.41.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 32460800
}
],
"md5sum": "35c309e5830ac7d81b8607f7218883a1"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "689449f01880f6c9f1f85396cdea2c2e"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5f28ba09beb57d0727bfb55b1031df9e"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.42.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "b8ec0e49c3afdea77fd05b7d42aa2615"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 32638976,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 0
},
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2293760
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15400960
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17039360
},
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 17049600
},
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 21473280
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30320640
},
{
"name": "model.layers.42.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 30330880
},
{
"name": "model.layers.42.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 30345216
}
],
"md5sum": "c2d9e5daa2c4b71b723f0fda0b93bbc3"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6728a8818bfb5085a9031689fae0b2be"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2cb0ca634092d1d8bece5df5ec1f5d02"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.43.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "65d140a89d070c3e7b6697ff6e81322b"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.43.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.43.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "ce5a6c8160691cbe07484f900c101e55"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "557a24c541bb1c75e6dcfa50108b5ac6"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "da90c84c017b4d9598405207d1a42d68"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.44.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "5a9cf503069976ead783e019ffa50ad5"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.44.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.44.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "1b10bf5599a3af4c9af6fec68c2289f6"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "916bc70ae2205034897e4eb6994da501"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e3951d8866a11028e03bb866791da0c1"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.45.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "3ca62695a28461dc085c00a75b6f5b9f"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.45.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.45.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "850ad1ef088f64a54e6919dc918bdb4d"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "1c1c5faf61e32b8e0b7fb1ca671b9031"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "feedb50cc03aafe7301fd1e513ab0a49"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.46.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "3d048bba3d6b60d68719887b6838728f"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.46.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.46.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "ec11d9552e584d6bea3739a1ce613f9d"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "011b4a54e197caa8bd5a8b91b19677ca"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.47.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "d90618aa2eea18a36a28caabc6b57ae1"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 25921536,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14755840
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23603200
},
{
"name": "model.layers.47.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 23613440
},
{
"name": "model.layers.47.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 23627776
}
],
"md5sum": "5ae3d9baf9368a8111674342830b3aab"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 14745600,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
}
],
"md5sum": "adee9458c64fcc1b4fdc1398f0c30055"
}
]
}