imi2's picture
Upload 96 files
f1ac10b verified
{
"metadata": {
"ParamSize": 305,
"ParamBytes": 2245834752.0,
"BitsPerParam": 4.500539815947002
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 77856768,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
50688,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 77856768,
"byteOffset": 0
}
],
"md5sum": "50eeae2d89f03fe1f400632cbf6d4c27"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "7d357d959cf970b2fd6ab5b866715814"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 29208576,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
50688,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9732096,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9732096
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 9738240
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 23894016
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 25663488
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 29202432
}
],
"md5sum": "80d9a3ddc0074df2ab7fd5e40486509e"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "c8d9ac5add61b1560776541fd96aa589"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "d0c2250c1b3bec9e955a6168c6ef8806"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "f669745d2cfb48da642d89b6f50c8f81"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "2e727b0e14604bd270872ae14509e56b"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "f5b654e75c21881c490d00a2b38b89b2"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "ce759f67031227afc47634a367cd5c5e"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "5e4c41b8e24934096da22286a1550661"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "c50d74f1563d510e552571ea7e001f12"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "56234680860bfbc0b36fd44344979ef0"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "15e988365f9f3ca13b7421b49e8d6cc7"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "c6d543c2dc4420b534edab7a969e2589"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "f47424645c4efd16575020333019d3d9"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "c6b87d4d9767461091f58bb8eb7b9902"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "2dec7ee8a2d67ff8ae21686cefb5658b"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "ceba982570cb398d2652c23d3336c0df"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "52e1ea6f00b7206966a06427861f55f2"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "32b94f98f2990b789d1995a9ae62f4de"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "9437cac4561899aa0dfd83733b3d08de"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "e8d3adb081bd53635075a85708736390"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "8ea10983ed71553c7f5a14cc73324ad0"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "b350f0cbb9f82086ea66f20904fb19b8"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "8c93c2f7dc57c1d0eba3645a8a4f992a"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "49c1eb15d9905126ead758f16b018022"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "4122bec2cb8246f1550e0702939f1a79"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "b91079bc9c60e85838dd51f6ee8c7c6a"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "1c2187edcc44f9522a9dec9c2bd036b0"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "0409553602a585edf3317d1057f0b7ab"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "4ad146abf88fce69789b372d81721866"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "d8082b1e9d39d663334660888ed39bb9"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "c0ef203edde43667909421555741653c"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 77856768,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
50688,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 77856768,
"byteOffset": 0
}
],
"md5sum": "a41df95ca01b20a7e19ab92ed0580c92"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 30978048,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
50688,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9732096,
"byteOffset": 21239808
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 30971904
}
],
"md5sum": "8fd4a779bad5be62d8a3dce2ac7fa888"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "3251493a763f020f445e5328fc175c1e"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "03c40a246dde3dd00550d6554b4e240f"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "007697a0365e6e413324b40f479b8472"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "1b8af6920865ad64626ec56736386256"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "c353197bd5e74e80eb75688f9347c39e"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "7ad6abf1134620850d8144f1b18f9edf"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "2b6975a69fdfd73f6800c8f823511405"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "0eddeee2726bf5f48a1a722eff08ff67"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "670592b36955e8b6aca9d3ac7471078d"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "b47a0893ead006542ad7836e8ce52324"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "5061f6c13e232a129d0524a87adaf411"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "4b10e00ef5ed2c4f9fba767b090f4d7f"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "08927e86b714bfd71d7c06c95a47eaba"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "74c62eaafd87a52589974902fbff5536"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "35fbc46c93e6a4048ed7bd25df4ccd6f"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "5e722631569e151c733d6e2905ac2fa1"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "e80910003a757cd549d81cf63d59f152"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "47e2604e4d59356de3f47de7b40c1dfc"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "6c4036e54e2460a73040e562c4063cf2"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "4573c3e9d3ea456cc5e77720130304fe"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "f5bbed717c71b4ed09eb2e55bbd46d33"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "3a39ff8285867fe404e27f7079f0ee89"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "f00b0349ac860ddd544691ae520e9c00"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "183827c2677b0468140ec84cb0037543"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "2ec18fa423f98be219080d6ae647befb"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "93d9d56dd685d5b0fd647d9f8f73df24"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "a3aee00ad71083e8dc6d5d635ee380ef"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "07b3f33a94d101d3b057bc2c60ae10e2"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "b539e568182fe5460398eb7b6551061d"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "a89979bfb686682ae917ec7520b945b8"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "438d818b42a385cbcf042578835be204"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "d99fa926de2511e306ab831f7f1bb8df"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "984b5f71b4d7b619760602266ebd0a76"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "8fe6455e16d14951b673b97a6b2919c9"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "591e83eb3bdd75618208f89ca72582cd"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "0d5f2367ee46808e9949c477388395fe"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "97160d3a598318db14e9e1e9e3e1960b"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "9ac868e58847661fdc297806b47f7cda"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "7f57edde7ea6964f67799083cad25b5e"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "bccea4684a1b531990da892d06b8bb29"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "0957327ffe74de83363c98f2db74dd74"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "7e066c8d866c2162c1ef36c4a99864f0"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "c0fedc095c1a51e26a284f7b699077f7"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "96b1d0667bcf03c6d95e332cb39133c6"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "67e063e95d4db2e86d1c163722aa6469"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "026e3e84d25cbd30ed2bab95983a3410"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "2a16644455b292684fc0caed599c9595"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "1d8007e2081c5606f29bd394d387ed4b"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "a65a72a622690640655b412d1c4094ca"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "b912c77095a953f0110eaabc7ce59ee9"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "9262bd167045f1940b8d0e6bc4d831c0"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "63f9bdd569c833d0fd367a93cec40294"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "7c3dd126898f1876ab3769cd4ca66514"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "9125dfacab24c1f9c318336f099d947a"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 28311552,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
18432,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 28311552,
"byteOffset": 0
}
],
"md5sum": "dfed8d379107b6095776f746619b1f6c"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 19470336,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3072,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3072,
288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
18432,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15925248
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
}
],
"md5sum": "f32ae56d3f50e0c212f4297bd0823b7e"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
}
],
"md5sum": "e7bb7bd955dea80dc705e0b32f5937af"
}
]
}