imi2's picture
Upload 67 files
9ebb37b verified
{
"metadata": {
"ParamSize": 305,
"ParamBytes": 1801420800.0,
"BitsPerParam": 3.6099566223450714
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 62447616,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
308,
50688
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 62447616,
"byteOffset": 0
}
],
"md5sum": "91c335fd9de370bd474fe8971d2a2fa7"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "1a7a84a5093664b9a1c8ca35abbbfead"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 23430144,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
77,
50688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7805952,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7805952
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 7812096
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 19166208
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 20585472
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23424000
}
],
"md5sum": "c829ae187d928c8b48e39596009e7cce"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "c6c74c228c91ce6abe35c6f3f9f9da92"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 17037312
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 28391424
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 29810688
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "e74a4ca2026632c8f90e46dce2f8c8e9"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "d9b4fdc07b2e81e600649e75bc2837cf"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 17037312
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 28391424
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 29810688
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "ee1d731a048076f7e502ca23847da72f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "247dc6d313e701bad9f5a40093ee5459"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 17037312
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 28391424
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 29810688
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "1dde16ca8f3e4a64e63d97e41bda3d8b"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "75d25bcb8b6c9de07e6640c765a19aa5"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 17037312
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 28391424
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 29810688
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "47adc9d0a94328236765c3b2552f6412"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "8d0ed414a2e18c71e8665036e4350a4e"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 17037312
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 28391424
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 29810688
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "524da43ffad39581badfabeb37e86087"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "acd418c8917f2bf5157e56dbd7297045"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 17037312
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 28391424
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 29810688
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "a74cc899962bfb9d0eb026175fd370d4"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "8b385f6ceab9d746ec0e2099224f5fe0"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 17037312
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 28391424
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 29810688
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "dead80419776be7c48015b5364c00b68"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "1053a184e834b6bcb267a35c7a43bbad"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 17037312
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 28391424
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 29810688
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "7add06c05fa800ed49ac82f34e027f88"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "1a1b5808bc2f9f8171559100797b10cf"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 17037312
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 28391424
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 29810688
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "371d4a3a6dceaa7fb204b77b1ab8f158"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "716e310a586505c425143d9d58db2a94"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 17037312
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 28391424
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 29810688
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "071e0b44a5fa53b5edfb101907f9633a"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 62447616,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
50688,
308
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 62447616,
"byteOffset": 0
}
],
"md5sum": "d14a46b199f1189aee934273737e3a81"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 24849408,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 12773376
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 16558080
},
{
"name": "model.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17031168
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
50688,
77
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7805952,
"byteOffset": 17037312
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 24843264
}
],
"md5sum": "736b0fa55c9e70eafd94a03940c5482b"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "2b7f941b15567eeef2d9ede5c4031b0e"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "e1e7c88d1832d47cfa5c8f5fd7109004"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "f6f59476f93ff2f928e7629fb584a01a"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "9af159944093e4a4297ecabea4531016"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "62881db654be8d0a954afd57668dd68c"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "8860cbe57cf220746fd4c2e4b1165a7b"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "e0df0968c2aaf922b1ef9e933461a867"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "edf20728c8eaaff653d4c52b4cd4449e"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "ae911ec6a863b853ed5aecf730ef34d5"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "0191d20dd0ea644bad41d1e7e66d4121"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "f9140b6203a517950f4ff46ed3f3f66c"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "0ed73bdaef16f64e931ad0cf0f7d2429"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "ce9f0f5d0fb128447109e9e6b1da2604"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "b9d98ea36dde592f1e9d3d6761b26341"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "43357afd67911005f83964e7c6503b2c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "b9d0a7e51b4d7b09435be5329915f081"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "d3c99dde42b549b9b082bfb5401ace53"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "42b4f795e3e7ac69e1d665e947ec5a7f"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "11eb8e84f58219b14a7784cdcd1e2ab0"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "d41cd731a0b9de85cc503bd6a5ab91be"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "2a73c311fd24218090e3515a35b927ea"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "608b8981886465a32ddefc97339e3059"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "76fede07f99ecbc57752b3d9dceed75e"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "6fbe20776fac4df5bad9bdcfb53f49e2"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "c0cff2f8bc0934840325164f3089cce4"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "55ba3949dc056e7dca9ed07decfb26ee"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "6eeeb756120f22ce577398706295b577"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "2dbf75c968192a58c56c4857032d1dcd"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "e6e383641f1c0e4854fbab2e2e9daea9"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "a1b2765eedc5f9cd9cb7df82b3aba1b8"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "a830f730ab5f6bd6c33a885623655769"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "ea20cc8d6cdee0d5c5c1e520e52871c1"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "d2f18d1c4860ad6074b9cf1a21a4c52d"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "5178372e80dcc99bddf5eea223683d39"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "01e6314cc10571b232370671faa108df"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 32655360,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32649216
}
],
"md5sum": "444e450aa5fd0e7dd9ddc73ec1b5d613"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 22708224,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
308,
18432
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22708224,
"byteOffset": 0
}
],
"md5sum": "88b42f1cf5aa35e4d0d40ee86d295f70"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 32649216,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
924,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
231,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 11354112
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
77,
18432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2838528,
"byteOffset": 12773376
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15611904
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
308,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11354112,
"byteOffset": 15618048
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
77,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1419264,
"byteOffset": 26972160
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
308,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3784704,
"byteOffset": 28391424
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
77,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 473088,
"byteOffset": 32176128
}
],
"md5sum": "c42365757247247c92b3472728babb3e"
}
]
}