imi2's picture
Upload 130 files
63be87e verified
{
"metadata": {
"ParamSize": 507,
"ParamBytes": 5199330304.0,
"BitsPerParam": 4.50075370326778
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 458752000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
256000,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 458752000,
"byteOffset": 0
}
],
"md5sum": "cc8a6f835293616f2f87160d8ebd2505"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 57344000,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
256000,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344000,
"byteOffset": 0
}
],
"md5sum": "bab39bf107dfa09d7a390d5c6decbca7"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "e3bbe96b7b09d6b3f5e1fd81b3fa9af0"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 28908544,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 7168
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 25697280
}
],
"md5sum": "fa316646e4bbe9a56fd96adeaa569841"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "20cab0153f88f324eba05413fe06de0e"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.0.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.0.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "9cd601d539b8c59b6518d36d57050618"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "f4827c728ff87a68b20ba382e4486f13"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.1.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.1.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "ec00dcc3f57feb14159da2bb24b1dd9f"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "46628ef9b2c384b03b652b904a547212"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "094d8c75ce282bf5256200a994b308a9"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "53a99527bb88825d35122cdb7014a48c"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.2.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.2.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "f2af44eca90cf93dea5366806e85ad9f"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "4595ecbe015953e97c179d787c792c53"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.3.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.3.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "9c4f0df762d5f7568c9e2894419b339b"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "c3581d43dbccfa55e15a70318a20d6fa"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "08cf630f24702e0a4c3901990412560a"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "ca3761dc3c42664e2522d876f990907c"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.4.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.4.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "3a0d1144225f43a299309f754c108138"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "46f084f01b2d01b4bdca3a3031d31864"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.5.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.5.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "c5224dd68b4221646e26eb8281aae788"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "fc4af35d3c54b6d170e6dd7ddece7e08"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "bc7ba5578551ba24c04b6964707afb40"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "9d97a4bb6e72e6cd2deca1fa04ebcc2e"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 31216640,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.6.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.6.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
}
],
"md5sum": "4f7674d35127af9eeee5e0b1f91565ee"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "702f6bfce20c42b7701f7107c36bc272"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 31202304,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6422528
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21102592
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22937600
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30277632
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31195136
}
],
"md5sum": "4dbe2cf515d982b15c66ec4479aae8d1"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "65f47245cf19603e8e87ce5834925898"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.10.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.10.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "5e1bc06cec1a3eb7c70d481f402f8a21"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "e239fc87dfc278081b0c0ed178776a4a"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "6569e04b53d8cfdddc078c7a8158be2c"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "bbbeab1b449929abe6efcf9dc4c8ce39"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.11.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.11.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "5624c9fe129f2f2321e112aa34dbdb69"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "b085c2eeacd13457c2163bb5f2328589"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.12.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.12.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "7af2710fa9027f5c4f640a05c52c21f1"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "f0190bbe5c0432f4add6bd23457c54d5"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "eae8ec12ddbd65f2c55d0d144f443b43"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "56cfd6fd8ebaeb3ddf0fbdf53ef47a7c"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.13.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.13.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "a4f3743747d462972e8f2744610fd2fa"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "74773019b47424884fec1b5e9a6a3646"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.14.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.14.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "57f0e7011522f1ea75f8a16aaae7ae25"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "26c8a476994b2f46c581d25b9f63eca7"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "8dbf9b9be05b661135ce11e0087e9564"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "435cc845298220f1c2b993a7d1e29cef"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.15.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.15.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "c057f2142d4436d3ff7421bf8f95862c"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "5df5301383fbd62a49a7338d10cb8de0"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.16.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.16.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "f2acc968ff3efd5353d8c3fac4f4c228"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "093fe73d619769c08e73c6bc83d95228"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "457dee9241dda19ade39559aebeaa829"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "8cc55456b91fec6c9432f8cfaaff1898"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.17.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.17.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "a0481a8bd942eb57f6140a1c3324c8f8"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "d25240d6c5de477b493eca6aec36b9bd"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.18.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.18.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "eada4341335743de0884186ef2516088"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "e44dcdb6ff5090925d258ae467334ea6"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "b0c64d261c9f49f15ab1576be7d344b9"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 31216640,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.19.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.19.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
}
],
"md5sum": "5b2c28c6e98e1c1af6b49074b7a24402"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "7a3b1edac0996ba75fc75356763ffe98"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "a42c6fa9323aca1885569be19cf07f22"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "31e8c217fd31ba1fd9b3544387932de2"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 31230976,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 14680064
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 16515072
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 23855104
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 24772608
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 24779776
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 27991040
},
{
"name": "model.layers.7.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 27998208
},
{
"name": "model.layers.7.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 28005376
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 28012544
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 28019712
}
],
"md5sum": "f2b89ab553811699310f56fa2d916eb7"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "5777b6c351c0f232b6a79dc80c335bec"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.8.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.8.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "e1340414a82711fa3d1ce8f9afb69cdb"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "0ad69ea375985f504a7374276bced446"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.9.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.9.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "bf5be5a4e92fd50614616049e6e193fb"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "f0be6e74f9cbe3544d5a8ad9a93fa20f"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "5a3cf54e00c7fd97d4537fbbe3f6d328"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 32141312,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.20.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.20.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6444032
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 6451200
}
],
"md5sum": "ccb3c061517031714e6d9dac2403b718"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "cd21d5d0d1ab07f89b7b617852ab08cb"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.21.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.21.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "62210c57896f71d24b170b2ea55c4f71"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "9379bbe1366bc5904e31c88697716fb2"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "dc8f92b713a908b453ae27b005fe6ab6"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "e8dba38005d48e226c543314a775b1aa"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.22.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.22.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "b2c725704cd7ed619f83bcfe23fda666"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "f164753c7867058c559dce398b230693"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.23.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.23.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "44fca240639f13e7972a09bde73695ef"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "3060198fab8cc2cdc8f1730059b3fcba"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "ada3fba742629c1bc22b66d986989290"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "1bcc2561b8722e216194d5a9144f9805"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.24.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.24.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "e7f4b5542b45882ef9b16032f4eefa77"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "4bf45ca01920f6d21965e2ae6d77e852"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.25.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.25.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "b41bcbb384e9c81f9d3fd6727f190df7"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "739f2fef46628c055ce18789c8c58611"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "38bff31cec62a81959259a923c3321c8"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "6dd23557bf67226494dbd0097079bde4"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.26.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.26.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "f387347565fecdc0996807c45af12433"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "ba7e1a6904d3a9d83ac137f4f1fe8c34"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.27.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.27.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "ed4bbfd77f0a8a56e637462176898fc4"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "b6da14c05d5407beabdb7cad28397138"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "716e9a41fbe7259e1552dbb68b8609e2"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "2a1732066e6103b3c77a0b89b27e478b"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.28.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.28.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "1d69823a16953f7fe8d6691acad455cf"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "163d1ff8acbbd6db44af0b65979f3230"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.29.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.29.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "3835387dcf1c911ee6c4a042292b0549"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "45d95d9495facc852468eea0a2097180"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "36bd063eace1dc7034b8decc3adca18b"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "3c06964ff3c21fedb6134ba433531b07"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.30.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.30.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "e6bd9eb718b6942d1e13b0638294c45d"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "771d48ce5b57c29cba6a5de97c315ce8"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.31.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.31.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "f8f9622174c19a0935ed93f63016144e"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "c25258a902f584e20fa395718deda0f8"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "305a7e8711f6ae1916954ffc37776f90"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 32119808,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 917504
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 7340032
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22020096
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23855104
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 31195136
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 32112640
}
],
"md5sum": "3b256f149dc5e8b6a55509339446616f"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "d9a661b4897ec771e610bb2cf21aad0d"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 32141312,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3211264
},
{
"name": "model.layers.32.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3218432
},
{
"name": "model.layers.32.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3225600
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3232768
},
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 3239936
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 28930048
}
],
"md5sum": "eb1896c26bffe58302a8985d66f73f3d"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "4ff67d456207b6c4ce84ddc506258341"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.33.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.33.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "1b253d86f577385660f77c8a43132866"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "0040806223d8c6d9fe90d46bdcb782b6"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.34.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.34.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "c056e0c1fbcc1ab45024b7e240dfe2d7"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "d36cc29e107e6ee665535923c5387e6f"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "118fca5ac084b64aa87bbc1953ad9870"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "6a91108ac75918ac3b0bf32832e00e02"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.35.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.35.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "5e2e2ffad6b596df125aeb9193704553"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "d0b0e47c7dbf311e8b1e11b588150105"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.36.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.36.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "1055f13b5afc20514d4500debdade9cb"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "4aeba853a35f13a51132f7c43cf2c893"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "6879499d7f50d90eef99580786d10622"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "d89741018f8a6fb6ffccfa058b4b7c24"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.37.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.37.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "aa040ca611085a760a242f6c1efa8c23"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "1a9389de05fc3d5cd2ec7ae9a634ca28"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.38.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.38.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "746ac527eb5ebbf829cf12b513583b66"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "616c75d2200059765f9c7ffc7402bccd"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "233b868ad834a0f5765e243789cf8a81"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "758bd86349c33d8728e11f64d18f1753"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.39.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.39.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "379b329d27edc99d123fbfc288df4384"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "7429f9ee1e1664d3a33f62fede7db226"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.40.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.40.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.40.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.40.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "a375dfcd7c8984cc08d803b87ed3b31b"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "7048ca3e74db38b6d667b078b2b163e6"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "e19b9b96d3b3d888f1bc4be38fec76d6"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.41.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.41.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "4e1a13c00eb22981f1ff755a6ec18308"
}
]
}