imi2's picture
Upload 150 files
21ee8d1 verified
{
"metadata": {
"ParamSize": 405,
"ParamBytes": 6889973760.0,
"BitsPerParam": 3.749252192749517
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 335544320,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
640,
131072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 335544320,
"byteOffset": 0
}
],
"md5sum": "798a23e3a37b8412eee85de28aff4ea2"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
160,
131072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "0ad993a9fb103a16becf55e34104e5ec"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "ac6a3a15b8aa79468d289db13e7fc3cf"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "99d13d70b73acdc477533105c4ecc3b3"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "280264730026d09a93a42421c1160e65"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7145985332c545fa211ced561add1aa8"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27566080,
"records": [
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 0
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 10240
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4597760
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13772800
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13783040
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 13793280
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 18380800
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 27555840
}
],
"md5sum": "4982b9d5cc1d7a6e0fdfdea65e2841b0"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "daef3c83c73af7b59e2ec5a65fbdd9b7"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "f065ebd7a2d94dc3e09511539a4834b8"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c8ce057f088a18b61caf21c5119c6696"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "2b27997e296ed14dddde278f408c2fd5"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "decc62d084582f7d8293a71441c912d6"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "92177e6c5635a67af728879ac9ad2ee5"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "2b3c7597fe5f10207e58ff071c697f27"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "81f870463ce066dcac5c449f4989881e"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "0c0399f0f53d93dfe71e206adf490650"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c3de374cc2e49461dfe3858a35537d53"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "c4524d61b7c5b9fb1712f1d812796ce8"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "802471790f8990d2488fd1e67fdff389"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c4aa60d59a3e17a5556b04674eedf4d2"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "e7e63472339440642047e1a80bfd9395"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "5b1c8339014a23779ecb0f9b160dc867"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "19a9a8f95c111db15d4918693e705429"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "8e566e8b41a8a1b07a064631580239ed"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "191991e132a95f2067309034818201ba"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 335544320,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
131072,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 335544320,
"byteOffset": 0
}
],
"md5sum": "40ad319ba2ec8d5bc15d68585d6456fb"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
131072,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1c0a6b70578fa907bf211788c218c48b"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "b54c76d42ba0da92876ac5e5968b0211"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "700cae1908e26c3780bf7bf8a412d407"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 25589760,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11806720
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11816960
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16404480
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25579520
}
],
"md5sum": "bae2de1cd36ba18ae0c99b968d038613"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "c5408db5d6679e745877e43e3cc90e99"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "77646894e7249542e5aeb89d90577656"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "5dc7a4ee4a3e8224dc89024862e840de"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "ef8c74249bb95b2c8b0fe9f46c2065b2"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "1f9dc875c2564a5970a250cd7e513288"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b183aeeec061b18cef944f21144ed0cf"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "f94e2e8f6fb90878d2dc381d037c398b"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "5b379b05d75f04471c3cccfd6a9b1eda"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "5b656630e863dfb391dbdcef44a29992"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "eef8498b7add02c5b71b642f7da18af0"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "16b15eb56fffc07a504d687451e3e46b"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "c510e5afdbc5a1041520155a1e683a4a"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "199c8eaa81ed460261e1a4133c4fb516"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "862b50730cd208684c0896808d592d69"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "9660c78f906ba41a179fdbc807639ae2"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "f24355739972b6fce630312a14abf55c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f25767395547a11867931a6a018461fa"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "8ddcaef5f71ff6acb59f89bcaf002e38"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "07b9ca292f93d040cfa66bf2aa34acc5"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 11796480
}
],
"md5sum": "6b772cdc3a512ba7f6120b6796e9d503"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "98c0501c7012e81dc6a7d865287b3ad0"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "42a82354b57556438b2cb0f45fda36f4"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "18d939fa7b4be3397c464d0b9803feb5"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "79877a4324cd5caac86ba0577aec658c"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "43dd03ec45a9edf2de1e8fd32e150cfc"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "0386b7fe98d1eeaa61c2439bd9afb018"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "6239cabd6c9b59496647dfa959910004"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "3229c55531bef053873cb6626bea5232"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "2c8cffde0c58f8c05e187f33c98f20b5"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7268aab5f48675c3976a68f83da1b163"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "3f65e0ef85ab8c7ad69f56ce36ec82c2"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "0095e07f5ad2fbcc81c6755f33e5329b"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "49143156e16f5c625fc34570ac7e0ba2"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "7c381c9733eca3770f26b03b66be1e7e"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "fe36efbc5dea33f6ee7954cb8cd68ad5"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "d0f75625cbb2ce48775fb8fe4b47bb2c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "bc5cdb6f367c3c41dec44fc15e5b51f1"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "12b8d69ad8e564acb7f343629d899787"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "54ddc9c414ba67cf69ad496ecd49ebc9"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 11796480
}
],
"md5sum": "bf212138fca6aac40e13b4eeecaaf566"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "34550c55a3ebcc0dfd7ea1b022f88ed8"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "ec955a0eae323acbef8366ed1831a977"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "e9ab16a2d58eeeb18f43ecd23f3a3cc3"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "325f4bbf77d8b4fe4ec0bc99d646576a"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 18380800,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4587520
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4597760
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 4608000
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 9195520
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 18370560
}
],
"md5sum": "3c7919d3a363bee18ea8f4fc697ce800"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "f470675f50189477efa69cfc3e8f58cd"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "01d292189b335a2b07377a2c677fd8d3"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1a648e0e27ef9b37c4129f44151be8f7"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "bee23057c9a3bc4890b3e83eb353aa33"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "d6d80683625a45c5c6fcc523b1459eeb"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c15e8b6dec4762c47835ff1770431859"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "7d1d11bc32d740a41e5ae86c74ae442b"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "1f22ec55fcef80eda60956420a35b2c1"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "e8b498ec9f3471c524c61162521762b3"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "e80d13d75bf8e0049759bc7e4734df08"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "15d5845c03f9ed8abb8f505b8905be59"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 18380800,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4587520
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4597760
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 4608000
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 9195520
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 18370560
}
],
"md5sum": "4f308a8a54f80eb9ef6d259368812efa"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "1fbdf75f975c3183a1e22ca2d6fa08b6"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "ccd08bf7146040746876289fc27e7e16"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d0a057e1e61e5b15440658da72e539f1"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "e705a50f12ff6cf4432df4f94d3fa367"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "b999c2136cdb89a9cf52b9e0def723af"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "311e2af89b35d5e7951c3a7e73eaaf51"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "f7448246ae5ec2704892ef07be390379"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "013a9f6d89bd1cee706df154a867471b"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "8f7ae98ab4dced4cfd6d3ad6e7ac5e8e"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b603dc89a47d2cdd61d4a43792f86a22"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "b33e4a72819cd23415a3d96ff6e143d5"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "63c87ad70c8042d4dd63d95270760c44"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "66597925d984a5a6f2d799baba7e5197"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "077bfe9ac2abada63bd0689e8bf496a7"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "6313d4addc9633a7c99f00b5c8aaf450"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "c9f83884241d87847fa25301e37ceccb"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7fdab5239d0cc572cd4ffb82cb7ab252"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "f37377f0f29666b153b919f08cfda847"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "63411085cd273ca811e628ccf8969912"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7b9a47b8110fbdb7658b31c8e3a57b0b"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "5d7aee6c1946bff7173d19bcb476cb03"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "5c4e7bda54236a13706537a95b8868dc"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "bfe95cf46c835966fcf88f28219de4fd"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "8bbc3e0bcb9bfc34065ad8d7f782a964"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "ad215fcbba7fdf894a46e5ac3a8c9e3c"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "50c32e926c85a0ce4729c8c26f262554"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 11796480
}
],
"md5sum": "e3d10cce39438d876125e69c9e6cbf19"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "c19854a7ec2d9955abbe59ae829564e6"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "9a3ba5545117f4a2ca1a5ffddb032b39"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "878b30ef31c3741881dfc21c8323ffdc"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "eb7ed0ab8a7f8c7bbd86ec9172f80007"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 18380800,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4587520
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4597760
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 4608000
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 9195520
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 18370560
}
],
"md5sum": "e477876f5b76ddaa2bef1521e90870de"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "0d5dabca9655f1cb1a891076b7aa5e38"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "97a4ac9556012f7f0509e94c05c84bfa"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "959a52a1020100885c986c9a5dcb6323"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "0e1df4f98a70b5f50285eca696d60f73"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "2568406c64fbb8b80f0da4455b814996"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f836fe6bf03b97145d5aa018029ecd29"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "b360580587a2e8614a174d2270c6dd87"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "6ee95f62208a50acfc8be58f94e1c7e9"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "46e8826a98029f033259bf55e5937b6f"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "31ddd4f1a5b792066d43684b1ba702db"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "d8e190b8704ab4b5bce7607d840eb675"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "a3ed41e31add549798cb69febd21f382"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ef87be9cd3235e1b7b3f4c7e287c9435"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "0810beb497cc30bbc9245e7820133f1b"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "4ab7826e5187b1ddb4035db01b6d2019"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "5aab6d51ee8ae66a88a94af544f5fc92"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "aa86a7e28e19a8d34329ab61831bdedd"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "ed7941698abcc25d352dd8d3041522c1"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "1e80f7ea0b3d20d3254405cc5422070b"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3b177fd8be1bdd6f64844619bec26048"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "7c7db746d5d9fe7934b9e8f84bd51867"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "e078f5204304b362f4bb5813d7e7857c"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "00265dd678857006ccc1869407a2796c"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e5963af040f6a1ade190b2d6a862b696"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "be40921013dca4846acfeff66c3125d4"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 29491200,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 11796480
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 27525120
}
],
"md5sum": "a8a4421ca19780904e24e79f537ebcfe"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 11796480,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
}
],
"md5sum": "ffa409bb82679c5785c655a0e1fe5cdf"
}
]
}