imi2's picture
Upload 152 files
f7e0619 verified
{
"metadata": {
"ParamSize": 405,
"ParamBytes": 6889973760.0,
"BitsPerParam": 3.749252192749517
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 335544320,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
640,
131072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 335544320,
"byteOffset": 0
}
],
"md5sum": "8f05fc2acbe89db2caf5cf4dba415d8f"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
160,
131072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5da453c25b0002dfb13f0bdad6c00f82"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "d921c8c0c793152e58ac7289035a44a9"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "75e8ca1f9ea76d044aafd35f031afdf7"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "58659ecd976d7c8b2c6ef7f35f569f8d"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "cd301ff72eda8055ea1597fc8effeb5d"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27566080,
"records": [
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 0
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 10240
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4597760
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13772800
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13783040
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 13793280
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 18380800
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 27555840
}
],
"md5sum": "8370f97d39eace87a18b342a1c87126f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "f67607f6020a904fc27131c377bf854b"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "0a1e9a4db22631ba3f760136d7ce9120"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6d2fef8e3bc887d02265deda0df6c578"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "b5ebae6d59e9b1f7a572edd9bbe1745c"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "2b384254269edaaf239510c0966e0dae"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "310efbb1de1b2f3cd25775fa23846532"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "c28b7f0bf14f070ba6865f06c61762ba"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "3c92449b192a54867cc7fda678dafc8c"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "468f58e12fdd32f32df1a1520a8b4586"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "805be9b58f7b0a0ff952e6e80876c462"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "1b28565ef3d3c1073094fa487d19437c"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "3daf41438feeb0a8c7377e8c1218dd01"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "9051981cb1624bbf57f3711a235c473d"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "59859965fe4ac7b225e8a55e64548cc9"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "ed5313ec610fd1bf85cafeeb5509e064"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "4ceae60e35a07f39c1a2378767ab9c37"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1741edc11a8f2d113a9288fc74e4e1e0"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "ce9998214ef114847662703adf8cc6b5"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 335544320,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
131072,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 335544320,
"byteOffset": 0
}
],
"md5sum": "3c15656f0ded974b83ebcad68c1f4619"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
131072,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6c32d4392a8865376dd946da79a7224e"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "224915919956775480d2c46a4f9147fa"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3dcaf3983c5d32aa033575584981735d"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 25589760,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11806720
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11816960
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16404480
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25579520
}
],
"md5sum": "7c846df238b24ef762d94c147070e0c4"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "af72a8bef719fcdde8977f90e1a85c46"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "0f5eb183b36ac816fc8fe40aecd765d4"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ad2309c2fe66f09374af98762413cb9c"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "4562420b92c30ed2c430c02af5381663"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "aa93f4e3fc822ee28ff8533cb1c703aa"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6dc4c4c3afda9d4631ab3ad6e7b8ef5d"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "9956ec90c4dacf62650c07ac93574cd1"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "2e9815f1a4a9f4d0c76db4fc9963d059"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "b7e04d71584948a76086a272bcf439aa"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "47e8c513ccf5d6ea290357cf6023f86d"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "3ed34e75d066ddad3b969ff7c678f071"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "b7ead68be377e2eb0f43173c370bb962"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "63db111aa404753e439911533293bd6f"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "7751740c7ff84d747e155697709cf1e2"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "1490f39d0be841026c562fe66820f791"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "13ca2bd1ae8ffe7cae2ada493fffde7a"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "07be7dcdf053a35775ded056bb4e21ea"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "88f06392a9d0556ef37bab4e237d316b"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1dde5fc316e8d2aac0c6e7a8eb012348"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 11796480
}
],
"md5sum": "f97b7fa257da9cb25b994cab8828465b"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "d4d44d0b4859566b33e7e68387794106"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "98f8ec6be416905eadfdf55f9cea123c"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f99ff5ede3a27611e41960c80047e2b9"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "98d033ce70369bd4d26449b543f992e7"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "e361cafba99cefa64f17d55974ce9b3c"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "728346b8210f7e45b05695b1881dd89c"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "4b68959d1e152515f6d32573ebcb4dbf"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "2eae04fae4ee60650be1ce08886ec02b"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "cda3fdc69876c13079a975fd83fab8e3"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b2f020e21dd91b5d3ee3e7f6ddb113e0"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "5f0bd90e15e7480e2157d69ca3c83046"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "3c2d44db9cdc97582f80f4aa73ad5f32"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a38af0aeed4cc817f1ee823fb5430659"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "a938439b94de4a0f6fab831e4dc47533"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "152cd414daac80d9c0fb2159bbec0ad9"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "f91d0038374b6dff284fde01bb27aa1c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f190d2360ca52573632600c030ac58ca"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "22984fc89c8cfb934f7ddf475f76cc65"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "aab76e1045815019cffdd9158abc1eca"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 11796480
}
],
"md5sum": "2ff0ddd27f26beb8575786df63edb769"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "4898d4a53374a51fce3bca716dbce669"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "6b4766ffee5ae0307bf1bc7a94479038"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "0b875e520f7a1d12313f35d60010b9e0"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "150f662cf03b418e7f5f4ecc2350dfb2"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 18380800,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4587520
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4597760
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 4608000
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 9195520
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 18370560
}
],
"md5sum": "25c73e7cccb6b8bbe2ee146b80a5af84"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "273aee2b052d5f09167ee4f56814a43d"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "ef3531b15286cdbb1e1fec261b9b105f"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "aa58e7a2b7ad23c317f38d76f5736e96"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "4fbf1c3d515716302dcb1ce500e25f78"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "27e66f6cbf03137687db7b8fe197dd24"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7746874e5842febb18db06d7581e9ea5"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "8678da3606f42c24df04bd2fb5c2f66c"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "28ef04b017624d35e8db01f2fda6dcd8"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "133f57449ee40e20d870b52c49fddf7b"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "4ed686378b82a9a42d6b242b5b2fce0a"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3c9bb36e383b6c2bed28dc0b70681f0f"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 18380800,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4587520
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4597760
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 4608000
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 9195520
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 18370560
}
],
"md5sum": "c2180dc9781448e1a5051c4adde03558"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "583f14213c6d5cbe99ae277acd55dbaf"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "b7e1d3fa6d060cce3535eb868bba2bc1"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4591f7a3dcdea5d3acb5e7757e447279"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "0c0d80a8e3ccaf9b7a7841848b0801d7"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "9df0db8205d3bdba681a2c3a982c3834"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d812e71e3e28b710e2aaf67c6128a037"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "afafb687ead58d02eca1282b935e7f1c"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "d5f9ae1d417e7e5f288e9cf3223565dc"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "de1cb2bfd11c88f4e648bebc4b0c08d8"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1cb992da1f83f4e24c897b6a40dd9c9a"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "d63fc711698372ead5b7ff0f1a8991e2"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "c5d92fd35acdd0a6081978be77f61cbf"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1ee73e0d9a70124648588dfb20357823"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "55bb69abfa99115089dba90de7a38489"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "a873585a02e899238040edfd2a48d2aa"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "2a4c339d8dfda4ee30d52f9f269364a7"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "0b425d649deb9b64cac848b3388cb015"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "33ac4ead2af90d2618ff5d45ebb1a3ef"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "5f4fdbb10732778f0fa82d03cce8085c"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f22ba0d27eaf6bdafdcf9e6e9ab1b560"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "d7f4b7199d404f4349960380ae0f0613"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "7054c60f279c3308f85960abf580330b"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "fc0ba7312251dbdfffb527ee350fee7d"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f5a79d9811946555fe98767aca2bf92b"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "7f5456523cac86ad0473b4b29065b4f0"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4ace1c6cd77a395f79f138abfcd1ea55"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 11796480
}
],
"md5sum": "7408eb9e882862eab6c93f6e1b3b5a69"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "ff3efd391fea26e1fc787d226f9a7611"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "de6ece9fdfdc1bdafd55853795ae294f"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "c8e3ea8c91885b1ac9a8a9c261d27cd9"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "bcbea69873f1a70db4403960d7056a51"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 18380800,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4587520
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4597760
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 4608000
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 9195520
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 18370560
}
],
"md5sum": "1ff7795ccf2a11681e01113067ecf64f"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "be19b0aa372c3ce7805358b438a7cc38"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "4abe8f46cbb7eb193ee2c9b5e5ea0e57"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "2f2b2d106b3ec590efe91c1b0a0eaace"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "69a85d04a6d78bbe207e45a58158f4b9"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "ddbce554d0842de9a9eb1f6d609b5bef"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "97acd73ea289ca67e76506d574c54b3b"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "bb71377feaf69059168f1d035d45d6e8"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "9930f0f601ade069b5a05af03c152a54"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "d38d78b006c0adb5d3212f17819028ba"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b64273147b5b1c5b8881b239697b94db"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "4ba23a051db87d0760080f0ee5b15c8b"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "fdcb4db06a124007804062883888f97b"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "18284766b098b96d08db844d3d8fcc0b"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "0d8ea7b83bb8a752dc0ecd5278e66db1"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "b5ae0cc4f242532b2b49f458086fcad8"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "cf0831196128103a8671fa043af59335"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4be086357d79dec53800ae39089f9dab"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "c8cf24902a08db080187cfd2afb48faa"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "32b1f5b14c9ae3cd21f51b7a295a7d12"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1661f0f8570d42c0dd95f1726a44a707"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "a1b6a86e4100ab7115ab6c1d286253b9"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
1792,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "7a1062b90052a74fc7ca1ba4e892238a"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "4ef6c096f5c485ec6bd41252679e1906"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
640,
28672
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ebefb1c79cfece7b895164bcb4f72617"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
448,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
160,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "100e6e66d805928e26768c74ddc860d1"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 29491200,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
640,
6144
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 11796480
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
160,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 27525120
}
],
"md5sum": "78e20398b51d8fce670c5f271a9a07c2"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 11796480,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
512,
5120
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
128,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
}
],
"md5sum": "6f9c65dc7ac98e87a1636890a33b2f0d"
}
]
}