|
{ |
|
"metadata": { |
|
"ParamSize": 405, |
|
"ParamBytes": 6889973760.0, |
|
"BitsPerParam": 3.749252192749517 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 335544320, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
640, |
|
131072 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 335544320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f05fc2acbe89db2caf5cf4dba415d8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
160, |
|
131072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5da453c25b0002dfb13f0bdad6c00f82" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d921c8c0c793152e58ac7289035a44a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "75e8ca1f9ea76d044aafd35f031afdf7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58659ecd976d7c8b2c6ef7f35f569f8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd301ff72eda8055ea1597fc8effeb5d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27566080, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 10240 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4597760 |
|
}, |
|
{ |
|
"name": "model.layers.33.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13783040 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 13793280 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 18380800 |
|
}, |
|
{ |
|
"name": "model.layers.34.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 27555840 |
|
} |
|
], |
|
"md5sum": "8370f97d39eace87a18b342a1c87126f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f67607f6020a904fc27131c377bf854b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "0a1e9a4db22631ba3f760136d7ce9120" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d2fef8e3bc887d02265deda0df6c578" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.35.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "b5ebae6d59e9b1f7a572edd9bbe1745c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b384254269edaaf239510c0966e0dae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "310efbb1de1b2f3cd25775fa23846532" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.36.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.36.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "c28b7f0bf14f070ba6865f06c61762ba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c92449b192a54867cc7fda678dafc8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.37.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "468f58e12fdd32f32df1a1520a8b4586" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "805be9b58f7b0a0ff952e6e80876c462" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.37.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "1b28565ef3d3c1073094fa487d19437c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3daf41438feeb0a8c7377e8c1218dd01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9051981cb1624bbf57f3711a235c473d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.38.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.38.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "59859965fe4ac7b225e8a55e64548cc9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed5313ec610fd1bf85cafeeb5509e064" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.39.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "4ceae60e35a07f39c1a2378767ab9c37" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1741edc11a8f2d113a9288fc74e4e1e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.39.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "ce9998214ef114847662703adf8cc6b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 335544320, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
131072, |
|
640 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 335544320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c15656f0ded974b83ebcad68c1f4619" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
131072, |
|
160 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c32d4392a8865376dd946da79a7224e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "224915919956775480d2c46a4f9147fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3dcaf3983c5d32aa033575584981735d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25589760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11816960 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16404480 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25579520 |
|
} |
|
], |
|
"md5sum": "7c846df238b24ef762d94c147070e0c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "af72a8bef719fcdde8977f90e1a85c46" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "0f5eb183b36ac816fc8fe40aecd765d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad2309c2fe66f09374af98762413cb9c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "4562420b92c30ed2c430c02af5381663" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa93f4e3fc822ee28ff8533cb1c703aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6dc4c4c3afda9d4631ab3ad6e7b8ef5d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "9956ec90c4dacf62650c07ac93574cd1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e9815f1a4a9f4d0c76db4fc9963d059" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "b7e04d71584948a76086a272bcf439aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "47e8c513ccf5d6ea290357cf6023f86d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "3ed34e75d066ddad3b969ff7c678f071" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b7ead68be377e2eb0f43173c370bb962" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "63db111aa404753e439911533293bd6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "7751740c7ff84d747e155697709cf1e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1490f39d0be841026c562fe66820f791" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "13ca2bd1ae8ffe7cae2ada493fffde7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "07be7dcdf053a35775ded056bb4e21ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "88f06392a9d0556ef37bab4e237d316b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1dde5fc316e8d2aac0c6e7a8eb012348" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20971520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 11796480 |
|
} |
|
], |
|
"md5sum": "f97b7fa257da9cb25b994cab8828465b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d4d44d0b4859566b33e7e68387794106" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "98f8ec6be416905eadfdf55f9cea123c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f99ff5ede3a27611e41960c80047e2b9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "98d033ce70369bd4d26449b543f992e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e361cafba99cefa64f17d55974ce9b3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "728346b8210f7e45b05695b1881dd89c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "4b68959d1e152515f6d32573ebcb4dbf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2eae04fae4ee60650be1ce08886ec02b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "cda3fdc69876c13079a975fd83fab8e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b2f020e21dd91b5d3ee3e7f6ddb113e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "5f0bd90e15e7480e2157d69ca3c83046" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c2d44db9cdc97582f80f4aa73ad5f32" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a38af0aeed4cc817f1ee823fb5430659" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "a938439b94de4a0f6fab831e4dc47533" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "152cd414daac80d9c0fb2159bbec0ad9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "f91d0038374b6dff284fde01bb27aa1c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f190d2360ca52573632600c030ac58ca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "22984fc89c8cfb934f7ddf475f76cc65" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aab76e1045815019cffdd9158abc1eca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20971520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 11796480 |
|
} |
|
], |
|
"md5sum": "2ff0ddd27f26beb8575786df63edb769" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4898d4a53374a51fce3bca716dbce669" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "6b4766ffee5ae0307bf1bc7a94479038" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b875e520f7a1d12313f35d60010b9e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "150f662cf03b418e7f5f4ecc2350dfb2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18380800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 4597760 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 4608000 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 9195520 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 18370560 |
|
} |
|
], |
|
"md5sum": "25c73e7cccb6b8bbe2ee146b80a5af84" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "273aee2b052d5f09167ee4f56814a43d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "ef3531b15286cdbb1e1fec261b9b105f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa58e7a2b7ad23c317f38d76f5736e96" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "4fbf1c3d515716302dcb1ce500e25f78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "27e66f6cbf03137687db7b8fe197dd24" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7746874e5842febb18db06d7581e9ea5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "8678da3606f42c24df04bd2fb5c2f66c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "28ef04b017624d35e8db01f2fda6dcd8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "133f57449ee40e20d870b52c49fddf7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ed686378b82a9a42d6b242b5b2fce0a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c9bb36e383b6c2bed28dc0b70681f0f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18380800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 4597760 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 4608000 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 9195520 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 18370560 |
|
} |
|
], |
|
"md5sum": "c2180dc9781448e1a5051c4adde03558" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "583f14213c6d5cbe99ae277acd55dbaf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "b7e1d3fa6d060cce3535eb868bba2bc1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4591f7a3dcdea5d3acb5e7757e447279" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "0c0d80a8e3ccaf9b7a7841848b0801d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9df0db8205d3bdba681a2c3a982c3834" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d812e71e3e28b710e2aaf67c6128a037" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "afafb687ead58d02eca1282b935e7f1c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d5f9ae1d417e7e5f288e9cf3223565dc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "de1cb2bfd11c88f4e648bebc4b0c08d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1cb992da1f83f4e24c897b6a40dd9c9a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "d63fc711698372ead5b7ff0f1a8991e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c5d92fd35acdd0a6081978be77f61cbf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ee73e0d9a70124648588dfb20357823" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "55bb69abfa99115089dba90de7a38489" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a873585a02e899238040edfd2a48d2aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "2a4c339d8dfda4ee30d52f9f269364a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b425d649deb9b64cac848b3388cb015" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "33ac4ead2af90d2618ff5d45ebb1a3ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5f4fdbb10732778f0fa82d03cce8085c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f22ba0d27eaf6bdafdcf9e6e9ab1b560" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "d7f4b7199d404f4349960380ae0f0613" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7054c60f279c3308f85960abf580330b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "fc0ba7312251dbdfffb527ee350fee7d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f5a79d9811946555fe98767aca2bf92b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "7f5456523cac86ad0473b4b29065b4f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ace1c6cd77a395f79f138abfcd1ea55" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20971520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 11796480 |
|
} |
|
], |
|
"md5sum": "7408eb9e882862eab6c93f6e1b3b5a69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ff3efd391fea26e1fc787d226f9a7611" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "de6ece9fdfdc1bdafd55853795ae294f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c8e3ea8c91885b1ac9a8a9c261d27cd9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bcbea69873f1a70db4403960d7056a51" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18380800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 4597760 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 4608000 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 9195520 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 18370560 |
|
} |
|
], |
|
"md5sum": "1ff7795ccf2a11681e01113067ecf64f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be19b0aa372c3ce7805358b438a7cc38" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "4abe8f46cbb7eb193ee2c9b5e5ea0e57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f2b2d106b3ec590efe91c1b0a0eaace" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "69a85d04a6d78bbe207e45a58158f4b9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ddbce554d0842de9a9eb1f6d609b5bef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "97acd73ea289ca67e76506d574c54b3b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "bb71377feaf69059168f1d035d45d6e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9930f0f601ade069b5a05af03c152a54" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "d38d78b006c0adb5d3212f17819028ba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b64273147b5b1c5b8881b239697b94db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "4ba23a051db87d0760080f0ee5b15c8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fdcb4db06a124007804062883888f97b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "18284766b098b96d08db844d3d8fcc0b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "0d8ea7b83bb8a752dc0ecd5278e66db1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b5ae0cc4f242532b2b49f458086fcad8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "cf0831196128103a8671fa043af59335" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4be086357d79dec53800ae39089f9dab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "c8cf24902a08db080187cfd2afb48faa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32b1f5b14c9ae3cd21f51b7a295a7d12" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1661f0f8570d42c0dd95f1726a44a707" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25579520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 11806720 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 16394240 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 25569280 |
|
} |
|
], |
|
"md5sum": "a1b6a86e4100ab7115ab6c1d286253b9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36700160, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1792, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36700160, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7a1062b90052a74fc7ca1ba4e892238a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29501440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 15728640 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 17694720 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 28180480 |
|
}, |
|
{ |
|
"name": "model.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 29491200 |
|
} |
|
], |
|
"md5sum": "4ef6c096f5c485ec6bd41252679e1906" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
640, |
|
28672 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ebefb1c79cfece7b895164bcb4f72617" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31467520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_scale", |
|
"shape": [ |
|
448, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4587520, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
160, |
|
28672 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9175040, |
|
"byteOffset": 4587520 |
|
}, |
|
{ |
|
"name": "model.layers.32.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 13762560 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 13772800 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 29501440 |
|
} |
|
], |
|
"md5sum": "100e6e66d805928e26768c74ddc860d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29491200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
640, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15728640, |
|
"byteOffset": 11796480 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
160, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1966080, |
|
"byteOffset": 27525120 |
|
} |
|
], |
|
"md5sum": "78e20398b51d8fce670c5f271a9a07c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 11796480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
512, |
|
5120 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
128, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1310720, |
|
"byteOffset": 10485760 |
|
} |
|
], |
|
"md5sum": "6f9c65dc7ac98e87a1636890a33b2f0d" |
|
} |
|
] |
|
} |