{ "metadata": { "ParamSize": 399, "ParamBytes": 1736187904.0, "BitsPerParam": 4.500900580433049 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 155582464, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 151936, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 155582464, "byteOffset": 0 } ], "md5sum": "a8f329f6ab0b7bc4d3b142d0f990cc24" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "11621cd4b1fbd1a6bf28fa42866098cb" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32133120, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 151936, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19447808, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19447808 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 19451904 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 30724096 } ], "md5sum": "0ed8548cb381dec97a27922669579e92" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "18a0910b4e572b42c772684643d8879a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "f9a226978881ae7ba6458975dd83628e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b1a02fe0851610a0a181efcae641d1a0" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c6b9efc12099b53746e9a3fb22109c90" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "43f810349fc34e73d49773a43b2691be" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c1c0edd7bae6cc9801a19f813f0a875b" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "06f69167e277ec320c3cafef5d258a94" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6c326b56d9e0aa2c1f94569d9807e8da" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "eb1ee7869d5aa07d2ef427eec7cffadf" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "9d72412a3a00300ce4ac2c543fbe912f" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c2c410aae1e01fab011410aaaa5b58d2" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "1a2b98459c3177cd2ab18b5c0242e975" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "50fe624cc1f74eb46e2af5fb3e07c673" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "dd0d4af95ba1a50bcbb25647bd4f0e1a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "902f9bdef24591ae5daa11e674598470" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a11e2c9b321ec0a43d1f931023611e23" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "465f73217a90b4e70de377d28356174f" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "dc3dee7770afcc741de2e14a6485c176" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1e91ae347581b376d1b8fcc57efe907e" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "82b4c0c1091a6c10cfa47711e4f5427c" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b450ad4ffa92212379187d3f9e2979f1" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "dbc992d9c8dfb6cba3984e6366c9ae3f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b2dda29561a436e0592f25875061a161" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "66713c63029ed84e4ec8e0c95328da29" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "1f8f2bc6a5d72b697b96e3fe37228aa9" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "167a25d104ac1458c2ef9334960c6ba8" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "9add86ee9dfe64dc40de8fce51f064b8" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f424a91514764c06c68a25352b066fd3" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4e39cf97d8e41898a81e8a199f2d2e1a" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "7808e88cb02e47ecc11f1e09a351535b" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "bdba9d8a7afaf56c4eb382cf61af5a38" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "3f00c5ae827a58dad7f9f03cc02cf6f6" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1d67ce67de7114263a6c2e2a4fd62039" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 26134528, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 20816896 }, { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 20822016 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 23443456 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23771136 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 25868288 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26130432 } ], "md5sum": "3f8f45f5c7b83369a9406fdc79562eb6" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "78dd38a3d37a287219193ba99154668e" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f8a54c6535ad0394d0bfb6e151326edc" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "ad03a6be31392805b80a0fd827d3768a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "785188cbd5bb969efec41aba675e9d5a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "666f1060544a4c7cea014814f48ea68a" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "9c3b0db39639f529038d532ca1316cb7" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7934666397ac8d94ffebadb5f7632627" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "ef1d4e22613b54f5dc90f1e89f4865d8" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "194e6dd743ad70d3a9a2bf1c256dc28f" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "6e513d4f8419dfebdc1b4e3267205d18" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "13fe31e9bca0aa05a16d9c61f0dec445" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "367c00236a9be720b9611a57f905843c" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "218fdf077b563197fed1678435747e56" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2c696bd5d3ab7950c93b4b1b35de2e38" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 23647232, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2822144 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 2826240 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 14098432 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 15507456 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18325504 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18329600 }, { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 18334720 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 20956160 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 21283840 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 23380992 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23643136 } ], "md5sum": "794fb0e0419d071443db01f83e0e9714" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "eb94aa834a9edb7f06012d4e0fc8fae6" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6132d9ee48ab43d7e2452d8464a2ce06" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "b9ee4bf3806a0c7e71d5709ba7e5168e" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a2885512bdc11ddd19b5261935637ab0" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "915e7ef089baf02eb9a9a4869a9f230c" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "db12a66f16f53adee8d65bebeb2151ea" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "3d3adf979b06fd3ed7d865e86ac103c9" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 33502208, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 11272192 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12681216 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15499264 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15503360 }, { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 15508480 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 18129920 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18457600 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20554752 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20816896 }, { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20820992 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 32093184 } ], "md5sum": "f96376d1ea3379487fc101fbf7d15681" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 22016, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "127cd186bea292847616b183bc4fba64" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 28960768, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2818048 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 2822144 }, { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2827264 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5448704 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 5776384 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7873536 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8135680 }, { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 8139776 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 2048, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 19411968 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 22016, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 20820992 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23639040 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23643136 }, { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23648256 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26269696 }, { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26597376 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28694528 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28956672 } ], "md5sum": "24f2656df90947a5a65c9fe612cc79dc" } ] }