{ "metadata": { "ParamSize": 709, "ParamBytes": 16895535104.0, "BitsPerParam": 3.1791405645822484 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 640, 152064 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "87af932a11da9ddbf25a81b6f46de56a" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b5a7abc95647a505cc9e7c0805fd6b53" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a20094e89f2bb7b118cc34140b791189" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "161185e7bb1e4c1992c14135b24753a9" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c391cd020afc25dcffa34846af955f02" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "09207a7499221e442bdf16731c75b62b" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 26044416, "records": [ { "name": "lm_head.q_scale", "shape": [ 40, 152064 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12165120, "byteOffset": 0 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 12165120 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14376960 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18800640 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 18810880 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 21022720 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25446400 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 25456640 }, { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 25470976 } ], "md5sum": "0e7f6c90f4d812bc59368d35f6c2153d" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "ba0d270c5131dac68af970d1dd2f7e21" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "747ddfebcfc1b4ff1e0ec3cce96155d1" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "43aa7612f8bb2c7a33020eb5dce7fdda" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "35e3dc94ab6bfe1e00635a3926c25f7b" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 32935936, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 40 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12165120, "byteOffset": 13527040 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25692160 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 25702400 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 27914240 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32337920 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32348160 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 32362496 } ], "md5sum": "99d21746e193bb1352b20cdca681f65e" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d1c1eae717e5d27aad1af203b1117028" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "149a859101ece83c9df2bfe6c4b0849b" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cf328b1ce4252e9ef8835b2d8f71326b" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "bc5481f4ac83c3e0f5f809b4a01f101e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "4d50f0fe4fae0026d6f58b57fe513703" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8ff4de8a05f7caa565394be6349bad37" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6c002d3b9d17ca72596fe563a2391d44" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 32075776, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32065536 } ], "md5sum": "1e0b397fb24a4adebde832fd219f6ac9" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 23371776, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4423680 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 4433920 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 4448256 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 22798336 } ], "md5sum": "f63b839ea61dddad2885fbf42b460855" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e7f860dad6b75dc7316c642c54e859cf" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "800992a652a39a6d7ab83aad17118c17" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "76b7ab6d49b062ab47639901383b68d8" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b3615018b67f97cb1159d7c1c6c9adf3" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "65d6d3dd894811ce9045954c31100ecb" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 27396096, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13516800 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15728640 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20152320 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 20162560 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 22374400 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26798080 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26808320 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 26822656 } ], "md5sum": "26c0b89bc2473d15efd26b5f9d132b6c" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0500ed10a2bf3cfa411a09f3f3a0e43e" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "60f87be81d5a7f3f63f149ef11cbecb8" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "808d91a0c459c0f9dc20f0355c70480b" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "38192a0b18f62ee9002658194f6d0803" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2c8461062bb7cd877b5b87ad80f2ac73" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b7a4309e10bf4a4ee432bd94972eeb6a" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b87a1afbd5742826b5a0b734f1d5ddf8" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "83bcc306cb3d649376a8b6eb0bab9dab" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8a334d7cb4efbbadcdd555754bdcd8b4" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "09a05661591a4004a42ad5e14703c9ba" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6dd7ae67df45dc9766881b74adbb35f6" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "bbc425da87a22042887fadbc41038b10" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 22972416, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13516800 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15728640 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 15738880 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17950720 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22374400 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 22384640 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 22398976 } ], "md5sum": "7da1dc73bd4df0a8c762206a6a4905f6" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b54a9ad114900385811911aabaf077ce" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f5354a27c180a22d1216934019ee8dc8" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2dfb7138d9c06ecb2cb721a83b0b30b7" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "599a54a57f0076a36052d2f4d9de0d41" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a4707e132dffb26a20113ea78dd4f282" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "53130834f9628b0c9f4e309fffc775c0" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c3513ccd015ad292feaa72bbed946622" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "f8caba6e27439853927a3f5537acab75" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "bf4e520f55fc2b3e8c037723947f391a" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2bc0d3b1bc74cea8f9e263ca8594abba" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3fa6f6d1d87bd00541b036b80c51b97f" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "dd8bb259d048b42a4032ddeb38f868fe" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fac8acf6943ceb2619bbd72d6be08c0b" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "43d1a0716180ea53e2b7bfd0f776fd83" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "6234fea28d3e7f6ab7bf33e130be7aff" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "99e1085cadda42215e69609d56b8c8fd" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "763e5a7894847a0b6470f1309c524a18" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1738af24aa1a551ae04f10e778316268" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "6d01ce4a702b322efe2e50351943db1e" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5360336b19195d92d55563aab5ff48aa" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e23cf3f12e6019ec61cdc763021dcdf5" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "30f660641c7d14467e2f4f233825b062" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "152c86d20e3d0f7fe6c7df41722a7ef9" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "68ee7fde787f77bfb400898b78c28447" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5ca10e2afe972970e8ef510679f8f72a" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6cb0c5b462d581fe57efd2e3846c16e0" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "12030ead7c34e45823d05107cbd09baf" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "777c6b47dae1aca1ce6f4b4b34a94709" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "94cb75efbc8a5940460f1c9696d3e595" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "103db507feb9c29b553ddb9cd59a0d6e" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0089d729f5b0c268d93c548af3ab0c95" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a7ee0ee3ca69fc9e65ca1059312f11be" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2afbbe2c8c4862a47a5e1441ecc20d97" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "e1195d370644a063cbd03eb222c29764" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a5ac5ce4900b66338a76d04b644f352b" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "703db33e6ddc9811c67a051cdc1a5ed9" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4d6a88a73a15b6fcae511aa54cc4168a" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "f2a21e3cbe7330e0048e5e0554b8b9fa" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "55e3b2e3505f9cdb5a338b9d7bebbdb9" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7a713eb58320dde140e91d75911d7789" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "170a7e750651b0189ccd68670bcfd8c7" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "a7c4ed5f0022c32a04cd3a8dea6ab8b6" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c64c61d777f48aedc44a1052ae4f6356" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ad625e9bf1dfc084cb415aa334f27788" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "1b4809c0c69382bdb5281d44e9deb0c9" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cb9cbd08583b4d0bd9cf945f2b15993c" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7e9788ecd3784fd77c6c610e59476260" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4a4d7af922c273c19cd5f0293d2f36f7" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "312942f81d8d49c68f26d8db37dd64fd" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b3b47f005e4bcb5dd2c26cd4df3a5b8e" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ba383e998ed7638c775a8083eedc329d" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2e0612bf9bbca76079d5022140a89cd0" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "741499b55efa3a0ae79a1704ce296de6" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32475136, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13527040 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 13537280 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 13551616 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 31901696 } ], "md5sum": "89bdb5d4b4065a179ec297f7328149f0" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bbb8daaa0cfdd24bb2671cd41c070fdb" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "af61274179f53b3b811bf25fa9868966" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "42dd91a67dedbe99b910a5e009426d90" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "733d2788b9283250248b622f65c97a13" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 22972416, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13516800 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15728640 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 15738880 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17950720 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22374400 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 22384640 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 22398976 } ], "md5sum": "169731de59301f164f90fd0b4a0eb495" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "557fa1f7ebada57efb69551026449f6e" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "cc042e27c30784219c3df0315422595d" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "94d56e9fc3baaa0a11c9c95e16de462f" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "6e758bcf7c1364ea7d0d461fe32c2efa" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0c79e9ff2d26e827fc94ee2e8b531166" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f387bb61356ea90c2522b58320969bb2" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "239c51f69d64b2f1f51e55ddd7908356" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "abae92e1c6cffc3ccdf3cef7311b6b72" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "df8d539a95fcbdd6ec7adaea9f0e3644" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6487a6cd3d8d1e6e325e91ce48940538" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e321d1da63eb09cd13d2bcad3d5c3601" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "6e3ebbeab1e783a91cd7e66881e3f600" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a4c7329b1995c07b4954308ac6d87ebb" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "76cb0fdf29cbbb9eae5ac2bc42221afe" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "a9c13d5e2615b02522f09105a67493b6" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2d359db9d935171acd76542e9aa57f81" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2f231199ac61c36932e914d050df4c73" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ee037600d475a788d127c184c299c3ef" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "b7fc9b5a977a01a55184a5a10ffd6676" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3c07c8a9b168c2dcb032a43400cb836e" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6bdf0ddb995d96c91a063624ea0d1640" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "844fda2927cb6f6d8a9007e50e71be1e" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "ba8120ecec6658ff3797648c98b9dcdf" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e55d752df11eb99a76c5fef5701682bb" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5b40f54076efe9b10b79c7c0eb28e92f" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ff5a385bf6d573dc03a0f2629d1f341e" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "ced9673f727d0b5915dfe03d49c45b47" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a5f90a3bf18ce239ec641c2c6e8f74e9" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d41966676c5b259b006c4a636f41454c" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "5f1732740fc0eec65b68240d208a1ed1" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5e7ad36e861be14e84be34776b1a1527" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0ba4e88fc0f7cc598599472a80961fa0" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f2545e8cf589b997fb906ce5d1335a0e" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "2f8f523216a8c96ea53d5fd62d690349" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b104d140e93cfe341989cb448c08bad1" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1c14f097c4284383236972fa6dc85df4" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5d46e284579e4469377a728515c6ee3c" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "8e2ae84f264bb77a2cc4dd2ef5e2db75" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "aca6c9ca55ff9ca06ff63815e77352da" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b1abb82ac1dbf72c4375ea3116f8f310" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d80c8b74484dc4cf2e7160c33cb869d3" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "18e97c4635a208434fe0b156811367bc" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "91c5ad357bbbc51fe5f6dce20f6b62a9" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "345991251a1762f1fa13aea8fc6793f6" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "363cf527d3081d94a28781e62a9d9ad6" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "28d5150611a5b754220243fa90aa9d99" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ebf5f5a18f390967a6977d4533255bce" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "35f36f322bc2b18bd8bc5ff76a2171e6" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "75df06daa9998cf8c81b4d975a4c8ea5" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b746effac1c5167e4acdca690c2ef90d" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "53da26c9ee60ce7b3bc437e12d55946b" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d32eadcaf6452a38bca7362d8b7e83d8" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "9fe52e82472dff5bb36e433a268a5936" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "caa06476edc387df7ebf18dce57ebb32" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "075da29c21317c6cc2b583b9b225973e" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "66b2ec224f6f3d2acff4e60395703d6a" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "314f2c10e46b4ca65168924bdfa5546f" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "369e988be930599077b3fcafb00bc631" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a3c2a2ab0bd9624a980a446dfb3de868" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "b65241919e32c5075896fcbe5c057e85" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5e418daf7f1f9ec244ff1dcbd6c47181" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0b5cd99baaa6ba89b8136691dbfb508f" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2c7213bdc70771c659b0db044960ee9b" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "c9fa19ef51c9ab4ec73931ebcb5d3319" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d5711d480bcf3b20cad89cecc8a29fad" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "97da612d0127417974c89021b516ffb0" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8d112414e30917bee2109b0b78abf082" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "7bc7f8c4189a226fe440ec65793cff1a" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "43354bf5d5b0115954da94b10cb69cec" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cd85380cf926148870c1df3059ba6962" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "50741e6c69e37fd79b9e06767bdd86e6" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "40a6e7489c146454225498eec6f6f54d" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c3842ddcd5a7e78768d4fa78a3f54005" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f748c4080cbf82386dad3224a0c8ba07" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "86ffb0dbbfa84dd978ca5db3ea17df96" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "406a193029f65d2f449b8458566a9702" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6220e11db2054f0de634c1b93cc69c38" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "27929bc9fac02883157e71431712d9c8" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "3747902449b049d51104b5295ebec664" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "605496941ad38cee49d67bb51e01bc49" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "51d975bafc4ed42c8d5814ac4dc089c3" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "64d2433384aaae2a76ca64ec5f3261ab" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "d12c3bcc496ff9560b01e74ce36f95d0" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a276567ccd3126eee73b579fbae6e886" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6274fb5d09e654896de3952db38f8aef" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "169099a81dc0910c26e81d963d82b83e" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "e56176f3bd4f1b6fe4b801162e7646fc" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "274cacb4823db0be4b04b26dc2ada05c" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "750e02e8a31614ac0761ecfba8df5318" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "57c848f18f25a254d080bd81d2567469" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "23840cf732b139e597c4f33016ca075b" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "23e3a11c87615a021a4714ee9f1a7d44" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b833d61630283b839be3f0ae17d8f53b" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "cc9932e3737c5e9776a91590fd10feea" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c26371254c242d2595715657ae8a08d9" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "333ae717234567dc7ee31e69d792be08" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2e7c188907c1f5f5b2752f3a119422b8" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "19316c321491413c66a6254497ab9824" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ae206331d553848e6825fe460f757934" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "72dd8ef65f7f9ee962dad056938e1907" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cb99b6f8a48903b8596b3a534049275b" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "03a538a411341e37f540f12a3aba8cf3" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "db988375b9e258e1a8bef2f5993c3ab6" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "55cf023f7895bfd1fbb2505f95258ce3" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "8bd4cbc2e8892f9fea2dbfb9cdedb4e4" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e83881f4ad8e29f25b99631404fce0b7" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "661ab1c64146daaa956d9d72d21cd447" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2aa99c9704d9579411623c73c7a46a36" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "14bf82fd3d31a7e61b692dccb7158dcf" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2930f6efad451813162bbe957688eb8a" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5a2648c6c2717dd7cb2f85c88a9fb1df" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cb33dded21f9937ae71c8b71a9adf145" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "382ced539cde5f539aa48038f1b06f07" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2852e79194f86eb89bb97e10b499cce4" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "fc2d20b1272276cc396fcfde5539f11a" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0d921ea4c4a5943a6655876d9b67a724" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "8ef31aeaaf4e4a01ad9226c7cdafe108" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e11647a288b51eb49691deb8b11552d0" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "42f92f87157d9650b7085d9b3890b2b7" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "43d678b0ad5e26a2002dc17b13c94b55" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4b78f0f6b02586f89e436d7806739a2e" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "deba369e2283f2604cf255b373f925ac" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b003a924a6770050e24fe92a751efef2" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "958efbdb06af08e3d5fb0762575b8bef" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3c4da9f1b5f506d8bf98f4ff1a92a1c7" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "19ba27a201555a9db0d7516bc59c4d55" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ae70d04d8a0bdd205c3e1cf8e7490bbd" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "c45397a298212aac95d342890fd869be" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "aee1d408325f4efd0606ffdb8479476f" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "595406e1e71fa4de87f8f8c2a908571e" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9d2b5e47268d20d6d28ce24cf0c6ae18" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 32065536, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13527040 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17950720 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17960960 }, { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17975296 }, { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18548736 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31655936 } ], "md5sum": "dd18e6c389c0c348833840f7a84d3a71" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d94cf5a3f4a521919de44ee4e87a25f3" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b1aad3cec25b627707a0f28da0893e19" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 27805696, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2222080 }, { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4433920 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27232256 } ], "md5sum": "d7265bb9056b2786dfb6011622794cff" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0e5b4c9cd1b06f243d192a37af948eec" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5bac96aa10f0b047ee38bba4cf949251" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f59c0f4c95f924ad999b292d2a3af5e0" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "59ce4b2b4756832e78ea60ad83db57ff" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "daccb59fb07dbdc05c71347c05f8db28" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f30e509978d3cbf94cf86a0130dc8efc" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "10f033206aabfab5fe9df5d0be832ae5" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "2c507f125342fbe959c3874a1acada30" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 32475136, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13527040 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 13537280 }, { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 13551616 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 31901696 } ], "md5sum": "ab5c8a9ec882e544df30d7a32210245f" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 13516800, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 } ], "md5sum": "d0ab8c4a0cf6204d52a65542f887e59c" } ] }