{ "metadata": { "ParamSize": 305, "ParamBytes": 2245834752.0, "BitsPerParam": 4.500539815947002 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 77856768, "records": [ { "name": "lm_head.q_weight", "shape": [ 50688, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 77856768, "byteOffset": 0 } ], "md5sum": "50eeae2d89f03fe1f400632cbf6d4c27" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "7d357d959cf970b2fd6ab5b866715814" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 29208576, "records": [ { "name": "lm_head.q_scale", "shape": [ 50688, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9732096, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9732096 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 9738240 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 23894016 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 25663488 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 29202432 } ], "md5sum": "80d9a3ddc0074df2ab7fd5e40486509e" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "c8d9ac5add61b1560776541fd96aa589" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "d0c2250c1b3bec9e955a6168c6ef8806" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "f669745d2cfb48da642d89b6f50c8f81" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "2e727b0e14604bd270872ae14509e56b" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "f5b654e75c21881c490d00a2b38b89b2" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "ce759f67031227afc47634a367cd5c5e" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "5e4c41b8e24934096da22286a1550661" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "c50d74f1563d510e552571ea7e001f12" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "56234680860bfbc0b36fd44344979ef0" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "15e988365f9f3ca13b7421b49e8d6cc7" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "c6d543c2dc4420b534edab7a969e2589" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "f47424645c4efd16575020333019d3d9" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "c6b87d4d9767461091f58bb8eb7b9902" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "2dec7ee8a2d67ff8ae21686cefb5658b" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "ceba982570cb398d2652c23d3336c0df" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "52e1ea6f00b7206966a06427861f55f2" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "32b94f98f2990b789d1995a9ae62f4de" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "9437cac4561899aa0dfd83733b3d08de" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "e8d3adb081bd53635075a85708736390" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "8ea10983ed71553c7f5a14cc73324ad0" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "b350f0cbb9f82086ea66f20904fb19b8" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "8c93c2f7dc57c1d0eba3645a8a4f992a" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "49c1eb15d9905126ead758f16b018022" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "4122bec2cb8246f1550e0702939f1a79" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "b91079bc9c60e85838dd51f6ee8c7c6a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "1c2187edcc44f9522a9dec9c2bd036b0" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "0409553602a585edf3317d1057f0b7ab" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "4ad146abf88fce69789b372d81721866" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "d8082b1e9d39d663334660888ed39bb9" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "c0ef203edde43667909421555741653c" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 77856768, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 50688, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 77856768, "byteOffset": 0 } ], "md5sum": "a41df95ca01b20a7e19ab92ed0580c92" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 30978048, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 }, { "name": "model.embed_tokens.q_scale", "shape": [ 50688, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9732096, "byteOffset": 21239808 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30971904 } ], "md5sum": "8fd4a779bad5be62d8a3dce2ac7fa888" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "3251493a763f020f445e5328fc175c1e" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "03c40a246dde3dd00550d6554b4e240f" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "007697a0365e6e413324b40f479b8472" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "1b8af6920865ad64626ec56736386256" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "c353197bd5e74e80eb75688f9347c39e" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "7ad6abf1134620850d8144f1b18f9edf" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "2b6975a69fdfd73f6800c8f823511405" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "0eddeee2726bf5f48a1a722eff08ff67" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "670592b36955e8b6aca9d3ac7471078d" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "b47a0893ead006542ad7836e8ce52324" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "5061f6c13e232a129d0524a87adaf411" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "4b10e00ef5ed2c4f9fba767b090f4d7f" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "08927e86b714bfd71d7c06c95a47eaba" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "74c62eaafd87a52589974902fbff5536" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "35fbc46c93e6a4048ed7bd25df4ccd6f" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "5e722631569e151c733d6e2905ac2fa1" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "e80910003a757cd549d81cf63d59f152" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "47e2604e4d59356de3f47de7b40c1dfc" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "6c4036e54e2460a73040e562c4063cf2" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "4573c3e9d3ea456cc5e77720130304fe" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "f5bbed717c71b4ed09eb2e55bbd46d33" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "3a39ff8285867fe404e27f7079f0ee89" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "f00b0349ac860ddd544691ae520e9c00" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "183827c2677b0468140ec84cb0037543" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "2ec18fa423f98be219080d6ae647befb" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "93d9d56dd685d5b0fd647d9f8f73df24" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "a3aee00ad71083e8dc6d5d635ee380ef" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "07b3f33a94d101d3b057bc2c60ae10e2" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "b539e568182fe5460398eb7b6551061d" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "a89979bfb686682ae917ec7520b945b8" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "438d818b42a385cbcf042578835be204" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "d99fa926de2511e306ab831f7f1bb8df" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "984b5f71b4d7b619760602266ebd0a76" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "8fe6455e16d14951b673b97a6b2919c9" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "591e83eb3bdd75618208f89ca72582cd" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "0d5f2367ee46808e9949c477388395fe" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "97160d3a598318db14e9e1e9e3e1960b" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "9ac868e58847661fdc297806b47f7cda" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "7f57edde7ea6964f67799083cad25b5e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "bccea4684a1b531990da892d06b8bb29" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "0957327ffe74de83363c98f2db74dd74" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "7e066c8d866c2162c1ef36c4a99864f0" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "c0fedc095c1a51e26a284f7b699077f7" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "96b1d0667bcf03c6d95e332cb39133c6" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "67e063e95d4db2e86d1c163722aa6469" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "026e3e84d25cbd30ed2bab95983a3410" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "2a16644455b292684fc0caed599c9595" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "1d8007e2081c5606f29bd394d387ed4b" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "a65a72a622690640655b412d1c4094ca" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "b912c77095a953f0110eaabc7ce59ee9" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "9262bd167045f1940b8d0e6bc4d831c0" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "63f9bdd569c833d0fd367a93cec40294" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "7c3dd126898f1876ab3769cd4ca66514" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "9125dfacab24c1f9c318336f099d947a" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 18432, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 28311552, "byteOffset": 0 } ], "md5sum": "dfed8d379107b6095776f746619b1f6c" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 19470336, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3072, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 3072, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 18432, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15925248 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 } ], "md5sum": "f32ae56d3f50e0c212f4297bd0823b7e" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 9216, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 } ], "md5sum": "e7bb7bd955dea80dc705e0b32f5937af" } ] }