{ "metadata": { "ParamSize": 305, "ParamBytes": 1801420800.0, "BitsPerParam": 3.6099566223450714 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 62447616, "records": [ { "name": "lm_head.q_weight", "shape": [ 308, 50688 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 62447616, "byteOffset": 0 } ], "md5sum": "91c335fd9de370bd474fe8971d2a2fa7" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "1a7a84a5093664b9a1c8ca35abbbfead" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 23430144, "records": [ { "name": "lm_head.q_scale", "shape": [ 77, 50688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7805952, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 7805952 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 7812096 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 19166208 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 20585472 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23424000 } ], "md5sum": "c829ae187d928c8b48e39596009e7cce" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "c6c74c228c91ce6abe35c6f3f9f9da92" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 17037312 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 28391424 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 29810688 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "e74a4ca2026632c8f90e46dce2f8c8e9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "d9b4fdc07b2e81e600649e75bc2837cf" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 17037312 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 28391424 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 29810688 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "ee1d731a048076f7e502ca23847da72f" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "247dc6d313e701bad9f5a40093ee5459" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 17037312 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 28391424 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 29810688 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "1dde16ca8f3e4a64e63d97e41bda3d8b" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "75d25bcb8b6c9de07e6640c765a19aa5" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 17037312 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 28391424 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 29810688 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "47adc9d0a94328236765c3b2552f6412" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "8d0ed414a2e18c71e8665036e4350a4e" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 17037312 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 28391424 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 29810688 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "524da43ffad39581badfabeb37e86087" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "acd418c8917f2bf5157e56dbd7297045" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 17037312 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 28391424 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 29810688 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "a74cc899962bfb9d0eb026175fd370d4" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "8b385f6ceab9d746ec0e2099224f5fe0" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 17037312 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 28391424 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 29810688 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "dead80419776be7c48015b5364c00b68" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "1053a184e834b6bcb267a35c7a43bbad" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 17037312 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 28391424 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 29810688 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "7add06c05fa800ed49ac82f34e027f88" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "1a1b5808bc2f9f8171559100797b10cf" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 17037312 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 28391424 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 29810688 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "371d4a3a6dceaa7fb204b77b1ab8f158" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "716e310a586505c425143d9d58db2a94" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 17037312 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 28391424 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 29810688 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "071e0b44a5fa53b5edfb101907f9633a" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 62447616, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 50688, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 62447616, "byteOffset": 0 } ], "md5sum": "d14a46b199f1189aee934273737e3a81" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 24849408, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 12773376 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 16558080 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.embed_tokens.q_scale", "shape": [ 50688, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7805952, "byteOffset": 17037312 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 24843264 } ], "md5sum": "736b0fa55c9e70eafd94a03940c5482b" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "2b7f941b15567eeef2d9ede5c4031b0e" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "e1e7c88d1832d47cfa5c8f5fd7109004" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "f6f59476f93ff2f928e7629fb584a01a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "9af159944093e4a4297ecabea4531016" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "62881db654be8d0a954afd57668dd68c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "8860cbe57cf220746fd4c2e4b1165a7b" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "e0df0968c2aaf922b1ef9e933461a867" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "edf20728c8eaaff653d4c52b4cd4449e" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "ae911ec6a863b853ed5aecf730ef34d5" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "0191d20dd0ea644bad41d1e7e66d4121" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "f9140b6203a517950f4ff46ed3f3f66c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "0ed73bdaef16f64e931ad0cf0f7d2429" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "ce9f0f5d0fb128447109e9e6b1da2604" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "b9d98ea36dde592f1e9d3d6761b26341" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "43357afd67911005f83964e7c6503b2c" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "b9d0a7e51b4d7b09435be5329915f081" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "d3c99dde42b549b9b082bfb5401ace53" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "42b4f795e3e7ac69e1d665e947ec5a7f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "11eb8e84f58219b14a7784cdcd1e2ab0" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "d41cd731a0b9de85cc503bd6a5ab91be" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "2a73c311fd24218090e3515a35b927ea" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "608b8981886465a32ddefc97339e3059" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "76fede07f99ecbc57752b3d9dceed75e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "6fbe20776fac4df5bad9bdcfb53f49e2" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "c0cff2f8bc0934840325164f3089cce4" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "55ba3949dc056e7dca9ed07decfb26ee" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "6eeeb756120f22ce577398706295b577" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "2dbf75c968192a58c56c4857032d1dcd" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "e6e383641f1c0e4854fbab2e2e9daea9" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "a1b2765eedc5f9cd9cb7df82b3aba1b8" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "a830f730ab5f6bd6c33a885623655769" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "ea20cc8d6cdee0d5c5c1e520e52871c1" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "d2f18d1c4860ad6074b9cf1a21a4c52d" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "5178372e80dcc99bddf5eea223683d39" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "01e6314cc10571b232370671faa108df" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 32655360, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32649216 } ], "md5sum": "444e450aa5fd0e7dd9ddc73ec1b5d613" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 22708224, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 308, 18432 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22708224, "byteOffset": 0 } ], "md5sum": "88b42f1cf5aa35e4d0d40ee86d295f70" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 32649216, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 924, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 0 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 231, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 11354112 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 77, 18432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2838528, "byteOffset": 12773376 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15611904 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 308, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 15618048 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 77, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 26972160 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 308, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 28391424 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 77, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 32176128 } ], "md5sum": "c42365757247247c92b3472728babb3e" } ] }