{ "metadata": { "ParamSize": 390, "ParamBytes": 1572915200.0, "BitsPerParam": 4.501369085231279 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 64389120, "records": [ { "name": "lm_head.q_weight", "shape": [ 50304, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 64389120, "byteOffset": 0 } ], "md5sum": "33bb513b28f36ebb6c9352edf514e607" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 64389120, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 50304, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 64389120, "byteOffset": 0 } ], "md5sum": "4c6c33b611b88517efad9ca0b0c38e10" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "92cf54de400a4e0bbb31141cafbc2226" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 28282880, "records": [ { "name": "lm_head.q_scale", "shape": [ 50304, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8048640, "byteOffset": 0 }, { "name": "model.embed_tokens.q_scale", "shape": [ 50304, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8048640, "byteOffset": 8048640 }, { "name": "model.layers.0.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16097280 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16102400 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 16107520 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 24954880 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 26060800 }, { "name": "model.layers.0.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 28272640 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 28277760 } ], "md5sum": "5bcca722f877257454d7af65489506a5" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "bba45ba957851bfeea1b0776d1324392" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.1.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.1.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "56c70c8ef90b94ce8b9f4cca52130ac3" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "0cfcdcba9657928fab48cfd882b93dfd" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.10.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.10.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "7273a1f1f9582b275e75e24808f7361a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "4931463e5f784c461e4f59e63a4aee2c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.11.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.11.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "5bc677a59aacc8552f8f35c3bcd02fe9" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "99acef9e7f788aa450afa6dd81a91ce5" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.12.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.12.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "44c41ed8e6b4b83ddee55003cf9e0114" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d51e6fca6ff6cf3427f5bc65131004d4" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.13.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.13.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "8ff6902fcca489e878edfe289075cd40" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1cf12630da5a449e124491e2ba583927" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.14.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.14.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "ebdccdef2fcc4b66a19e4de6bbe1ba2c" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "46a46dfa568f54180eb34ab92b3b8e53" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.15.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.15.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "c216b9a10b71cd3216d15d975d45541a" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "fb9f33b76a27b0c42cbbbcd8bf64bea2" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.16.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.16.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "e9e5c9757bdaf2b9138780e8902f51d3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "de1bb327af6fcf9febf1eda8c7d7b959" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.17.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.17.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "d532e6af81ebcf82877a8cbdf6496e82" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1fd755cc97ad0b885c3afac6e4cdcc26" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.18.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.18.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "deb384ae3635b4cba177e80ed78ebf1c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "5b987afcc196c12dbd647beea2319158" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.19.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.19.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "e9bc895d566d236c5eb14cd03e4bcbe7" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "0b941962d2692fb3933ca78d6d8c99b3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.2.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.2.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "cc00c351e78f57dd5cd6aaf8161a26a0" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "92d2613b3ca45897ad589fd9e78c0c6c" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.20.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.20.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "2bd0671a3e6ff0ced48ded95c10838da" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "24a8cb1f40e995b68c11abcf481e54d7" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.21.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.21.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "7115779964bd09697a9716b2b8ff159f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "5c6733f9724f3fccc5a14846d7102fec" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.22.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.22.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "b9bc007e9f3fcf2d0d6ae2b002ff43b6" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d800838d4a53c3b910c62d2715bde687" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.23.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.23.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "452ebe06ec5b577b81a820b5ca4c248d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "382ab9423010e3a4f755249d6237690e" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.24.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.24.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "71971bda7d8d1f7b96b2b3d477bfd4f3" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f34ab2dfec684f3ecf8953fae6f6557f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.25.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.25.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "a25663a81acb8911289d7f92f75c5e7b" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c015cd1ff73f7de72aa2b55988b7ea3b" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.26.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.26.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "7c766ae6ba486451e80b5e394f741b35" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "282ceccdcfabf9ff53b2d8885d2c2d07" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.27.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.27.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "472a5d5f8c4c0c145c8c8ed570fbc16c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "4cc3bd4cfee9d1b30a4dd3612e7d7e49" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.28.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.28.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "bc091d5859c2569e14676153dd819caa" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "697f7667dacd874d63be96baf77eb915" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.29.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.29.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "857cdd01c1420567e9ea5f74c3d25b44" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3b190d8f2179d9ce0c931cc28b7e1c64" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.3.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.3.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "9f304494424994c92fb500044f3538d3" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "81413d32bfa84670dcff7da4012ea78f" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.30.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.30.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "2ece0a5d7949574d1554d1f82242f9a2" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c623c66c1d6d752fdef8fd3fa4492d07" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.31.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.31.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "44c0b0ca47fa2b735f73fed07ee464b2" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "9d8c42e04066ced497e9eaa6f88718fb" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.4.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.4.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "654a7520c445991c7ada77398fc227c5" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "aa9a6eda73762ad531b256e8637b75ae" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.5.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.5.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "4f69964b0e5e14a98c9b4ec795dfb6bf" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1d2d72e76c3d037c169a34dd2b231b09" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.6.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.6.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "12cad3b1fb76cac16072846768767f66" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "4c8cbe42728a47514f9ce4a3b3bce476" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.7.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.7.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "c16515be9ab69668a01072dd283ac634" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "62bee20bf1f9fc7f3910c8d8d48ec7b8" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.8.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.8.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "55c32ffbed6edd675625f5646120e94b" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "a20bf1433300232971c943cac71ea494" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 26931200, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.9.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23603200 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24709120 }, { "name": "model.layers.9.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26920960 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26926080 } ], "md5sum": "644f39d26f65816bb7f7d277e55c1f4c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 14755840, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.norm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.norm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 } ], "md5sum": "340ab1623dce958bcb319ebe41c5f3af" } ] }