{ "metadata": { "ParamSize": 885, "ParamBytes": 68971290624.0, "BitsPerParam": 4.32957972211083 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "lm_head.q_weight", "shape": [ 32768, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "0cd6402e860fb9b1c6b6c2dd94639b1f" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.87.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "e6beb30473a5b6b6919dc496aa27c588" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.87.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "28139b70c92024360fd1ce28aa4b874c" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.87.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "fbdaa48f2c176522fa5749c1152f4ae6" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.87.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5c05ff37f65fcc1db199bbbf73536aaa" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32768, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "6ddfbf5a7d856f226f3d63ff7e0d61bc" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32768, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6799bcc4180681747367f4a21d1dc202" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "6b3a3240335e3bd60f7732a0f94ff88f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "cda5d4e5d4f73ec1dd19f0bb3b83d520" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "8c1f4bf07839ce729ace4d533d736ae8" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "a05fe0f4121b1901e52d5b0ddcacaabb" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "7f3f2a214a46e7bd05c1410c60632d12" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25288704, "records": [ { "name": "lm_head.q_scale", "shape": [ 32768, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 }, { "name": "model.layers.87.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 25165824 }, { "name": "model.layers.87.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 25190400 }, { "name": "model.norm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 25214976 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 25239552 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 25264128 } ], "md5sum": "b2d59e3419c273eef8c0e05dcdf803c0" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "426113e98baa2acbfe17b0644c1c99bd" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "37e410ea0a4ac92974a2002d9859855a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "e7a6181d20978ae0208435d7672f60ac" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c3e3850e903560c3efeb02e0e5c0340d" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "deff02523b2f35651b2865ee3b2a2b25" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "7b8464503e8b439360c410e582933a07" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "dedaa39576105b330cf2a0fda68869c4" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "ce647ffd9b1c1aaf1df448757ebec36d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "2a2e45f70c6c867bc438f02d50565eae" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "bf4fee0e14fc8022ad17e318a56c3268" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f73c70df376095d98b998d696b898d8c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "64d6e2815a76ad7819288fa19a7d5849" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "778ff32636c875727ec0e543671d0130" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a0fcc2537fff49883c51623f0bebd631" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a228a7379d0a68437155c5838fc3862d" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "89ead8af3adb95b05f84816e7374a3d1" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "74069485734c98a75df1f06c8f41d8d6" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "24386a5bd87dc580ce053a4f9ac1782e" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "49ed079eabf5e18c05bcd5cbba1967b0" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "077dc053cb06989ec00e53a03bacd265" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "a355446b11e99ab33bd891e76dff69c8" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "f6e9e7f4baf9f2e9a58322c683d37b4a" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "6c0e420fce2856a4d567f4261f5f3b95" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4925e658be0c80183aa6f081a5388d82" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "9afb434027912879901e323f42d91e1d" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "e9d4de6bc9a467c42e0b33e151ffbf80" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b17f1d99c7592e0f96bf53c1a3466849" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "462f2cbd4997e84a8951838d5570584a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "60d0cb8dbc51efd991ca77025a1f14dc" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "d9956333562623a7a1c49f35e28d6a99" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "84fbeae6b74e18f5eb2c74c3034c4b37" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "fb0d8c2aa5961e5f3fb19346a7596be0" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "030351cc01ddaec832e7ba0f1b0f9b53" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "8aee2edb32afd5c3c4b96baf16148f0d" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ead4c9e3652ea4644f68d279406a307d" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "436b46144544e040f0191a294fd0496e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ddf53537b3dba09cb2f6e68a30e7d1c3" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "23ba4806fded2a0cede6bd1538c7029a" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "7d9a1f601b74c2dafa4c548cb676ef32" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3e457ce1c548540b9e136b40c6ff4dac" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "9846db667d1d05d885a6f7284aec49df" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ce323e157e9ecb976e155101bc77b1ca" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "44cd7535364a6efe637930c579c5257e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d8f465f100164b3e57084dbe2dd9599a" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "9a1c5696f20c9b116065f6fead19a199" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bcba3ab162b4a1cb91d602ebfa55afd2" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "a751fdcc5aa86764f7635aabd95b5970" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "3e50f8460f86f2c74b5c5fa7d4dbce76" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "077eb7d539dfa6bf03f56609dccb67bd" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a8ebbbf7c507507a213777cb43068957" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "075b43ffdc979c54b34ed491e21f3b02" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "8de224d10672503d0e5e5ceaeee364d0" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "084f9ab759d702553326bbafdde73619" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "dbfd4f81b5abea3506a70b795e6a6376" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "a96427da9740a7224bf56b3d2c64d0e3" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "05a461d0887b3b8a53a0e11186114cef" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "48803d43ee94fa5381f7f99b7b82cdf7" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "126887684323915aec30faf2e5e88ea3" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "3c82572d6654994f8581d9d1b784c8dc" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "517c0c2a4c1fe2d0532141b4feb8dac2" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "3768197cfe36f6b5cf9abdfac133dfaf" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2f4c110340330e2fae8e1955c4d3f925" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "75f733b647fe84fedc44e452033afa10" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "82f344aae9dafdc1cef65bdc3c727f86" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "1c3990306b56073e2ffc863a9f07fc7d" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "5e7752a9edef5a454b774c3391415480" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "0ac4aa90ce441ab42a937044a1ed90e9" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "c413c15c45d99d69367740df01737afa" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "ffafda46de652976de759ce1763068f3" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5af6b8a82b9b3004470ca266cb5a6eb0" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "537ab500bdbf568a477c4212294da05d" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "96c9cd72c404f1b13e1b42dce0cccdbe" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7bca7ca469211be555b40b3865eddf99" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "59c1fbdc6745c9e655da3cf90bd53613" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d423efac2ab83f177ec174208541c1ce" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "c9ceb31bcf09c5c68de4652d65933adc" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "1f4c5776e6188f1fb80f12df1cc77ad6" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "e1897860789b25f5dd2f009cea666e40" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "93972bb01f1f69b8918bfcce7f487705" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "247910a97dd243131c4d2ebcbcac8f7e" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "df440753ba57133bef70bdedc7c9471c" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "7986086d33be440870287c4446734ca8" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "dcf2f229eecbb01e27e4623a4d8189a8" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "938fab17cdb44f52d2bfc589094fcf92" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "40ec9d9257f64424118f83f0cd33ab8b" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b0bd4df66f6c8a061a35556276d225f3" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "7d187c047bb59d9c7f2f3882f350d5ea" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ecd36d04cd5728e1ab6fa1feef95d8af" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "987fb326fba8d4dc4fbc83428a771389" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "be0906d33f18b51aa9f06d1f03130c8a" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "d4003d7346c14cb70a507ab519f52a00" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "fc3d6d2977c5daf654663570919e02d8" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "a166747db5cb76f8a967943b5d264525" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "94e8a29e09bee84ddb07b93acef0a567" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "da34ce810947c9010d5322023927d1c1" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a76eda6f8cf03507c04ba6256afa0990" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "07df096a79a05d17ada59b50351992b2" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "bec2044aa86cd8bf534d81c8d011afa5" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "9f6921d075d20d17c8daf0d58271cdad" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "d0f5a0f3876c61bc88a6790aa7db50ac" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "103b2e2dc94e3ac853a74e8ffd3cead3" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "8db29b1995296ae05cac2df97c4e7e04" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "02c3b0f27286a560dd5064f35a77c3e4" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "77e3ebe2059d64aa2d0a964df69152cc" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "66de1d8ac518dd9343b94dd436f4195c" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0c8d37910c0128f6dd4a55074e12e62c" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "280b43d7471fbc1cfa24a5439949db0c" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "7dd02449444fd64cfeb58b830a03904b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "7de625d6ba1cc98dd20ad69fc01d0d69" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "289216625e21fd77788fc566546119a3" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "e304b464a62c409bf64b3250024f7801" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "a70e5abd695e421604cc6986aa8cd4ca" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "530962a175f242d348873eebfe4b4d69" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "636635c2176334e00f87917acced9076" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "3f467f35631a7714b17a032e222d6486" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ff75bad35ed5c5cafae94c8a085bdc4d" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c86eced652a56aa8e7f5150d2cfb6b9b" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "31100c9597f39be7af81bb60e95420b4" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3eaf01dee4f87fe1d415181b15736db1" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a9701dc9523189802f26efa677f75fc4" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c153116303c19ff0b4e6bbd3b6fa2141" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "7540793f82d7b54c20add732b967a250" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b3d3f0efd86e700e669dff5c8cd63ef2" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "99c156c6f71195b19cc3ab709cf71426" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "7e60eab35e7880d26749a34a1a126690" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "003958c672396c53a00eb4b1c571fcda" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "f2db15fbc7c1a0c6a01d431a696f878f" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "9decaa81d95ba2c1d3438225650122e5" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "8113090c8aac1eaac24656d7d64de3bc" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "b327db6f9115c0f1575d525f63de1532" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "54b53958690cefb2962dce3e5c1e1738" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "04343d8acbc3e063259352c169026029" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "f4feee3148c2793d508a0792c2f65bad" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "4c725296bb9882662c4b77e4e933558a" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "716a5a7d0d8a2094cd2774de67dc9163" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0755bafbdd4ca9f5d5887b5694064eea" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "53c4370ad96e98d5ee1f8f1939d7f29d" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ee55d1c5b8731437cabe9c2763a4ecb1" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "c23219b38a7101625a714b806925524a" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d994f920664693ab3823f327952bb2ee" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0a01c8ccd7f756984c45d5147f4e4191" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "cbcb7cbdbe4791cd46d6825ef4b2c717" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "65645eb7aa63c9a6608f1715ed5ae9ce" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "7df0b2d8edd3197ec3912ae880cd38bd" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "72c72e02add82728baf09ddf03670428" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "13b270890b9bdbe4533e25c1a1aad3d9" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "41542aee68e8b61347aa93ade1cb9999" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "8ca8aceac589ba220da8bd53128db8b5" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5c6b9a85364715488acc2c929d6dabe9" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "76f787c250967b6b3f2e39f8b05b30e2" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "343c25da4db810ddc96aae66d1c7d110" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3491df9f5bff5d0cf177b4cb0b2cc5c5" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "eb17262b19a1ca174a03c35c3bbbe9cd" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "003d5e088438982fb807dc3aa7e39de9" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "6fb735d1d78c21ed0d616273b4e7e88d" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5f20df30663ac427a19ec660804ca642" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "462dc4abbbd399945ced04350df11fa7" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "9e2b89d99f7422319989ad2c0a133b11" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "efc7e02e1a95b9e029e30f43e87c110a" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "234f5681c06495cdce599a9f7b3ff0dc" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "3e6b711563aae10578e3d505c6fc6550" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "10336be5126f92911f44de6edac93e9e" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "462757377e75f1395eda0dff062fb34f" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "a5b10d9c5b1583dde6dc16bb160fb6be" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "65fcdac436531e890fb7748a5fa200ac" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3741818b502ab86a5279efa95e610a08" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6d965779f81a48f1d26e755bb8a455f4" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "3527fa623f44061e21ff20ad9017d92c" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b3e7259a52128c4ce75a58b25421ed93" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "e7a6540d908411aa1a2747a9c14ad6bc" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "427aa887abb1cc6c2fdee1c7b0b2977e" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b7083ecd1c2359ad57033f7b40974482" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "24116495994366e6c176aacabb0e748b" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "5cc01f615320499ce0163dc55ec6cd9e" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "e6d955f61bbff2f43562a563c287cae0" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "0b1b7e976441f1fe777590ad0252c619" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "13343fa5ac0c13bb5f4311039baefedb" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5d70f9039c324df900d03e5fc31c9b54" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "0ee41dd608e710a32360d415e4a6d881" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "acbd68cda1fde18d973fc5ba2828a4c9" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "4ef7a99be810575d65cd9c5380ef887f" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ca73a5bc86d5fb3e7dd9a40bae30491f" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "33f646873cc46e377c40df24e123ddff" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "eb4b86d2d8caa70ad606b21321253291" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "bb75d2e96dabb729f3518dcdf88669e6" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6b9d92130a9fac4b7934defd50a4144d" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "382d487146b2663ef4145e9e179030fa" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "84438f101685d861c771087c6bb1c09d" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20496384 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20520960 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20545536 } ], "md5sum": "8d247c1bc85da578e68ac546fb3c4d54" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "4362f796e53b334545a7e06ca3250a02" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "fcfabd5d28bd768605c1330a56251e92" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ac5b7688e6cd9c5054fa014e0c281a36" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "e8cde46f1d77aab66a5bc16ce69e3114" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 } ], "md5sum": "5cb817aadad2451330e4093ab32f46ed" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4ff64b411c32056e2eb9ed5dd168982f" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d3c5fc533f5a4e755168000809ffb79e" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2910b068bca62e9aa39272892f96ad8b" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "11a2234d14c858691b362548162c84a1" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d261d86e2cb23396f69d982315775dd9" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "7e38927034dc40784bc78e20b505fe34" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "f9344253452792564f15d44f86df57ef" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "54727ba663c86fd37395c4ae1ecd4379" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "1bb6df6825ea6118ba92775d82664694" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "0c822d8ab4dba8e0c117575384357dd6" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "aee00852773cfcee6955cbcd24c1cd88" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "e4d4a8d19c035bc039ecb4b9e6bcc40e" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "42d71af976151fcf92d59d3814de1e8a" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cb55120f5f2d5dc97011011d7232c432" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "2d570888d556d4af45c44561fbc3917e" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "eb9f6de225dff4fbb6591358ec0b64a2" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "cf1c3a1e4608f771f911742f8507b2a9" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c17223b35f0df60d9d5aa98a470e62a6" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "926930f461600866ab1762894826d973" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "94bb5e1885c1125bcdc5c72b1a6f6664" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "11fe2c02fd45455a04d2622b34fe291e" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "424753a253ab6cca65b6e7fddec35e44" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "4d33654e9cc7a465e189dea84ea4f36c" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "8ebe5b42a62ed802c6f5ff7f00a9e9fa" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3d5dc05f0a1b23364227d6279e75993e" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "51be1bec4dce28d451076c78dc730b87" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b166b588c27330f46652cdd9e8495fe0" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "305e406eaa14b5b2b965c2862187d728" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "34d6aeb5e7b276eecaa8d84bbe2f26eb" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "7fcf2ffb561a8b4be1c9a020db69ff6b" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "ecac39e9ac515be80a369a06772cc7fc" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "1d65ebb9d55b3fe6ec287f3179ed0976" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d5a21180e9e0b8a1ad35784ee24df906" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "febab2e39ee50386d81f89ea6a977480" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "8fe0037ffa34bd9a48fd343cd6eda731" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "0dcdaa34c78b703a7a210c516bec8e19" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "bab71352af47ad4d85a9d24bdff606cc" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "9351fe442aa508034ad832b77c37062a" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "f3def3a199083b307de6522c71fd5946" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "92d66e1f10bdc6baa5a2233e3920824d" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "56ae5a849adb480d319facd10be46543" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3029ba74cc454ac27681af1f5353e5b9" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "f1a80fb2c1a52f96f0bb382879ada128" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d466ca20053ee4c8c55d283432c7ba4d" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "f80a309e2bd2bc70194fcbe75720f726" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "dbf3193649f342bdaa04918882b9fc33" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "dcbbedc1563fd83b95eac96c8ce23575" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "5faba5d8649ac761c506e85d66c0d32d" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ccbb435afca67d36d416aa44dcaad6ba" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3e086ad3f64f8ffc7ea3e7088142bba2" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4e030b385b3ec00dc3f2e89fc2030a18" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "dfc9ee50abc5ec166a346b9de5387848" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "6fb3837a3e6eb2c13f68504b8dfbe8ab" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "5e08c8c07e8135a2383fd10dab787fb9" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "bf0bdce343af92f76cbb840f140bfc5b" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "624d5d926be3f722dc818d14fdbf21b2" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ff5292d8e1ad8036a05d7b5aa6b97990" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "f7fe3bde33dfc0a96fcbcc04403d0cd8" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "ad934d9debea7111949fd205639bb7c8" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "affe4404e96aa5b315683a79c3d70688" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "f4903738a524f100ddb9e58c0700fd70" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "21b8fb6a2482ab1b0657c76e2c8efee6" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "00665bbe5181fd7181c96220db0e82a9" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "223db063686ad6fc5c8c2ebc5c98ccf5" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "b7da92d6bebb45bd640f554c06564e28" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "73ebf10924e9a5daf3f75175f33cf29a" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "8b1c9e391fd5791e282735018add4807" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "5dd3bbd21c34afe7930b8ef0042c6d8d" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "ba86cb1d33287706991a3c53b488bdc9" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "923c1f56b0da29c930ccbf27559517bd" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c78937109d5f92863c2ec736dd9017b5" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "64c7b2b0797639e9f07f3707411058c8" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3f3060d5cbe43bf35528f891e419c43d" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "74ec1319501dc3fda9ba06d359f271f1" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "dc352f9804b8524adc5ec2e3cc9593c2" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "61b432241aa07386e85d55161cf82ba8" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "00cf865c352a00a3bb6ca1851bf18fdd" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "060541c37927af942bd9d0caa9e86502" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2f0e53debcd1ebc0c53926c0d7085ba3" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "9b9aaa3d8bec1e899f8b40361b310dc1" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "261f029f19f7c955997bd668bb2a418e" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "63a81ec2b97744a4077753db0d9e8bf0" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2ba31c2983ec90b3f61dd441808c7469" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "6b166ba9cc9a237fba657b2c7b6437b2" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "d30070bc8984be745fe1e7a4ca9f1924" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "01a6e6c8310a39142a847d2522545059" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "77df413790f4ccc409a182f92a7f02cd" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "e6c532e44a70adfce3a6e73c67338f9a" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "9b1bdf44cf7a1c38feedc021fe281c3f" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "772c2cf292c3fc301d5b3fcbf835bd5a" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "1e945a6a87b6644e0623f76d92b0a504" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3f62087f27180b4dc9538c8a1fb774b2" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "9cea60985cfbe178fff4954a08e20298" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "18108018d7bd24a53e808b2fec550960" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "aac70914ea897137d2fa726c124a3439" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "8c23b66c668a1295b41e64efb10a3048" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "52e8bd31df3923324900d5af119fe115" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "1707b9086d7ac4f0f2c98ff1317699c8" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "857553700c521e790a56bd880fed6139" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "919aceb6395a0be1e4fee38d6e008a48" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "4ef93060ffe17ff8e2092a8039e65dcc" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "a4b08e4dee006847a40aba4a1ba809f2" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b83758de98d86b62d116fa698b3b66df" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a098018238ac76c4124ca166b8213b77" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "53bf606ee1dc9a63c442d1d8d9f3d0b9" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ef14924209e2bd46800d39ec6abd0c55" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "a8b47c3703863bfdefec1f4594499404" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "be7a4385f25d162fd997c403cdc40fd3" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "53441bde0ecd726d8018cc43648b9396" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "02d37d73fa4aacf6de922a7123a70bf8" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "b1556982696275ed16c6748e33b82629" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "4e616eca6906f9e8d8e5681debb27488" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "7d257da63539ad78774ae28868b93ed8" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "c1adecfa15110d0ea722c60fd40cb0fc" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a4ca26c75bf558f9b92a89f690fbb8d7" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 } ], "md5sum": "cc4407ab47a0f232c113771acaf2b5af" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2cd10f51851cdfe205a376129f97485b" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "b7631c9085ccb6cfc706ff8f842074de" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "01540b00acb968bacc1dfb9f3c28be8d" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "e691603ccb1e10c1e683bcf4ca6ccb97" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "32d5408217c9d066f91703305721c84d" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "3eb67c842c01f06c42356464dd7e0de2" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "4b55aee1d343ced8c668cf0a79f4471a" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "acee11ea677abea915cb6e25c6665065" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "928d1469417ace28160455d78ed526e9" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d04ad6651e3358dc8666894514b6e52d" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "1cb4297ed54d31192356f9a01de2e0b0" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3c5c14731a98591250f66d5743412eda" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "6c27509992e18abb940bb7b2f1a0a604" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1fc92f9a49d4ec2f9cb53984f8890ce0" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "709963456273229217af5864559a0798" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d6a03a54f675cb50c6273c4d44b1650a" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "3a4f63bf5f31ca5377cf0eb683329490" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "009375e53dbb53f501249e800e279bfa" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "14cc2dff1e5c76581d685f32393c4ca1" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5a67f5d4b2d13290fbd3394b57ead408" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "4af7f4f1baba5cacfabdb0462d1d05c2" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "5620db6d50e58c24609831db3a1716c8" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "c20c3063da27bdcb66659c70192d443d" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "c20a7783e3ad7b29b9d267ae35de6f64" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "14784b08e0406a4dd90d97ec78254754" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "ac76ac8e4053c736abe8273b6233e0c9" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0877d1960d7296eb2b0ebf82fc961b25" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "a3aa9141769735c76dd804aaf1f33992" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ce7ac8ed5ecc433ad83a969eeb6e84d6" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "f80c1acb626ae5cfd89ef0c84d85564c" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "3234da52392a058c344b78345f441928" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "57864eeb9a894b51395a9ddd4b827fe2" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "45d2900cbc1195c03003c995b6716eb5" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "1115796e3c678812441a081934e7cd04" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "ac3d8a208f2ef33ea229a75a2dbdd572" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "034536bd290d9a7e7a63e518c2923e81" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "d6b79e22e720148e868e54cf1344cd94" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b34689a26c1bd7a5e95631e5c774c617" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "0b4f381498d2775a811a7966c054d97a" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "06af738d6a1cb294e39c1ee6df34dcfb" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "54a259321411ec4654cccba2f2a51a7b" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "91887312ea2fab54b7bf62ad7c0856fa" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "e06a273ddd18f36b83e50c3b8f728c0a" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "da523c5fc126d87e78646dfd26418970" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "0377bd8214eec5d78898d8aa7b96431b" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "daac32835a25827b1dc2b94cf31d12ef" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e856e519c845048f95c3fa06a4a6c334" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "c974dcfe449a19b5403a14b3dfe707bf" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "21bc02349316a4717105af78277b7b93" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "db7a3fd270f17e6ebd17c0127b3812b0" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4ea283a74a47c6076517761ad7f5c9f5" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "20fadb1014ffd9187a8a7a4eeb2d66d5" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "3f310e7735975b5ded000c5bb97856e2" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "834bab3d64f4ea30737f20f2e626d8eb" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "fbc244d8c810621a7d41bb77b01b66aa" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "13221c1e4bf8b573a7ccc929cbed4a17" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d671cd9a543d742d1bec3a4975e22819" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "f82caf294f8a9e652467ee256fd506ae" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "6a7b6dd565f35d8a74d66dba6e13b356" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bf420778ac8b8b602e729681e1589e5f" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "82197ad27cb232abb2ea290c3dac5e73" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e7d0f5c7018427e9bb26c292996e8f48" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "8166a69bb55da25318b0947949e009e0" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "e473886f3b211a5110aee34ddc115afa" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d5c50fd3068c8784964ec6afa260c1f0" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d2b95e7f5c029f7841b68bc0d4776cbf" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "58a6629ae553760018256c7c111091eb" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "f174951631e7236c99f8c118ea0d5d42" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "868c3e03508ced453979f1875b812960" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "c7bc2902a50bd0a9f02d1953ece718da" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "f25e35a1622398b768472c6e3f515747" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "5a74bf709a7ed249a27666cc61bfd49a" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ead732d4ca371dd9466e7058b6554b76" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b749b33fb0d3138b20f6730412e6dfd4" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "5a48e68809bb5be0657d9aa95e9b1d46" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "13d4a0c64c0c51840ad581dd6ac10233" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "44cfbe6662acbc3e8e68f53278846356" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "bc404f035b7da0a466b51e085fa8893d" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "aee52697eee2b5c0c5e1ab11bc0107e0" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "073c28c336043ef3e3d0b1f77218a41c" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7130245ce706455f2adbe34eaacb79b6" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20496384 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20520960 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20545536 } ], "md5sum": "42d3f1e2623b49d6cd056644d4bf0b3e" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "87b86ffea017eb2718d8eeeef54c16cb" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3d0eca6f3cd6b09faa306173df6ddd58" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "59654ccb80a6ab7f2e414ce28cf544c1" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "e6fc46cfbb0b9aacef50f9f1a99e5fc7" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d39edd05ff3e6ae41232ea11bb4d709e" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "88fbd8ac2980711cafa1bfcec606715c" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "0daa4aa5cef9b06a1551658733f8664c" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 29933568, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 } ], "md5sum": "0bb2ec95623ca186d0df215f05825b05" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "83fefa2e6815fae2aea1d0d3ee0ad726" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "edbaf593cdcf39bb7119ba40a7300936" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b471ece4872f0be5491b5fd0494c2e87" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "23666157c8fa261913735ed055a83232" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "38c50117df3ecda4a8a9cc523a7ea35c" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "11a72bc703a1a6f620365bb664852d51" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "d0eb98c1dd7913821bf74f089f6545cc" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "d9e371b8ce3157a596013aba1f28f1f1" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ecd029edf4e97094f12a055250bc6882" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "ff1094f59c6ce216d90d379811b1a600" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "e0f882b74dabee2a89ced5434e0648df" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "62fe396524502781f957439c57894e2a" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "1dd38e61ef5fdfdc7c4d1654b34c6d50" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ebf41598adb99186c1947f4b0b728ded" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "2569fe99c04fe9130e18bbe23aef79a7" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "e791905bb36bb838f046d89327ac19a4" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "1b377491ce3428ad3fd7a27af3553e2f" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c05d0fa7fe799aac07adf6bec4b0b979" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "fbd715be52dea4e23881af27c8a30c62" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "5fc452efb2c2d619ca8d404eb48c8e53" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "85b59386810c19281bb7679596ef8be1" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "1624e69c0b6e54237cd19bc3a0ec20aa" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "7761d5cd481b452c2dbaef94b65c8fb4" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "bf3390cf9a0d76b664d314fc241caeb6" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "ba4e8c66ee8db939f49ba14375cb8744" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "d84db3f181ba7717bd149a0f87cf21dd" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "45d5b9da4aabccaa85d1bb32d409bf67" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "022f697ab097e1839c6da0169127892b" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6964326b900fe6fe85314672f92deb25" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "a238ed77292dd1cd521ec99da49867aa" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "ea27f97bb2a79abb9483bb7c2cf25499" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "0eaa1526d570b7f18b48014387d27de8" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "52ac23829665ce10d2b3368f9f991bc1" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "85296557367456ea9f64ef725f76bc1a" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "44a4854d8f916288bbd07483d024c099" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "4392dd17eacb29323bd6980af4005431" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "e0b955bbb749a55643b8441a07f9363e" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "92c7ec67b683fdb63c2e3739c1a465c1" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "326694bfc38f6824570dec5d78e80eeb" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "70ee3591942350e7cba53c142e8d90e2" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b08317b70d45296cecbb75e3581012ee" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "9a8e0def9e827140abe06a8e593ef9fb" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "abe3279573728bd19358650ea8b3c4be" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2ffa52f3e60ed5db7e2bdc1e74137405" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "d24ff4634d2e84131d5da9fa2b256db8" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4f4b29ebc12616ae8930f9a70a4183ce" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "a729bdd624f9d85587b14485606e10d4" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "1c1e5ed28072a9ddb2b9043623040353" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "176ed30ab3738080d3c7a51b5d75e312" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "6b6822d1fb917e64ed9b708f238b48da" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d0ef02cd55a8ae291341b3228f213164" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "6507aedd90ea0c2633fafd7f2841099d" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "805907ef62e86e4b585833a5d8d3ff08" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "9a3c86b34b9a7b4b53e16311edf127c2" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "9a32e06421b9ff94af6f1735701e13f0" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "8a2116f4e57da3894a89fdf09b07f740" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5118b467b8701a3f21a8bea03e1c1237" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "50bce2b12f3a0bbb6cab8c35b1ff18c7" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "c0c94f726018aa6c9fd15e051ef223d8" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9d5bc1fa964eff70bf50b3f3a1b08c20" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "4585233d1863e094deb25a6e6df57e3f" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "60eb86c64c857d331e431d824a148fea" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "aa5ad11a883d1f45fcd791765255e9f6" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f4e6397024fa1948dd90d1de6835c8b3" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "ef1981209d55716aaefc63e3f79f4e01" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "a33c8b3843dd19b5c78e946d06e4a188" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "6623edc59aa0901cc372be193c57230e" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "f500852904ffc2d81812a29cd057f17b" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "36b2747ce88d110cc345e9c0b158dd30" }, { "dataPath": "params_shard_482.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ad1bf4510f494cadc215750fd420779d" }, { "dataPath": "params_shard_483.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "e1abbc29b6827753abd515fafe18a01a" }, { "dataPath": "params_shard_484.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "f6ece4cda44edad3bc89f7a362097a61" }, { "dataPath": "params_shard_485.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d1e765b5f88773df1594d9b671317898" }, { "dataPath": "params_shard_486.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "9d1010ad0e052cb41121e40dec7d0f74" }, { "dataPath": "params_shard_487.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "02e560c4579bab87ea29637103a95552" }, { "dataPath": "params_shard_488.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "09e474074f6070f82c3efeef56c75122" }, { "dataPath": "params_shard_489.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "5200e8511e7a8ad2db6b9d311748ff68" }, { "dataPath": "params_shard_490.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d1974895c176b815a737c5718699ad6c" }, { "dataPath": "params_shard_491.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ad8a68b96a7ba13dc0a4a0f0d64d1681" }, { "dataPath": "params_shard_492.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "3bba865babd9508afa923633783ee075" }, { "dataPath": "params_shard_493.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "5bdcfb716391358e0c9ee75790f5a08b" }, { "dataPath": "params_shard_494.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "6ec1f24a09c18e2c2ac8d2795bd7bfbe" }, { "dataPath": "params_shard_495.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "10e21032fae28539f3ae37818094de48" }, { "dataPath": "params_shard_496.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "6a48e9ec8d3c1b52e1d4afd5a7afbd93" }, { "dataPath": "params_shard_497.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "d8598ec5acfe32a9ccfb5ae4cf9f6894" }, { "dataPath": "params_shard_498.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "de36d7225306abfb8f151780117539f0" }, { "dataPath": "params_shard_499.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "1ee912955612ece112fc25933dd26d92" }, { "dataPath": "params_shard_500.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "4ce05e2a91a601b821afbca1bb7e1ce2" }, { "dataPath": "params_shard_501.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "1fd3418b7c2b65ce3a4a142a07d71856" }, { "dataPath": "params_shard_502.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "83c9fd98d473f658686709017c383656" }, { "dataPath": "params_shard_503.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "df018e16f260241d8d5f0c679cd03aac" }, { "dataPath": "params_shard_504.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ffaabc356443d8afbc2b98661c9a25b0" }, { "dataPath": "params_shard_505.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "9a640cd1a04bca96e7c6e156e23cb995" }, { "dataPath": "params_shard_506.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "12d1e77ea16b46122bfa769e93ddab0f" }, { "dataPath": "params_shard_507.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "7361c6625843c946b23c21306c36d2ff" }, { "dataPath": "params_shard_508.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "4ae759db4efefe5841faa67ae069888d" }, { "dataPath": "params_shard_509.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0a786b8a645c78d0a09273f180488183" }, { "dataPath": "params_shard_510.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b2a93cccd7dd235c4ef7df37e97c6401" }, { "dataPath": "params_shard_511.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "8d5ec49a74a4976507eb6409ec168a00" }, { "dataPath": "params_shard_512.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "98f3112b6a8986b45149a6d74af66c66" }, { "dataPath": "params_shard_513.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "fa15625037dfd64108aaefba29808a2e" }, { "dataPath": "params_shard_514.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "1d330f85f67220a9ebaed82009129452" }, { "dataPath": "params_shard_515.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "8d4624a56456b0aeca370f6c8aed3f5f" }, { "dataPath": "params_shard_516.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "1405d2f453a3a989db4b600181653707" }, { "dataPath": "params_shard_517.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "9b1eda8bb9ec93378ac355828a78a25d" }, { "dataPath": "params_shard_518.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "afdb2c2f18e38230e3f9fd643f6603f6" }, { "dataPath": "params_shard_519.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "3d52f49c5ab426654ba5fb3907078240" }, { "dataPath": "params_shard_520.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5851a416bfe2cbe38de2baeef8b368b1" }, { "dataPath": "params_shard_521.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "240a0686f47137d82bf67b1319790f13" }, { "dataPath": "params_shard_522.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4fc901841e23bb156ea1dd5b36c194f2" }, { "dataPath": "params_shard_523.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "538dc58a2f7f35212e784333e19dbba9" }, { "dataPath": "params_shard_524.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "28835833cd099c709de8016b833b7ba9" }, { "dataPath": "params_shard_525.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "9837e28ebbf05f65af97952d9e49388e" }, { "dataPath": "params_shard_526.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "7908e2ca031101a73a97e81981b73ed3" }, { "dataPath": "params_shard_527.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "b4501b6e9289dbfe916d8fc1b142f916" }, { "dataPath": "params_shard_528.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "3fd0269f54cb49cf290c9cb6b8f7c9ee" }, { "dataPath": "params_shard_529.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "489de2ef9681fcb59f342ac3aa85160e" }, { "dataPath": "params_shard_530.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "d83ad5265e528888b6ba9f1d6abfbbdb" }, { "dataPath": "params_shard_531.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9db4c75d4f544b6547d39176655f94fa" }, { "dataPath": "params_shard_532.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.80.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "4ac5cfb983611c54ed4e744f9efa05dc" }, { "dataPath": "params_shard_533.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.80.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b58b8a45c65074368f6363a354592483" }, { "dataPath": "params_shard_534.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.80.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "36ae69f5816a3d4bc49a9b8275cb7a8c" }, { "dataPath": "params_shard_535.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.80.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "e10a94891691bb0cbbf183c500b24dfd" }, { "dataPath": "params_shard_536.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.80.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "63f78fb654fdcff3f36dff491d6e5e7d" }, { "dataPath": "params_shard_537.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.80.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "325576f21f27ffe69e9476f6ce65c74d" }, { "dataPath": "params_shard_538.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.81.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "3bc99aba4975c5f5dbd6bcd4f252d336" }, { "dataPath": "params_shard_539.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.81.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "893b29e72071e3f434a1ca1730b37b82" }, { "dataPath": "params_shard_540.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.81.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "bff490a1da7c63f79edcd831db499a84" }, { "dataPath": "params_shard_541.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.81.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "7f3bb50d005bb50199155a38b3acd6fc" }, { "dataPath": "params_shard_542.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.81.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "683f1c511530b3eadbfede55e4eb87b8" }, { "dataPath": "params_shard_543.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.80.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.80.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.80.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.80.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.81.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.81.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "33f224a9aad1966b233322834d806e6c" }, { "dataPath": "params_shard_544.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.81.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2ee48bfb673475773690e573613098f6" }, { "dataPath": "params_shard_545.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.82.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b970cf7c52b388eadf3575c6d567bb84" }, { "dataPath": "params_shard_546.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.82.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "5427a9df3d8ccba3899d360bc6a9e268" }, { "dataPath": "params_shard_547.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.82.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "297224685d90595ffbeba122c86c1465" }, { "dataPath": "params_shard_548.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.82.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2efa1123c74b7be329a37241cc89d480" }, { "dataPath": "params_shard_549.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.82.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "8bf8215d53cd7ab87298782aff61dd94" }, { "dataPath": "params_shard_550.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.82.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "eca3a8269f33bb1bbeb07452761169a4" }, { "dataPath": "params_shard_551.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.81.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.81.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.82.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.82.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.82.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "0925a89042a0e16f1432a9ccea949f71" }, { "dataPath": "params_shard_552.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.83.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "cfa53d5294747423ba5c6b1029eb8051" }, { "dataPath": "params_shard_553.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.83.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f5b865a958a515f165b73562fbda01c0" }, { "dataPath": "params_shard_554.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.83.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "9a548994b7ebec6120672f14389ccf3b" }, { "dataPath": "params_shard_555.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.83.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e7c9874981a6d42bb28b28081b1376bf" }, { "dataPath": "params_shard_556.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.83.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "41aa4dabc523c1349e340c800173e46e" }, { "dataPath": "params_shard_557.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.83.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "7fdc2cb3d8083d6a5c2c97eab4aadcc2" }, { "dataPath": "params_shard_558.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.84.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "6486d4a577da6d7cec2bbb9826f94d8c" }, { "dataPath": "params_shard_559.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.84.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "aa30252b6f6d6c3172dcff17a88e8dd7" }, { "dataPath": "params_shard_560.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.84.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "7fa1c111c1f56699fae7d5254f61b35a" }, { "dataPath": "params_shard_561.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.84.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "a8a3e1b3f312146cb54b194a65992de3" }, { "dataPath": "params_shard_562.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.84.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "1d8b59fb4131a18b25abc8b78a52b53b" }, { "dataPath": "params_shard_563.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.82.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.83.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.83.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.83.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.83.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.84.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.84.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "6a11471788fad3f637f8968ba1934341" }, { "dataPath": "params_shard_564.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.84.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a70270af8f6cddf28bac66cf1a47f8e2" }, { "dataPath": "params_shard_565.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.85.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "901c43673d2e23b23e815a74338a6596" }, { "dataPath": "params_shard_566.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.85.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "06d76cf26ac9ca7dca03eb0d6e907bbb" }, { "dataPath": "params_shard_567.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.85.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "6d4023ad3c09fd9c8615962eec3b6f8a" }, { "dataPath": "params_shard_568.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.85.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "279c939fdc096d330702afcb9e01568e" }, { "dataPath": "params_shard_569.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.84.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.84.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.85.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "8470f5369b9d34b3c4596325220041f4" }, { "dataPath": "params_shard_570.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.85.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "e75e122c727b850f86599dd48911ba6a" }, { "dataPath": "params_shard_571.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.86.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "c9e828cbb57acb1aab6b2e76a43bfb6a" }, { "dataPath": "params_shard_572.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.86.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "b2d15a7b045c7ec399c78ea46541e102" }, { "dataPath": "params_shard_573.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.86.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "aa55a8b307162c984248223f00d7c890" }, { "dataPath": "params_shard_574.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.86.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6280e34640064062d1ec727752c9d27c" }, { "dataPath": "params_shard_575.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.86.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "baf88d69a32d352a58d7deca3f5f890d" }, { "dataPath": "params_shard_576.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.85.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.85.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.85.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.85.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.86.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.86.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "6bec1d0ce7676c8bda72a23e43f3382b" }, { "dataPath": "params_shard_577.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.86.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "55146ec682e7bc8e6e82ee1666dfd416" }, { "dataPath": "params_shard_578.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.87.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "d4516f0e6fd9ec54f89f02f706c29c53" }, { "dataPath": "params_shard_579.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.87.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "89221d8bac8942984f6427ae4031140e" }, { "dataPath": "params_shard_580.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.86.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.86.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.87.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "4b700535fa4bde8d18c307e5910a0cdb" }, { "dataPath": "params_shard_581.bin", "format": "raw-shard", "nbytes": 9437184, "records": [ { "name": "model.layers.87.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 } ], "md5sum": "d20394b65ffb1c6e6ec07a5a67e8268e" } ] }