{ "metadata": { "ParamSize": 405, "ParamBytes": 6889973760.0, "BitsPerParam": 3.749252192749517 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 335544320, "records": [ { "name": "lm_head.q_weight", "shape": [ 640, 131072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 335544320, "byteOffset": 0 } ], "md5sum": "8f05fc2acbe89db2caf5cf4dba415d8f" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "lm_head.q_scale", "shape": [ 160, 131072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5da453c25b0002dfb13f0bdad6c00f82" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "d921c8c0c793152e58ac7289035a44a9" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "75e8ca1f9ea76d044aafd35f031afdf7" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "58659ecd976d7c8b2c6ef7f35f569f8d" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "cd301ff72eda8055ea1597fc8effeb5d" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 27566080, "records": [ { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 10240 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4597760 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13772800 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13783040 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 13793280 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 18380800 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 27555840 } ], "md5sum": "8370f97d39eace87a18b342a1c87126f" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "f67607f6020a904fc27131c377bf854b" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "0a1e9a4db22631ba3f760136d7ce9120" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6d2fef8e3bc887d02265deda0df6c578" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "b5ebae6d59e9b1f7a572edd9bbe1745c" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "2b384254269edaaf239510c0966e0dae" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "310efbb1de1b2f3cd25775fa23846532" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "c28b7f0bf14f070ba6865f06c61762ba" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "3c92449b192a54867cc7fda678dafc8c" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "468f58e12fdd32f32df1a1520a8b4586" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "805be9b58f7b0a0ff952e6e80876c462" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "1b28565ef3d3c1073094fa487d19437c" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "3daf41438feeb0a8c7377e8c1218dd01" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "9051981cb1624bbf57f3711a235c473d" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "59859965fe4ac7b225e8a55e64548cc9" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "ed5313ec610fd1bf85cafeeb5509e064" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "4ceae60e35a07f39c1a2378767ab9c37" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1741edc11a8f2d113a9288fc74e4e1e0" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "ce9998214ef114847662703adf8cc6b5" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 335544320, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 131072, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 335544320, "byteOffset": 0 } ], "md5sum": "3c15656f0ded974b83ebcad68c1f4619" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 131072, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6c32d4392a8865376dd946da79a7224e" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "224915919956775480d2c46a4f9147fa" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "3dcaf3983c5d32aa033575584981735d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25589760, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11806720 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11816960 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16404480 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25579520 } ], "md5sum": "7c846df238b24ef762d94c147070e0c4" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "af72a8bef719fcdde8977f90e1a85c46" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "0f5eb183b36ac816fc8fe40aecd765d4" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ad2309c2fe66f09374af98762413cb9c" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "4562420b92c30ed2c430c02af5381663" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "aa93f4e3fc822ee28ff8533cb1c703aa" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6dc4c4c3afda9d4631ab3ad6e7b8ef5d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "9956ec90c4dacf62650c07ac93574cd1" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "2e9815f1a4a9f4d0c76db4fc9963d059" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "b7e04d71584948a76086a272bcf439aa" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "47e8c513ccf5d6ea290357cf6023f86d" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "3ed34e75d066ddad3b969ff7c678f071" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "b7ead68be377e2eb0f43173c370bb962" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "63db111aa404753e439911533293bd6f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "7751740c7ff84d747e155697709cf1e2" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "1490f39d0be841026c562fe66820f791" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "13ca2bd1ae8ffe7cae2ada493fffde7a" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "07be7dcdf053a35775ded056bb4e21ea" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "88f06392a9d0556ef37bab4e237d316b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1dde5fc316e8d2aac0c6e7a8eb012348" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "f97b7fa257da9cb25b994cab8828465b" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "d4d44d0b4859566b33e7e68387794106" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "98f8ec6be416905eadfdf55f9cea123c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f99ff5ede3a27611e41960c80047e2b9" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "98d033ce70369bd4d26449b543f992e7" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "e361cafba99cefa64f17d55974ce9b3c" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "728346b8210f7e45b05695b1881dd89c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "4b68959d1e152515f6d32573ebcb4dbf" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "2eae04fae4ee60650be1ce08886ec02b" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "cda3fdc69876c13079a975fd83fab8e3" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "b2f020e21dd91b5d3ee3e7f6ddb113e0" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "5f0bd90e15e7480e2157d69ca3c83046" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "3c2d44db9cdc97582f80f4aa73ad5f32" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a38af0aeed4cc817f1ee823fb5430659" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "a938439b94de4a0f6fab831e4dc47533" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "152cd414daac80d9c0fb2159bbec0ad9" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "f91d0038374b6dff284fde01bb27aa1c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f190d2360ca52573632600c030ac58ca" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "22984fc89c8cfb934f7ddf475f76cc65" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "aab76e1045815019cffdd9158abc1eca" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "2ff0ddd27f26beb8575786df63edb769" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "4898d4a53374a51fce3bca716dbce669" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "6b4766ffee5ae0307bf1bc7a94479038" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "0b875e520f7a1d12313f35d60010b9e0" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "150f662cf03b418e7f5f4ecc2350dfb2" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "25c73e7cccb6b8bbe2ee146b80a5af84" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "273aee2b052d5f09167ee4f56814a43d" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "ef3531b15286cdbb1e1fec261b9b105f" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "aa58e7a2b7ad23c317f38d76f5736e96" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "4fbf1c3d515716302dcb1ce500e25f78" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "27e66f6cbf03137687db7b8fe197dd24" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7746874e5842febb18db06d7581e9ea5" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "8678da3606f42c24df04bd2fb5c2f66c" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "28ef04b017624d35e8db01f2fda6dcd8" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "133f57449ee40e20d870b52c49fddf7b" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "4ed686378b82a9a42d6b242b5b2fce0a" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "3c9bb36e383b6c2bed28dc0b70681f0f" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "c2180dc9781448e1a5051c4adde03558" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "583f14213c6d5cbe99ae277acd55dbaf" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "b7e1d3fa6d060cce3535eb868bba2bc1" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4591f7a3dcdea5d3acb5e7757e447279" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "0c0d80a8e3ccaf9b7a7841848b0801d7" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "9df0db8205d3bdba681a2c3a982c3834" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d812e71e3e28b710e2aaf67c6128a037" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "afafb687ead58d02eca1282b935e7f1c" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "d5f9ae1d417e7e5f288e9cf3223565dc" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "de1cb2bfd11c88f4e648bebc4b0c08d8" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1cb992da1f83f4e24c897b6a40dd9c9a" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "d63fc711698372ead5b7ff0f1a8991e2" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "c5d92fd35acdd0a6081978be77f61cbf" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1ee73e0d9a70124648588dfb20357823" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "55bb69abfa99115089dba90de7a38489" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "a873585a02e899238040edfd2a48d2aa" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "2a4c339d8dfda4ee30d52f9f269364a7" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "0b425d649deb9b64cac848b3388cb015" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "33ac4ead2af90d2618ff5d45ebb1a3ef" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "5f4fdbb10732778f0fa82d03cce8085c" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f22ba0d27eaf6bdafdcf9e6e9ab1b560" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "d7f4b7199d404f4349960380ae0f0613" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "7054c60f279c3308f85960abf580330b" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "fc0ba7312251dbdfffb527ee350fee7d" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f5a79d9811946555fe98767aca2bf92b" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "7f5456523cac86ad0473b4b29065b4f0" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4ace1c6cd77a395f79f138abfcd1ea55" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "7408eb9e882862eab6c93f6e1b3b5a69" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "ff3efd391fea26e1fc787d226f9a7611" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "de6ece9fdfdc1bdafd55853795ae294f" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "c8e3ea8c91885b1ac9a8a9c261d27cd9" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "bcbea69873f1a70db4403960d7056a51" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "1ff7795ccf2a11681e01113067ecf64f" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "be19b0aa372c3ce7805358b438a7cc38" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "4abe8f46cbb7eb193ee2c9b5e5ea0e57" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "2f2b2d106b3ec590efe91c1b0a0eaace" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "69a85d04a6d78bbe207e45a58158f4b9" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "ddbce554d0842de9a9eb1f6d609b5bef" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "97acd73ea289ca67e76506d574c54b3b" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "bb71377feaf69059168f1d035d45d6e8" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "9930f0f601ade069b5a05af03c152a54" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "d38d78b006c0adb5d3212f17819028ba" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "b64273147b5b1c5b8881b239697b94db" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "4ba23a051db87d0760080f0ee5b15c8b" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "fdcb4db06a124007804062883888f97b" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "18284766b098b96d08db844d3d8fcc0b" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "0d8ea7b83bb8a752dc0ecd5278e66db1" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "b5ae0cc4f242532b2b49f458086fcad8" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "cf0831196128103a8671fa043af59335" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4be086357d79dec53800ae39089f9dab" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "c8cf24902a08db080187cfd2afb48faa" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "32b1f5b14c9ae3cd21f51b7a295a7d12" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1661f0f8570d42c0dd95f1726a44a707" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "a1b6a86e4100ab7115ab6c1d286253b9" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "7a1062b90052a74fc7ca1ba4e892238a" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "4ef6c096f5c485ec6bd41252679e1906" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ebefb1c79cfece7b895164bcb4f72617" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "100e6e66d805928e26768c74ddc860d1" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 11796480 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 27525120 } ], "md5sum": "78e20398b51d8fce670c5f271a9a07c2" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 11796480, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 } ], "md5sum": "6f9c65dc7ac98e87a1636890a33b2f0d" } ] }