{ "metadata": { "ParamSize": 405, "ParamBytes": 6889973760.0, "BitsPerParam": 3.749252192749517 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 335544320, "records": [ { "name": "lm_head.q_weight", "shape": [ 640, 131072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 335544320, "byteOffset": 0 } ], "md5sum": "798a23e3a37b8412eee85de28aff4ea2" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "lm_head.q_scale", "shape": [ 160, 131072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0ad993a9fb103a16becf55e34104e5ec" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "ac6a3a15b8aa79468d289db13e7fc3cf" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "99d13d70b73acdc477533105c4ecc3b3" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "280264730026d09a93a42421c1160e65" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7145985332c545fa211ced561add1aa8" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 27566080, "records": [ { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 10240 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4597760 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13772800 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13783040 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 13793280 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 18380800 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 27555840 } ], "md5sum": "4982b9d5cc1d7a6e0fdfdea65e2841b0" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "daef3c83c73af7b59e2ec5a65fbdd9b7" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "f065ebd7a2d94dc3e09511539a4834b8" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c8ce057f088a18b61caf21c5119c6696" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "2b27997e296ed14dddde278f408c2fd5" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "decc62d084582f7d8293a71441c912d6" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "92177e6c5635a67af728879ac9ad2ee5" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "2b3c7597fe5f10207e58ff071c697f27" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "81f870463ce066dcac5c449f4989881e" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "0c0399f0f53d93dfe71e206adf490650" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c3de374cc2e49461dfe3858a35537d53" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "c4524d61b7c5b9fb1712f1d812796ce8" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "802471790f8990d2488fd1e67fdff389" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c4aa60d59a3e17a5556b04674eedf4d2" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "e7e63472339440642047e1a80bfd9395" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "5b1c8339014a23779ecb0f9b160dc867" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "19a9a8f95c111db15d4918693e705429" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8e566e8b41a8a1b07a064631580239ed" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "191991e132a95f2067309034818201ba" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 335544320, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 131072, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 335544320, "byteOffset": 0 } ], "md5sum": "40ad319ba2ec8d5bc15d68585d6456fb" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 131072, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1c0a6b70578fa907bf211788c218c48b" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "b54c76d42ba0da92876ac5e5968b0211" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "700cae1908e26c3780bf7bf8a412d407" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25589760, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11806720 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11816960 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16404480 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25579520 } ], "md5sum": "bae2de1cd36ba18ae0c99b968d038613" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "c5408db5d6679e745877e43e3cc90e99" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "77646894e7249542e5aeb89d90577656" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "5dc7a4ee4a3e8224dc89024862e840de" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "ef8c74249bb95b2c8b0fe9f46c2065b2" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "1f9dc875c2564a5970a250cd7e513288" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "b183aeeec061b18cef944f21144ed0cf" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "f94e2e8f6fb90878d2dc381d037c398b" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "5b379b05d75f04471c3cccfd6a9b1eda" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "5b656630e863dfb391dbdcef44a29992" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "eef8498b7add02c5b71b642f7da18af0" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "16b15eb56fffc07a504d687451e3e46b" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "c510e5afdbc5a1041520155a1e683a4a" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "199c8eaa81ed460261e1a4133c4fb516" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "862b50730cd208684c0896808d592d69" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "9660c78f906ba41a179fdbc807639ae2" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "f24355739972b6fce630312a14abf55c" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f25767395547a11867931a6a018461fa" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "8ddcaef5f71ff6acb59f89bcaf002e38" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "07b9ca292f93d040cfa66bf2aa34acc5" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "6b772cdc3a512ba7f6120b6796e9d503" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "98c0501c7012e81dc6a7d865287b3ad0" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "42a82354b57556438b2cb0f45fda36f4" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "18d939fa7b4be3397c464d0b9803feb5" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "79877a4324cd5caac86ba0577aec658c" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "43dd03ec45a9edf2de1e8fd32e150cfc" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "0386b7fe98d1eeaa61c2439bd9afb018" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "6239cabd6c9b59496647dfa959910004" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "3229c55531bef053873cb6626bea5232" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "2c8cffde0c58f8c05e187f33c98f20b5" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7268aab5f48675c3976a68f83da1b163" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "3f65e0ef85ab8c7ad69f56ce36ec82c2" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "0095e07f5ad2fbcc81c6755f33e5329b" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "49143156e16f5c625fc34570ac7e0ba2" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "7c381c9733eca3770f26b03b66be1e7e" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "fe36efbc5dea33f6ee7954cb8cd68ad5" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "d0f75625cbb2ce48775fb8fe4b47bb2c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "bc5cdb6f367c3c41dec44fc15e5b51f1" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "12b8d69ad8e564acb7f343629d899787" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "54ddc9c414ba67cf69ad496ecd49ebc9" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "bf212138fca6aac40e13b4eeecaaf566" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "34550c55a3ebcc0dfd7ea1b022f88ed8" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "ec955a0eae323acbef8366ed1831a977" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "e9ab16a2d58eeeb18f43ecd23f3a3cc3" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "325f4bbf77d8b4fe4ec0bc99d646576a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "3c7919d3a363bee18ea8f4fc697ce800" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "f470675f50189477efa69cfc3e8f58cd" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "01d292189b335a2b07377a2c677fd8d3" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1a648e0e27ef9b37c4129f44151be8f7" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "bee23057c9a3bc4890b3e83eb353aa33" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "d6d80683625a45c5c6fcc523b1459eeb" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c15e8b6dec4762c47835ff1770431859" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "7d1d11bc32d740a41e5ae86c74ae442b" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "1f22ec55fcef80eda60956420a35b2c1" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "e8b498ec9f3471c524c61162521762b3" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "e80d13d75bf8e0049759bc7e4734df08" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "15d5845c03f9ed8abb8f505b8905be59" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "4f308a8a54f80eb9ef6d259368812efa" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "1fbdf75f975c3183a1e22ca2d6fa08b6" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "ccd08bf7146040746876289fc27e7e16" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d0a057e1e61e5b15440658da72e539f1" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "e705a50f12ff6cf4432df4f94d3fa367" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "b999c2136cdb89a9cf52b9e0def723af" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "311e2af89b35d5e7951c3a7e73eaaf51" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "f7448246ae5ec2704892ef07be390379" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "013a9f6d89bd1cee706df154a867471b" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "8f7ae98ab4dced4cfd6d3ad6e7ac5e8e" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "b603dc89a47d2cdd61d4a43792f86a22" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "b33e4a72819cd23415a3d96ff6e143d5" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "63c87ad70c8042d4dd63d95270760c44" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "66597925d984a5a6f2d799baba7e5197" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "077bfe9ac2abada63bd0689e8bf496a7" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "6313d4addc9633a7c99f00b5c8aaf450" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "c9f83884241d87847fa25301e37ceccb" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7fdab5239d0cc572cd4ffb82cb7ab252" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "f37377f0f29666b153b919f08cfda847" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "63411085cd273ca811e628ccf8969912" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7b9a47b8110fbdb7658b31c8e3a57b0b" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "5d7aee6c1946bff7173d19bcb476cb03" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "5c4e7bda54236a13706537a95b8868dc" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "bfe95cf46c835966fcf88f28219de4fd" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8bbc3e0bcb9bfc34065ad8d7f782a964" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "ad215fcbba7fdf894a46e5ac3a8c9e3c" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "50c32e926c85a0ce4729c8c26f262554" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "e3d10cce39438d876125e69c9e6cbf19" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "c19854a7ec2d9955abbe59ae829564e6" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "9a3ba5545117f4a2ca1a5ffddb032b39" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "878b30ef31c3741881dfc21c8323ffdc" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "eb7ed0ab8a7f8c7bbd86ec9172f80007" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "e477876f5b76ddaa2bef1521e90870de" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "0d5dabca9655f1cb1a891076b7aa5e38" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "97a4ac9556012f7f0509e94c05c84bfa" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "959a52a1020100885c986c9a5dcb6323" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "0e1df4f98a70b5f50285eca696d60f73" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "2568406c64fbb8b80f0da4455b814996" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f836fe6bf03b97145d5aa018029ecd29" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "b360580587a2e8614a174d2270c6dd87" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "6ee95f62208a50acfc8be58f94e1c7e9" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "46e8826a98029f033259bf55e5937b6f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "31ddd4f1a5b792066d43684b1ba702db" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "d8e190b8704ab4b5bce7607d840eb675" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "a3ed41e31add549798cb69febd21f382" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ef87be9cd3235e1b7b3f4c7e287c9435" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "0810beb497cc30bbc9245e7820133f1b" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "4ab7826e5187b1ddb4035db01b6d2019" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "5aab6d51ee8ae66a88a94af544f5fc92" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "aa86a7e28e19a8d34329ab61831bdedd" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "ed7941698abcc25d352dd8d3041522c1" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "1e80f7ea0b3d20d3254405cc5422070b" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "3b177fd8be1bdd6f64844619bec26048" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "7c7db746d5d9fe7934b9e8f84bd51867" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 1792, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "e078f5204304b362f4bb5813d7e7857c" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "00265dd678857006ccc1869407a2796c" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 640, 28672 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e5963af040f6a1ade190b2d6a862b696" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 448, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 160, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "be40921013dca4846acfeff66c3125d4" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 640, 6144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 11796480 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 160, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 27525120 } ], "md5sum": "a8a4421ca19780904e24e79f537ebcfe" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 11796480, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 512, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 128, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 } ], "md5sum": "ffa409bb82679c5785c655a0e1fe5cdf" } ] }