{ "metadata": { "ParamSize": 507, "ParamBytes": 5199330304.0, "BitsPerParam": 4.50075370326778 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 458752000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 256000, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 458752000, "byteOffset": 0 } ], "md5sum": "208b8da2a7b271bafdf05f6608c5b343" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 57344000, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 256000, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344000, "byteOffset": 0 } ], "md5sum": "4b1ecc43aa73a8194d6c5662faf07c08" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "7e65a7f12c9aebee845454df40f16f5c" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 28908544, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 7168 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 25697280 } ], "md5sum": "6232bc9ae9fcbb38f093e018eab52ebb" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "a8dd21d37e6756206e8857265c4c0541" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.0.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "20729fa45d8ad63cb6d76d0c9bf7ed2d" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "bdb67a4c657d9eac9bcfd6b46cc8e76d" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.1.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "02c3e39a5d5347fcdc5026ec091ab4a4" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "fa0287a382a02c739d00c16b12f85319" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "6f395879ca0a123f0021053a050567fd" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "1461ff02c0c7e5f0ae0e5db404fdbf1f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.2.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "da9cbff49e8d852562ef00995c856f78" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "382ef02c69121260bbdb24bce8c1d820" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.3.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "a7f3125ecc200f0ab43f7aa1551bf1ab" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "fa2f8666030683e8ea11e507e0b2ef14" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "c0ef81c2053d1b6452a123fe81907fe1" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c48ba06e85ed0211ddf8bcc3a4224de0" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.4.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "7153006acd003f8b98468b64dae3a95e" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "9d4707f7fc018a2c27de7aa11e11f5c5" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.5.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "f10f9b8927a460e3700e8c96bc9016fb" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "11517f3b4faa7a81318f9a5ea60f891e" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "9af4a276ca3a965b7d6f880753e58efe" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "234591b84e62dffe4a3d102764fd1cbc" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 31216640, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.6.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 } ], "md5sum": "12e8be3382c7d39b274fc56bd34b7885" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "5e01c40e3e835395aa5a25300ed8b0f5" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31202304, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6422528 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21102592 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22937600 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30277632 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31195136 } ], "md5sum": "943d64c7efc90a997f8eb9899c67030a" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "adce3e4992ee0ea1fe8b395f816879e4" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.10.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "20c2a3195ce9907c76041340f3bf426a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "8635fa6388b578cd626d7f26bbf98d4d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "dfa2cb2fb3a45e86223d877c5df99224" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c8a6a5b9b87e92e869bc0a108654dd48" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.11.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "ae16960b5a58f769bc9b4d406df89433" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "6bc9ccb8d3a4d1ee9fccc8a91245c897" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.12.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "a7ba30ca52b5b30e8865df212384951e" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "67a0597c374c49b2cd8a15a90054e947" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "6becc43c9c266e9076ec2cc1869bf107" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "1edd55a9a0e3c18817c66abba28fcdd8" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.13.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "bfe19f0b47674d940f7154a851565858" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "c137cd3e6e3812d404858777a238bb83" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.14.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "0e579c3f8c95d07708f584b3c1e2847b" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "abbb71f6a58fafc6f7fcf0ef7c358181" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "1b822e0cd53d8e26e54a15c7a76d1b11" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "4c2054370bf2a4b689f784185aeedb79" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.15.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "546fc2631ddda941ac302a8d35af04be" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "2b34524f37221e9c762572256b387923" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.16.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "a777b74828fb533e005355fdae2274f7" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "f23cda95d6378ea6244a10c3b754793b" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "c07ec5200e9cf2c0834abaf792171a0e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "e907015fe90307523789afa14c2509b3" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.17.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "823c9768612fc0f8344c86895720961a" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "0702a6c1dac786b738daa223d14613fb" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.18.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.18.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "6d40de89673913109df15d2aa96384f8" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "076059d53a53cf021f01719ae80e0bec" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "62f10e4942e98c77fc507947c5e40ed7" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 31216640, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.19.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.19.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 } ], "md5sum": "29bad1d2ca257d4f50d0aa4a761df717" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "d6f6c3bb146f469303793a30700dab79" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "b0e37ac5a11492c878cd56eeaa44b801" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "f212fa7bd4b0a80c87fd975acaf51c00" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 31230976, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 14680064 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 16515072 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 23855104 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 24772608 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 24779776 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 27991040 }, { "name": "model.layers.7.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 27998208 }, { "name": "model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 28005376 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 28012544 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 28019712 } ], "md5sum": "8dcf25f02e45c258fbd464e9bb45ec2a" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "7bccbd65d1a8bdc878292f2bc4dbfb69" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.8.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "09863c6a94fbd26e2aa814ea10525c7a" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "db0092283aefecaa0da02809b03d0be0" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.9.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "71a6b174a9d234930c59d7c31b7bbc39" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "c2fa7cf1267af44844661b6aa5fca55a" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "0359886989ae69899a8980e3d981733f" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32141312, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.20.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.20.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6444032 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 6451200 } ], "md5sum": "f0177aeda7ff779239a6b2342c487f80" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "b546dee7e99ffea6014dfa4d00ceb6d9" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.21.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.21.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "43c4b4b3a70166ff96ba22e571fe19a1" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "edf1c8eb022a01af3e12b7de0506e00c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "2cdc319bcf9056eceb7c0829e8f14ec7" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "4b4ebde6e020efa7f65a9ae29eaaf4b9" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.22.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.22.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "c54cc7f3ffc3df904bd493d70f0057eb" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "8aa1c3109b1103b1d40f35842fac01a9" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.23.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.23.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "56f20978dfe5a08a29cf6884b29f42b3" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "6f689a0b2f55f8a16d299134894faad1" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "de2ee10872b09973923e096e93290bf7" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "1d7ae0896be4401ff6334e2099fe8440" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.24.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.24.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "51d1c8858e4a5784698f941a44dd02d9" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "7cc3a1d8e5d5215479335021ff9ba611" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.25.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.25.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "bab82493643276dc66f12ed08db1524d" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "d3bc9fa0dbe837d248e2f5df5ff26258" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "61de34082f8b0c483cdb621e325900ff" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "d85619cd82592661687353fda029bdb7" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.26.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.26.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "102b7377cf2d4f21f5311704e2c05cba" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "808b82080ac487e604ebcef28cef92d6" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.27.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.27.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "5e377fae3dce0606b9b6fd02f6080350" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "60937200dec856a0c8373d869fb30c24" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "7f4e5304e5b7d9eb5aec5d3d9f4cf49a" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "71e00c2b39c03965aaf1d267d9618ab7" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.28.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.28.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "e1c3cbdc9c4812b8d72ae13326f2be39" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "c28735682ef417f33dcc9144febd9bc7" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.29.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.29.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "7b41d3d0b84962780ccfe2b32fe3c92d" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "457b2d360a3110817dea88a533d043ba" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "fbc517a4ae375571011f42fb4c68460d" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c78c2b51ea6a834873a562d69dcd942b" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.30.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.30.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "c6a9ae18ce27b6da1fe50db9d0e4c96e" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "6574e0a4b66880cd9f0ab6b53ee0c1da" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.31.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.31.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "0106fc5548befcf144444b76ed4629c4" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "d52f527ecd7ff57ea435da36a0137284" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "3a7866842aadc45f537ffd71727dd8ad" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 32119808, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 917504 }, { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 7340032 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22020096 }, { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 23855104 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 31195136 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 32112640 } ], "md5sum": "07917efad7aae8f247a072fa26d1d5c8" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "3609f910ba861fd36160e7ceae478f6c" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 32141312, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3211264 }, { "name": "model.layers.32.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3218432 }, { "name": "model.layers.32.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3225600 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3232768 }, { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 3239936 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 28930048 } ], "md5sum": "812251ab91064059c1fda630b7358ea1" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "f289817e554ad98ed26a1ce86fa1cce0" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.33.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.33.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "a129ed68a579368b75985d6850204b1f" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "ee1091a5658370bef5040493cec1412e" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.34.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.34.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "cea47b883964f9cb623834b33941afe5" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "6765946f1eec5c2d96fff6b7cff8469a" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "43369bbc74908fe30077f839205e0589" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "3209af471a3c4f75e5d3a97324d27537" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.35.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.35.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "59d9e527fcbf2b53ce75864c9d32e60d" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "045ed94b0275e2ebb2e0abc7eea252e2" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.36.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.36.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "83e4e39bc4d0f46782875f225b6cfbe2" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "302f359e58b215c039f411d4b38e821d" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "6ba34fbfc0294ead668b5cbe7f0bbdb5" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "f86610d02c8a87c4632018faa146aedc" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.37.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.37.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "174241c78c8320aba9c04fde25f10833" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "09086c90699eda379cac295837522b44" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.38.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.38.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "9cbfe7ed6cc423e3a96987b17a2a5e56" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "285612c1ce1e956c21ae1211162f8ca6" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "e99ca60217524174c904a767ddb89990" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "be3b080ecec9396a839dc2464e10b390" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.39.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.39.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "4eb3ffb78a5e5d0bfdc4e038f7853605" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "e8cc8360e3358e4341071ec5efd1ee7f" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.40.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.40.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "8a2f24e40658bab208b538439e72c6db" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "52e0073300fcef78538ff3c7675331e7" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "524ebb4156c6829cde2f3d4aabf470e0" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.41.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.41.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "645175301cec64b702b4bdb8cb48738e" } ] }