{ "metadata": { "ParamSize": 885, "ParamBytes": 68971290624.0, "BitsPerParam": 4.32957972211083 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "lm_head.q_weight", "shape": [ 32768, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "e9d8a0475aa77d31c8783b22993dfa22" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.87.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "7454db1c5781d9d08f198d17bb276715" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.87.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "b5031046e1584600399a3d9a18dfa37a" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.87.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "1e13fe8c3d79270fefba85d13cd54023" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.87.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "3023d492db0ad4be1968dec7101901b7" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32768, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "261b9846885f509b5fb69e7dea6d7502" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32768, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e2b113513ec607de6e969de2a5dcb029" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b9a689ccfd737f3f787399a49c409441" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "8a885b43b1bec2a1b597474cdb71edd5" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "14c0b86f6d700a0fb356a3eb7d4bace8" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "29cff07e946dd196a8cb6921e4534f50" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "b766b17ea60e28e57437cb633c23f4f5" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25288704, "records": [ { "name": "lm_head.q_scale", "shape": [ 32768, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 }, { "name": "model.layers.87.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 25165824 }, { "name": "model.layers.87.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 25190400 }, { "name": "model.norm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 25214976 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 25239552 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 25264128 } ], "md5sum": "cc68f5299e9ff9ceca17267e2f6d5c00" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c25dda0d0522b7bb0c45e3e322c61be7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "1c1e6b1bd7930554c50aefe706672118" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "9323cd306bc83448086100cbf55b0a46" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "d7aee1ef6bd3b9e96bc5942301d22655" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8597a4326ed4776408d392e5a2f2d1e5" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "e3ae2ce510bdde3b0bd950a617e5b1e9" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "7b5f43114a3984203247b4f973fa1382" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "7b8645e6ac6202fee82d9fff700ba30d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3036dc171a9340ff58e203b1da36546c" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "6472a3fb0c8dae0d1fbc5ae348922f29" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "55665fd776515fe28df5568fb2c74536" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c2b37f2dd5f074db3339bdfdfe3c9aa5" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "ff459a51e51ca3e7233ba9ad2d5b1bd8" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "11cc90f2fb659fb5b9a6f2086e689dd5" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "86932c4d6b3123f35d81b3c81aa7a2d0" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "33d0620a4597d2e80469302dc9116d47" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "c8f63a354c747db96162f91eb68dd849" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "805adf2e6c701ccfcf55ffb52ccb5fc8" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "8a2c58675271df6b87f858521ca33551" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "122a6367bb9d36254133dc428f0ac019" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "8dc962f4de262d6e53ee1e430c74a832" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "78cfd963057ee2bcce7693cdc59b281a" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "ffbc2e5a104024aae7272ddf5aa9d3c6" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "79567d8489b137712088544b7a92c106" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "7ca8d78e668694c7430ea833a0815db7" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "229c380d779b6cb3e7e766541049b05b" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "11ac0d6fbc03492b64358bd41ce698c0" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "1b5232969d5d412d1be4acd39db098fc" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bebd8dabb770dfb83a1dc786be47a299" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "f13d418b47fbf1db07039d69d9ab7177" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "d2e071542370a067018b855533bf21e9" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "f53c233d17a12cf6cc53cfc1157db36f" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b7c1018ae5fc85b60457f9ff869a4a52" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "6b60b130f727d266221170ab239c37c6" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "9955368e7cacd12dc912fe711d6b0f5c" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "72ca5318de13727f3a26090bfcfbcea4" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b80dd618e65dd7ae94efa57e4fd7087f" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "bb4a47efe60496e057e7fd1f7b411b98" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "bd7874304c5c7c4bb32d3e6d26bc73c3" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "36acef2844669cf2b3d30a7466dd6a76" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "1820f3b357d1bbcbb3ff76b4256688ef" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "c045751ee47f2188a426774d4f380eb1" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "37a32879754cb671d8e9db76f03d0574" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6702df1f345469e35cdbffa70180fd0b" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "4b650bdb98fe440c43992650f4eccd11" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "974119dc0db596768b601fdd9ff72f65" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "161a3dbd63a88cea9cb0b01957798611" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "7418758ace572ba4947c8a2ebfe76f4d" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "947dfead1a4040219b269e309fd03861" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3c32e34950f9dc98e777b83437004bfb" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "473a60aaeb9fcfcbd6957b2eada17cb7" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "dacd19acefc9cbb18592a5050364960b" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "b6911cb961f3b1a77e6d1419641f2a8f" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "d524ad2e0a95c9edceb602d6ebdcc2ae" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "7b5d73f6afbf3a39115f1447ba6ac51f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d9cea2c93f73552573324fbd37048778" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "8a3f76193191f8d910d698c22c292e44" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "fa3e31c0856c39bf21606b1d560aae82" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "1787e2f464e18b5b0bfc6661ebb7367f" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a41690b9a40b763a40cbbd938a646372" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "798171009f3f106fccd6be1028500fc7" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5f391136dc445543a4ef62ccc209dfc2" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "773cb99e6657d6339ea3469effe7738d" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d15ed3ed6a28e00e47ba8d46175727a3" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "81b192a92811130fb883540358566ded" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "29bd04bc0b53d76f08621eeeb1f5678d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "0080e9bddb020fca1b75ae5909bf7d87" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "2de2b421c8f0d6f9d7b1f561a7f4f84e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "4881d4c74f770926f5b19685628cfcd6" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "83c3c1f2edce35986d5f8cba310bfac3" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "21c5b3592dcce6333975fc00abd43b56" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "34312a4833ea4e5deb4d1bc59fee833c" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ef8aa961c84f069732466e7ee871cd07" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "14f7110becee0976042fcc204064ea57" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4e08307911e99e0ea80a02dd77c54e60" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "b205d2ba6a882b49e6830a0703263da6" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "39feed8d1d01c97c49cf55f458a746e5" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "611ec19120a96cb1e3c09af4b344b30c" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2e37c321d8e7158942cb35a706303681" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "5c42a8733e94d7c62aeb477f66b56f50" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ba6a709804af777a716b33689b58b65e" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "ba76a7a960556de52d1be1b53fa7dfd1" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "8e658f825fb67387a611ff863abf0f98" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "0eb89df60fd90997ed1bc5f0e04bcef2" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "433b153d7f663fcdb934ff6481add821" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c58c1c62ffd7a239afe1f8648251a508" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "d9e8f2497d14d5be95077a95cc8fa21c" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "82a9ba97f2c893b55351ff535e84f42a" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "8589907c641a320497d0977995db0c92" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "bbfae347d41c299b6be6359325333c54" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "34800ba7c995b4c0cf5f033d99f9ad21" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ff0049a88b2949485ef27c7f65ddcee8" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "420f812f1ffebeb086843157c95c1fce" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "2c18aee1ee46989d80a1f14b66f94643" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "c24d2b1d59106f3589683122610c6b86" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "38904b4f0264025df20551b1da4b946f" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cde509ca0553c4f98e11d575c9470924" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "105322b36da6e28ab55d2c11a4c6b815" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "94681c248121e26489b4224f77efbd98" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "f84f298af1d9bffecc803fea39114218" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "36691bf17eff64bb94aaf71f8d29a760" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "dc74c9684de58b346e3b7648c43a487c" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5a36236d8ec3f2773d21cf65f7ce203a" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "1bcea5ee43746e1238c1efbdd09736b3" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "73c7e423c05746f114c41f7c676ef8be" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2c8e2856d084285de66c3ee031b99568" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "046bf9b3d8703ef1152ff0e642066069" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "eb3e70a0d4ecf519a7c56840adf04ab1" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "d77d9350d2bfaaed823add0e4dedc9fd" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d8b74de3fb06648244cfcb27e914d44b" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "380e2986e0e3b4b94cda8f9744f71e94" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "72ae0f0816ca54f75728d65b9ba1fa7c" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "00a7197ead2a384531c65e357881e489" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "2569089334b1a7e1680cc468ded8b802" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d5028da9eeb6bc208297b948935933fc" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "1c025cd7cf076c35c8faf977f7473f50" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "17919be9ef735bb9510e24a887c0baa1" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "c8fe109af12710b0e1a0ccb0a37e8788" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "47f29370de9cda4bb8c12c9ffa03026a" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "422e19e503ae10075b09e6903a740e34" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cc81fcd7d66a28458ee5ca310fd8e1bf" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "df743fb6ec5ce29ef41fceafe5dd22ce" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b1f80579db1aba637b52da1a45037da0" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "e83961b48c4d24f302151cbcac843610" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6658183f95c355c5f43412ae9f8bb57e" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "983a078aa5f2a2adb2079a5bf92d01e9" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ebcc55abf3ad8eeed94376111885f5c5" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "2c41cf8e48e51d94a65faf3bd2383c1b" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5272985433512bd282b5ffea8194a829" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "e44ac2a8c3065ef871438fe40ede8fe2" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "0ff9a175327dc0756f4a4d21b0a9e12b" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "dfa1fb74051daa25926fd404512b5b1c" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "02af26f58658dea18d4a98d5b7bb8d84" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "562b010760bbe6c96a2ec2692d996986" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "f94ff09d088228dfc5ad5fd35ce7a118" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "7e79554f17dcb54afbc1407d8e235b83" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "e5f26c1f4bfe0c546f2c424fbc53fc4d" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "dc5b28aa1fcf2d81fc1de58217908fa8" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "e705c301374fb4a6dbf8b7d2f0534fc3" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "36e6efbb29b3e13ae910dbfa0bf727ce" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5bd0908342338fb42e76bf10f9f8cf13" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "5a10ab7ec62a30a7563c649ef171f810" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ebfa33f6aca410f5f4b54b77fdecedb7" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "d2999a396d6bfda980cbbdf7df15b336" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "75c2a75aba9fab03bef0dbbcbd5b24a9" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b65b6d3917a9a94d34fa01808ed1fa0b" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "25b1f79ccda89d11029a82ed783c16bf" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "414032eb6e9e3546c26378c73a5a2a64" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4d064c311e996ffbdfc3543abfcd8ba0" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "8ff2af4984cc34ab3242347a3b8b9146" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "707b23abee60fc02540d99eea545c42e" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "dfd4c59b450a7ace9ea424a71c9ba127" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "8cbca41b2b1ada1db5420e675e3cb486" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "c39cf0e3db4762d6cacf6a570be74072" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "ef5ca09139be4c8ecc5ea73d6b22e311" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ead906703a2325837e9588598cefebe7" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "9cf68c33156aebd2c67c52312636099a" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "da90b80329fa3d21af36bc092e2ee28d" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "ed4610fa7f9ebe3a1d1cd7b47174274b" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "b061b44da49bfe1e7e686c4b32ab86bc" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "3528ffa4ef58c02edb89d00c387814bf" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "70e72fbed6004a105c044800469660b0" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "d02918f7ce37c71f05b052e01c86b000" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "3be88f6383b7d4971cac3b113dcb4215" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e07648c254b8208449b73b27177af791" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "5ad93b77a93a5b79e10b2131031a71eb" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d31f94883e2a58f59de901d505973bfe" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "901cee16328135af9f9fea4421dace83" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "460c8885cae86ff0400976d275787111" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "9186fe54335f1f295932e549cfae8a4a" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b564ec3e07209d88f29f55ce7a58d123" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "60892277a56094b9a071353bbcbd07f2" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "59eb491371a06d90e194d29013358591" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "e85c24b35d752ad07a8650325b995611" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "28889a19644b92c8a7a7e68fc1f0618c" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "ecf4fc2c82f170425216c15a0adc096d" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "b69ef06ccbd98124970eadee7f9e645b" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b896f6a6d7b544e356ccc6ba48883610" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "2849301cd9dc3c59ec31cdb7cf688d8c" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "f3fc97387c38ec3a500a4f991d05c625" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "a10d7bf66904af887a1709cd31689809" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d0ec0adbadb47c80e849d4840f3458b6" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "2e50f711241e7ac950b38b96f1daebde" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "b7d4d2e75435291b142386b1d1c23836" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d450dfc15fbae6fd62b8ae72ebe0d9c3" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "84dd184e6d905eff4f67fe766ad1ec24" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "4f028b1a28e7ef8d11dd38c3aebe4156" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cb62b651e571fc35e4f351f9acea5b1d" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20496384 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20520960 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20545536 } ], "md5sum": "9b150a7e35ce81a919995a09877c3bcb" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "cb8043c21912c730593641aebd3c5aad" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "ff05a1a70b807e858cee607248b0a0a3" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "17493bd2e0d66f83295d4ee8f118e809" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "e42eed622ae6e94896c9c12bc4e83f62" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 } ], "md5sum": "dbadfeb5e2fe4b673f6fd245e8d7d95b" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b33ec65663128dfc59834e21aee210db" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d14ed62e8604b6b92dfbeede8880e7ce" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d009c3f521547afbc186a2d1f01c04f7" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "bd823591859345a7a6dcb944e488d78b" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d63afc059625f24f263101a631ee6e8a" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "8c6b9c67f973b8c4f33a914703c54393" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "c9529f698dfdd419551876a723a38cd9" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "9d73cb2db045a754affcbd096f1c0361" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "4003e086d5aceed98a6e4d3a1828f0fa" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "54ad310350a5e8029ccc4f1431912a17" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "986170e90dbd4b40815a4ab82ac4dfb0" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "9bd5a22610517d1fc31f24ff3e743bb3" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "2c4368b7b87b9302203e1b9755828719" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4049a56b921b7108d55cd31dbb7bca54" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "a1f8ddfa447f5e46ae5925994269e77c" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "494ff6282e45108f3f219fa1c2e5e9a4" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3464c9048bc0646337fba6b37bc0d98c" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0a259a749cc0a06c9cb25fa12cc66fed" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "8efe8a82e1f0c8b4cd265c4b26aaaa9b" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b10439b46a9dbad59905af9989acff92" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "05fefa4f9c434db1b61565bf687ec4fc" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "6044fd23abbc257a7fcbf8f9ccbcc352" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "5cda58752e13f7577fa41853251a0889" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "bcdaa0104bbfb469c5a8cbfb0d38fc92" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "8ef6be16a788ea346fe454f5313b81c4" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "d890ea10d343f9e270adb711ab5f3bed" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "befaabdb282c40b74f2451ad027ff1c7" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c81fe1db560f096d95b226a048f4c427" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e5a660ecfe6235b943f067019744be10" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "bf4b1af44263aa39fbd1f4ff594ab5d5" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "2aff7ec20fc0bbf22aa1019140a39e0a" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "a77a03e4f86499de7eb56221e5805f3b" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "c6a98661429f1130cccc2b2efdb942a5" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "d0560da1621c41f937db9608c97acd62" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "d651199d3fdd35c4159da4cb2bf4a97d" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "f6b76dd0aa7d3331fe06528b68cd1ae5" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "28fa85594a67cbd36175c328004a3590" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "95f455b40e0550f4f106adcbe33489a7" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "f7f590028552776ab9c5c0fade697ed5" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3ab72be95722e3c53efef736e786e230" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "7e80bf49bf25f7778727f041d0830eaa" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "a1230f6bc01a9cd3d4e8fbc36215f418" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "cdfc42bd654139aac82efbf29c8c596d" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "faa35789734163158d2c2b4e27da9a05" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "6d9f9c0c838cf25f90d03495ffed5dc9" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b1a3446541cb19cf38f894c9daca3ddc" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "43fa0055e0ba770222073069f37a9321" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "e458d666c3c0224ee1c80b283be46da6" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f712f8b7e1f91ded117d48425c0b065c" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c165e2ea9546c65c859a9f9db8a8aac1" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e9ecbbd3d9732b3244384226ad25b54e" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "040cf9a05c37b142e93e06ad1a9e80c5" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "1f1fbbef8d8c9a9acd2fdf0fd32ce5f7" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "4dea3a759d13f952d5505bc142d5fca5" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "d6a32f261c1718b90ecab2e1334d90a2" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "58c43decaa6b254785e7a1f827288834" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "9a9f3f8437e5fe6a251ffb83a8cbfb0c" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "cf5c0e1b793d228f36e9f5256c24b948" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "7afc5c481f52de7b862e4b250f17c1a1" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cd78ff7be808244e6075cda767f25906" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "c2e57f6a29a9b1fee86bcac649ed9599" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "74f24081515747aed2f88d6be8d470b3" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "62e446714374ead7088249bd52bcee45" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d6a19c83d633cbfba9f84a9dbbc7e814" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "d3a06abb8f4f4c9e6891aa5197016e0d" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "c8aee7a4693d30caf0dc7db8db8ce13d" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "88b1d0d0bc8283e88b6c05e1b467b139" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "161b978c150e1da1ccb9061e8678c715" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "f04932f5a37bdc75eac70bb8b157bfc8" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4ed17a096b615a3867ccca132449726e" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "59eaa49cbc5add053693aee4e741b39b" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "34f6fdfc5c716e4ccd3094494d71a866" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ff2637b496b7cc14616ec5410d939405" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "837ab6cb466f132ec0f6e4b7b2419a54" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "04b2dda4cac886c12cb439ccf7f8c2a3" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "fe8885eb5a992c1d89a95652aa464e3d" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "3fc9afffb7bd668ded9ab54addd9bc4f" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "df07c1219e00f8379b099004a9e8ea09" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "176e820cdffc3fa76fd811bded2921a5" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "0f2f94b5117ea0d263f043c47de7a356" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "d2e50d12abfdda15ca4924a151ede6ce" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "bedf360079471aaa121140901d6e567f" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "1090226f44ebca11d35bffb6f2465800" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "6dc2d461c673f4ff0cc3725016be2c7d" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "0d610744f9b93b93b8f7a1b418b62953" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6105d3bdc56c15e1f03fd409ef19f1cb" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "3de7b0e1546b7016319aafb5162d281f" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "c0b03aec926cf6f227b01f026b8fb3fe" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "df6f2fb759ad0001de4b7f7acea071b7" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "96e81f157c0526d2038ace4b169945ae" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "5b237e74e3fe252cacc1ab8b60aecec4" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "15bdd30459d40bafd042bf00ea6bbe9b" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "d042ee96bfd6ff334c3cd332bb604c40" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "5ece41658a490637f504d8fd1d5a3917" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "29957070e92e687d8c1cfbad2bcf278b" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "0542c54e2a2026406f5c65c92456269a" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "aa3f949fdf522088aecc981f614ddbc2" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "02543db0bc26f94d6017f18e71d9e0ed" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "1a0459ed5811e175f0f59e91b20c1525" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "a68691a2737f0f7c65914965a1f3888a" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "6cabd85bf7d37165d1c932bcfbd4989d" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "9264943bf4f25c2e0ecfe9ef733d3d37" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ce4f544fff389b8232ad8799ec0275ba" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3ca23923e969f741a9c6a96c6bfbaaed" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "f4f87831ac7f2d8d6bde2e5408117ab5" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ebd5ca9a9d38be551539c878b8ed65a2" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "6a76600fb129b9e76a1126c573492706" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "771433701aba66b14efc6a1176a2197b" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c4e609e4ec2e3ddb5705aa7a55de54a4" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6a21212cbfd4e0ad45929314a0de16ca" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "50880efbae4e518cc370b18382b52a71" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "b3edc9c892308815eaedb6775afd5779" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "26500f794c32d4b6926a00dc1213a7f5" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "51a50165fac04c8f005daa66a8ca8ffa" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "7bc64b21e8bf09d14c8e65fa23583154" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 } ], "md5sum": "b59057ef12fd951789f7febfa33fa567" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8f5fd537f63b672bad38ca15afcd375d" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "f2ef5851f687856a3ec35786d81608af" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2eb72425e78041754328ac5f7a2a3e21" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "84dcfaf93dce5f6c040038c45d3cd40d" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "923f20a67af59b34ea2db0b7a802e5f6" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "36f8bf829a464b6be4593e9533821471" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "01fb6429ea806bfe151e01066a1ce445" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "135f350f9c60b15c386705ca8d0c86bc" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "88cf6beb14471a220ab0500e1f2c01b1" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "f910e1790a1e61c302b144df12ee4373" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "7d522b7e887bee77e0a23cbcaf611dad" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a6fac9eb8d31ae563cceab93d9486f42" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "9e2ce2f6276b3aad52d890aac4fe040c" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b198a982ace865d7362a61eaadcfb904" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a13a067e1ac9dff8b9bf9059a064c5cd" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "793868c3e9270b9769668777405d3613" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "b351e733bb2268fea8a55d3fec086b0f" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "886e5b28896099731c364fc3c2c45021" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "76f1a98ee0e204c23da98c259bdcb07d" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6eaff5c929a260199b45853bb127216b" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "f22d72254ff150a63d5b5c7484ac5316" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "650cd48581b896256f53fee3dc05df35" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "bb420a883141d6cfab62217f3a01cc7f" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "870a2a5c870608da58ab03385a98e49c" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "8dbdbbc57b80a53de843ffec5a086c9f" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "102ca88839a5d791b777b2139ed60913" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9c8b9d9653c05e5c2354c22cedc27304" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "6ee538613c53b2589059c0a8f90d9f50" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3e9fc48d126bbca68865c0e4a9d5d5d6" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "b2462f6d4df6f846649de6305c67cd62" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "82e853ba75ab076e3f0cac7067202471" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "749a5b5a7782b5135c26873ab3a2b334" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "43fdecc3e9d6a18b550b9eba9879911c" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "59d5229a575d0720951b513c8de6eabf" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "1af5d00a2eb61ca06b49c2b1312933f6" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "57d8a3073c5df1af0d41b18417e040c2" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "510883109b8f5b6a293ae95f3b9a3e58" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b2db32c983193b81bb325d30c93e3f79" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "ca590dde9ff2d5a6ab7a96c5d6955a7e" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "245991e94da3653ae954502de3daa153" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "49846092681ea8ce6a823704c808fb19" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "2c1b48c571691114e20a7de56e944e3b" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "faf0f08c03ced0ee69464f9ecb5584b2" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "88bd33f776587f192db499f151b78e77" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "549c13e77664ca16ea00071ff856fc48" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "8a2f0b7d577f79e82e1bf8f88eebf4dc" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "39c34946eafa16ef44887ff387d0e71f" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d36c9b06c610faf7f81e6b25c2668804" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "54427216e90db97801eb6473525378c8" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "2b07f9616a8073b33e4318086ebee6ac" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1cb1fdf6d227d5cf7b089d858e7f98b8" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "5c3628673a9f8ae58399f11cacc9eb8f" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "9417b0a3a3b4bd3606ec36fb0bade549" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "a0ad6df80bade02e7b6f5497ecf3e23c" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "8a46e84241a09fd7e53efae4dfc7a973" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "ce5ad0ad1fc7cdd502891cda749b7d22" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "bd074c91b4be3e0f4f6cd14bfaa7ba29" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3928e0e978aa11b2517e4124155321f3" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "f480295a42672cf7dfdad3eb045a0be2" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "385bc3ed7e21e8786bd3521c725f74e9" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "02a6d3566e6a425cb204427e5c6c154c" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "13f0620a712a04ef70e84974d1604019" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "211ec6517054e366e1f9837732a6b5ec" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "d1655d074e4b73a2d157023ccd811502" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "76753ce81fdcc0aa28457244e13fd5d4" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "05ab8e865cd4784cb2ecda298eb373c5" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "2537e303395cc7ddfc07adf82cd12a3a" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "4b55c077f6511e0405300167445fec1d" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "e18e53fdf1daa1a3de60e294f3901afb" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b38c013b62c1b6a5d25dc7b5f25f211a" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "332e0e236d77793d2232db58295ba1a2" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "e17bf9399ffbe9f5914ab845eb1528ca" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0b0dc4fdf689e2f2b1487f8a42fda069" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "2185b4149c82b9720bd3d70e6eb5b920" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "8b48851ba00833f1c5970f3e369504e2" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "19249ce6d7e67afe14ba34a646fffb9c" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "38b106fcb4085d0a0372cc8e3b176397" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "d03d2b7fcb97016a2a46bf45b57fdd4e" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "7f8def120e83a02961693ed736d1795b" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c4d85412fa84d734da85d1b8e647e061" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c04fa97c6fbe0ad53f71223351ab5a95" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20496384 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20520960 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20545536 } ], "md5sum": "7d3bf41658d6d647944e23d944fae458" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "7ce274538717808d12cca61c92edf0ff" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1691e6a81bf19926a244a0c58cdfb690" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "337f27295d9161edd1a4450e045dc9d9" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "c96961de38865db83c6f40f175b32cbf" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "677ea717c315c8d238b7eb8f31062b1c" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2e6c32735918f0d556167f385381541c" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "21ff8c7c28c3c69b3f887a690099e0cc" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 29933568, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 } ], "md5sum": "efcc70e72f31d8b5b549dc1e91d97481" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7be1fde9a3433d5d66f392a1e4e2be31" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "44d829a8580a0ed6e3f2a19b7c0e6f35" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "c35da7cb7471807da233b1dc397fbfa9" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "dd1d913451ca9df5eed168a5d1b6d1dc" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e2d94a59566ffb79923bd9a365731c13" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "fee2dd92d5b2f2510192099f57f8263a" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "07ea6f81761607cb4aa7d3960f50c8eb" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "bbf98a6b29f0ceaf25f9d254650fc63c" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "cece6929d4a74157ed72728bb2126438" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "45b3453c6c8abc947af0ab87b1fa3cb4" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "462c5b05a31bff30976cd100f069ff2f" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "310adf9e5b6c267790a42e2292a2092f" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "e9f2c72f2586885cc8509d8b879e39b6" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5decd366ed2d37b9465d5cef2d01b81d" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "2bfe0e755456bc3292ede20081abaf40" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0bbd1bdd45496df0f80635f491beaea0" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3c46ac61a0c0c2b4c53f975665ed9b13" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3c94449ce00ea65119d8ff66e587deec" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "bc2b4d86a1772446f0723e82cbbe8efb" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "41a120dca2db7dc5eabbe5435a51190c" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "ae2c0d672cf1854df64ca1319091fda7" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "0674c84e5a63854862a6c973f28e692f" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "9fd35d122e5d8a44d5a14d9a537f6d2c" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "992c8ea0c26f5866c416282295a1b1a3" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "b1f60b3f3eeecc4918eb7b762d1eb0b2" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "817bf2951679f49d0e62f4ece8da6602" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a8e28cfc94550d9c19cbc5a47b38562f" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "46dade3a3c9354fd4cb78d86068b9cb6" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1e50360edb92256fddf7745b68eee0af" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "f880fa77b573d4a7c487bc9d29d113e2" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "14c9aa837c8a289c1d837e62a3eaeb6d" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "71c4f0c47a022fbd6c4d6a35b4567d1f" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ae24b7ce15e362071b66c97b3bc32480" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "96e36709c72bbaa2bd5383963a031d4d" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "535430e9c7071fb13af1f35e47bcc4a3" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "340be1cb1da5d27130c950d8475c0edc" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f4fac28c66315aee59968fb46518530f" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a209bd3b418b99f10d8303904154d364" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "0b143c922803060347797dbc7a660e77" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "70d49b0a29e1c13083aab423c492d3c4" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "e97497f178c9f400f35983c97dd3bc2b" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "341580e8f7a8c9c1ff53a70decfa2e36" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "0bc31eb6da9b5a40fde660ff18a2f051" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "74c5b93a331151c31989385bdd01b7a3" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "923718d33d7aa00c4603dd20eedaad12" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7cbab0ea0863ba840fb13869128656a0" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "7f95432311a896d49b4ac395a596bdfa" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "43e2bfaa40ef1217eeb677fdc1506457" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2fc674831b15d4edae4de7a7df1e9fc3" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "65bbeba93eba386ace490ff15d38f9b2" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "18f558bbe8d23bfcf42f738bdd32e0b4" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "d09c20bfb8cec7b3bbae5ac6df032836" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "66413a3ff629c3fd8c55053a9cbf297e" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "72f2ec040bf75cc3fa53d1185f11c3d4" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "fa14670b681bb5dc24476fe14cdfccb4" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "3c7457000227c37b5dfa9d40bcc4af9d" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6a721e12894bdd6e5d821e104930b7d3" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "609b73e27b43eaadacf37c3aebf3cd30" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "479964b76c0e909ea61f90145344e4af" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7053766404aeaf8e108a51bbb5c221eb" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "fa074c9592b5cad9de1657ba84f8c352" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "330f830aaefac0f83cceffc57e34e1b9" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "0dfdab6740aa6c635ce27da308d24606" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "522d75ce0a9bad1151bcea3f59488476" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "a77eea3741771409324661b768b04652" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "801dce60aec231a49edefbcc710a2a79" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "eb50c43c503a30bf62c178119383ed04" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3d545860ff78342d7dd05392b131c81b" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "4ecb131ba655272005fbc0e72e462cd7" }, { "dataPath": "params_shard_482.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4ce950e7790edc5e0862a767b6496f3d" }, { "dataPath": "params_shard_483.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "2d5c9bf5b2480e774b3c32b4ffb0adc5" }, { "dataPath": "params_shard_484.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "ef277ecaeb288ccf0d30239dbfc14956" }, { "dataPath": "params_shard_485.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d1c73453c20bf2814431088d4e86b278" }, { "dataPath": "params_shard_486.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "014b671e0dcff407b0faa6bfefcf9753" }, { "dataPath": "params_shard_487.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f963fbb67cf309b4834a80181779b2d7" }, { "dataPath": "params_shard_488.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "117a31ebb0bc03f30bbfdd4d158aa22d" }, { "dataPath": "params_shard_489.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "1ec5396aa4f0367f050a906f1459c5a2" }, { "dataPath": "params_shard_490.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "7cc8311cfd54634d71441cd63b1c0c4b" }, { "dataPath": "params_shard_491.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "088621fd63abf6188b4b2f60f7803024" }, { "dataPath": "params_shard_492.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "2d992a6aab1c64b9c17d00cfc419198b" }, { "dataPath": "params_shard_493.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "f88803ea466fb29a2ef1ed2b4ec47c94" }, { "dataPath": "params_shard_494.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "87e792888cca4145ee5d8e03043f6b47" }, { "dataPath": "params_shard_495.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "1733a415586ee0fe85dda53f19e8671e" }, { "dataPath": "params_shard_496.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3b598d161863eab6d02c4198b13b030e" }, { "dataPath": "params_shard_497.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "c5cb510f0c9a3472de38c3682ab16073" }, { "dataPath": "params_shard_498.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "17dfdc274c1f07b3ba223c3f8b5a3cdf" }, { "dataPath": "params_shard_499.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "572801e85bd353a629afa798c4b3454b" }, { "dataPath": "params_shard_500.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "58d1429a5a05d13f334e1c370c07ad67" }, { "dataPath": "params_shard_501.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "e00cdc000ffd5e3dfa44cd786ecb69d6" }, { "dataPath": "params_shard_502.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5d1e1d0a49d9143e4ca2aa545289ab83" }, { "dataPath": "params_shard_503.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "d789717bcb73d657de252ee67a23db94" }, { "dataPath": "params_shard_504.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "471f32cdf829e3dd66f499a2e133dc8b" }, { "dataPath": "params_shard_505.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "4905c7aaae5674110d5f7f4dcd751aec" }, { "dataPath": "params_shard_506.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "8aadb43e23c822f1b6477092f95260d6" }, { "dataPath": "params_shard_507.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f325c01d55916d29e5a5c4ab47f5a737" }, { "dataPath": "params_shard_508.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a54a522b9c42db96f4f29fd7e64e0d06" }, { "dataPath": "params_shard_509.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d33a6c9b7bd918e4978458feb2366f35" }, { "dataPath": "params_shard_510.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "7390b8bff820b1510cfdcd272b88d1ef" }, { "dataPath": "params_shard_511.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "41612d3d68815f8d53fa21bac87b8af8" }, { "dataPath": "params_shard_512.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "255c93c201278eaeb244ba1e6cb32cae" }, { "dataPath": "params_shard_513.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "909bbbec617bcc9a99e2b084c3d59425" }, { "dataPath": "params_shard_514.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "a4cb34436f8d74828e2b40c432c0409e" }, { "dataPath": "params_shard_515.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f8c81cd1e6f5ab48804752c8cba352ef" }, { "dataPath": "params_shard_516.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "cb4808d09db4e2e191a8e265543273fe" }, { "dataPath": "params_shard_517.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "2840912f1fd2637aaa031c0fbd9b8994" }, { "dataPath": "params_shard_518.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f0858df8246db19bdd3a2bf95c431f49" }, { "dataPath": "params_shard_519.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "3ea50d99bcc9fa03b9bd186f3e313ce9" }, { "dataPath": "params_shard_520.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "81884a9f4f48757e39d32662197be5b4" }, { "dataPath": "params_shard_521.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "030e811fcb2b33d3c413cc4ed1fc5f22" }, { "dataPath": "params_shard_522.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a528ea24787c48c2f86cff29988a6742" }, { "dataPath": "params_shard_523.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "0864fbcd11c12507922a9d9cf9b4e967" }, { "dataPath": "params_shard_524.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "0a2866751b58c8393ba986666a0fa80a" }, { "dataPath": "params_shard_525.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "5c659bca61e035e2587b4ca622e18ff2" }, { "dataPath": "params_shard_526.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "5658711f3e6144d713dcc1ee08312d55" }, { "dataPath": "params_shard_527.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "60a220b678b5a5f28e14ea5ae3dec9a1" }, { "dataPath": "params_shard_528.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0d4d00c439db3a73bdfa9cb002be382b" }, { "dataPath": "params_shard_529.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "54b781192a5fee11a5f7f70c831dfcb2" }, { "dataPath": "params_shard_530.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "dfc1d7fbb67be0561ee196758b4a48e1" }, { "dataPath": "params_shard_531.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8609c8a5dfdbe6760f514572316c0a71" }, { "dataPath": "params_shard_532.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.80.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "fd1b913c6064ab5d73a2c71ec5e2136e" }, { "dataPath": "params_shard_533.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.80.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7f68d5f7861e8116d4d7b6a512cd4820" }, { "dataPath": "params_shard_534.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.80.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "1258a0bef98ab5de46d69b7ed14cac0e" }, { "dataPath": "params_shard_535.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.80.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "dbdf979c3a2fbf2eeab7f90d503b48ee" }, { "dataPath": "params_shard_536.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.80.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "734b86083fe3bab23cc728d9c343adf1" }, { "dataPath": "params_shard_537.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.80.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "885150898be16c2ef02abfb61c5988fc" }, { "dataPath": "params_shard_538.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.81.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "ee4575ade5ac3bf260c3c2e6b22704cc" }, { "dataPath": "params_shard_539.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.81.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "d18a3fdefa00322072e3f98d9f134395" }, { "dataPath": "params_shard_540.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.81.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "f5ba90daa1236023287703b1630d189a" }, { "dataPath": "params_shard_541.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.81.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f84a0387e378e0066913d0145c6f87e4" }, { "dataPath": "params_shard_542.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.81.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "b96d51ee352ef80d0fe1465ddedd4077" }, { "dataPath": "params_shard_543.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.80.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.80.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.80.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.80.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.81.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.81.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "6039656121a2720c4eecbc53a38f97a6" }, { "dataPath": "params_shard_544.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.81.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5e7a37dad5053fb4f849ddaa7fd70c9a" }, { "dataPath": "params_shard_545.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.82.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "da0f57b7efcda12ad9ee63a3305d6a5a" }, { "dataPath": "params_shard_546.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.82.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "648efc0b6dc52cdd5e88f920a0dafcb1" }, { "dataPath": "params_shard_547.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.82.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "72166c7f4c03c17bdb092158a410db9c" }, { "dataPath": "params_shard_548.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.82.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6e71c320425ceb3cb5c733723c7ca711" }, { "dataPath": "params_shard_549.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.82.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "d5a14592eba72800a121b18c594f149a" }, { "dataPath": "params_shard_550.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.82.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1b2ae8fb32184170190383a7fbccacc7" }, { "dataPath": "params_shard_551.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.81.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.81.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.82.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.82.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.82.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "7c7e9904ed7f0c9391f7efa171991a60" }, { "dataPath": "params_shard_552.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.83.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "c635c4dfee43b7d190e7c876201b9a73" }, { "dataPath": "params_shard_553.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.83.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "325941e73c30731d231382bb6da46ee6" }, { "dataPath": "params_shard_554.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.83.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "7c3d09a1818c168200a3938f09263d38" }, { "dataPath": "params_shard_555.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.83.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6212f7ec93bd584866bb4d00d3d69068" }, { "dataPath": "params_shard_556.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.83.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "4d7056fcdbdef8f8e28b72addd4e5ebd" }, { "dataPath": "params_shard_557.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.83.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "4877581cdd767e68460f17a2e60bea2e" }, { "dataPath": "params_shard_558.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.84.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "c259a4d2803c6266174fdb7fa20a8218" }, { "dataPath": "params_shard_559.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.84.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "0b1cda86b4725ed6fae3953efdc8be60" }, { "dataPath": "params_shard_560.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.84.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "66a868837996da86781ddc18d979c536" }, { "dataPath": "params_shard_561.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.84.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "7b9f09cd6faeeececb219d10a90b2fa9" }, { "dataPath": "params_shard_562.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.84.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "7501a7f59a8158627a322db9b713f3db" }, { "dataPath": "params_shard_563.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.82.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.83.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9437184 }, { "name": "model.layers.83.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20447232 }, { "name": "model.layers.83.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29884416 }, { "name": "model.layers.83.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.84.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.84.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "d2c35cc4e67d9c93e5dc5529639bdd0a" }, { "dataPath": "params_shard_564.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.84.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bfd6e354ba7b6f14d2c70315b94a2346" }, { "dataPath": "params_shard_565.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.85.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "c0a9cbd301c8b5c665b61555c7abd4d0" }, { "dataPath": "params_shard_566.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.85.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "1d46359f1f7184ce5cf1a00d10501b1e" }, { "dataPath": "params_shard_567.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.85.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a256654b400352c9626b96f707a0b303" }, { "dataPath": "params_shard_568.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.85.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8357cf7257fc327a47ad617ca25e45b6" }, { "dataPath": "params_shard_569.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.84.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.84.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.85.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "61874f4d8af022df59bbfc8d0d49ac1f" }, { "dataPath": "params_shard_570.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.85.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "21f60a538b36c0b8fe69851ca40c6a68" }, { "dataPath": "params_shard_571.bin", "format": "raw-shard", "nbytes": 176160768, "records": [ { "name": "model.layers.86.mlp.down_proj.q_weight", "shape": [ 12288, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 176160768, "byteOffset": 0 } ], "md5sum": "c63cbc7327d4985d809551fd3782fe5f" }, { "dataPath": "params_shard_572.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.86.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "c141caae6ce440911d3186749e7a175d" }, { "dataPath": "params_shard_573.bin", "format": "raw-shard", "nbytes": 352321536, "records": [ { "name": "model.layers.86.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 352321536, "byteOffset": 0 } ], "md5sum": "2edeeeec5914696ede20bc700ea87a84" }, { "dataPath": "params_shard_574.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "model.layers.86.mlp.gate_up_proj.q_scale", "shape": [ 57344, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f8c3b238d9ee1f85a643f5e2d14f27ea" }, { "dataPath": "params_shard_575.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.86.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "01ab3af1561edb175172e49ecb1cfda7" }, { "dataPath": "params_shard_576.bin", "format": "raw-shard", "nbytes": 31555584, "records": [ { "name": "model.layers.85.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.85.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.85.mlp.down_proj.q_scale", "shape": [ 12288, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 9461760 }, { "name": "model.layers.85.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31481856 }, { "name": "model.layers.86.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31506432 }, { "name": "model.layers.86.post_attention_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 31531008 } ], "md5sum": "144071c73263b6f528ae308f14d3067d" }, { "dataPath": "params_shard_577.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.86.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cefffe5f457b87fe666122d709a64bbf" }, { "dataPath": "params_shard_578.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.87.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "303ab7d0b102e3f80b21e63e3d509c87" }, { "dataPath": "params_shard_579.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.87.self_attn.o_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "63a38ef398f0ec0260f0648d7ac0bad6" }, { "dataPath": "params_shard_580.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.86.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.86.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.87.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "44f652be5583addde3595a51d9a11a85" }, { "dataPath": "params_shard_581.bin", "format": "raw-shard", "nbytes": 9437184, "records": [ { "name": "model.layers.87.self_attn.o_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 } ], "md5sum": "44a3de0ed565c90d35fd53dfa8ff1ab8" } ] }