{ "metadata": { "ParamSize": 123, "ParamBytes": 10825834496.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1050673152, "records": [ { "name": "lm_head.weight", "shape": [ 128256, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1050673152, "byteOffset": 0 } ], "md5sum": "a6945d49d5d076ecd0be132b67ce3cc2" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 1050673152, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1050673152, "byteOffset": 0 } ], "md5sum": "f2d28da53eec7a4bfb2c19fb349ee23d" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e937b2e11737a73c8433773410127217" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9820d4566fb22a3df1144dd5a69ec2a9" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "27453451f3a7644c0b0d69c120713259" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8453d2213e40e4fe25059741bc038f22" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2e42464286ec5527bb482582516025af" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "25c7237d9dd39b8cf9dc22dd74e89858" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4da37612aa2935e67762390e54ae10d2" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5918a3f86ffdc42da7fa41dc9ceb3f5c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c46f0443f9a29232b648b39c87096c64" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8feaa56e620d97c12acc424549c7fd72" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2e455d7fba4daa0bd2a79271b2a7377a" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ec227f02386055d2ace2632fe0d81b40" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "47f36aa219dc4c49ed6bd52158e2b2f7" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9df38297188d2a42171e222ae587c1b7" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1d2d346bb6d1bc723bfafb605ba8e09a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9cd24569bfc4a61d1764f8492740adc8" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9cf43c77adbcbcb982f9a216fd3b61a6" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "58fd08562dd46f8a059806516f766e97" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "61bb85766246c8611167e3a1fd8dea43" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e75f953fbf0b15ad94f81d7e518c6ef7" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "21496883da67d545566000775c70c9ec" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6b3123c5b1aa53e2fcff15a645fbf218" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5b8275815efb016b68621153230c10a8" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "79e3ba10352141535f272d1352b135ac" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5090361f5a224c97673ad73425805ddd" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "728425b4ea63dc12526d8054e57b152f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d8d0842f3180c9ca701b281a75c837d1" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8fef16ecf6532754745eb9a0aa721681" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "17a2f7286a3e2205d22c0c38b849016c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4ee8be083d815e383e94accdd7b676f0" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ff96989d45e2ce4d592d7f2faa249b30" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6551b7aea7c2bdb9f20a33b8834381c8" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "644da24b38c91be32847e179accfe569" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c78179872b7154b5537add5137be29cb" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "62445893cda6f55efd335139dd8eadf2" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4ac6511a6fe5a418a26de295bceb19f3" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b933c0852d0cfb4c3658927fd80f95d1" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2a181db301d6d2c8b7ed206b7edbd81c" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4aac1045d80691db6eaf51cf34ba3e04" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0675836793b33db17b8b1cb97e371e9c" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "667597c1dd1fdbef273584f5667f1f7f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "02164c18545c96b8fa943d3a9f0a4fe5" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6fa8ce42feedccb6a5c91f40bdccc3c6" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "daa6d8d3d717ddbbc40d430323e37d3d" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "98326f483ed9ec944c73e7ae8726aef7" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "535b6fcfdcdd18fffd1aeea3938230a4" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f2958338b7e2f9a6482753f7cb98f5eb" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e633ccf8732c712ee093f8920e609f9b" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a0e2a0c72a558c74b324cca7a9110e9e" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "49b9371a3c79460d5a99f53515735e32" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "731d92900483226505602be53bb73b4a" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9f825f0c09ed9b0c300a7fb485331503" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3e4347288308d05a790fe02c901f795e" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ca5151be3fc277317e64fe2521d22d9a" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2e2151f16cf9dcf4b4fe736b78708879" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5e81322a1338996066675e2cd8eec9bd" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "623eff2bf8cb5544d7e76f390fd6c498" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "603143f55b7cece63f204d7db165583a" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0fb8cb3f987dcd985c55e7a32951e824" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b5dd2420d9e43ecfd1e5870fee46bc02" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e54c4b303eb9450da6e8d0f260a3d432" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "99dd190e30d50bae591a456b5856ed63" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "695ac0ae0f2d58602ccd0d29f664c3c2" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6b745f2ce4086dd034d19b112ac1ded4" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6acc1ec8e97f1433e6b705c7bfae90c6" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "67d57035608ae2ce4438784d8d677826" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5b98eb8ad81f9eab3ec4323bd10e03d8" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dbb6a2e8d5c324712083fcd936252b16" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f7cb954cd06dba30faecccdb99f15ba8" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a1fcdd887e2603ae1084ebd00565ee9c" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d65a1d27568ae023351d27641db5da25" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ee3f4f996a7799a11fc1dc691bea217e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "64ab027aad105cfe2a92a07e39a89818" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c2eed27ce53331199b18b0bc9ccc2615" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ef2454eea2929d54578e357822aeaf9d" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d1ac8f90a27359fcc7a545adbc0f52c3" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d33a7619353629a3ee4ead844ae58552" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "233ead72e7fceda74a9fc1e881ebd92d" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.self_attn.wqkv_pack.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bdccad7d751c0580a8132880c098f45e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fe682c84680eca02f330bc6a7c03d240" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 335872, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16384 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24576 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32768 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 40960 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 49152 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 57344 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 65536 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 73728 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 81920 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 90112 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 98304 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 106496 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 114688 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 122880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 131072 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 139264 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 147456 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 155648 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 163840 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 172032 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 180224 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 188416 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 196608 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 204800 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 212992 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 221184 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 229376 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 237568 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 245760 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 253952 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 262144 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 270336 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 278528 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 286720 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 294912 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 303104 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 311296 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 319488 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 327680 } ], "md5sum": "95e75830965326aea19e2926c2dd6d31" } ] }