diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,6849 @@ +{ + "metadata": { + "ParamSize": 451, + "ParamBytes": 65527752704.0, + "BitsPerParam": 11.661296738129801 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1557135360, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 152064, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1557135360, + "byteOffset": 0 + } + ], + "md5sum": "4ebab047815dc4a25bb4a12821329a31" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "6142757cb6b949f8f7251628aa53e4f3" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "91faf2badaab41bdb1a119d100ec0e11" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 1557135360, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 152064, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1557135360, + "byteOffset": 0 + } + ], + "md5sum": "f32a4192cd8fa57457ae61588672cc91" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "abdab6316e1f0240a5c48f6934b7c803" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "382ff2b7c106176ea687a2d36e53743a" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "6be1bba2fa49ccb4609c3a1c92c0f894" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "9d4716d700845daf9fc4f8c28d19897f" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "770243e89abb4649829aa8fcc9cf0c67" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "7df3208af16bc2ae8863a9eec77be716" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "91c38c8695518da71cf3e46654085bc6" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e4e948f162104e91f173a206ed0de9b3" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f26d4e6e67bf3d40c2c50ea7678a7e7f" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "5fee2de2dd83ff1c6a6fdc087b1840a1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "b2060836044ee9f7885bebfe6e3da7a6" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "3c6e07eb974be53096b51603dfd06fff" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "c55ddfe5d12b81d7258b59b94e0bf992" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "785c497d374e811d009dc3d319449270" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "15fbccc5ccb57d0381ba4b0e5a1c651a" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "5ca40b4e619ca1a7ac932e679f5e741c" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "71b37c8dad20cb0a133533477b0754b7" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a426c453fde1e3336b1eb45f289ab00f" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "7107cf91ed4a0af103d08b8e81ae0894" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "4653f0d314336ae44d02a078098f93d8" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "000807c8c442b1b1696900ca2a5f116b" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "30f96456b2b6876144335b2003d13893" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "854cf5cfe00f7158daf90ded7b8ee20d" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "e2a3544e90ab15e2a0f569e90b379053" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "f1ee861a1dada0ea71da4218f039b68e" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "7275fac192b4cad69e5074c69d52aba4" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "23ce9daaf4e621477a729ee020e7518e" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "12b8bec623afd3341bae1faa676e02ba" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "610d70642569d95aa85f1492577ab4d7" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "69ea0bfc4416eb502764acf912d8a11c" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "b8bce5b75c42ce1824ec9ea8057fcafa" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "9770dcf5bb59855d7fdd0ed4b27a8882" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "db41009da66906ca62c42ca9f24119bf" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "03e307e54107da0846ac526958e5665b" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "74e5234f7e3c3131ef4ceb18b8623b7f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "5da39242e00ec8f0c8d12754d058e72b" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "b91e173eeffccfd061983aa4e32251e5" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "445be9c217cfc3311c2767fba30f483a" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "7fd10b234863a2a3f5f5d6f55c01e2b6" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e2d5c6b716ad99228ca267bb77ff0dfe" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0bac228d75f853237b101da4e6ebb03e" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "ebcb979a17b06e6170bb7b732b0aae8c" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "69319ee3bf80711c82664076292f1599" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e30d834c2f15c74ed98dd2015fd29e19" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c63feb59b8feb980e602a40284fcf693" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "1b3029721555a18fd9a0fdfb8eb1b1db" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "454d8125c44eff2155573a21fc32afb5" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "94c7b7d4cb975f169f88759ffd10c7e0" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "69580377f1181157d97e210dc9df708e" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f6ad7015f508cfa1c51ba48c188b42d8" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "e2904f05ca0bd62197a1f0a4c8ad221d" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "3b59cb145359a5f3c1360f84c26091cc" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1469d93d763ae166c9161b9e611d736a" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "16e7595d9738fb616834867bfce50650" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d50a01c6c39712c5f5689e2e12ce56d8" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c470551d31f13b004b8df0fadf7b1082" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "546cebe8240473b9067d77b1e73f0d6a" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "9bb8a20a5e2c8a98860fc65324540c94" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "45fc87f148f3b4ae7b6b75586da37025" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "1b4c0bf675a99c0cd0e8368815548ff3" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6ca521f51ba3d708113d817b95922aa6" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "d38948c44febbb26a677b15dc8b691bb" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "c5ada97fad88dd5e5f6159b02e4af9f2" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "188b8079cd8fa755f24c1126916c9fdc" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "26c1787fa658e73f3ff29e62da4ef5e3" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "05d78a9452b47c2282e3094f6808cb20" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "fc9a874e7cf6a81b1af4240b007cfa66" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "12be63e3ffbcb107d6a4321bbe471158" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "edd51b2f71391d694d08ac588fa0fe4b" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "8d1ea58909ab251bdaa5e6a47e9e5e7c" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "b2cbfb366f7d92fad6c5842e208b5e10" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "c621d64dd47ae8906bc02ae03151cb7f" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "4952d4c7d963ba963c88b67ef35fdeb9" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "c18ceafb27e183c674d42854be7a6f8b" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "5cfd2120d0c42e087ea7ff446ef26e76" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "5c38216022eda052326434dd748b132f" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "5c8a9ba9e81fc11cc4d58ec9ecf10c27" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "fcc2b896d9e6be08ec775d4d47321f31" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "e729b04b35469a70f432074077221419" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e0d4177b685dd557f9ab6ac55613fcf2" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "52c07274a578a32208fdec9bf74d588f" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f74d437d00f777b0b33e70da165fcd6a" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "5ece0f6862e393595eeb2196e42f4069" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "7114cf51bfdf05b0b92524ecb3aee10d" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "fac02bf0dcb7572fb443b0d256d7cc56" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "7972c8c587aeb14e4fbfdc9e32016f21" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "05d5c2ca8ff615dd0981e44bed495d15" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ee72627e8c3ba622691ebc1f59dee36a" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d471b5765e36a9b3e253cfd0b6260e7a" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "4bbfd95cd338662510f285ae6e846af5" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "69f4848aef2979d706382b3c3e34d429" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "3c4d4dcbf6e321a0647be13366a86075" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "896a8920caaa46fdccd8b3c2952e1e3e" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "29e636180a9cadfb645c3ff141152e4c" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "68543852537255de0f59460b3b5b320a" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "cc0e894779991be6e3c1f82c31681a55" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "08321ef408d9badb99c9045c1683b9d7" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "135a27d3a9626df0a13d070a6104e36c" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "d4a4158c02ad72cca6541654cc8960a7" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "47784749f7911a10c61a7d19b0bce23a" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "583dc8ed6fb3b9f12eb8a157e6a31888" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "01d82109e2b053392c58c112837aa3e1" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "d5c20b92279f030e665f87498574c552" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "4616c54b751908ccb1ac86edaf98eb11" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "b0580b4c326008dd79b4ed2a69b49eef" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "076e5da95262790a5d3f1426bfb6cb1d" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "d2f6104d4972749df63e01bbc8fa4626" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "1ed0af9be86b3f82693a098876ec4054" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f1731442dbf7b6836eea60e7e5341c74" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "8a3a2769015eeda747c6dab404627649" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "a03f74a04f9262079f9d4a494589d4f9" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "f75c890b4feee824e3c706fec9e319ce" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d5546f2cd028c025073bb0c62beb07ba" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "ab957e4cb6263347f7d3c207e3427c62" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "75f63149319fd2dee3bc4b1fa2fa9e00" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "3fcc1802ee292e02a7af6e0768afc3ee" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "ec6f8aa47f6fc1e95c15f411a99ff467" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "93b2e92475ecb2964c181b8c522e072d" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "5c6b90cce7bcb06f02e5b66eb5d0cb4b" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "604da35f9395a3d2d46afc1de505cffc" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "58463736c560a47373eaf9600bfdb157" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "7a48ef08092fc550e5bda22fde150eb1" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "6f19fe3de5ac81e68742b58d8495a2d2" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "f7d6a10c272a5df1b280352813835523" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "3ca029383ab487142802f4042c7cf851" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "7e98d663037efc91cd25ea1bda673a4a" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "45ffdc357bf418534543301eef9b3614" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "6b6e399c0b6c67e00bd528b16e12b4ba" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ea1ef05775ff2fc9eb91f2c32de3a145" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f797387bcca0e89d5a12ab5e6569a9d1" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "b9e9731eee6323be581ed42eb04eec4b" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "1f10cadffe8744c670137d0bd8cba5c8" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "58032f77710c05aa28ebc0922a055641" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "73d25a7990b064d75d3940f8c828da7a" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ea0b36a56939d2eded2d2ece611bc8fe" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "468de7fe1d77509417133e0d49d1c042" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "3d5600b51ab16792c6a937f4f641ba25" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "132779a872e73947380f72cf48fc06af" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "b528191419d940cf7fe619d5088ed136" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "2dd22a63aef81eb91ab5dd0e3335e202" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0d986048289c70a931cad927ffd7078c" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "c2d37cb2bd7eefe6ac17b98e0c990d20" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "2192ca6c2f5cc3023848c9a6576a1247" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "869b0c193e7916866f27f663ba3e9660" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "9adbee946b8f33957ea9cfaf7a53be9f" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "ec7d72a81fdde034f3b413b16de40431" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a49d10483400832264db2a92069e6e62" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "2592c76625234644b3964b28dc488f97" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "e7f2978264a91a83c0ff74573f61edb9" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "af23706921ecca51196c2c13c0e7c28d" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "5d86f0c588d4dd7bf4a123611e937b93" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f620578b0a699d4d82592520f1f52950" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "842314b084d3dd62d928ded98968e224" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "1dee23de3fa38e9aa1eedbde4a11fb46" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "59d1abc636058cb4135de1a0dbdab0ca" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "3a282bceb506182c3e9ae3fc7ff44a4e" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "c2cb53a6d26354ccc2c0c9773d3c3e26" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "dc27334ca214721382370a88c5a9affc" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "b2be639cdca8d9d084145e185c95d7b0" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "9492eba46a87dc48fae76324e919ff55" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "4dc90d2c33953941d6a7fe2ec2cfd7e1" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "586efb33c244d2f305a84c0ebe6d7c32" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "4eb6bf4f0a27244e3239739a29e8bf53" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "538d6c66016a52cbf34dd186dd1bbfa4" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "e326d4b86bd1443ef3e68d08a3346a3e" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "b32f0d441ffc3b46bd09bb3d4650800f" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ecca691dbbca4299299804138aacbe17" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ffebf087642c164f8be9aaef00589f47" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "bb48ee3a23058d83cb8b8ccbc56bba25" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "388345b13cd50c0200f13de5a43e3e9a" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e9b75d26352670a40f87247ee820dafc" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ecfd9294bd47734f2902651909c6ab09" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "5464b8e45a4b216e23a8e599a6246f00" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "9595dbb3f9a60cc90539eabcabdcb914" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "8a63ce0ac904e74a0f732c069b763f16" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f310e2ff59128e9c169fc020668533dd" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "4dbeedde5619d1fb6d11effdf24fde83" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "4807437c8a9bed455068e281c4aeb899" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "b1f9e05f9a849f05fdf91e05f34a85a2" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "778870337876ee649f0f02ea787a5ea4" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "64e3c421234fb6370434664d0e8ac2df" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "08cbd0bd9dc093fba6be11b6e211dd22" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d7b3d79282204719afaef5ad03fa5cc3" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "7589121c42cfea25580df5cb35dbe3f5" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "fac93ee37d4966f8f7bed7baa0d77628" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "87b7da44649bc6767a3323d17c3846ff" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "60516058dd9cf879b9ca8cda82223623" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "19807242e41bd91bbf3eec7191427a84" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "39603864d378d3039e8bd56b96b9ba3c" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "19729fd254d74a6d6ebe7cccfb537607" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "32ef9c6d917e76bea36db52e89ca0e3a" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c8c55042629e486107c3f5147beab130" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "7082a8710212efa0919c70c5cd85e36b" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "6ce012223592455e085628da5152a287" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "5dfc357b6cbfac9e9931f26a7cec3653" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "c39efd7684406799a10dc3b5cf43acc9" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "d590f2d9de53027aafbfbb438ff5d0de" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "67f93d0184da1a9eee0988388e56b388" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e112a3cd9c9f15178d698fa982a9212e" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "8a5c44176e9d36a912e3eef33394d5f8" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "eb1f92c2f4d2df62e0e25ef202f6a71b" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "25fd1c2497046ff741da909e549a8003" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a7968fb24c9e1efb26125c3cb1157687" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c90952a99572d8ea4c966f680ce5ea3a" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "3506ba9a7341df82ccebaa702e3e31b4" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "bb4cb4411f9699ba9edd42a14fe63dcc" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "3d844ee9ead864f8e7ba3f7d79a2b067" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c5bd9fd77f1ee2a466538f6cccc4abb3" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0a07afe99c02acc6623314f98e0e7042" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "727ded046641d0d7291ae1f6dd7cca15" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d2a6b8e3dab3fdaa3c3b94682ad971d8" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e69b713c7ab4cb9a0804aa5d8512a944" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "7ec40c8bf46499301bba07fb92ce64f6" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "458c5049fb5a98958bc2789fd8a4661b" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "846c92564124f3de0da9c1fe9c222c9a" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "5a13bbc57d9fb9cedd7f8d58ef997cd3" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "08910347e75d55743bfba18270d4da5f" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "94145c2ab896c5e42bdfa716a60d20bf" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "062b832f222716190f719186bed31161" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c45f9599200903b95b612ea2930ffd79" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "c4b10136d11eef3df3941315dd57eb86" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "b3a7f6c44bfaf5bd11100daa3935cf91" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "734d1983c20928b5c14074faa06b0eaf" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "418af5d0f9eb4c38d241baca02f4f1a5" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "c342fc790357827dfc56071aad6d0b11" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "b9f762474812f9ab39c37be73882ae17" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a33eeba8c40e0d6d0d14fc9f5f3180a9" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "8da40d4c433458bb42e523c8ebc0febf" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "cfbd4d64c9591a96c1f791484331d9c5" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "6501aad225757bd2858110ee07fa6d1d" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "134d4486a8a90d47c5b84628347e7cc8" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "490e63dd84e3e19fc59ef6b4fea67900" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "82d1aafa5fd2c2cde3816dc1cf158b5d" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "81eaa978116ae4062ab46264a84fca05" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "b3e348383eb3dc470f594bd0f7ef9164" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "d14fe54e17c9c10414b9f1e9bd1f525e" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f308449b6c35588f8f4bf69d05f7d4cf" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "1ce849d2b66bb5fbf05b17f37836041c" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "988d3abb58e781a668859bcd8be3e0c6" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "848c8c3f55ca6846f5c19919b2d3c73e" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "4011187c2b3c9702ec28b7c22d500325" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "1602243caa912687bd3a27f9ebfea73e" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "1ba75f1772670be818f3941927a26f74" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ee8d0b6f2a0c66c627f33e747692139d" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "b4bf9f0a00431e29f0a0959bf57047e6" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "4ac1f798741e90add03065eb1803b681" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ef70bcd2d50ae806e11b4f1852e0fa41" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "5afd078c2166291b9e066a8240c50b60" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "78a19235c7baf9cac6a8da2cb2937a4f" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "0d78ce4c346fc113a819b37d2eafe4d3" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "cfacd9675e9fae450c86cdb490ed189f" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e1eb88ea9c0dc29197fd966114235f10" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "193101b6973c37967f9be0ced8cce1a6" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ecfeee3d0547a155fe21b45f886697f4" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 2238464, + "records": [ + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 0 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 10240 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20480 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30720 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 40960 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 51200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 65536 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 75776 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 86016 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 100352 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 110592 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 120832 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 135168 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 149504 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 159744 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 169984 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 184320 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 194560 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 204800 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 219136 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 229376 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 239616 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 253952 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 268288 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 278528 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 288768 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 299008 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 309248 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 323584 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 333824 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 344064 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 354304 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 364544 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 378880 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 389120 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 399360 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 413696 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 423936 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 434176 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 448512 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 458752 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 468992 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 483328 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 497664 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 507904 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 518144 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 528384 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 538624 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 552960 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 563200 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 573440 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 587776 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 598016 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 608256 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 622592 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 632832 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 643072 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 657408 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 671744 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 681984 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 692224 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 702464 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 712704 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 727040 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 737280 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 747520 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 761856 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 772096 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 782336 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 796672 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 806912 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 817152 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 831488 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 845824 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 856064 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 866304 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 876544 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 886784 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 901120 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 911360 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 921600 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 935936 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 946176 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 956416 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 970752 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 980992 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 991232 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1005568 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1019904 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1030144 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1040384 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1050624 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1060864 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1075200 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1085440 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1095680 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1110016 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1120256 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1130496 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1144832 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1155072 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1165312 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1179648 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1193984 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1204224 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1214464 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1224704 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1234944 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1249280 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1259520 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1269760 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1284096 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1294336 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1304576 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1318912 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1329152 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1339392 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1353728 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1368064 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1378304 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1388544 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1398784 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1409024 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1423360 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1433600 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1443840 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1458176 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1468416 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1478656 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1492992 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1503232 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1513472 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1527808 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1542144 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1552384 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1562624 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1572864 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1583104 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1597440 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1607680 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1617920 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1632256 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1642496 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1652736 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1667072 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1677312 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1687552 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1701888 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1716224 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1726464 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1736704 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1746944 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1757184 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1771520 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1781760 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1792000 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1806336 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1816576 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1826816 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1841152 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1851392 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1861632 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1875968 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1890304 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1900544 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1910784 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1921024 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1931264 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1945600 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1955840 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1966080 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1980416 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1990656 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2000896 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2015232 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2025472 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2035712 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2050048 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2064384 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2074624 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2084864 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2095104 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2105344 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2119680 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2129920 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2140160 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2154496 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2164736 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2174976 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2189312 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2199552 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2209792 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2224128 + } + ], + "md5sum": "b0f5a80d026b3517cfd2a7df2fe6b1bf" + } + ] +} \ No newline at end of file