diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,5687 @@ +{ + "metadata": { + "ParamSize": 405, + "ParamBytes": 7322019840.0, + "BitsPerParam": 4.500366420537488 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 81920000, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32000, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920000, + "byteOffset": 0 + } + ], + "md5sum": "11c4c18242821232fe70c418f076b5e6" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e873f9e872ef0287a951905e319dcafe" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f018d4e95eddbcd4083a2c485f3a8584" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4f794f3a8c22468bb8fea20e55695078" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ba2521ed5711a8ed7770a2b2ddff26d7" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 32890880, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32000, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240000, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 10240000 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 10250240 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14673920 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14684160 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14694400 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19118080 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 27965440 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 27975680 + } + ], + "md5sum": "5cd5e574f2f8abf36631f78b3384d080" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c715e703262d99ce18fd4ec202c1a548" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d5c476265eb219ca9d64c8cced335754" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "73851efa0e30a31fb0ae9638e2edd1e3" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "4227ed956d2ad94ab8b2335c462bfdfe" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "aa433d3cb5f238b538c71ccd0d7b237d" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "91a1ba573682a43b676fec8cacc01657" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d75cccbdbcbeea9c5712407ef390c939" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "e12de843e15473c5614531ad6216c7a8" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1875d013b3fa17ae95460e5645923601" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0561bed48f35980d552cf17b6959d6f0" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d5e095f7cfa17d7cf70f405cf5982116" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "d6cbbfedb2878392abbac8c1355c60af" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "673af13f09ca6826698afdd11108c9cf" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ad10defad39195d1504f2215d23f5a99" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "685d5c5af4348e710520b76dfe8b7045" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "beb50063ee8d5ad21f272e2bc70032ce" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d1ec673c49a358922326deb8584ee801" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fcf135c56b61d908c874d398ebd29747" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b1c05b19363520d032fd86ee3494f0f5" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "e47c0d69851ca6562388aa25d99282ca" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "ba6a02352aee0d69c71e1825afad179c" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c78447cbded4d1f81ae06916a9fa174e" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4f51ab18f91bcd95d21dc0bb07af75cc" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "79496e6ce54d8b586dfd5e4e74da7daf" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "01b7ddf460e1f401946bc877becc3e2a" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3f3f436b5bc2f0f12d7734f12cf2d873" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0d8211b83a46c07d228735a8a9f56254" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "b40b0d128d8fdb5c9de2e3d89cf37644" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8ba990b887f604ff0cd08a4bf51f818a" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "538acc35591047147a2370641231a545" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ec406edcb3b2ee22e3ea91ddaee402a3" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "34f078c6aeecb45eff3c44f294dd32e2" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 81920000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32000, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920000, + "byteOffset": 0 + } + ], + "md5sum": "484b3ae65b19a3fc50c0b32e23e6c3e2" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "3e8c2472ee1e172ad1506bd07a16be2e" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "abfa690a4a8aad3b0185ebecf26480b0" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 29429760, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32000, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240000, + "byteOffset": 14755840 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24995840 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 25006080 + } + ], + "md5sum": "1feda04fbb9a7051786231656fc10c78" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a0ac964e315dfa35f75416c456f566c2" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6108591d6880489c422a3de8b95a11a4" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1eafce35d0f5881febb6c91c1e14dd64" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "003ad6052e6c651e12a5ffb6c2182214" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f24fdcf6d84fbafdf278615a778808a2" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2d1cdaab330cc3178b306311d38f6da7" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4eefe40174939cacc6b7fb9d78c75061" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "6f692210c8e919ac570e20f4511ea0fc" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "9b666409f38656616908ba0c952a3f7d" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "87e29ec6d950af496e1c175462f2a5da" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6cc7600c42e018c1495871fd2c9e5635" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "e86a116cf358dcfad169a874299b9cb2" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "fb56c60482da01595b2e8b4aded2b8ff" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8e60ca17603bc91c4f34153a60e1acdd" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8ab4771a23f0ca021e7b8f14a0d5c640" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "8e2db16bf17a7e59f60e56032b0b0f4c" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "2a52195f2befef564b483813d3c55ee4" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8eeed7d5e3972e4c8455b02e7c2eb95f" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b6fd7d086fe8d8b7e1ed79d040a88894" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "21bdf9c97d700ef2b654079dda1e7175" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "eb1296edd01a163df5136ce4d52a5fd0" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "bc6475b3b9e4f47e5eda2ec720e8f100" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "15482e3476984409e7336693a91bd713" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "0486173eb13cb4389225e48221d258a6" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "7a2dbc6446a06fa8d2671f506064b504" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "018c910c0c7640241cfa13edaa5f1549" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5e0a6d71d71212f3f6c271476e89a125" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 33443840, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 28518400 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 33433600 + } + ], + "md5sum": "44d5c2ac76b0e6c74f3b9b2c641d386d" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3d159dee29ebcc9c216a24de92a3e40a" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "bb8bbb4d63d4a2b78cacab159061d2d8" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "9ee8c90f30733befbf42a7c3ef41041f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "9bf3cea8bb6139965f9e2c5819a7e787" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9284a9e01320b0737c827c0b6e5c2243" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "781fe2b5727b4b8e38c9d41356e2936f" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "629cf324d02d923d5e1063dca62c774c" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "cdc7cc906c70aeeb4f331d3325f432c1" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "97918726320756d7a65b1309a70b8afd" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "97319700537c80cf7a57e8b149d348f2" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1335ba06cbea7ef2905abdddb5714f11" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "bb5f2c291cb27b6e1f58c7bedadba24d" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "dea335597546208cd1f3aeef9d4c3f7b" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a998ff6b96b7e183897b5e5965cb388c" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "46767bdfcc49705108a43106de43fea4" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "30213df8776f6f429b282a1b32f59bc3" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ea1ca71ed1859932f598f48140780ad7" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a66c8ce6da10257f68b5826f54abbb18" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "93bb2b06631087011ee0aef38a285e50" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "c8ed3065a5481093d2984f1806f24591" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "66359b54e7b2915c372492bd60d4ce95" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "15f7ff3aa54c7c9df4981dd8af32e22a" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "59ac3241728f4e5a249b847408526867" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "a8e027955a9fb6b2e90132cd17958a29" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a7e013c992d1deecd947ff301c2108f1" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "9f86dd21f3df04231849c1e7fcfd8b75" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "448427b91bbbd4a9981f3947f50a8be7" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "d68d660caac0a4c9a73283f0d6e590ff" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "64f5491d8cf52bc54b290817027880c6" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "919085395b9fbafcbc8ee2049825e79c" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d80ee2bce647d267ea5ee2863d23db2c" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "30c45def09519508ae581c2eb2981683" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a47d93512b4a684c86434501e406e456" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1aea6de60637a951b36f4a026d989ee1" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d46770fcc09773e69ed3f56016fc5b4e" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 32460800, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26388480 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28037120 + } + ], + "md5sum": "ff6884ef3353bc8186956101262f0ee3" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "75993a9b5154c3b00380fa959bb91bc6" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6a4faf3826cf6f33f9bef8da6e838109" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7e68440c7dfb00c6cedbc1bf81ecd50c" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "2af2b8ad96267f3beedefec18fcef5c7" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "cc714cfb3be1aeb288575011dde8db8d" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "b8b348683b0df385a58724542afa52d9" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "926b1db5b363a5f1d5722e074fe8cb2f" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "b81e464e5ecf5dae3629df14427d4247" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8329f8d9f44c37666071e13c954e4a16" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "16b98f2321277a01aae0e6beb9f53884" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "78e28f621aca2f013ae5a4f13eb6714a" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "0f8362b537018187cd8eba10f1d5e682" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "c8802ec40ca8a6a44ab77c3feafc257c" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1add3984d77e4fdc86432ea713062fc6" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0309ab4372b46166e60551ab4b42b6cd" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "c4ae037206a58e688b20753c6032e4ac" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "c690b98ec8245973e749aacf767c0414" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "185d46889914d94b4c993d4426b8602b" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "42e1df990176011f90bcf1abde5d1716" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "47234cfb02770695ae70ec537c4c9822" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "20f6b2b3fc0d88a1332202634ffa485b" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d64393429113ce4e352b4f293e5d18a4" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a6ef9a4625d5b8a58fe92346236d959b" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "3f663df2ccb787735c08daeb7db2c30d" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e29899f763ad36c25006c5d332c9ff46" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f897d19d769c7179084bcf50421b6074" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "097b98fbe52c72a034f63b39373a4c0f" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "430c1e6f3fbb5adf73234259103c4aa6" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "aebc96ee6e4b3a8467078f8c7a8a2a30" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "bf1512f28df82363cd3f0d75ee10cd41" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "62571e6923fbc82dd032b6e77563a797" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "316c023ade6d8ca2235b49c49eb8599c" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "902892cc74298c29c1c65b103e82e453" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "590beb3bf92784c79af3f39821465c78" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b02d9cefbb3841ec2c37339c0777213c" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "968ef1c887de955c791f81982269a4ab" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "9f38eab7601fd14bf5e39161afb10219" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "df56740b6e701bce145ae62827d90d7c" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "185f16f27a2eef5d9710c289bbc42d02" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "db8fabd12540096627dbfa9edbd7bd56" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "aaa71d53162f3f69ddb52a6a43c63963" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2850654425587bb1f0206c3bfad7ee3f" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9fc30f4fa911756a5a7563286a294308" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "f6535ab4a7993a24a8c4d47a7440e5d0" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e14f1b034e88499a615ea6aed566ed6b" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "074181d6d95db0b759fbb0454dec5f26" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c8dec693eaaa392f8695b939e7c272ea" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "198942ad81e4602146bd68227ae53417" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a2bf8391fb02603e07712bc34e7b7171" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "dac820a12862b0b8ef31391d182d9f86" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ac2b242d2b8f4c6409330d9c4ec4006a" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28518400 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28528640 + } + ], + "md5sum": "fe5b8d6c9948c61d01d5995b0a38bac6" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a374635d2b179488ac62b82de6a5bc91" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "90b569b3b28d44ff1464201a079abaf5" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 28518400, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26880000 + } + ], + "md5sum": "99583837e41b0531a90c9c33d0517636" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "5f0a0b8016f8464d5bda3658c0cc8af3" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 28508160, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8847360 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13762560 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 26869760 + } + ], + "md5sum": "48798efc4753b3480be9d38572ee6f59" + } + ] +} \ No newline at end of file