diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,7191 @@ +{ + "metadata": { + "ParamSize": 485, + "ParamBytes": 11172753408.0, + "BitsPerParam": 4.500345077304432 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "788d81e300fabbbf467614fd0ba2a305" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.0.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e0c456e9bb38cf8bb575cfda83174384" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 24391680, + "records": [ + { + "name": "model.layers.0.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + }, + { + "name": "model.layers.0.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18874368 + }, + { + "name": "model.layers.0.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21233664 + }, + { + "name": "model.layers.0.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24379392 + } + ], + "md5sum": "0b9a427ace169eebc90b414c1a6bf86a" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.0.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "814abafec8850b24b030cf60e240b81f" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.1.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a4382e95625fe8e2d624d7d0fca83d0b" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bb1a85ba60912369b2fcb66dfef2946c" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.1.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6b61f339e8da1e466a9dbd77e4dc8ac6" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.0.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.0.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.0.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.1.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.1.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.1.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "72845fdbf8a499af08cad8001a8e1bd3" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.1.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "7810173d7120b07ef0e8990190f471de" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.10.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "bf9ddcd1f943e7b5a384adb2b4e7a4d1" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5dbf70d2f34e944571aa86b327a7e138" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.10.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "01e68be7a7476620dae10e378ca5598d" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.1.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.1.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.10.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.10.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.10.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "d04d59fe6cd6f05275099ad6dfd15832" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.10.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "463cb59112166bb8dcc0acb727e36ec7" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.11.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b9a3958d0fcb1a861577b0de8f5a7635" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "98a1f6d5e31a5a4c0c1fed05c227e8b8" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.2.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f17930756f2a96cf7c7b237c6fcc74b0" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "61234313e3eea089d648ccdc8ef92a38" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.2.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5447a04bf2ce5145ffe9fca878f5ed5e" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.10.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.11.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.11.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.2.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24391680 + }, + { + "name": "model.layers.2.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 26750976 + }, + { + "name": "model.layers.2.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 29896704 + } + ], + "md5sum": "82ff93d45269a7ef8ea04f92b0933569" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.2.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "688e476ec3cceb956cf4814b97fe9c06" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.3.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "30607a64013851dbee29e42a960e1337" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "de4f0776093a7ee7dc831c6cc14f053b" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.3.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d8006c5b503bdb9e2bfeafd790cd3f8c" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.2.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.3.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.3.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.3.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "5b5e7f63fd631d6c0b2e296ef657cd8e" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.3.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "4e647c7d869d9c4dc79b97bb42e505db" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.4.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "48f21619307c6930378bd6ac1261dddb" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "de2f18040458d245bf0a75063772ec4c" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.4.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "168a45ef539525f88941486809a0dcfc" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.3.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.3.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.3.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.4.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.4.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.4.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "35a8a361d3ccf25d93821f41263928dc" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.4.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "bf342b7ac866a50639f560cce9bf74c1" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.5.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "999a8cc876caec771e28507eaf7a4c49" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b007e931775cc3784e60fb27d7ad48fa" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.5.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f299a35ee8e670c76333b2a0b54e62d8" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.4.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.4.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.5.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.5.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.5.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "418c9b6d64a1923ef7fb9066b8f0ad26" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.5.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cb83fc2cf4384d75092d80e7a819b842" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.6.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b07bcfe1457d502aefc6177aab7d9e47" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0e2a7d08127187b1345ef41dd668d8e2" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.6.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "22a4a05a26cb47d26f74ca00fdc131cd" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.5.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.5.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.5.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.6.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.6.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.6.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "6923bb30ee4ee6e27aff8b9551827414" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.6.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "7fa8849a77e0bbeec4f1357c2e9b190d" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.7.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2edd46f838494b378bfe3bd69a3f1bcb" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0304255b102a53a932a02dca3974d9ca" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.7.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ced45f35dabcf4736a1877e7797c1b86" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.6.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.6.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.7.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.7.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.7.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "826166645b6b181efd4f66fafb247b91" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.7.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3c28306e2a396658a668ea5ef8936d43" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.8.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a97ce0947f2317202a12c6c7142a5d9f" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a7fe930dd6b24e42a5f5a407fdbac46f" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.8.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5199854de656453245d8d325b8b511f8" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.7.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.7.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.8.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.8.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.8.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "9282675a6e2375e535cdfcc1cb899bca" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.8.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "e1287497887b62ffb4da0ac144a39092" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.9.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "160355833ae9286d2e95292fafe630d9" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "369d9288bfde5585e889f8df8e54b4ce" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.9.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d9d5ae489b9af8301283051deaf4397b" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.8.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.8.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.8.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.9.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.9.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.9.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "84edf7e3be7986862946553931f3b230" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.9.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "df732be09aa8e0ba807ebbb594b76a19" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 284295168, + "records": [ + { + "name": "model.tok_embeddings.q_weight", + "shape": [ + 92544, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 284295168, + "byteOffset": 0 + } + ], + "md5sum": "ab233af5e8d88310dcbfe63a58406b11" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 35536896, + "records": [ + { + "name": "model.tok_embeddings.q_scale", + "shape": [ + 92544, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35536896, + "byteOffset": 0 + } + ], + "md5sum": "cba46a350bd42a85b572dd137604898d" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.11.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "408ef2b16829d872e04a0174e9450637" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.11.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "4912f9150baf6203c8953caeea377daf" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 31481856, + "records": [ + { + "name": "model.layers.9.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.9.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.9.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.11.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18886656 + }, + { + "name": "model.layers.11.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 18898944 + } + ], + "md5sum": "7676224fdd71c4282528f219ab739026" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4e9c10a26f04b80f2c30e691d83847f1" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.12.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0e7ca9a654461d74849afc8ae609b613" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 30695424, + "records": [ + { + "name": "model.layers.11.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.11.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 6291456 + }, + { + "name": "model.layers.12.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 6303744 + }, + { + "name": "model.layers.12.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 25178112 + }, + { + "name": "model.layers.12.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 27537408 + }, + { + "name": "model.layers.12.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 30683136 + } + ], + "md5sum": "0d44068cb2a5092facb534277d2f37d7" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.12.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c47b16f163277e4aa7d7c8a0f9b4149c" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.13.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e80bcbf1018d915f799330717333fc04" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2f47f430c142c7cc70cebaea0bd5da56" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.13.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d9ac7cd257765de805ffa0368bba6710" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.12.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.12.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.13.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.13.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.13.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "5fa36ac603491c176dcdd471198b4835" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.13.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "7186ec956cc3bec9150cbf22fb7eedbf" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.14.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d9c36346bfd193b0d77dcb5af0034105" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "65a4f5414d63d7b54a9139cc0a8e0f04" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.14.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f55a35c749e47c6de7ad8a4b7bf7d762" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.13.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.13.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.13.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.14.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.14.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.14.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "dd8bd4227056d5accc87911f24c67b9a" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.14.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "d9203b9cae964d9f8be7d07c67efbe92" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.15.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5378f6dc7d89e606ec62a7222d5ec614" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "842e5a968282cf786e56b468e0a5fa55" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.15.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "45cf165b697c49c33b09662a4aff06b7" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.14.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.15.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.15.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.15.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "1594737da0418efbcb377ad9dd013b92" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.15.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5c5a521f1a2b7a95f0c4bd71e7cf22ce" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.16.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1539f5fef482c39d873c144373cef2c2" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4d540443905abd975e67a6edc7fc1a7d" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.16.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "909218a672a405fae6ab7347b01170b8" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.15.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.16.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.16.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.16.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "efc8c366b2b9128ef636853b74e405c7" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.16.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9050056e405fd0aac06e7d127b359e79" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.17.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d150b3ee45ffbdb45dcdc11a09fade59" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e1e99756446ed4c729c87d80321ef547" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.17.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0ef4e48afc1e01e6d74be98f4e90723f" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.16.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.16.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.17.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.17.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.17.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "636705874b77e7763fe28df67efd0f5f" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.17.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f04ba0fdf653102ccc563327006d9854" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.18.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f1d5ae9f35a5316de560dc79827493bb" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "10dcca1e18bb55c3e0da35997d2f4560" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.18.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "febe3ac670d406bc9461dc65e3492cc5" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.17.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.17.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.17.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.18.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.18.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.18.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "22d9531d0c13381a3c6fb94bf9a66f17" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.18.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "0c64f29f2963cbbcab51b1f594778115" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.19.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0e49a4f14dd019e671d48cd625e997b2" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9eaf42892006ebdfd3c689435bc46bcc" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.19.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ae55677a719c43f6e7dca7698b9214b1" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.18.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.19.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.19.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.19.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "0e16d1ee54210369741ba1e0ace784a2" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.19.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f7df21c9efea021b9c1426b0a0cb4cd1" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.20.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "aaf0c3c6765eb4d2eb2dd784f44804ef" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "587b7719c66c5c6dbc7a486f0fb01d58" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.20.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "30ccb97b8beab47e295b6f2a64f2e503" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.19.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.20.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.20.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.20.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "8fccd014290f8973decc5193d17c42fb" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.20.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "7946db1e1b8eb5b13b50643fe95a3ed7" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.21.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d3b83745249a3669d6b3bc9dbb65eedf" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dfb688920fc7872eedca5c7bdf4c40af" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.21.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "de6d3b8f85bcf3452fafd8e1aa23d8cf" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.20.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.20.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.20.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.21.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.21.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.21.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "9b39edd33c1d37081035fb67ef8724e9" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.21.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9d248a99db4f2130d6b82d660956542a" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.22.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f113a79d940070533b11f8c6c3432a9c" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.22.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bc6019de7328bb2c17ef2dda7ba29f1d" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.22.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "796832896fa4034619bba8651a7140dc" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.21.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.21.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.22.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.22.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.22.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "42090e254798b25e3b15a2ac21ef1dfb" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.22.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "46d115205b31cb10801d127b1b475c9d" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.23.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e60014487e686977f6f4b60bb3eafaf9" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.23.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0cc79913db1453245e695d2937d1a68a" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.23.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9c6fdbd66a9d3db15664d2f469dbf4b8" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.22.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.22.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.22.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.23.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.23.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.23.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "e654bb1ff1459843806297865b849c7c" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.23.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "44ca26c27cbb95c9f15b9a1814357b20" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.24.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8fad8853a8a67a8d9600e505e0b65c3c" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.24.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "755c7641f7a63cf3574ffe524f4d27a5" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.24.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5195cebf0e5535109c7a604df5a83fb2" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.23.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.24.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.24.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.24.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "ffd52981890aea444dacdf3122aa6ceb" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.24.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "8cf9aa54c191323ef23e7be06b2a607f" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.25.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4f4816aa40a160376e6f600245b91ad0" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f5769ff9c5800688585b74926c7658a4" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.25.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d880feada73d8e7074bddcc4e93b83a8" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.24.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.24.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.24.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.25.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.25.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.25.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "117306bdd7555f3b84c180d3f3730b78" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.25.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "61d23882beec4f92f0f1d1303bd20712" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.26.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5942f667e9acf7e6bcdd178cb70d4341" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2cc7207924f8fbe21d91b6ef70a32c93" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.26.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e61668544ff049ac27dde98d5c2aa53b" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.25.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.25.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.26.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.26.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.26.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "3511e929ddfbbda3893fc90885d04808" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.26.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1359d818e700c8ee5d4db207d093038b" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.27.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f8c82025efcd9f65fa1ef078e84a81a9" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "84ddecdf026dc6a4fbe761055bd8e5a6" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.27.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d1a277d470dba3772e5cd2f15ce48d24" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.26.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.26.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.27.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.27.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.27.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "027c951400c10167bf1d2c6fee1dff2e" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.27.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "a90941354d647edba205c6bc19f73aab" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.28.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0b217b721f78dbcc2c0ed67e11b5ecc6" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.28.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f8b7ca92d42d321192ac439785b262a9" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.28.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a9f9e2a247f2c46b1e22152c14115739" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.27.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.28.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.28.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.28.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "d315d1f8fccaca486dfed76c5d57fc03" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.28.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "d43f591d51ae436d5c84493be6fde88e" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.29.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d81a488a05c54f786447746a060f457e" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.29.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d90a2cb4aa583b5ae3d6650586cfb83b" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.29.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b5ad2fbcf8043e286e2b9facf9eb621c" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.28.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.28.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.28.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.29.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.29.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.29.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "de796b075da05fae904950737a66157e" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.29.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1fedb9949e7017f2d6f66200766186bb" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.30.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8a9ef1a9451bff16e1a05db67d9db6ec" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.30.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "53924d077311077590489bb8874a7f06" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.30.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "64839de53fc8b9e6620f2f689816d5ef" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.29.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.29.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.29.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.30.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.30.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.30.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "36e9b27256e744bfc84c0548f4a184ee" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.30.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f60e6ebcba0d4a457d007181f17bc61a" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.31.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f4df70e3ffc803325c102ab198e83199" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.31.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f26ade8b2d98a7e6b4f9c2d77540f9e0" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.31.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0bdb58b3d2878a9f5ac66fa2ff54d800" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.30.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.30.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.30.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.31.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.31.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.31.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "d457f95ae120e131d624d01048fbd94b" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.31.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c59a016ca2abe0c77be693b87c7f19e1" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.32.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "00797a9b4abb97450b1f659ba3e640cb" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.32.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d431fe7954faed3fa5a53b9bfaed7d81" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.32.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a15f4da7ce1bb9dac56d41279bb1fe4b" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.31.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.31.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.31.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.32.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.32.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.32.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "ee27f50abd9addc0812e50383553233a" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.32.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "0a34a699d88cd5548ebd77ec62966438" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.33.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "db44f4a9b9e2ab638ec65efc594519e5" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.33.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "705a8b30866dae02295314488bade993" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.33.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9624d44f1f941fd794d90f57299a1134" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.32.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.32.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.32.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.33.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.33.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.33.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "a7f74aee4a7ffcb1d42f91199d900233" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.33.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b76a008f070b5b6073af5d52b62dd6dc" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.34.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "11ca1ead68552e92ae71663a185ffdb9" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.34.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e58a794753d76bbee3e156c2acb335f5" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.34.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6f9d40acddb46ecb813ba1974e99a957" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.33.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.33.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.33.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.34.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.34.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.34.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "4e3948417fcadca00daee1ef8d86a407" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.34.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "bd6efc023e5903f521661612102e73e5" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.35.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2d3bffa5e675349677247b511c524a7b" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.35.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4ffd056f47b501f89feea3c86555ffa1" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.35.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f60b10efccb721e569d26476e2d2107d" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.34.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.34.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.34.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.35.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.35.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.35.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "4d3a810211a1ebb1a15b8c77415c58d9" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.35.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "401a9fcb9cab734ff723c1db25513700" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.36.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "54da9c2cbc42600ad4de11b80f17bb63" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.36.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b1bd29d111483237ebbdab19cd7c9d25" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.36.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7227baf6d2a38d0b5161fa8e85a93c4d" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 24391680, + "records": [ + { + "name": "model.layers.35.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.35.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.35.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.36.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.36.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + } + ], + "md5sum": "bb596961facb8b7784da9917e66db8b0" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.36.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "8bfbbecbd950176c7040d2c48a6d2051" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.37.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a16e1a0b07c5a5d6ad9b9c55702a4244" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.37.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "07527ecb52ed7c0af58e598aacb5358c" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.37.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3c701ae2d5e237cd829ff85e7b7d44e7" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 24416256, + "records": [ + { + "name": "model.layers.36.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.36.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 12582912 + }, + { + "name": "model.layers.36.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12595200 + }, + { + "name": "model.layers.36.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18886656 + }, + { + "name": "model.layers.37.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18898944 + }, + { + "name": "model.layers.37.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21258240 + }, + { + "name": "model.layers.37.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24403968 + } + ], + "md5sum": "8cb99f88872b88a9b3a39a7e95f66cd0" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.37.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2d0bae44911205c57050eff8bfc51632" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.38.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "988a4a104c7e1087a5c73e0d9b249b80" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.38.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "599a287f382c8bad688c8a0abbb3e3db" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.38.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "679de3f319cf74a4e995a76e045c355a" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.37.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.37.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.37.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.38.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.38.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.38.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "f319c92a77244cf7d195e419db85ad09" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.38.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2c79a96051eff105651fe838cb13b46d" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.39.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d6fcfcade8f5c1fa505465262535837a" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.39.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dbac3f04a7d080983663455bf9dfea07" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.39.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4ea0118ce9a0bcac7f8d4cc4678d87d2" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.38.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.38.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.38.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.39.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.39.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.39.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "ab9c367a5be60979d6455f7d739ccc72" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.39.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ae34276998b1dec7eb315fbe60fd93e3" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.40.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "028d2075c70e57c67b94c1fdaf40a018" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.40.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8c5a1b2d6d2ef3b5868491faa4cde2a8" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.40.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a62ca58403d902f2331aad61d6c5b225" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.39.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.39.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.39.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.40.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.40.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.40.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "3f3ffcd238206f7d65c93ff911113393" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.40.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "178bfcc5079185832cd02c90e9c7e817" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.41.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "452092c35aaaebc4915ca1c804a6125a" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.41.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0ce72fc965b3c8fe070438eaf7c92297" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.41.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ebbe6de22d586ca0d7cb52138f718eec" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.40.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.40.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.40.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.41.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.41.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.41.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "ef3a52d4f20701f6e494961b838c5d83" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.41.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "958ace8f8e98b84dd5b82add0bb4d884" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.42.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "74706fd2b8529eb2efd14e6bbd84d185" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.42.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "148fe4bc78ba939b4f06ed6b4bf57038" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.42.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "83d980df3aa595ec94625daf8e3151d1" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.41.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.41.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.41.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.42.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.42.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.42.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "a747a37d2e908c6416dd6d7c9e62fe7b" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.42.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f410e9172b0b60d9c30f50c9e0482c36" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.43.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "bcb4715a281057361afb867fed27164d" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.43.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "71c196d5872c079d9b4ad163fd1ae2c3" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.43.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9a94b6decb5a1c3abe053ca30ef2bce4" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.42.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.42.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.42.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.43.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.43.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.43.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "c343def9951a0b1dd5ecf45067e0b9ba" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.43.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "8d2a5801c50041174319c176e3fbb91d" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.44.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "96b2adf641b8980a57941b7aecc63086" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.44.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "864922ee5506a66c6a5b5a22fba55b5e" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.44.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f7c0bc42ac27c4fd450935cf57b133d2" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.43.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.43.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.43.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.44.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.44.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.44.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "eed8426b93d326d5d4a7fda8c7100b25" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.44.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "6cd38fe34268b7ac3f343250b34d73de" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.45.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "62f3039af40c6544027403242aea0205" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.45.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "067f7b0af02e58d8116738141fa2e9b4" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.45.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "54cb96da37c9ac462fd62bc3435fa112" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.44.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.44.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.44.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.45.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.45.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.45.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "f28bef0b827697ec7dc93a7aec38d697" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.45.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "8c73e0f89743394ba00f81a307ba5f7f" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.46.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5f4a2b9e9515336ebdc0cc5e72964a24" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.46.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "50ba7f4b0a1fae3eeef351d3bc196983" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.46.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e0c93661f45b260190e73830aa004ff9" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.45.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.45.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.45.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.46.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.46.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.46.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "6ce9363076eafb4ad6b415c514d26624" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.46.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "52071f5359d9e0745b1aa3fa78a0cf04" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.47.attention.wo.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "89c2388180558790ef086f86b436c264" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.47.attention.wqkv.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5fa014c07079263e092294cd4dfcec12" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.47.feed_forward.gate_up_proj.q_weight", + "shape": [ + 32768, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d93e34391aa38c70089da6e1d7b7befe" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 24403968, + "records": [ + { + "name": "model.layers.46.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.46.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.46.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.layers.47.attention.wo.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "model.layers.47.attention.wqkv.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21245952 + }, + { + "name": "model.layers.47.attention_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24391680 + } + ], + "md5sum": "5ea98ac9a26b4407fb8dbdc7baee9d75" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.47.feed_forward.w2.q_weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2baaf4862c5baab297498668eef903e2" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 284295168, + "records": [ + { + "name": "output.q_weight", + "shape": [ + 92544, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 284295168, + "byteOffset": 0 + } + ], + "md5sum": "6c7a08698502a151860e20ac7a12fb3d" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 35536896, + "records": [ + { + "name": "output.q_scale", + "shape": [ + 92544, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35536896, + "byteOffset": 0 + } + ], + "md5sum": "717e73dfb96047fc5b9204cea4ba25d1" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 18898944, + "records": [ + { + "name": "model.layers.47.feed_forward.gate_up_proj.q_scale", + "shape": [ + 32768, + 192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.47.feed_forward.w2.q_scale", + "shape": [ + 6144, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 12582912 + }, + { + "name": "model.layers.47.ffn_norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "model.norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18886656 + } + ], + "md5sum": "e15925ccd75f8d95b3bf951d35834ad4" + } + ] +} \ No newline at end of file