{ "metadata": { "ParamSize": 485, "ParamBytes": 11172753408.0, "BitsPerParam": 4.500345077304432 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "788d81e300fabbbf467614fd0ba2a305" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e0c456e9bb38cf8bb575cfda83174384" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.0.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 }, { "name": "model.layers.0.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18874368 }, { "name": "model.layers.0.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21233664 }, { "name": "model.layers.0.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24379392 } ], "md5sum": "0b9a427ace169eebc90b414c1a6bf86a" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "814abafec8850b24b030cf60e240b81f" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a4382e95625fe8e2d624d7d0fca83d0b" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bb1a85ba60912369b2fcb66dfef2946c" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6b61f339e8da1e466a9dbd77e4dc8ac6" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.0.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.0.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.1.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.1.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.1.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "72845fdbf8a499af08cad8001a8e1bd3" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7810173d7120b07ef0e8990190f471de" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.10.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bf9ddcd1f943e7b5a384adb2b4e7a4d1" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5dbf70d2f34e944571aa86b327a7e138" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "01e68be7a7476620dae10e378ca5598d" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.1.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.10.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.10.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.10.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "d04d59fe6cd6f05275099ad6dfd15832" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "463cb59112166bb8dcc0acb727e36ec7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.11.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b9a3958d0fcb1a861577b0de8f5a7635" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "98a1f6d5e31a5a4c0c1fed05c227e8b8" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.2.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f17930756f2a96cf7c7b237c6fcc74b0" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "61234313e3eea089d648ccdc8ef92a38" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5447a04bf2ce5145ffe9fca878f5ed5e" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.10.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.10.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.11.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.11.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.2.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 24391680 }, { "name": "model.layers.2.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 26750976 }, { "name": "model.layers.2.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 29896704 } ], "md5sum": "82ff93d45269a7ef8ea04f92b0933569" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "688e476ec3cceb956cf4814b97fe9c06" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "30607a64013851dbee29e42a960e1337" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "de4f0776093a7ee7dc831c6cc14f053b" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d8006c5b503bdb9e2bfeafd790cd3f8c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.2.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.2.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.3.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.3.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.3.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "5b5e7f63fd631d6c0b2e296ef657cd8e" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4e647c7d869d9c4dc79b97bb42e505db" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.4.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "48f21619307c6930378bd6ac1261dddb" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "de2f18040458d245bf0a75063772ec4c" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "168a45ef539525f88941486809a0dcfc" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.3.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.3.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.4.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.4.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.4.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "35a8a361d3ccf25d93821f41263928dc" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bf342b7ac866a50639f560cce9bf74c1" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "999a8cc876caec771e28507eaf7a4c49" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b007e931775cc3784e60fb27d7ad48fa" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f299a35ee8e670c76333b2a0b54e62d8" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.4.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.4.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.5.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.5.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.5.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "418c9b6d64a1923ef7fb9066b8f0ad26" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cb83fc2cf4384d75092d80e7a819b842" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.6.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b07bcfe1457d502aefc6177aab7d9e47" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0e2a7d08127187b1345ef41dd668d8e2" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "22a4a05a26cb47d26f74ca00fdc131cd" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.5.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.5.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.6.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.6.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.6.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "6923bb30ee4ee6e27aff8b9551827414" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7fa8849a77e0bbeec4f1357c2e9b190d" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.7.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2edd46f838494b378bfe3bd69a3f1bcb" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0304255b102a53a932a02dca3974d9ca" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ced45f35dabcf4736a1877e7797c1b86" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.6.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.7.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.7.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.7.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "826166645b6b181efd4f66fafb247b91" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3c28306e2a396658a668ea5ef8936d43" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a97ce0947f2317202a12c6c7142a5d9f" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a7fe930dd6b24e42a5f5a407fdbac46f" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5199854de656453245d8d325b8b511f8" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.7.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.7.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.8.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.8.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.8.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "9282675a6e2375e535cdfcc1cb899bca" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e1287497887b62ffb4da0ac144a39092" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.9.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "160355833ae9286d2e95292fafe630d9" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "369d9288bfde5585e889f8df8e54b4ce" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d9d5ae489b9af8301283051deaf4397b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.8.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.8.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.9.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.9.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.9.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "84edf7e3be7986862946553931f3b230" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "df732be09aa8e0ba807ebbb594b76a19" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 284295168, "records": [ { "name": "model.tok_embeddings.q_weight", "shape": [ 92544, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 284295168, "byteOffset": 0 } ], "md5sum": "ab233af5e8d88310dcbfe63a58406b11" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 35536896, "records": [ { "name": "model.tok_embeddings.q_scale", "shape": [ 92544, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35536896, "byteOffset": 0 } ], "md5sum": "cba46a350bd42a85b572dd137604898d" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "408ef2b16829d872e04a0174e9450637" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4912f9150baf6203c8953caeea377daf" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 31481856, "records": [ { "name": "model.layers.9.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.9.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.11.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.11.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 18898944 } ], "md5sum": "7676224fdd71c4282528f219ab739026" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4e9c10a26f04b80f2c30e691d83847f1" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0e7ca9a654461d74849afc8ae609b613" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 30695424, "records": [ { "name": "model.layers.11.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.11.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 6291456 }, { "name": "model.layers.12.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 6303744 }, { "name": "model.layers.12.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 25178112 }, { "name": "model.layers.12.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 27537408 }, { "name": "model.layers.12.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30683136 } ], "md5sum": "0d44068cb2a5092facb534277d2f37d7" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c47b16f163277e4aa7d7c8a0f9b4149c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.13.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e80bcbf1018d915f799330717333fc04" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2f47f430c142c7cc70cebaea0bd5da56" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d9ac7cd257765de805ffa0368bba6710" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.12.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.13.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.13.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.13.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "5fa36ac603491c176dcdd471198b4835" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7186ec956cc3bec9150cbf22fb7eedbf" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d9c36346bfd193b0d77dcb5af0034105" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "65a4f5414d63d7b54a9139cc0a8e0f04" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f55a35c749e47c6de7ad8a4b7bf7d762" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.13.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.13.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.14.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.14.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.14.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "dd8bd4227056d5accc87911f24c67b9a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d9203b9cae964d9f8be7d07c67efbe92" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.15.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5378f6dc7d89e606ec62a7222d5ec614" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "842e5a968282cf786e56b468e0a5fa55" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "45cf165b697c49c33b09662a4aff06b7" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.14.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.14.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.15.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.15.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.15.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "1594737da0418efbcb377ad9dd013b92" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5c5a521f1a2b7a95f0c4bd71e7cf22ce" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.16.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1539f5fef482c39d873c144373cef2c2" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4d540443905abd975e67a6edc7fc1a7d" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "909218a672a405fae6ab7347b01170b8" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.15.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.15.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.16.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.16.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.16.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "efc8c366b2b9128ef636853b74e405c7" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9050056e405fd0aac06e7d127b359e79" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d150b3ee45ffbdb45dcdc11a09fade59" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e1e99756446ed4c729c87d80321ef547" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0ef4e48afc1e01e6d74be98f4e90723f" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.16.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.17.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.17.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.17.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "636705874b77e7763fe28df67efd0f5f" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f04ba0fdf653102ccc563327006d9854" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.18.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f1d5ae9f35a5316de560dc79827493bb" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "10dcca1e18bb55c3e0da35997d2f4560" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "febe3ac670d406bc9461dc65e3492cc5" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.17.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.17.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.18.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.18.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.18.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "22d9531d0c13381a3c6fb94bf9a66f17" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0c64f29f2963cbbcab51b1f594778115" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0e49a4f14dd019e671d48cd625e997b2" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9eaf42892006ebdfd3c689435bc46bcc" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ae55677a719c43f6e7dca7698b9214b1" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.18.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.18.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.19.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.19.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.19.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "0e16d1ee54210369741ba1e0ace784a2" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f7df21c9efea021b9c1426b0a0cb4cd1" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.20.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "aaf0c3c6765eb4d2eb2dd784f44804ef" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "587b7719c66c5c6dbc7a486f0fb01d58" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "30ccb97b8beab47e295b6f2a64f2e503" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.19.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.19.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.20.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.20.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.20.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "8fccd014290f8973decc5193d17c42fb" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.20.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7946db1e1b8eb5b13b50643fe95a3ed7" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d3b83745249a3669d6b3bc9dbb65eedf" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dfb688920fc7872eedca5c7bdf4c40af" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "de6d3b8f85bcf3452fafd8e1aa23d8cf" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.20.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.21.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.21.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.21.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "9b39edd33c1d37081035fb67ef8724e9" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.21.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9d248a99db4f2130d6b82d660956542a" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.22.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f113a79d940070533b11f8c6c3432a9c" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bc6019de7328bb2c17ef2dda7ba29f1d" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "796832896fa4034619bba8651a7140dc" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.21.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.22.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.22.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.22.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "42090e254798b25e3b15a2ac21ef1dfb" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.22.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "46d115205b31cb10801d127b1b475c9d" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e60014487e686977f6f4b60bb3eafaf9" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0cc79913db1453245e695d2937d1a68a" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9c6fdbd66a9d3db15664d2f469dbf4b8" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.22.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.22.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.23.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.23.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.23.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "e654bb1ff1459843806297865b849c7c" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.23.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "44ca26c27cbb95c9f15b9a1814357b20" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.24.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8fad8853a8a67a8d9600e505e0b65c3c" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "755c7641f7a63cf3574ffe524f4d27a5" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5195cebf0e5535109c7a604df5a83fb2" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.23.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.23.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.24.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.24.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.24.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "ffd52981890aea444dacdf3122aa6ceb" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.24.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8cf9aa54c191323ef23e7be06b2a607f" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.25.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4f4816aa40a160376e6f600245b91ad0" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f5769ff9c5800688585b74926c7658a4" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d880feada73d8e7074bddcc4e93b83a8" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.24.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.24.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.25.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.25.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.25.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "117306bdd7555f3b84c180d3f3730b78" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.25.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "61d23882beec4f92f0f1d1303bd20712" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.26.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5942f667e9acf7e6bcdd178cb70d4341" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2cc7207924f8fbe21d91b6ef70a32c93" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e61668544ff049ac27dde98d5c2aa53b" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.25.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.25.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.26.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.26.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.26.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "3511e929ddfbbda3893fc90885d04808" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.26.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1359d818e700c8ee5d4db207d093038b" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.27.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f8c82025efcd9f65fa1ef078e84a81a9" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "84ddecdf026dc6a4fbe761055bd8e5a6" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d1a277d470dba3772e5cd2f15ce48d24" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.26.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.26.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.27.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.27.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.27.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "027c951400c10167bf1d2c6fee1dff2e" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.27.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a90941354d647edba205c6bc19f73aab" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.28.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0b217b721f78dbcc2c0ed67e11b5ecc6" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f8b7ca92d42d321192ac439785b262a9" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a9f9e2a247f2c46b1e22152c14115739" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.27.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.27.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.28.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.28.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.28.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "d315d1f8fccaca486dfed76c5d57fc03" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.28.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d43f591d51ae436d5c84493be6fde88e" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.29.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d81a488a05c54f786447746a060f457e" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d90a2cb4aa583b5ae3d6650586cfb83b" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b5ad2fbcf8043e286e2b9facf9eb621c" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.28.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.28.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.29.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.29.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.29.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "de796b075da05fae904950737a66157e" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.29.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1fedb9949e7017f2d6f66200766186bb" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.30.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8a9ef1a9451bff16e1a05db67d9db6ec" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "53924d077311077590489bb8874a7f06" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "64839de53fc8b9e6620f2f689816d5ef" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.29.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.29.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.30.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.30.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.30.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "36e9b27256e744bfc84c0548f4a184ee" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.30.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f60e6ebcba0d4a457d007181f17bc61a" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.31.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f4df70e3ffc803325c102ab198e83199" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f26ade8b2d98a7e6b4f9c2d77540f9e0" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0bdb58b3d2878a9f5ac66fa2ff54d800" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.30.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.30.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.31.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.31.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.31.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "d457f95ae120e131d624d01048fbd94b" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.31.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c59a016ca2abe0c77be693b87c7f19e1" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.32.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "00797a9b4abb97450b1f659ba3e640cb" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.32.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d431fe7954faed3fa5a53b9bfaed7d81" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a15f4da7ce1bb9dac56d41279bb1fe4b" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.31.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.31.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.32.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.32.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.32.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "ee27f50abd9addc0812e50383553233a" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.32.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0a34a699d88cd5548ebd77ec62966438" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.33.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "db44f4a9b9e2ab638ec65efc594519e5" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.33.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "705a8b30866dae02295314488bade993" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9624d44f1f941fd794d90f57299a1134" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.32.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.32.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.32.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.33.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.33.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.33.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "a7f74aee4a7ffcb1d42f91199d900233" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.33.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b76a008f070b5b6073af5d52b62dd6dc" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.34.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "11ca1ead68552e92ae71663a185ffdb9" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.34.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e58a794753d76bbee3e156c2acb335f5" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6f9d40acddb46ecb813ba1974e99a957" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.33.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.33.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.33.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.34.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.34.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.34.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "4e3948417fcadca00daee1ef8d86a407" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.34.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bd6efc023e5903f521661612102e73e5" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.35.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2d3bffa5e675349677247b511c524a7b" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.35.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4ffd056f47b501f89feea3c86555ffa1" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f60b10efccb721e569d26476e2d2107d" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.34.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.34.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.34.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.35.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.35.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.35.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "4d3a810211a1ebb1a15b8c77415c58d9" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.35.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "401a9fcb9cab734ff723c1db25513700" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.36.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "54da9c2cbc42600ad4de11b80f17bb63" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.36.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b1bd29d111483237ebbdab19cd7c9d25" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7227baf6d2a38d0b5161fa8e85a93c4d" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.35.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.35.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.35.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.36.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.36.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 } ], "md5sum": "bb596961facb8b7784da9917e66db8b0" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.36.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8bfbbecbd950176c7040d2c48a6d2051" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.37.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a16e1a0b07c5a5d6ad9b9c55702a4244" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.37.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "07527ecb52ed7c0af58e598aacb5358c" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3c701ae2d5e237cd829ff85e7b7d44e7" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 24416256, "records": [ { "name": "model.layers.36.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.36.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.36.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12595200 }, { "name": "model.layers.36.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.37.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18898944 }, { "name": "model.layers.37.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21258240 }, { "name": "model.layers.37.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24403968 } ], "md5sum": "8cb99f88872b88a9b3a39a7e95f66cd0" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.37.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2d0bae44911205c57050eff8bfc51632" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.38.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "988a4a104c7e1087a5c73e0d9b249b80" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.38.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "599a287f382c8bad688c8a0abbb3e3db" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "679de3f319cf74a4e995a76e045c355a" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.37.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.37.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.37.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.38.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.38.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.38.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "f319c92a77244cf7d195e419db85ad09" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.38.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2c79a96051eff105651fe838cb13b46d" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.39.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d6fcfcade8f5c1fa505465262535837a" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.39.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dbac3f04a7d080983663455bf9dfea07" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4ea0118ce9a0bcac7f8d4cc4678d87d2" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.38.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.38.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.38.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.39.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.39.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.39.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "ab9c367a5be60979d6455f7d739ccc72" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.39.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ae34276998b1dec7eb315fbe60fd93e3" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.40.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "028d2075c70e57c67b94c1fdaf40a018" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.40.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8c5a1b2d6d2ef3b5868491faa4cde2a8" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.40.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a62ca58403d902f2331aad61d6c5b225" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.39.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.39.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.39.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.40.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.40.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.40.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "3f3ffcd238206f7d65c93ff911113393" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.40.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "178bfcc5079185832cd02c90e9c7e817" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.41.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "452092c35aaaebc4915ca1c804a6125a" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.41.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0ce72fc965b3c8fe070438eaf7c92297" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.41.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ebbe6de22d586ca0d7cb52138f718eec" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.40.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.40.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.40.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.41.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.41.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.41.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "ef3a52d4f20701f6e494961b838c5d83" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.41.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "958ace8f8e98b84dd5b82add0bb4d884" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.42.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "74706fd2b8529eb2efd14e6bbd84d185" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.42.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "148fe4bc78ba939b4f06ed6b4bf57038" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.42.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "83d980df3aa595ec94625daf8e3151d1" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.41.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.41.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.41.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.42.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.42.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.42.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "a747a37d2e908c6416dd6d7c9e62fe7b" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.42.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f410e9172b0b60d9c30f50c9e0482c36" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.43.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bcb4715a281057361afb867fed27164d" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.43.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "71c196d5872c079d9b4ad163fd1ae2c3" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.43.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9a94b6decb5a1c3abe053ca30ef2bce4" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.42.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.42.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.42.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.43.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.43.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.43.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "c343def9951a0b1dd5ecf45067e0b9ba" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.43.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8d2a5801c50041174319c176e3fbb91d" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.44.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "96b2adf641b8980a57941b7aecc63086" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.44.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "864922ee5506a66c6a5b5a22fba55b5e" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.44.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f7c0bc42ac27c4fd450935cf57b133d2" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.43.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.43.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.43.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.44.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.44.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.44.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "eed8426b93d326d5d4a7fda8c7100b25" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.44.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6cd38fe34268b7ac3f343250b34d73de" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.45.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "62f3039af40c6544027403242aea0205" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.45.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "067f7b0af02e58d8116738141fa2e9b4" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.45.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "54cb96da37c9ac462fd62bc3435fa112" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.44.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.44.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.44.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.45.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.45.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.45.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "f28bef0b827697ec7dc93a7aec38d697" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.45.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8c73e0f89743394ba00f81a307ba5f7f" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.46.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5f4a2b9e9515336ebdc0cc5e72964a24" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.46.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "50ba7f4b0a1fae3eeef351d3bc196983" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.46.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e0c93661f45b260190e73830aa004ff9" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.45.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.45.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.45.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.46.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.46.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.46.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "6ce9363076eafb4ad6b415c514d26624" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.46.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "52071f5359d9e0745b1aa3fa78a0cf04" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.47.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "89c2388180558790ef086f86b436c264" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.47.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5fa014c07079263e092294cd4dfcec12" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.47.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d93e34391aa38c70089da6e1d7b7befe" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.46.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.46.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.46.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.47.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.47.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.47.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "5ea98ac9a26b4407fb8dbdc7baee9d75" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.47.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2baaf4862c5baab297498668eef903e2" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 284295168, "records": [ { "name": "output.q_weight", "shape": [ 92544, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 284295168, "byteOffset": 0 } ], "md5sum": "6c7a08698502a151860e20ac7a12fb3d" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 35536896, "records": [ { "name": "output.q_scale", "shape": [ 92544, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35536896, "byteOffset": 0 } ], "md5sum": "717e73dfb96047fc5b9204cea4ba25d1" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 18898944, "records": [ { "name": "model.layers.47.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.47.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.47.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18886656 } ], "md5sum": "e15925ccd75f8d95b3bf951d35834ad4" } ] }