internlm2_5-20b-q4f16_1-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
da5bf0b verified
raw
history blame
240 kB
{
"metadata": {
"ParamSize": 485,
"ParamBytes": 11172753408.0,
"BitsPerParam": 4.500345077304432
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "788d81e300fabbbf467614fd0ba2a305"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.0.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e0c456e9bb38cf8bb575cfda83174384"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.0.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
},
{
"name": "model.layers.0.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18874368
},
{
"name": "model.layers.0.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21233664
},
{
"name": "model.layers.0.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24379392
}
],
"md5sum": "0b9a427ace169eebc90b414c1a6bf86a"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.0.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "814abafec8850b24b030cf60e240b81f"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.1.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "a4382e95625fe8e2d624d7d0fca83d0b"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bb1a85ba60912369b2fcb66dfef2946c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "6b61f339e8da1e466a9dbd77e4dc8ac6"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.0.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.0.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.1.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.1.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.1.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "72845fdbf8a499af08cad8001a8e1bd3"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.1.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7810173d7120b07ef0e8990190f471de"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.10.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bf9ddcd1f943e7b5a384adb2b4e7a4d1"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5dbf70d2f34e944571aa86b327a7e138"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.10.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "01e68be7a7476620dae10e378ca5598d"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.1.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.10.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.10.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.10.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "d04d59fe6cd6f05275099ad6dfd15832"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.10.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "463cb59112166bb8dcc0acb727e36ec7"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.11.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "b9a3958d0fcb1a861577b0de8f5a7635"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "98a1f6d5e31a5a4c0c1fed05c227e8b8"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.2.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "f17930756f2a96cf7c7b237c6fcc74b0"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "61234313e3eea089d648ccdc8ef92a38"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.2.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5447a04bf2ce5145ffe9fca878f5ed5e"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29908992,
"records": [
{
"name": "model.layers.10.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.10.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.11.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.11.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.2.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24391680
},
{
"name": "model.layers.2.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26750976
},
{
"name": "model.layers.2.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 29896704
}
],
"md5sum": "82ff93d45269a7ef8ea04f92b0933569"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.2.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "688e476ec3cceb956cf4814b97fe9c06"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.3.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "30607a64013851dbee29e42a960e1337"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "de4f0776093a7ee7dc831c6cc14f053b"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.3.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d8006c5b503bdb9e2bfeafd790cd3f8c"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.2.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.2.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.3.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.3.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.3.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "5b5e7f63fd631d6c0b2e296ef657cd8e"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.3.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4e647c7d869d9c4dc79b97bb42e505db"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.4.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "48f21619307c6930378bd6ac1261dddb"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "de2f18040458d245bf0a75063772ec4c"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.4.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "168a45ef539525f88941486809a0dcfc"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.3.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.3.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.4.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.4.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.4.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "35a8a361d3ccf25d93821f41263928dc"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.4.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "bf342b7ac866a50639f560cce9bf74c1"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.5.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "999a8cc876caec771e28507eaf7a4c49"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b007e931775cc3784e60fb27d7ad48fa"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.5.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f299a35ee8e670c76333b2a0b54e62d8"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.4.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.4.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.5.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.5.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.5.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "418c9b6d64a1923ef7fb9066b8f0ad26"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.5.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "cb83fc2cf4384d75092d80e7a819b842"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.6.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "b07bcfe1457d502aefc6177aab7d9e47"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0e2a7d08127187b1345ef41dd668d8e2"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "22a4a05a26cb47d26f74ca00fdc131cd"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.5.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.5.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.6.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.6.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.6.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "6923bb30ee4ee6e27aff8b9551827414"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.6.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7fa8849a77e0bbeec4f1357c2e9b190d"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.7.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2edd46f838494b378bfe3bd69a3f1bcb"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0304255b102a53a932a02dca3974d9ca"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.7.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ced45f35dabcf4736a1877e7797c1b86"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.6.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.7.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.7.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.7.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "826166645b6b181efd4f66fafb247b91"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.7.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3c28306e2a396658a668ea5ef8936d43"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.8.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "a97ce0947f2317202a12c6c7142a5d9f"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a7fe930dd6b24e42a5f5a407fdbac46f"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.8.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5199854de656453245d8d325b8b511f8"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.7.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.7.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.8.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.8.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.8.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "9282675a6e2375e535cdfcc1cb899bca"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.8.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e1287497887b62ffb4da0ac144a39092"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.9.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "160355833ae9286d2e95292fafe630d9"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "369d9288bfde5585e889f8df8e54b4ce"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.9.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d9d5ae489b9af8301283051deaf4397b"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.8.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.8.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.9.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.9.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.9.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "84edf7e3be7986862946553931f3b230"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.9.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "df732be09aa8e0ba807ebbb594b76a19"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 284295168,
"records": [
{
"name": "model.tok_embeddings.q_weight",
"shape": [
92544,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 284295168,
"byteOffset": 0
}
],
"md5sum": "ab233af5e8d88310dcbfe63a58406b11"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 35536896,
"records": [
{
"name": "model.tok_embeddings.q_scale",
"shape": [
92544,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 35536896,
"byteOffset": 0
}
],
"md5sum": "cba46a350bd42a85b572dd137604898d"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.11.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "408ef2b16829d872e04a0174e9450637"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.11.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4912f9150baf6203c8953caeea377daf"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 31481856,
"records": [
{
"name": "model.layers.9.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.9.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.11.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.11.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 18898944
}
],
"md5sum": "7676224fdd71c4282528f219ab739026"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4e9c10a26f04b80f2c30e691d83847f1"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0e7ca9a654461d74849afc8ae609b613"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 30695424,
"records": [
{
"name": "model.layers.11.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.11.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 6291456
},
{
"name": "model.layers.12.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 6303744
},
{
"name": "model.layers.12.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 25178112
},
{
"name": "model.layers.12.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 27537408
},
{
"name": "model.layers.12.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30683136
}
],
"md5sum": "0d44068cb2a5092facb534277d2f37d7"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.12.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c47b16f163277e4aa7d7c8a0f9b4149c"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.13.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e80bcbf1018d915f799330717333fc04"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2f47f430c142c7cc70cebaea0bd5da56"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.13.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d9ac7cd257765de805ffa0368bba6710"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.12.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.13.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.13.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.13.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "5fa36ac603491c176dcdd471198b4835"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.13.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7186ec956cc3bec9150cbf22fb7eedbf"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.14.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d9c36346bfd193b0d77dcb5af0034105"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "65a4f5414d63d7b54a9139cc0a8e0f04"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.14.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f55a35c749e47c6de7ad8a4b7bf7d762"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.13.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.13.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.14.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.14.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.14.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "dd8bd4227056d5accc87911f24c67b9a"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.14.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d9203b9cae964d9f8be7d07c67efbe92"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.15.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "5378f6dc7d89e606ec62a7222d5ec614"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "842e5a968282cf786e56b468e0a5fa55"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.15.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "45cf165b697c49c33b09662a4aff06b7"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.14.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.14.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.15.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.15.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.15.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "1594737da0418efbcb377ad9dd013b92"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.15.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5c5a521f1a2b7a95f0c4bd71e7cf22ce"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.16.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "1539f5fef482c39d873c144373cef2c2"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4d540443905abd975e67a6edc7fc1a7d"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "909218a672a405fae6ab7347b01170b8"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.15.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.15.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.16.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.16.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.16.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "efc8c366b2b9128ef636853b74e405c7"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.16.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "9050056e405fd0aac06e7d127b359e79"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.17.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d150b3ee45ffbdb45dcdc11a09fade59"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e1e99756446ed4c729c87d80321ef547"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.17.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0ef4e48afc1e01e6d74be98f4e90723f"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.16.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.17.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.17.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.17.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "636705874b77e7763fe28df67efd0f5f"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.17.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f04ba0fdf653102ccc563327006d9854"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.18.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "f1d5ae9f35a5316de560dc79827493bb"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "10dcca1e18bb55c3e0da35997d2f4560"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.18.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "febe3ac670d406bc9461dc65e3492cc5"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.17.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.17.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.18.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.18.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.18.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "22d9531d0c13381a3c6fb94bf9a66f17"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.18.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0c64f29f2963cbbcab51b1f594778115"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.19.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0e49a4f14dd019e671d48cd625e997b2"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9eaf42892006ebdfd3c689435bc46bcc"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.19.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ae55677a719c43f6e7dca7698b9214b1"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.18.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.18.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.19.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.19.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.19.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "0e16d1ee54210369741ba1e0ace784a2"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.19.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f7df21c9efea021b9c1426b0a0cb4cd1"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.20.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "aaf0c3c6765eb4d2eb2dd784f44804ef"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "587b7719c66c5c6dbc7a486f0fb01d58"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "30ccb97b8beab47e295b6f2a64f2e503"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.19.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.19.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.20.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.20.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.20.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "8fccd014290f8973decc5193d17c42fb"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.20.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7946db1e1b8eb5b13b50643fe95a3ed7"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.21.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d3b83745249a3669d6b3bc9dbb65eedf"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "dfb688920fc7872eedca5c7bdf4c40af"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "de6d3b8f85bcf3452fafd8e1aa23d8cf"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.20.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.21.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.21.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.21.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "9b39edd33c1d37081035fb67ef8724e9"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.21.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "9d248a99db4f2130d6b82d660956542a"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.22.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "f113a79d940070533b11f8c6c3432a9c"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bc6019de7328bb2c17ef2dda7ba29f1d"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.22.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "796832896fa4034619bba8651a7140dc"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.21.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.22.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.22.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.22.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "42090e254798b25e3b15a2ac21ef1dfb"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.22.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "46d115205b31cb10801d127b1b475c9d"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.23.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e60014487e686977f6f4b60bb3eafaf9"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0cc79913db1453245e695d2937d1a68a"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.23.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9c6fdbd66a9d3db15664d2f469dbf4b8"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.22.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.22.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.23.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.23.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.23.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "e654bb1ff1459843806297865b849c7c"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.23.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "44ca26c27cbb95c9f15b9a1814357b20"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.24.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8fad8853a8a67a8d9600e505e0b65c3c"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "755c7641f7a63cf3574ffe524f4d27a5"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.24.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5195cebf0e5535109c7a604df5a83fb2"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.23.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.23.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.24.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.24.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.24.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "ffd52981890aea444dacdf3122aa6ceb"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.24.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8cf9aa54c191323ef23e7be06b2a607f"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.25.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4f4816aa40a160376e6f600245b91ad0"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f5769ff9c5800688585b74926c7658a4"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.25.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d880feada73d8e7074bddcc4e93b83a8"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.24.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.24.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.25.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.25.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.25.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "117306bdd7555f3b84c180d3f3730b78"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.25.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "61d23882beec4f92f0f1d1303bd20712"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.26.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "5942f667e9acf7e6bcdd178cb70d4341"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2cc7207924f8fbe21d91b6ef70a32c93"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.26.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e61668544ff049ac27dde98d5c2aa53b"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.25.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.25.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.26.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.26.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.26.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "3511e929ddfbbda3893fc90885d04808"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.26.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "1359d818e700c8ee5d4db207d093038b"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.27.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "f8c82025efcd9f65fa1ef078e84a81a9"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "84ddecdf026dc6a4fbe761055bd8e5a6"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.27.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d1a277d470dba3772e5cd2f15ce48d24"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.26.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.26.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.27.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.27.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.27.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "027c951400c10167bf1d2c6fee1dff2e"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.27.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a90941354d647edba205c6bc19f73aab"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.28.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0b217b721f78dbcc2c0ed67e11b5ecc6"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f8b7ca92d42d321192ac439785b262a9"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.28.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a9f9e2a247f2c46b1e22152c14115739"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.27.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.27.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.28.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.28.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.28.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "d315d1f8fccaca486dfed76c5d57fc03"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.28.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d43f591d51ae436d5c84493be6fde88e"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.29.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d81a488a05c54f786447746a060f457e"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d90a2cb4aa583b5ae3d6650586cfb83b"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.29.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b5ad2fbcf8043e286e2b9facf9eb621c"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.28.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.28.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.29.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.29.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.29.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "de796b075da05fae904950737a66157e"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.29.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "1fedb9949e7017f2d6f66200766186bb"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.30.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8a9ef1a9451bff16e1a05db67d9db6ec"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "53924d077311077590489bb8874a7f06"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.30.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "64839de53fc8b9e6620f2f689816d5ef"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.29.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.29.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.30.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.30.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.30.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "36e9b27256e744bfc84c0548f4a184ee"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.30.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f60e6ebcba0d4a457d007181f17bc61a"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.31.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "f4df70e3ffc803325c102ab198e83199"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f26ade8b2d98a7e6b4f9c2d77540f9e0"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.31.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0bdb58b3d2878a9f5ac66fa2ff54d800"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.30.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.30.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.31.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.31.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.31.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "d457f95ae120e131d624d01048fbd94b"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.31.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c59a016ca2abe0c77be693b87c7f19e1"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.32.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "00797a9b4abb97450b1f659ba3e640cb"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.32.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d431fe7954faed3fa5a53b9bfaed7d81"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.32.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a15f4da7ce1bb9dac56d41279bb1fe4b"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.31.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.31.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.32.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.32.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.32.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "ee27f50abd9addc0812e50383553233a"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.32.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0a34a699d88cd5548ebd77ec62966438"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.33.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "db44f4a9b9e2ab638ec65efc594519e5"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.33.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "705a8b30866dae02295314488bade993"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.33.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9624d44f1f941fd794d90f57299a1134"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.32.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.32.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.32.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.33.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.33.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.33.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "a7f74aee4a7ffcb1d42f91199d900233"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.33.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b76a008f070b5b6073af5d52b62dd6dc"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.34.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "11ca1ead68552e92ae71663a185ffdb9"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.34.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e58a794753d76bbee3e156c2acb335f5"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.34.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "6f9d40acddb46ecb813ba1974e99a957"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.33.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.33.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.33.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.34.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.34.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.34.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "4e3948417fcadca00daee1ef8d86a407"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.34.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "bd6efc023e5903f521661612102e73e5"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.35.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2d3bffa5e675349677247b511c524a7b"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.35.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4ffd056f47b501f89feea3c86555ffa1"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.35.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f60b10efccb721e569d26476e2d2107d"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.34.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.34.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.34.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.35.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.35.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.35.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "4d3a810211a1ebb1a15b8c77415c58d9"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.35.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "401a9fcb9cab734ff723c1db25513700"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.36.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "54da9c2cbc42600ad4de11b80f17bb63"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.36.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b1bd29d111483237ebbdab19cd7c9d25"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.36.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "7227baf6d2a38d0b5161fa8e85a93c4d"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.35.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.35.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.35.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.36.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.36.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "bb596961facb8b7784da9917e66db8b0"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.36.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8bfbbecbd950176c7040d2c48a6d2051"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.37.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "a16e1a0b07c5a5d6ad9b9c55702a4244"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.37.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "07527ecb52ed7c0af58e598aacb5358c"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.37.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3c701ae2d5e237cd829ff85e7b7d44e7"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.36.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.36.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.36.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.36.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.37.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.37.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.37.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "8cb99f88872b88a9b3a39a7e95f66cd0"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.37.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2d0bae44911205c57050eff8bfc51632"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.38.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "988a4a104c7e1087a5c73e0d9b249b80"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.38.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "599a287f382c8bad688c8a0abbb3e3db"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.38.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "679de3f319cf74a4e995a76e045c355a"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.37.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.37.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.37.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.38.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.38.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.38.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "f319c92a77244cf7d195e419db85ad09"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.38.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2c79a96051eff105651fe838cb13b46d"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.39.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d6fcfcade8f5c1fa505465262535837a"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.39.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "dbac3f04a7d080983663455bf9dfea07"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.39.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "4ea0118ce9a0bcac7f8d4cc4678d87d2"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.38.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.38.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.38.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.39.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.39.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.39.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "ab9c367a5be60979d6455f7d739ccc72"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.39.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "ae34276998b1dec7eb315fbe60fd93e3"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.40.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "028d2075c70e57c67b94c1fdaf40a018"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.40.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8c5a1b2d6d2ef3b5868491faa4cde2a8"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.40.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a62ca58403d902f2331aad61d6c5b225"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.39.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.39.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.39.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.40.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.40.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.40.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "3f3ffcd238206f7d65c93ff911113393"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.40.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "178bfcc5079185832cd02c90e9c7e817"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.41.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "452092c35aaaebc4915ca1c804a6125a"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.41.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0ce72fc965b3c8fe070438eaf7c92297"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.41.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ebbe6de22d586ca0d7cb52138f718eec"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.40.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.40.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.40.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.41.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.41.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.41.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "ef3a52d4f20701f6e494961b838c5d83"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.41.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "958ace8f8e98b84dd5b82add0bb4d884"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.42.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "74706fd2b8529eb2efd14e6bbd84d185"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.42.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "148fe4bc78ba939b4f06ed6b4bf57038"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.42.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "83d980df3aa595ec94625daf8e3151d1"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.41.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.41.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.41.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.42.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.42.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.42.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "a747a37d2e908c6416dd6d7c9e62fe7b"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.42.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f410e9172b0b60d9c30f50c9e0482c36"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.43.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bcb4715a281057361afb867fed27164d"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.43.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "71c196d5872c079d9b4ad163fd1ae2c3"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.43.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9a94b6decb5a1c3abe053ca30ef2bce4"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.42.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.42.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.42.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.43.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.43.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.43.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "c343def9951a0b1dd5ecf45067e0b9ba"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.43.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8d2a5801c50041174319c176e3fbb91d"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.44.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "96b2adf641b8980a57941b7aecc63086"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.44.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "864922ee5506a66c6a5b5a22fba55b5e"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.44.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f7c0bc42ac27c4fd450935cf57b133d2"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.43.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.43.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.43.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.44.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.44.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.44.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "eed8426b93d326d5d4a7fda8c7100b25"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.44.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6cd38fe34268b7ac3f343250b34d73de"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.45.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "62f3039af40c6544027403242aea0205"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.45.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "067f7b0af02e58d8116738141fa2e9b4"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.45.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "54cb96da37c9ac462fd62bc3435fa112"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.44.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.44.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.44.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.45.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.45.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.45.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "f28bef0b827697ec7dc93a7aec38d697"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.45.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8c73e0f89743394ba00f81a307ba5f7f"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.46.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "5f4a2b9e9515336ebdc0cc5e72964a24"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.46.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "50ba7f4b0a1fae3eeef351d3bc196983"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.46.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e0c93661f45b260190e73830aa004ff9"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.45.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.45.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.45.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.46.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.46.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.46.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "6ce9363076eafb4ad6b415c514d26624"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.46.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "52071f5359d9e0745b1aa3fa78a0cf04"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.47.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "89c2388180558790ef086f86b436c264"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.47.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5fa014c07079263e092294cd4dfcec12"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.47.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d93e34391aa38c70089da6e1d7b7befe"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.46.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.46.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.46.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.47.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.47.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.47.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "5ea98ac9a26b4407fb8dbdc7baee9d75"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.47.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2baaf4862c5baab297498668eef903e2"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 284295168,
"records": [
{
"name": "output.q_weight",
"shape": [
92544,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 284295168,
"byteOffset": 0
}
],
"md5sum": "6c7a08698502a151860e20ac7a12fb3d"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 35536896,
"records": [
{
"name": "output.q_scale",
"shape": [
92544,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 35536896,
"byteOffset": 0
}
],
"md5sum": "717e73dfb96047fc5b9204cea4ba25d1"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 18898944,
"records": [
{
"name": "model.layers.47.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.47.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.47.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
}
],
"md5sum": "e15925ccd75f8d95b3bf951d35834ad4"
}
]
}