SmolLM-1.7B-Instruct-q0f16-MLC / ndarray-cache.json
CharlieFRuan's picture
Upload folder using huggingface_hub
8e8b9aa verified
{
"metadata": {
"ParamSize": 146,
"ParamBytes": 3422752768.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 201326592,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
49152,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 201326592,
"byteOffset": 0
}
],
"md5sum": "a259e04dd5ac68366d0270853fc6baa0"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "142b00822051f07c35c0297306732f48"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "4cdf051c6d954220db78f14789eb9243"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4096
},
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 8192
}
],
"md5sum": "8d623f48a653accb77cfae436fd0df5a"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "15a17acdb21ca5266f0b35542c25fc0c"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "361e19c8e4a80770ff6ccd63c47704e9"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e418d11051dacbf65518c0f728c11f77"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a51763f48f8e20769858c3bce34eeb7e"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "f14cb56efe79d85c98c9f8c6df8f4e3e"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "049eba8a73a15f6d39a296e687d24d1d"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e187eb9be4ae5793e175f7920abc0f4c"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "403c4b426ffd4f9eddf429e7298d4f36"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "02e6eda657d12ff2f02d34a36b4910ba"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25190400,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16785408
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16789504
},
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16793600
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25182208
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25186304
}
],
"md5sum": "1ec7a96eeb0c2c8b32aa68c370851e5d"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6694a4f83cdaa11619ef14ea5ddb3e94"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "d55465755e112eddf6194b5cbe941f30"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7ee58e03244ec98675592a619a4712f7"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "da0a9f97d7bdf6a0ddddc378bdcb0c42"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "3abff689ea70caf218a0cc0fcf672c1e"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d55edaa50a35290566eedc5fc1f401e4"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2edf76daafe562f5c10616058c58a47b"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "8fec62b169f638c3d9ae042f194f0fd0"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "00e69a33d1c557ede0d69183450a1707"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 25190400,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16785408
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16789504
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16793600
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25182208
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25186304
}
],
"md5sum": "d27d3c72e5678c5621bf5770804a0b5c"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7050ab00c6e2924acca7d91fdd19ab5b"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "55429b972e8de6f6cb68ea14012bd15f"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "251e12116f3f4c8480a710cdf57264a3"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f9a6c1dffd0b9c3b98fe4d1848460207"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "04e9237f609321497557047d90ab9316"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9f7ea2e608b0696573f4ca3f5898cb5f"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "bab64fd3d4db995a0564c7eb7509dcf4"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "d4b45409b45a338c10c2e1d9e4c13907"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9c6714e8cad4a4764f6904a65046bca8"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25190400,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16785408
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16789504
},
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16793600
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25182208
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25186304
}
],
"md5sum": "627446641a00cb8a056fa4f107f5ee33"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3fee3063f6956bfef4c59018d149eafc"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "009bf55dfe453da73fbe39cd694b27fa"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fc29b456c44817fc0c8c15384e56fc43"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "04937f32d4e969f6787ef0bf56aac323"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "f35cbbdeaf8f04e22bd45d0e1821ba4a"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6bc15385fe9911a15daa5dcab12c5318"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "985436f845616fa88353fbbab318599a"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "e04d5ed0f18bda06740e1750730a8563"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8654458158b0b0b7dcc3b9a501bd516b"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25190400,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16785408
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16789504
},
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16793600
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25182208
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25186304
}
],
"md5sum": "cc8b44dfb4291d38f618e690a03b0dd4"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5da44e3580810651e3ca86453a66289e"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "0679620c0b95a61079dfe779a9b32b03"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cfb852b4681a0714865aa9927d0d4cbb"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "bbf70e6104bfae17a95995790f4b58de"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "df7431c02ed05db25c4dc0772dc0cc7d"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3c8f1c16e84e0c212f02926265b69338"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "23bac0243265691f27163e18b6b8b9de"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "f45ea17e274b1457fe96d16b9fb7a843"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0a8fc03151241e115d9f1ea41d4a5cdc"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 25190400,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16785408
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16789504
},
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16793600
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25182208
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25186304
}
],
"md5sum": "514579ed33a0c7548c4745e8b15efca2"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "13d802b61d0ef424539a18b470d12d8d"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "4513e8c8ad8878fe714635aa20d21644"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "44a34baaf11c41475ea721ff2e3826d0"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "85777dca1c0a19051e360d42d8f4c2c1"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "7168a60708c4305d3749dee0db5b0874"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e8ffa4a7cd803a596c562114d97c1113"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cda4047891966d559763d59795e089b4"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "15c29db6b563475d715a448cf9abb852"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b27665bb70d6dbc97ee3946b5f67a974"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25190400,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16785408
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16789504
},
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16793600
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25182208
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25186304
}
],
"md5sum": "8258de8c45a1904dd5c144ddacf94932"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "603d5a6e825c0e1dd7238bcf1d6d5a9a"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "d0677587e19d32f120bbb24b7e1c3346"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "08d590fd7aaf12ceebb89b32a3901da9"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b56db862d598ec7f29eed7cd9918cf6f"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "d7680d7619191bd632fa3b80c6c2000c"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "310d44e7f1fa0b05a1861f7bf54ad1ba"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4414a7bc6d47cdf2a762f1bf6b45704d"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "fc407cd06f2c4aee544ad2f3cad072d8"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f039e34665f461602cc853154a8c20d4"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 25190400,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16785408
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16789504
},
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16793600
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25182208
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25186304
}
],
"md5sum": "7e64f6ee1bdaac20d6e8992df1310660"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "32d627f9928b05bd30fd6bd859ef5b68"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "1b59151fcaf44c9995af9880d8a15d96"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "101037c77c03764fcaf1a4f1bbd2a13c"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "577f019a0505d834c070922505c011b5"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "2f880bdd28b6419920d338d464bee211"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6f1878dece9d888a3ebe83fa83fba105"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 25186304,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16785408
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16789504
},
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16793600
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25182208
}
],
"md5sum": "978338a8729a5504c555f701ef532be4"
}
]
}