gpt2-medium-q0f16-MLC / ndarray-cache.json
CharlieFRuan's picture
Initial commit
e6a454e
raw
history blame
111 kB
{
"metadata": {
"ParamSize": 293,
"ParamBytes": 812572672.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 102926336,
"records": [
{
"name": "lm_head.weight",
"shape": [
50257,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102926336,
"byteOffset": 0
}
],
"md5sum": "eec32f50280ffcd1a236c7e7dbd83cb6"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 102926336,
"records": [
{
"name": "transformer.wte.weight",
"shape": [
50257,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102926336,
"byteOffset": 0
}
],
"md5sum": "eec32f50280ffcd1a236c7e7dbd83cb6"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 27293696,
"records": [
{
"name": "transformer.wpe.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "transformer.h.0.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 2097152
},
{
"name": "transformer.h.0.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 2099200
},
{
"name": "transformer.h.0.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2101248
},
{
"name": "transformer.h.0.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 8392704
},
{
"name": "transformer.h.0.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 8398848
},
{
"name": "transformer.h.0.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 10496000
},
{
"name": "transformer.h.0.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 10498048
},
{
"name": "transformer.h.0.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 10500096
},
{
"name": "transformer.h.0.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10502144
},
{
"name": "transformer.h.0.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18890752
},
{
"name": "transformer.h.0.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18898944
},
{
"name": "transformer.h.0.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27287552
},
{
"name": "transformer.h.1.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27289600
},
{
"name": "transformer.h.1.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27291648
}
],
"md5sum": "112a664a1110ab79703ac38f87b1fe8f"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 31490048,
"records": [
{
"name": "transformer.h.1.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "transformer.h.1.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 6291456
},
{
"name": "transformer.h.1.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 6297600
},
{
"name": "transformer.h.1.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8394752
},
{
"name": "transformer.h.1.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8396800
},
{
"name": "transformer.h.1.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8398848
},
{
"name": "transformer.h.1.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8400896
},
{
"name": "transformer.h.1.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16789504
},
{
"name": "transformer.h.1.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16797696
},
{
"name": "transformer.h.1.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.2.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.2.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
},
{
"name": "transformer.h.2.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25192448
},
{
"name": "transformer.h.2.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31483904
}
],
"md5sum": "c8425e929b9cf76cb3643623b218aae3"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 27295744,
"records": [
{
"name": "transformer.h.2.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "transformer.h.2.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 2097152
},
{
"name": "transformer.h.2.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 2099200
},
{
"name": "transformer.h.2.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 2101248
},
{
"name": "transformer.h.2.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 2103296
},
{
"name": "transformer.h.2.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10491904
},
{
"name": "transformer.h.2.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10500096
},
{
"name": "transformer.h.2.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 18888704
},
{
"name": "transformer.h.3.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 18890752
},
{
"name": "transformer.h.3.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 18892800
},
{
"name": "transformer.h.3.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 18894848
},
{
"name": "transformer.h.3.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25186304
},
{
"name": "transformer.h.3.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 25192448
},
{
"name": "transformer.h.3.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27289600
},
{
"name": "transformer.h.3.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27291648
},
{
"name": "transformer.h.3.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27293696
}
],
"md5sum": "116731931ee1771b82ec10524a88cf3c"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.3.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.3.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.3.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.3.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.4.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.4.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.4.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.4.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.4.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.4.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.4.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.4.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "fb5c75c0b282fef20b386c538b4c7c3c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.4.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.4.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.4.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.4.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.5.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.5.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.5.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.5.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.5.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.5.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.5.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.5.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "ea4bcbd38e010438180664d3501567da"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.5.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.5.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.5.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.5.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.6.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.6.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.6.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.6.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.6.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.6.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.6.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.6.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "ef7889578a63dab6c66b4c070159d182"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.6.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.6.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.6.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.6.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.7.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.7.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.7.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.7.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.7.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.7.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.7.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.7.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "7d3b89d1dccc117d0b7ad85fc5edd64c"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.7.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.7.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.7.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.7.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.8.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.8.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.8.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.8.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.8.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.8.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.8.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.8.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "52a567337e48aae8179a77c111464d1d"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.8.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.8.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.8.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.8.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.9.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.9.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.9.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.9.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.9.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.9.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.9.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.9.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "ef47f0009672c223059f783172e0f815"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.9.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.9.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.9.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.9.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.10.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.10.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.10.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.10.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.10.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.10.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.10.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.10.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "fdf1efa1d84efed4dc338323e5693d04"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.10.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.10.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.10.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.10.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.11.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.11.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.11.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.11.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.11.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.11.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.11.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.11.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "7926ce1b2972cc186b5d2c1470acfd37"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.11.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.11.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.11.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.11.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.12.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.12.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.12.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.12.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.12.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.12.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.12.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.12.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "3b05c244b0d40fb843b1de4fffbf15dd"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.12.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.12.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.12.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.12.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.13.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.13.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.13.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.13.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.13.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.13.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.13.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.13.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "bcb2f7f911485a1452aa000e2e2c0e9b"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.13.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.13.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.13.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.13.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.14.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.14.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.14.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.14.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.14.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.14.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.14.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.14.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "4722a67d2e51a955ef61a5d03c3bcf94"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.14.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.14.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.14.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.14.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.15.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.15.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.15.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.15.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.15.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.15.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.15.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.15.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "35ff9c86676c975dbf63b9912ab81f33"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.15.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.15.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.15.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.15.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.16.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.16.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.16.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.16.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.16.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.16.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.16.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.16.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "01df1b9923d63e9d0f9a560804782440"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.16.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.16.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.16.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.16.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.17.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.17.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.17.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.17.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.17.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.17.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.17.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.17.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "92f106c3b756a5a5e926e43fdb5741f1"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.17.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.17.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.17.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.17.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.18.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.18.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.18.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.18.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.18.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.18.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.18.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.18.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "983dbeeab7f0141aa067be28562ea043"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.18.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.18.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.18.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.18.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.19.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.19.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.19.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.19.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.19.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.19.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.19.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.19.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "1057439dfb17bfcc20ba72d11a21434f"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.19.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.19.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.19.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.19.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.20.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.20.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.20.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.20.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.20.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.20.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.20.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.20.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "e45f78377a8fdd0c1f43506acd580d6e"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.20.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.20.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.20.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.20.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.21.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.21.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.21.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.21.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.21.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.21.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.21.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.21.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "ba1df963ea81fc211f28da7fee9a9165"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.21.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.21.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.21.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.21.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.22.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.22.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.22.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.22.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.22.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.22.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.22.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.22.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "e25144d1411e5b0a29c5c8d9c35dd029"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25192448,
"records": [
{
"name": "transformer.h.22.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.22.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.22.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.22.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.h.23.ln_1.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.h.23.ln_1.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
},
{
"name": "transformer.h.23.attn.c_attn.weight",
"shape": [
3072,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16791552
},
{
"name": "transformer.h.23.attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23083008
},
{
"name": "transformer.h.23.attn.c_proj.weight",
"shape": [
1024,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23089152
},
{
"name": "transformer.h.23.attn.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25186304
},
{
"name": "transformer.h.23.ln_2.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25188352
},
{
"name": "transformer.h.23.ln_2.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25190400
}
],
"md5sum": "9c68c3d6b3ce48e1f4b5bebd16de3f13"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 16791552,
"records": [
{
"name": "transformer.h.23.mlp.c_fc.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.23.mlp.c_fc.bias",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "transformer.h.23.mlp.c_proj.weight",
"shape": [
1024,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8396800
},
{
"name": "transformer.h.23.mlp.c_proj.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16785408
},
{
"name": "transformer.ln_f.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16787456
},
{
"name": "transformer.ln_f.bias",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16789504
}
],
"md5sum": "5529160aa774a49a742e440f2113dbd1"
}
]
}