|
{ |
|
"metadata": { |
|
"ParamSize": 293, |
|
"ParamBytes": 812572672.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 102926336, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
50257, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102926336, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eec32f50280ffcd1a236c7e7dbd83cb6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 102926336, |
|
"records": [ |
|
{ |
|
"name": "transformer.wte.weight", |
|
"shape": [ |
|
50257, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102926336, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eec32f50280ffcd1a236c7e7dbd83cb6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27293696, |
|
"records": [ |
|
{ |
|
"name": "transformer.wpe.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 2097152 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 2099200 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 2101248 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 8398848 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 10496000 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 10498048 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 10500096 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10502144 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 18890752 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 27287552 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 27289600 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 27291648 |
|
} |
|
], |
|
"md5sum": "112a664a1110ab79703ac38f87b1fe8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31490048, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 6291456 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 6297600 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 8394752 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 8398848 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8400896 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 16797696 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 25192448 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 31483904 |
|
} |
|
], |
|
"md5sum": "c8425e929b9cf76cb3643623b218aae3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27295744, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 2097152 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 2099200 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 2101248 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 2103296 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10491904 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10500096 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 18888704 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 18890752 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 18892800 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 18894848 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 25192448 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 27289600 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 27291648 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 27293696 |
|
} |
|
], |
|
"md5sum": "116731931ee1771b82ec10524a88cf3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "fb5c75c0b282fef20b386c538b4c7c3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "ea4bcbd38e010438180664d3501567da" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "ef7889578a63dab6c66b4c070159d182" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "7d3b89d1dccc117d0b7ad85fc5edd64c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "52a567337e48aae8179a77c111464d1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "ef47f0009672c223059f783172e0f815" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "fdf1efa1d84efed4dc338323e5693d04" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "7926ce1b2972cc186b5d2c1470acfd37" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "3b05c244b0d40fb843b1de4fffbf15dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "bcb2f7f911485a1452aa000e2e2c0e9b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "4722a67d2e51a955ef61a5d03c3bcf94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "35ff9c86676c975dbf63b9912ab81f33" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "01df1b9923d63e9d0f9a560804782440" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "92f106c3b756a5a5e926e43fdb5741f1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "983dbeeab7f0141aa067be28562ea043" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "1057439dfb17bfcc20ba72d11a21434f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "e45f78377a8fdd0c1f43506acd580d6e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "ba1df963ea81fc211f28da7fee9a9165" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "e25144d1411e5b0a29c5c8d9c35dd029" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25192448, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_1.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_1.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 16791552 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23083008 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 23089152 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25186304 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_2.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25188352 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_2.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 25190400 |
|
} |
|
], |
|
"md5sum": "9c68c3d6b3ce48e1f4b5bebd16de3f13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16791552, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.c_fc.weight", |
|
"shape": [ |
|
4096, |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_fc.bias", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_proj.weight", |
|
"shape": [ |
|
1024, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8396800 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_proj.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16785408 |
|
}, |
|
{ |
|
"name": "transformer.ln_f.weight", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16787456 |
|
}, |
|
{ |
|
"name": "transformer.ln_f.bias", |
|
"shape": [ |
|
1024 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2048, |
|
"byteOffset": 16789504 |
|
} |
|
], |
|
"md5sum": "5529160aa774a49a742e440f2113dbd1" |
|
} |
|
] |
|
} |