diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,2951 @@ +{ + "metadata": { + "ParamSize": 269, + "ParamBytes": 283132928.0, + "BitsPerParam": 3.655862583030465 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 63205376, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 151936, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 63205376, + "byteOffset": 0 + } + ], + "md5sum": "e7b49b4c6ba0344356fbdcdec46233be" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 63205376, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 151936, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 63205376, + "byteOffset": 0 + } + ], + "md5sum": "e7b49b4c6ba0344356fbdcdec46233be" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33418240, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 151936, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7900672, + "byteOffset": 0 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 151936, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7900672, + "byteOffset": 7900672 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 15801344 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 15803392 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 16966656 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 17112064 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 19454976 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19747840 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19749888 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 19756032 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 21033984 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 21193728 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 21619712 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21672960 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 21675008 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 22838272 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 22983680 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 25326592 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25619456 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 25621504 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 25627648 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 26905600 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 27065344 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 27491328 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27544576 + }, + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 27546624 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 28709888 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 28855296 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 31198208 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 31491072 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31493120 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 31499264 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 32777216 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 32936960 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 33362944 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33416192 + } + ], + "md5sum": "e4b6c5f9d9a8eb8813e4d4f88846ba33" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 33310720, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 1163264 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 1308672 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 3651584 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 3944448 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 3946496 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 3952640 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 5230592 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 5390336 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 5816320 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 5869568 + }, + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 5871616 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 7034880 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 7180288 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 9523200 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9816064 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9818112 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 9824256 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 11102208 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11261952 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 11687936 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11741184 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 11743232 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 12906496 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 13051904 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 15394816 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 15687680 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15689728 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 15695872 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 16973824 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 17133568 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 17559552 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 17612800 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 17614848 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 18778112 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 18923520 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 21266432 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21559296 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21561344 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 21567488 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 22845440 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 23005184 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 23431168 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23484416 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 23486464 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 24649728 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 24795136 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 27138048 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27430912 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 27432960 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 27439104 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 28717056 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 28876800 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 29302784 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 29356032 + }, + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 29358080 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 30521344 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 30666752 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 33009664 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33302528 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33304576 + } + ], + "md5sum": "4fb729f6a7bc624aa0f7422ac38cfcef" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 32585728, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 1277952 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 1437696 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 1863680 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 1916928 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 1918976 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 3082240 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 3227648 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 5570560 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 5863424 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 5865472 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 5871616 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 7149568 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 7309312 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 7735296 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7788544 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 7790592 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 8953856 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 9099264 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 11442176 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11735040 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 11737088 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 11743232 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 13021184 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 13180928 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 13606912 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 13660160 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 13662208 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 14825472 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 14970880 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 17313792 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 17606656 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17608704 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 17614848 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 18892800 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 19052544 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 19478528 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19531776 + }, + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 19533824 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 20697088 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 20842496 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 23185408 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23478272 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23480320 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 23486464 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 24764416 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 24924160 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 25350144 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25403392 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 25405440 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 26568704 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 26714112 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 29057024 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 29349888 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 29351936 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 29358080 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 30636032 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 30795776 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 31221760 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 31275008 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 31277056 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 32440320 + } + ], + "md5sum": "8572c24763eba76e31a9e9087aedc3bd" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33439744, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 2342912 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2635776 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 2637824 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 2643968 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 3921920 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 4081664 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 4507648 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4560896 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 4562944 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 5726208 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 5871616 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 8214528 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8507392 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 8509440 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 8515584 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 9793536 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 9953280 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 10379264 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10432512 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 10434560 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 11597824 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 11743232 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 14086144 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14379008 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14381056 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 14387200 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 15665152 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 15824896 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 16250880 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16304128 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 16306176 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 17469440 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 17614848 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 19957760 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 20250624 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 20252672 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 20258816 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 21536768 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 21696512 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 22122496 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 22175744 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 22177792 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 23341056 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 23486464 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 25829376 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26122240 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26124288 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 26130432 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 27408384 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 27568128 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 27994112 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28047360 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 28049408 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 29212672 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 29358080 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 31700992 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 31993856 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31995904 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 32002048 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 33280000 + } + ], + "md5sum": "c3e18cf55248dd1d744e5c930cfd7468" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 23967744, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 425984 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 479232 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 481280 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 1644544 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 1789952 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 4132864 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4425728 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 4427776 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 4433920 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 5711872 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 5871616 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 6297600 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 6350848 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 6352896 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 7516160 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 7661568 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 10004480 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10297344 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 10299392 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 10305536 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 11583488 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11743232 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 12169216 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12222464 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 12224512 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 13387776 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 13533184 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 15876096 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16168960 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 16171008 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 16177152 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 17455104 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 17614848 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 18040832 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18094080 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 1024, + 284 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1163264, + "byteOffset": 18096128 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 1024, + 71 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 145408, + "byteOffset": 19259392 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2342912, + "byteOffset": 19404800 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 292864, + "byteOffset": 21747712 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 22040576 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 22042624 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 22048768 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 23326720 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 23486464 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 26 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 53248, + "byteOffset": 23912448 + }, + { + "name": "model.norm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23965696 + } + ], + "md5sum": "21d7d4b461aa432cc38af98cc018f736" + } + ] +} \ No newline at end of file