{ "metadata": { "ParamSize": 165, "ParamBytes": 45547008.0, "BitsPerParam": 3.652428290625048 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 33464832, "records": [ { "name": "lm_head.q_weight", "shape": [ 52, 50304 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10463232, "byteOffset": 0 }, { "name": "lm_head.q_scale", "shape": [ 13, 50304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1307904, "byteOffset": 10463232 }, { "name": "model.embed_tokens.q_weight", "shape": [ 50304, 52 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10463232, "byteOffset": 11771136 }, { "name": "model.embed_tokens.q_scale", "shape": [ 50304, 13 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1307904, "byteOffset": 22234368 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 23542272 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 23543296 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 23805440 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 23838208 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 24370688 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 24437248 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 24438272 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 24757760 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 24797696 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 24904192 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 24917504 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 24918528 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 25180672 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 25213440 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 25745920 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 25812480 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 25813504 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 26132992 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 26172928 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 26279424 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 26292736 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 26293760 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 26555904 }, { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 26588672 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 27121152 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 27187712 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 27188736 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 27508224 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 27548160 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 27654656 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 27667968 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 27668992 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 27931136 }, { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 27963904 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 28496384 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 28562944 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 28563968 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 28883456 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 28923392 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 29029888 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 29043200 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 29044224 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 29306368 }, { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 29339136 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 29871616 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 29938176 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 29939200 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 30258688 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 30298624 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 30405120 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 30418432 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 30419456 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 30681600 }, { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 30714368 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 31246848 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 31313408 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 31314432 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 31633920 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 31673856 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 31780352 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 31793664 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 31794688 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 32056832 }, { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 32089600 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 32622080 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 32688640 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 32689664 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 33009152 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 33049088 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 33155584 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 33168896 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 33169920 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 33432064 } ], "md5sum": "938eddcccf9773ab32a164863d26a907" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 12082176, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 532480 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 599040 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 600064 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 919552 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 959488 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 1065984 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 1079296 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 1080320 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 1342464 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 1375232 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 1907712 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 1974272 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 1975296 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 2294784 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 2334720 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 2441216 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 2454528 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 2455552 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 2717696 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 2750464 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 3282944 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 3349504 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 3350528 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 3670016 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 3709952 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 3816448 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 3829760 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 3830784 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 4092928 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 4125696 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 4658176 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 4724736 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 4725760 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 5045248 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 5085184 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 5191680 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 5204992 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 5206016 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 5468160 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 5500928 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 6033408 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 6099968 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 6100992 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 6420480 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 6460416 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 6566912 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 6580224 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 6581248 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 6843392 }, { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 6876160 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 7408640 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 7475200 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 7476224 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 7795712 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 7835648 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 7942144 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 7955456 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7956480 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 8218624 }, { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 8251392 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 8783872 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 8850432 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 8851456 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 9170944 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 9210880 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 9317376 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 9330688 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 9331712 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 9593856 }, { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 9626624 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 10159104 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 10225664 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 10226688 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 10546176 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 10586112 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 10692608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 10705920 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 128, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 10706944 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 32, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 10969088 }, { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 52, 2560 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 532480, "byteOffset": 11001856 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 13, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 66560, "byteOffset": 11534336 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 11600896 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 52, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 11601920 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 13, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39936, "byteOffset": 11921408 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 52, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106496, "byteOffset": 11961344 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 13, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13312, "byteOffset": 12067840 }, { "name": "model.norm.weight", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 12081152 } ], "md5sum": "0a36288a5e64434e1a495fc10ceb53db" } ] }