|
{ |
|
"metadata": { |
|
"ParamSize": 165, |
|
"ParamBytes": 45547008.0, |
|
"BitsPerParam": 3.652428290625048 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33464832, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
52, |
|
50304 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10463232, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
13, |
|
50304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1307904, |
|
"byteOffset": 10463232 |
|
}, |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
50304, |
|
52 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10463232, |
|
"byteOffset": 11771136 |
|
}, |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
50304, |
|
13 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1307904, |
|
"byteOffset": 22234368 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 23542272 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 23543296 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 23805440 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 23838208 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 24370688 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 24437248 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 24438272 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 24757760 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 24797696 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 24904192 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 24917504 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 24918528 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 25180672 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 25213440 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 25745920 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 25812480 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 25813504 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 26132992 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 26172928 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 26279424 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26292736 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 26293760 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 26555904 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 26588672 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 27121152 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 27187712 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 27188736 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 27508224 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 27548160 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 27654656 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 27667968 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 27668992 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 27931136 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 27963904 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 28496384 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 28562944 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 28563968 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 28883456 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 28923392 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 29029888 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 29043200 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 29044224 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 29306368 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 29339136 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 29871616 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 29938176 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 29939200 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 30258688 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 30298624 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 30405120 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 30418432 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 30419456 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 30681600 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 30714368 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 31246848 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 31313408 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 31314432 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 31633920 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 31673856 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 31780352 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 31793664 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 31794688 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 32056832 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 32089600 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 32622080 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 32688640 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 32689664 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 33009152 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 33049088 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 33155584 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 33168896 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 33169920 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 33432064 |
|
} |
|
], |
|
"md5sum": "938eddcccf9773ab32a164863d26a907" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 12082176, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 532480 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 599040 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 600064 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 919552 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 959488 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 1065984 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 1079296 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 1080320 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 1342464 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 1375232 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 1907712 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 1974272 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 1975296 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 2294784 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 2334720 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 2441216 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 2454528 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 2455552 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 2717696 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 2750464 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 3282944 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 3349504 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 3350528 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 3709952 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 3816448 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 3829760 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 3830784 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 4092928 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 4125696 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 4658176 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 4724736 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 4725760 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 5045248 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 5085184 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 5191680 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 5204992 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 5206016 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 5468160 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 5500928 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 6033408 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 6099968 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 6100992 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 6420480 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 6460416 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 6566912 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 6580224 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 6581248 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 6843392 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 6876160 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 7408640 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 7475200 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 7476224 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 7795712 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 7835648 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 7942144 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 7955456 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 7956480 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 8218624 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 8251392 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 8783872 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 8850432 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 8851456 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 9170944 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 9210880 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 9317376 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 9330688 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 9331712 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 9593856 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 9626624 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 10159104 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 10225664 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 10226688 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 10546176 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 10586112 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 10692608 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 10705920 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
128, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 10706944 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
32, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 32768, |
|
"byteOffset": 10969088 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
52, |
|
2560 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 532480, |
|
"byteOffset": 11001856 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 66560, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 11600896 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
52, |
|
1536 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 319488, |
|
"byteOffset": 11601920 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
13, |
|
1536 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39936, |
|
"byteOffset": 11921408 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
52, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 106496, |
|
"byteOffset": 11961344 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
13, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13312, |
|
"byteOffset": 12067840 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 12081152 |
|
} |
|
], |
|
"md5sum": "0a36288a5e64434e1a495fc10ceb53db" |
|
} |
|
] |
|
} |