imi2's picture
Upload 3 files
7917d36 verified
raw
history blame
62.9 kB
{
"metadata": {
"ParamSize": 165,
"ParamBytes": 45547008.0,
"BitsPerParam": 3.652428290625048
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 33464832,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
52,
50304
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10463232,
"byteOffset": 0
},
{
"name": "lm_head.q_scale",
"shape": [
13,
50304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1307904,
"byteOffset": 10463232
},
{
"name": "model.embed_tokens.q_weight",
"shape": [
50304,
52
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10463232,
"byteOffset": 11771136
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
50304,
13
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1307904,
"byteOffset": 22234368
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 23542272
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 23543296
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 23805440
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 23838208
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 24370688
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 24437248
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 24438272
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 24757760
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 24797696
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 24904192
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 24917504
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 24918528
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 25180672
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 25213440
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 25745920
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 25812480
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 25813504
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 26132992
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 26172928
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 26279424
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26292736
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 26293760
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 26555904
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 26588672
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 27121152
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 27187712
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 27188736
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 27508224
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 27548160
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 27654656
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 27667968
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 27668992
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 27931136
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 27963904
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 28496384
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 28562944
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 28563968
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 28883456
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 28923392
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 29029888
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 29043200
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 29044224
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 29306368
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 29339136
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 29871616
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 29938176
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 29939200
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 30258688
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 30298624
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 30405120
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 30418432
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 30419456
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 30681600
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 30714368
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 31246848
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 31313408
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 31314432
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 31633920
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 31673856
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 31780352
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 31793664
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 31794688
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 32056832
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 32089600
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 32622080
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 32688640
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 32689664
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 33009152
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 33049088
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 33155584
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 33168896
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 33169920
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 33432064
}
],
"md5sum": "938eddcccf9773ab32a164863d26a907"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 12082176,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 532480
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 599040
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 600064
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 919552
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 959488
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 1065984
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 1079296
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 1080320
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 1342464
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 1375232
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 1907712
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 1974272
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 1975296
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 2294784
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 2334720
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 2441216
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 2454528
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 2455552
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 2717696
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 2750464
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 3282944
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 3349504
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 3350528
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 3670016
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 3709952
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 3816448
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 3829760
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 3830784
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 4092928
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 4125696
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 4658176
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 4724736
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 4725760
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 5045248
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 5085184
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 5191680
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 5204992
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 5206016
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 5468160
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 5500928
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 6033408
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 6099968
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 6100992
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 6420480
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 6460416
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 6566912
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 6580224
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 6581248
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 6843392
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 6876160
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 7408640
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 7475200
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 7476224
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 7795712
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 7835648
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 7942144
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 7955456
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7956480
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 8218624
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 8251392
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 8783872
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 8850432
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 8851456
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 9170944
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 9210880
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 9317376
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 9330688
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 9331712
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 9593856
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 9626624
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 10159104
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 10225664
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 10226688
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 10546176
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 10586112
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 10692608
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 10705920
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 10706944
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
32,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 10969088
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
52,
2560
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 532480,
"byteOffset": 11001856
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
13,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 66560,
"byteOffset": 11534336
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 11600896
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
52,
1536
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 319488,
"byteOffset": 11601920
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
13,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39936,
"byteOffset": 11921408
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
52,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 106496,
"byteOffset": 11961344
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
13,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13312,
"byteOffset": 12067840
},
{
"name": "model.norm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 12081152
}
],
"md5sum": "0a36288a5e64434e1a495fc10ceb53db"
}
]
}