aikitoria's picture
Upload folder using huggingface_hub
b6f4770 verified
raw
history blame
76.5 kB
{
"quant_algo": "MIXED_PRECISION",
"kv_cache_quant_algo": "FP8",
"quantized_layers": {
"transformer.layers.0.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.0.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.0.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.0.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.0.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.1.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.1.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.1.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.1.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.1.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.2.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.2.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.2.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.2.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.2.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.3.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.3.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.3.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.3.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.3.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.4.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.4.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.4.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.4.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.4.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.5.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.5.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.5.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.5.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.5.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.6.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.6.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.6.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.6.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.6.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.7.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.7.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.7.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.7.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.7.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.8.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.8.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.8.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.8.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.8.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.9.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.9.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.9.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.9.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.9.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.10.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.10.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.10.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.10.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.10.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.11.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.11.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.11.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.11.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.11.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.12.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.12.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.12.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.12.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.12.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.13.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.13.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.13.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.13.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.13.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.14.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.14.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.14.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.14.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.14.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.15.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.15.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.15.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.15.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.15.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.16.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.16.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.16.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.16.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.16.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.17.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.17.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.17.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.17.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.17.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.18.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.18.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.18.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.18.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.18.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.19.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.19.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.19.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.19.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.19.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.20.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.20.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.20.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.20.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.20.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.21.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.21.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.21.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.21.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.21.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.22.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.22.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.22.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.22.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.22.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.23.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.23.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.23.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.23.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.23.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.24.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.24.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.24.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.24.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.24.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.25.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.25.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.25.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.25.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.25.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.26.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.26.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.26.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.26.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.26.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.27.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.27.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.27.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.27.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.27.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.28.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.28.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.28.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.28.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.28.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.29.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.29.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.29.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.29.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.29.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.30.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.30.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.30.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.30.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.30.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.31.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.31.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.31.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.31.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.31.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.32.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.32.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.32.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.32.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.32.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.33.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.33.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.33.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.33.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.33.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.34.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.34.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.34.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.34.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.34.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.35.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.35.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.35.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.35.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.35.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.36.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.36.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.36.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.36.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.36.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.37.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.37.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.37.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.37.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.37.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.38.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.38.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.38.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.38.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.38.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.39.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.39.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.39.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.39.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.39.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.40.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.40.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.40.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.40.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.40.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.41.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.41.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.41.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.41.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.41.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.42.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.42.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.42.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.42.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.42.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.43.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.43.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.43.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.43.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.43.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.44.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.44.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.44.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.44.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.44.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.45.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.45.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.45.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.45.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.45.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.46.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.46.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.46.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.46.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.46.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.47.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.47.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.47.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.47.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.47.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.48.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.48.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.48.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.48.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.48.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.49.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.49.attention.dense": {
"quant_algo": "FP8"
},
"transformer.layers.49.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.49.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.49.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.50.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.50.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.50.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.50.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.50.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.51.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.51.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.51.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.51.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.51.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.52.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.52.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.52.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.52.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.52.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.53.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.53.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.53.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.53.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.53.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.54.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.54.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.54.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.54.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.54.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.55.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.55.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.55.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.55.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.55.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.56.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.56.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.56.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.56.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.56.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.57.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.57.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.57.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.57.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.57.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.58.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.58.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.58.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.58.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.58.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.59.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.59.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.59.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.59.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.59.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.60.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.60.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.60.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.60.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.60.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.61.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.61.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.61.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.61.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.61.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.62.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.62.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.62.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.62.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.62.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.63.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.63.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.63.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.63.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.63.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.64.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.64.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.64.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.64.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.64.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.65.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.65.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.65.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.65.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.65.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.66.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.66.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.66.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.66.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.66.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.67.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.67.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.67.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.67.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.67.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.68.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.68.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.68.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.68.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.68.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.69.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.69.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.69.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.69.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.69.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.70.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.70.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.70.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.70.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.70.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.71.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.71.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.71.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.71.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.71.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.72.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.72.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.72.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.72.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.72.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.73.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.73.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.73.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.73.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.73.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.74.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.74.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.74.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.74.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.74.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.75.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.75.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.75.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.75.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.75.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.76.attention.qkv": {
"quant_algo": "FP8"
},
"transformer.layers.76.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.76.mlp.fc": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.76.mlp.gate": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.76.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.77.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.77.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.77.mlp.fc": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.77.mlp.gate": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.77.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.78.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.78.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.78.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.78.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.78.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.79.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.79.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.79.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.79.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.79.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.80.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.80.attention.dense": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.80.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.80.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.80.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.81.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.81.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.81.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.81.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.81.mlp.proj": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.82.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.82.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.82.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.82.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.82.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.83.attention.qkv": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.83.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.83.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.83.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.83.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.84.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.84.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.84.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.84.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.84.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.85.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.85.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.85.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.85.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.85.mlp.proj": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.86.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.86.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.86.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.86.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.86.mlp.proj": {
"quant_algo": "FP8"
},
"transformer.layers.87.attention.qkv": {
"quant_algo": "W4A8_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.87.attention.dense": {
"quant_algo": "W4A16_AWQ",
"group_size": 128,
"has_zero_point": false,
"pre_quant_scale": true
},
"transformer.layers.87.mlp.fc": {
"quant_algo": "FP8"
},
"transformer.layers.87.mlp.gate": {
"quant_algo": "FP8"
},
"transformer.layers.87.mlp.proj": {
"quant_algo": "FP8"
}
}
}