Spaces:

larsthepenguin
/

trt-llm-rag-windows-main

Build error

App Files Files Community

trt-llm-rag-windows-main / model /mistral /mistral7b_int4_quant_weights /mistral_tp1.json

larsthepenguin

Upload folder using huggingface_hub

272de00 verified 10 months ago

raw

history blame contribute delete

94.9 kB

{"version": 0.4, "quantization": "int4_awq", "awq_block_size": 128, "dtype": "float16", "vocab_size": 32000, "rank": 0, "tensor_parallel": 1, "vocab_embedding": {"weight": "_np:vocab_embedding:weight"}, "positional_embedding": null, "layers": [{"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:0:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:0:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:0:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:0:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:0:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:0:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:0:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:0:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:0:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:1:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:1:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:1:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:1:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:1:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:1:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:1:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:1:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:1:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:2:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:2:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:2:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:2:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:2:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:2:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:2:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:2:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:2:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:3:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:3:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:3:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:3:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:3:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:3:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:3:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:3:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:3:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:4:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:4:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:4:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:4:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:4:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:4:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:4:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:4:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:4:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:5:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:5:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:5:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:5:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:5:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:5:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:5:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:5:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:5:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:6:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:6:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:6:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:6:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:6:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:6:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:6:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:6:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:6:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:7:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:7:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:7:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:7:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:7:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:7:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:7:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:7:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:7:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:8:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:8:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:8:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:8:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:8:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:8:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:8:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:8:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:8:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:9:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:9:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:9:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:9:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:9:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:9:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:9:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:9:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:9:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:10:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:10:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:10:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:10:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:10:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:10:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:10:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:10:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:10:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:11:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:11:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:11:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:11:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:11:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:11:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:11:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:11:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:11:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:12:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:12:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:12:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:12:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:12:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:12:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:12:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:12:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:12:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:13:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:13:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:13:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:13:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:13:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:13:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:13:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:13:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:13:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:14:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:14:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:14:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:14:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:14:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:14:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:14:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:14:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:14:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:15:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:15:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:15:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:15:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:15:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:15:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:15:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:15:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:15:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:16:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:16:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:16:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:16:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:16:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:16:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:16:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:16:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:16:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:17:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:17:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:17:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:17:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:17:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:17:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:17:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:17:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:17:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:18:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:18:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:18:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:18:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:18:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:18:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:18:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:18:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:18:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:19:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:19:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:19:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:19:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:19:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:19:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:19:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:19:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:19:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:20:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:20:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:20:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:20:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:20:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:20:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:20:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:20:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:20:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:21:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:21:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:21:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:21:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:21:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:21:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:21:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:21:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:21:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:22:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:22:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:22:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:22:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:22:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:22:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:22:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:22:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:22:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:23:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:23:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:23:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:23:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:23:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:23:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:23:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:23:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:23:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:24:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:24:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:24:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:24:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:24:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:24:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:24:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:24:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:24:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:25:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:25:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:25:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:25:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:25:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:25:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:25:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:25:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:25:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:26:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:26:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:26:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:26:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:26:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:26:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:26:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:26:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:26:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:27:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:27:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:27:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:27:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:27:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:27:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:27:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:27:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:27:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:28:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:28:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:28:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:28:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:28:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:28:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:28:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:28:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:28:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:29:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:29:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:29:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:29:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:29:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:29:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:29:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:29:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:29:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:30:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:30:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:30:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:30:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:30:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:30:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:30:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:30:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:30:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}, {"decoder_type": "llama", "input_layernorm": {"weight": "_np:layers:31:input_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp_layernorm": null, "attention": {"qkv": {"q": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:q:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:q:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:q:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "k": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:k:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:k:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:k:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "v": {"linear_type": "column", "weight": "_np:layers:31:attention:qkv:v:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:qkv:v:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:qkv:v:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}}, "dense": {"linear_type": "row", "weight": "_np:layers:31:attention:dense:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:attention:dense:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:attention:dense:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "rotary_dim": -Infinity}, "post_layernorm": {"weight": "_np:layers:31:post_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "mlp": {"fc": {"linear_type": "column", "weight": "_np:layers:31:mlp:fc:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:fc:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:fc:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "gate": {"linear_type": "column", "weight": "_np:layers:31:mlp:gate:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:gate:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:gate:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "proj": {"linear_type": "row", "weight": "_np:layers:31:mlp:proj:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:layers:31:mlp:proj:weights_scaling_factor", "prequant_scaling_factor": "_np:layers:31:mlp:proj:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "hidden_act": "silu"}, "num_attention_heads": 32, "num_kv_heads": 8, "max_position_embeddings": 32768, "rotary_pct": 0, "use_alibi": false, "new_decoder_architecture": false, "parallel_attention": false}], "final_layernorm": {"weight": "_np:final_layernorm:weight", "bias": null, "layernorm_type": "rms", "eps": 1e-05}, "lm_head": {"linear_type": "column", "weight": "_np:lm_head:weight", "bias": null, "activation_scaling_factor": null, "weights_scaling_factor": "_np:lm_head:weights_scaling_factor", "prequant_scaling_factor": "_np:lm_head:prequant_scaling_factor", "output_scaling_factor": null, "awq_block_size": 128}, "share_embedding_table": false}