{ "producer": { "name": "modelopt", "version": "0.19.0" }, "architecture": "LlamaForCausalLM", "dtype": "bfloat16", "logits_dtype": "float32", "num_hidden_layers": 88, "num_attention_heads": 96, "num_key_value_heads": 8, "hidden_size": 12288, "norm_epsilon": 1e-05, "vocab_size": 32768, "max_position_embeddings": 131072, "hidden_act": "silu", "use_parallel_embedding": true, "embedding_sharding_dim": 0, "quantization": { "quant_algo": "MIXED_PRECISION", "kv_cache_quant_algo": "FP8" }, "mapping": { "world_size": 4, "tp_size": 4, "pp_size": 1 }, "head_size": 128, "intermediate_size": 28672, "position_embedding_type": "rope_gpt_neox", "share_embedding_table": false, "residual_mlp": false, "bias": false, "rotary_pct": 1.0, "rank": 3, "decoder": "llama", "rmsnorm": true, "lm_head_bias": false, "rotary_base": 1000000.0, "model_type": "llama" }