{ "architectures": [ "Qwen2ForCausalLM" ], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_darwinlm.DarwinLMConfig", "AutoModelForCausalLM": "modeling_darwinlm.Qwen2ForCausalLM" }, "bos_token_id": 151643, "dim_each_mlp": { "0.mlp.down_proj": 6912, "1.mlp.down_proj": 1376, "10.mlp.down_proj": 4160, "11.mlp.down_proj": 5536, "12.mlp.down_proj": 6912, "13.mlp.down_proj": 6912, "14.mlp.down_proj": 6912, "15.mlp.down_proj": 8288, "16.mlp.down_proj": 6912, "17.mlp.down_proj": 9664, "18.mlp.down_proj": 4160, "19.mlp.down_proj": 6912, "2.mlp.down_proj": 2752, "20.mlp.down_proj": 6912, "21.mlp.down_proj": 5536, "22.mlp.down_proj": 5536, "23.mlp.down_proj": 6912, "24.mlp.down_proj": 4160, "25.mlp.down_proj": 8288, "26.mlp.down_proj": 9664, "27.mlp.down_proj": 6912, "28.mlp.down_proj": 8288, "29.mlp.down_proj": 6912, "3.mlp.down_proj": 2752, "30.mlp.down_proj": 5536, "31.mlp.down_proj": 8288, "32.mlp.down_proj": 6912, "33.mlp.down_proj": 6912, "34.mlp.down_proj": 9664, "35.mlp.down_proj": 9664, "36.mlp.down_proj": 9664, "37.mlp.down_proj": 6912, "38.mlp.down_proj": 8288, "39.mlp.down_proj": 6912, "4.mlp.down_proj": 5536, "40.mlp.down_proj": 9664, "41.mlp.down_proj": 6912, "42.mlp.down_proj": 9664, "43.mlp.down_proj": 6912, "44.mlp.down_proj": 8288, "45.mlp.down_proj": 8288, "46.mlp.down_proj": 9664, "47.mlp.down_proj": 8288, "5.mlp.down_proj": 6912, "6.mlp.down_proj": 5536, "7.mlp.down_proj": 4160, "8.mlp.down_proj": 6912, "9.mlp.down_proj": 6912 }, "eos_token_id": 151645, "heads_each_attn": { "0.self_attn.o_proj": [ 5, 7, 9, 21, 24, 25, 28, 32, 33, 34, 36, 37 ], "1.self_attn.o_proj": [ 1, 3, 10, 11, 12, 13, 14, 20, 21, 22, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "10.self_attn.o_proj": [ 0, 4, 7, 8, 12, 14, 15, 16, 17, 18, 20, 22, 23, 25, 28, 30, 33, 35, 38, 39 ], "11.self_attn.o_proj": [ 0, 12, 14, 15, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 33, 34, 37, 38, 39 ], "12.self_attn.o_proj": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 25, 28, 29, 30, 31, 33, 35, 36, 37, 38, 39 ], "13.self_attn.o_proj": [ 1, 4, 5, 7, 15, 17, 19, 23, 26, 27, 28, 29, 35, 36, 37, 38 ], "14.self_attn.o_proj": [ 0, 1, 3, 5, 6, 8, 9, 10, 11, 12, 14, 15, 17, 19, 21, 22, 24, 25, 28, 29, 31, 32, 33, 35, 36, 37, 38, 39 ], "15.self_attn.o_proj": [ 2, 4, 5, 6, 9, 11, 17, 21, 22, 23, 26, 30, 31, 32, 34, 35 ], "16.self_attn.o_proj": [ 1, 3, 4, 7, 9, 12, 18, 19, 20, 21, 22, 23, 25, 28, 30, 31, 35, 36, 38, 39 ], "17.self_attn.o_proj": [ 1, 3, 37, 39 ], "18.self_attn.o_proj": [ 1, 6, 7, 9, 11, 12, 13, 14, 19, 22, 23, 30, 34, 35, 37, 39 ], "19.self_attn.o_proj": [ 0, 1, 4, 6, 7, 8, 11, 12, 15, 16, 18, 20, 21, 22, 23, 25, 26, 28, 29, 30, 31, 32, 33, 34 ], "2.self_attn.o_proj": [ 0, 1, 2, 3, 5, 6, 7, 8, 9, 12, 15, 21, 23, 25, 26, 29, 30, 31, 32, 33, 34, 35, 37, 38 ], "20.self_attn.o_proj": [ 7, 9, 13, 16, 17, 18, 19, 20, 21, 22, 24, 30, 31, 35, 37, 38 ], "21.self_attn.o_proj": [ 0, 2, 4, 5, 6, 9, 10, 11, 13, 20, 21, 22, 26, 27, 30, 33, 34, 35, 36, 39 ], "22.self_attn.o_proj": [ 1, 3, 5, 6, 8, 11, 12, 13, 14, 16, 18, 19, 20, 22, 26, 27, 28, 32, 35, 37 ], "23.self_attn.o_proj": [ 0, 1, 11, 12, 15, 16, 18, 19, 21, 22, 27, 28, 33, 35, 36, 39 ], "24.self_attn.o_proj": [ 1, 2, 5, 6, 7, 10, 12, 14, 16, 18, 20, 22, 23, 25, 26, 27, 29, 30, 31, 32, 33, 35, 37, 38 ], "25.self_attn.o_proj": [ 0, 1, 2, 3, 16, 27, 28, 39 ], "26.self_attn.o_proj": [ 0, 1, 2, 3, 6, 7, 8, 10, 11, 12, 16, 17, 18, 20, 21, 22, 25, 26, 27, 31, 33, 34, 35, 38 ], "27.self_attn.o_proj": [ 0, 2, 4, 6, 8, 11, 12, 13, 14, 22, 23, 25, 26, 27, 28, 31, 32, 33, 37, 38 ], "28.self_attn.o_proj": [ 0, 2, 3, 5, 7, 8, 10, 14, 17, 18, 19, 20, 22, 25, 26, 29, 30, 31, 33, 34, 36, 37, 38, 39 ], "29.self_attn.o_proj": [ 13, 25, 29, 34 ], "3.self_attn.o_proj": [ 0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 36, 37 ], "30.self_attn.o_proj": [ 0, 2, 4, 6, 11, 14, 16, 18, 20, 21, 23, 25, 28, 29, 31, 39 ], "31.self_attn.o_proj": [ 2, 3, 4, 5, 8, 12, 13, 15, 17, 19, 20, 21, 26, 28, 30, 31, 33, 36, 37, 38 ], "32.self_attn.o_proj": [ 1, 8, 11, 12, 14, 16, 19, 29, 30, 31, 32, 36 ], "33.self_attn.o_proj": [ 0, 3, 4, 5, 8, 12, 17, 22, 23, 27, 29, 30, 31, 34, 35, 38 ], "34.self_attn.o_proj": [ 1, 3, 4, 6, 7, 8, 9, 10, 13, 15, 20, 21, 23, 25, 27, 29, 31, 32, 36, 37 ], "35.self_attn.o_proj": [ 0, 1, 2, 5, 6, 9, 12, 13, 14, 16, 17, 19, 21, 22, 27, 28, 29, 31, 32, 33, 34, 35, 36, 39 ], "36.self_attn.o_proj": [ 0, 3, 4, 5, 9, 13, 14, 15, 16, 17, 18, 20, 22, 24, 25, 26, 28, 29, 32, 34, 36, 37, 38, 39 ], "37.self_attn.o_proj": [ 0, 1, 5, 11, 12, 13, 15, 19, 21, 24, 25, 27, 29, 30, 31, 35 ], "38.self_attn.o_proj": [ 0, 1, 2, 3, 4, 5, 6, 9, 14, 15, 18, 19, 23, 24, 25, 27, 28, 29, 31, 32, 33, 34, 37, 38 ], "39.self_attn.o_proj": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "4.self_attn.o_proj": [ 1, 2, 4, 6, 7, 10, 11, 12, 13, 14, 15, 17, 18, 21, 22, 30, 32, 33, 34, 37 ], "40.self_attn.o_proj": [ 0, 3, 13, 14, 15, 17, 25, 26, 27, 29, 31, 33, 34, 36, 38, 39 ], "41.self_attn.o_proj": [ 1, 10, 12, 13, 15, 19, 20, 21, 23, 26, 27, 29, 32, 33, 34, 39 ], "42.self_attn.o_proj": [ 0, 2, 3, 4, 5, 6, 9, 10, 16, 19, 20, 22, 26, 27, 32, 33, 34, 35, 36, 38 ], "43.self_attn.o_proj": [ 3, 6, 7, 8, 9, 10, 12, 13, 14, 17, 18, 19, 20, 24, 31, 38 ], "44.self_attn.o_proj": [ 0, 2, 6, 9, 10, 11, 14, 20, 22, 23, 25, 26, 27, 28, 31, 32, 34, 36, 37, 38 ], "45.self_attn.o_proj": [ 1, 4, 5, 6, 7, 8, 11, 18, 21, 22, 23, 24, 25, 28, 32, 33, 34, 35, 36, 39 ], "46.self_attn.o_proj": [ 0, 1, 3, 4, 5, 6, 9, 20, 21, 22, 23, 25, 26, 27, 29, 31, 33, 34, 35, 37 ], "47.self_attn.o_proj": [ 0, 2, 3, 5, 6, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 22, 25, 26, 27, 29, 33, 35, 36, 38 ], "5.self_attn.o_proj": [ 0, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 25, 26, 27, 29, 30, 31, 32, 33, 35, 36, 37, 39 ], "6.self_attn.o_proj": [ 0, 1, 2, 4, 5, 6, 7, 9, 11, 15, 16, 17, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 32, 33, 34, 35, 38, 39 ], "7.self_attn.o_proj": [ 1, 3, 5, 6, 8, 9, 10, 11, 12, 13, 16, 19, 20, 21, 22, 23, 34, 35, 36, 38 ], "8.self_attn.o_proj": [ 1, 2, 3, 4, 5, 8, 10, 11, 12, 14, 15, 17, 19, 21, 23, 25, 26, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39 ], "9.self_attn.o_proj": [ 8, 10, 11, 13, 14, 15, 17, 18, 22, 25, 26, 28, 29, 30, 32, 39 ] }, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "kv_ignore": true, "max_position_embeddings": 32768, "max_window_layers": 70, "model_type": "darwinlm", "num_attention_heads": 40, "num_hidden_layers": 48, "num_key_value_heads": 8, "rms_norm_eps": 1e-06, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.45.0.dev0", "use_cache": true, "use_sliding_window": false, "vocab_size": 152064 }