derek33125 commited on
Commit
f9a917e
1 Parent(s): dd59a31

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +18 -23
config.json CHANGED
@@ -1,50 +1,45 @@
1
  {
2
  "_name_or_path": "THUDM/glm-4-9b-chat",
3
- "add_bias_linear": false,
4
- "add_qkv_bias": true,
5
- "apply_query_key_layer_scaling": true,
6
- "apply_residual_connection_post_layernorm": false,
7
  "architectures": [
8
- "ChatGLMForConditionalGeneration"
9
  ],
10
- "attention_dropout": 0.0,
11
- "attention_softmax_in_fp32": true,
12
  "auto_map": {
13
  "AutoConfig": "configuration_chatglm.ChatGLMConfig",
14
  "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
15
- "AutoModelForCausalLM": "THUDM/glm-4-9b-chat--modeling_chatglm.ChatGLMForConditionalGeneration",
16
- "AutoModelForSeq2SeqLM": "THUDM/glm-4-9b-chat--modeling_chatglm.ChatGLMForConditionalGeneration",
17
- "AutoModelForSequenceClassification": "THUDM/glm-4-9b-chat--modeling_chatglm.ChatGLMForSequenceClassification"
18
  },
 
 
 
 
 
 
 
19
  "bias_dropout_fusion": true,
20
- "classifier_dropout": null,
21
- "eos_token_id": [
22
- 151329,
23
- 151336,
24
- 151338
25
- ],
26
  "ffn_hidden_size": 13696,
27
  "fp32_residual_connection": false,
28
  "hidden_dropout": 0.0,
29
  "hidden_size": 4096,
30
  "kv_channels": 128,
31
  "layernorm_epsilon": 1.5625e-07,
32
- "model_type": "chatglm",
33
  "multi_query_attention": true,
34
  "multi_query_group_num": 2,
35
  "num_attention_heads": 32,
36
  "num_hidden_layers": 40,
37
  "num_layers": 40,
 
38
  "original_rope": true,
39
- "pad_token_id": 151329,
40
  "padded_vocab_size": 151552,
41
  "post_layer_norm": true,
42
  "rmsnorm": true,
43
- "rope_ratio": 500,
44
  "seq_length": 131072,
45
- "tie_word_embeddings": false,
46
- "torch_dtype": "bfloat16",
47
- "transformers_version": "4.44.2",
48
  "use_cache": true,
49
- "vocab_size": 151552
 
 
 
 
50
  }
 
1
  {
2
  "_name_or_path": "THUDM/glm-4-9b-chat",
3
+ "model_type": "chatglm",
 
 
 
4
  "architectures": [
5
+ "ChatGLMModel"
6
  ],
 
 
7
  "auto_map": {
8
  "AutoConfig": "configuration_chatglm.ChatGLMConfig",
9
  "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
10
+ "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
11
+ "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
12
+ "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
13
  },
14
+ "add_bias_linear": false,
15
+ "add_qkv_bias": true,
16
+ "apply_query_key_layer_scaling": true,
17
+ "apply_residual_connection_post_layernorm": false,
18
+ "attention_dropout": 0.0,
19
+ "attention_softmax_in_fp32": true,
20
+ "attn_implementation": "sdpa",
21
  "bias_dropout_fusion": true,
 
 
 
 
 
 
22
  "ffn_hidden_size": 13696,
23
  "fp32_residual_connection": false,
24
  "hidden_dropout": 0.0,
25
  "hidden_size": 4096,
26
  "kv_channels": 128,
27
  "layernorm_epsilon": 1.5625e-07,
 
28
  "multi_query_attention": true,
29
  "multi_query_group_num": 2,
30
  "num_attention_heads": 32,
31
  "num_hidden_layers": 40,
32
  "num_layers": 40,
33
+ "rope_ratio": 500,
34
  "original_rope": true,
 
35
  "padded_vocab_size": 151552,
36
  "post_layer_norm": true,
37
  "rmsnorm": true,
 
38
  "seq_length": 131072,
 
 
 
39
  "use_cache": true,
40
+ "torch_dtype": "bfloat16",
41
+ "transformers_version": "4.44.0",
42
+ "tie_word_embeddings": false,
43
+ "eos_token_id": [151329, 151336, 151338],
44
+ "pad_token_id": 151329
45
  }