anrilombard commited on
Commit
5ad64e1
·
verified ·
1 Parent(s): 0b8fa40

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +19 -49
config.json CHANGED
@@ -1,58 +1,28 @@
1
  {
2
- "_name_or_path": "./result_and_evaluation/SSM_20M_little_dropout",
3
- "architectures": [
4
- "MambaModel"
5
- ],
6
- "attn_cfg": {},
7
- "attn_layer_idx": [],
8
- "bos_token_id": 1,
9
- "conv_kernel": 4,
10
- "d_intermediate": 0,
11
- "d_model": 512,
12
- "dropout_rate": 0.1,
13
- "eos_token_id": 2,
14
- "expand": 2,
15
- "fused_add_norm": true,
16
- "hidden_act": "silu",
17
- "hidden_size": 768,
18
- "id2label": {
19
- "0": "LABEL_0"
20
- },
21
- "initializer_range": 0.1,
22
- "intermediate_size": 1536,
23
- "label2id": {
24
- "LABEL_0": 0
25
- },
26
- "layer_norm_epsilon": 1e-05,
27
  "model_type": "mamba",
 
 
28
  "n_layer": 6,
29
- "num_hidden_layers": 32,
30
- "pad_token_id": 3,
31
- "pad_vocab_size_multiple": 8,
32
- "rescale_prenorm_residual": false,
33
- "residual_in_fp32": true,
34
- "rms_norm": true,
35
  "ssm_cfg": {
36
  "layer": "Mamba2"
37
  },
38
- "state_size": 16,
39
- "summary_activation": "tanh",
40
- "summary_first_dropout": 0.4,
41
- "summary_hidden_size": 128,
42
- "summary_proj_to_labels": true,
 
 
 
 
 
 
 
43
  "summary_type": "cls_index",
44
  "summary_use_proj": true,
45
- "tie_embeddings": true,
46
- "time_step_floor": 0.0001,
47
- "time_step_init_scheme": "random",
48
- "time_step_max": 0.1,
49
- "time_step_min": 0.001,
50
- "time_step_rank": 48,
51
- "time_step_scale": 1.0,
52
- "torch_dtype": "float32",
53
- "transformers_version": "4.40.2",
54
- "use_bias": false,
55
- "use_cache": true,
56
- "use_conv_bias": true,
57
- "vocab_size": 1880
58
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "model_type": "mamba",
3
+ "d_model": 512,
4
+ "d_intermediate": 0,
5
  "n_layer": 6,
6
+ "vocab_size": 1880,
 
 
 
 
 
7
  "ssm_cfg": {
8
  "layer": "Mamba2"
9
  },
10
+ "attn_layer_idx": [],
11
+ "attn_cfg": {},
12
+ "rms_norm": true,
13
+ "residual_in_fp32": true,
14
+ "fused_add_norm": true,
15
+ "pad_vocab_size_multiple": 8,
16
+ "tie_embeddings": true,
17
+ "dropout_rate": 0.1,
18
+ "eos_token_id": 2,
19
+ "bos_token_id": 1,
20
+ "pad_token_id": 3,
21
+ "num_labels": 1,
22
  "summary_type": "cls_index",
23
  "summary_use_proj": true,
24
+ "summary_activation": "tanh",
25
+ "summary_proj_to_labels": true,
26
+ "summary_first_dropout": 0.4,
27
+ "summary_hidden_size": 128
 
 
 
 
 
 
 
 
 
28
  }