Upload GPTRefactForCausalLM

#3
by svakhreev - opened
config.json CHANGED
@@ -8,23 +8,24 @@
8
  "AutoConfig": "configuration_gpt_refact.GPTRefactConfig",
9
  "AutoModelForCausalLM": "modeling_gpt_refact.GPTRefactForCausalLM"
10
  },
11
- "bos_token_id": 0,
12
  "do_sample": true,
13
  "embd_pdrop": 0.1,
14
  "eos_token_id": 0,
15
  "initializer_range": 0.02,
16
  "layer_norm_epsilon": 1e-05,
17
  "model_type": "gpt_refact",
 
18
  "n_embd": 2048,
19
  "n_head": 32,
20
  "n_inner": null,
21
  "n_layer": 32,
22
- "n_positions": 1024,
23
  "resid_pdrop": 0.1,
24
  "scale_attention_softmax_in_fp32": false,
25
  "scale_attn_weights": true,
26
  "torch_dtype": "float32",
27
- "transformers_version": "4.29.2",
28
  "use_cache": true,
29
  "vocab_size": 49216
30
  }
 
8
  "AutoConfig": "configuration_gpt_refact.GPTRefactConfig",
9
  "AutoModelForCausalLM": "modeling_gpt_refact.GPTRefactForCausalLM"
10
  },
11
+ "bos_token_id": -1,
12
  "do_sample": true,
13
  "embd_pdrop": 0.1,
14
  "eos_token_id": 0,
15
  "initializer_range": 0.02,
16
  "layer_norm_epsilon": 1e-05,
17
  "model_type": "gpt_refact",
18
+ "multi_query": true,
19
  "n_embd": 2048,
20
  "n_head": 32,
21
  "n_inner": null,
22
  "n_layer": 32,
23
+ "n_positions": 4096,
24
  "resid_pdrop": 0.1,
25
  "scale_attention_softmax_in_fp32": false,
26
  "scale_attn_weights": true,
27
  "torch_dtype": "float32",
28
+ "transformers_version": "4.31.0",
29
  "use_cache": true,
30
  "vocab_size": 49216
31
  }
configuration_gpt_refact.py CHANGED
@@ -30,8 +30,10 @@ class GPTRefactConfig(PretrainedConfig):
30
  initializer_range=0.02,
31
  scale_attn_weights=True,
32
  use_cache=True,
33
- bos_token_id=0,
34
  eos_token_id=0,
 
 
35
  attention_softmax_in_fp32=False,
36
  scale_attention_softmax_in_fp32=False,
37
  **kwargs,
@@ -55,4 +57,6 @@ class GPTRefactConfig(PretrainedConfig):
55
  self.bos_token_id = bos_token_id
56
  self.eos_token_id = eos_token_id
57
 
58
- super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
 
 
 
30
  initializer_range=0.02,
31
  scale_attn_weights=True,
32
  use_cache=True,
33
+ bos_token_id=-1,
34
  eos_token_id=0,
35
+ max_position_embeddings: int = 2048,
36
+ multi_query: bool = True,
37
  attention_softmax_in_fp32=False,
38
  scale_attention_softmax_in_fp32=False,
39
  **kwargs,
 
57
  self.bos_token_id = bos_token_id
58
  self.eos_token_id = eos_token_id
59
 
60
+ self.multi_query = multi_query
61
+ self.max_position_embeddings = max_position_embeddings
62
+ super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
generation_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 0,
4
  "do_sample": true,
5
  "eos_token_id": 0,
6
- "transformers_version": "4.29.2"
7
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": -1,
4
  "do_sample": true,
5
  "eos_token_id": 0,
6
+ "transformers_version": "4.31.0"
7
  }
modeling_gpt_refact.py CHANGED
@@ -341,7 +341,7 @@ class GPTRefactModel(GPTRefactPreTrainedModel):
341
  super().__init__(config)
342
  self.embed_dim = config.hidden_size
343
  self.num_heads = config.num_attention_heads
344
-
345
  self.wte = nn.Embedding(config.vocab_size, self.embed_dim)
346
 
347
  self.h = nn.ModuleList([GPTRefactBlock(config, layer_idx=i) for i in range(config.num_hidden_layers)])
 
341
  super().__init__(config)
342
  self.embed_dim = config.hidden_size
343
  self.num_heads = config.num_attention_heads
344
+ self.multi_query = config.multi_query
345
  self.wte = nn.Embedding(config.vocab_size, self.embed_dim)
346
 
347
  self.h = nn.ModuleList([GPTRefactBlock(config, layer_idx=i) for i in range(config.num_hidden_layers)])
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0376542ae03467045dccef1614b92c6fe1f8cedc1882b16304de0179cf2e9e53
3
- size 6343465669
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2369c7e2228204ac8e0bc39c048d1e6349ce5f1bab8005a60bde0f0aa26ca73
3
+ size 6343461637