tomvoelker commited on
Commit
b7df336
·
verified ·
1 Parent(s): 77bd4b7

Training in progress, step 10000

Browse files
config.json CHANGED
@@ -3,7 +3,7 @@
3
  "EncoderDecoderModel"
4
  ],
5
  "decoder": {
6
- "_name_or_path": "gpt2-medium",
7
  "activation_function": "gelu_new",
8
  "add_cross_attention": true,
9
  "architectures": [
@@ -43,12 +43,11 @@
43
  "min_length": 0,
44
  "model_type": "gpt2",
45
  "n_ctx": 1024,
46
- "n_embd": 1024,
47
- "n_head": 16,
48
  "n_inner": null,
49
- "n_layer": 24,
50
  "n_positions": 1024,
51
- "n_special": 0,
52
  "no_repeat_ngram_size": 0,
53
  "num_beam_groups": 1,
54
  "num_beams": 1,
@@ -57,7 +56,6 @@
57
  "output_hidden_states": false,
58
  "output_scores": false,
59
  "pad_token_id": null,
60
- "predict_special_tokens": true,
61
  "prefix": null,
62
  "problem_type": null,
63
  "pruned_heads": {},
@@ -99,7 +97,7 @@
99
  "decoder_start_token_id": 50256,
100
  "early_stopping": true,
101
  "encoder": {
102
- "_name_or_path": "gpt2-medium",
103
  "activation_function": "gelu_new",
104
  "add_cross_attention": false,
105
  "architectures": [
@@ -139,12 +137,11 @@
139
  "min_length": 0,
140
  "model_type": "gpt2",
141
  "n_ctx": 1024,
142
- "n_embd": 1024,
143
- "n_head": 16,
144
  "n_inner": null,
145
- "n_layer": 24,
146
  "n_positions": 1024,
147
- "n_special": 0,
148
  "no_repeat_ngram_size": 0,
149
  "num_beam_groups": 1,
150
  "num_beams": 1,
@@ -153,7 +150,6 @@
153
  "output_hidden_states": false,
154
  "output_scores": false,
155
  "pad_token_id": null,
156
- "predict_special_tokens": true,
157
  "prefix": null,
158
  "problem_type": null,
159
  "pruned_heads": {},
 
3
  "EncoderDecoderModel"
4
  ],
5
  "decoder": {
6
+ "_name_or_path": "gpt2",
7
  "activation_function": "gelu_new",
8
  "add_cross_attention": true,
9
  "architectures": [
 
43
  "min_length": 0,
44
  "model_type": "gpt2",
45
  "n_ctx": 1024,
46
+ "n_embd": 768,
47
+ "n_head": 12,
48
  "n_inner": null,
49
+ "n_layer": 12,
50
  "n_positions": 1024,
 
51
  "no_repeat_ngram_size": 0,
52
  "num_beam_groups": 1,
53
  "num_beams": 1,
 
56
  "output_hidden_states": false,
57
  "output_scores": false,
58
  "pad_token_id": null,
 
59
  "prefix": null,
60
  "problem_type": null,
61
  "pruned_heads": {},
 
97
  "decoder_start_token_id": 50256,
98
  "early_stopping": true,
99
  "encoder": {
100
+ "_name_or_path": "gpt2",
101
  "activation_function": "gelu_new",
102
  "add_cross_attention": false,
103
  "architectures": [
 
137
  "min_length": 0,
138
  "model_type": "gpt2",
139
  "n_ctx": 1024,
140
+ "n_embd": 768,
141
+ "n_head": 12,
142
  "n_inner": null,
143
+ "n_layer": 12,
144
  "n_positions": 1024,
 
145
  "no_repeat_ngram_size": 0,
146
  "num_beam_groups": 1,
147
  "num_beams": 1,
 
150
  "output_hidden_states": false,
151
  "output_scores": false,
152
  "pad_token_id": null,
 
153
  "prefix": null,
154
  "problem_type": null,
155
  "pruned_heads": {},
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:288b077c28664ad5927da68b425eb9443783dd3cfcce26158bba6c8250a019ce
3
- size 3241914192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:156baf145c6c3138bb884a0697f63f3ed8eaf7c80e9aac30595fccf2f0835c85
3
+ size 1109028072
runs/Mar12_19-24-38_jn020/events.out.tfevents.1741807628.jn020.398992.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80517c41d1357f5739a2d8930bf5f45eed02cc213e2d7a48b85117681a820ef8
3
- size 17830
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d765591789b9f7b6e25f78c782cdc7a40931653af3e449479d1299a1b753e63b
3
+ size 18726
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4ee502e259362ba17c64127370d3ac7405bbaafc9a77a5bc47fea3fcee1a99a
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ec21e70451935fe7ccf4f25e474b7a4ce1a008a140f12928eefecbc9a40e33
3
  size 5432