hsuvaskakoty commited on
Commit
f7417b6
1 Parent(s): d07a955

Upload 15 files

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/lustre/projects/cardiff/bart/bart-base",
3
  "activation_dropout": 0.1,
4
  "activation_function": "gelu",
5
  "add_bias_logits": false,
@@ -69,7 +69,7 @@
69
  }
70
  },
71
  "torch_dtype": "float32",
72
- "transformers_version": "4.26.0.dev0",
73
  "use_cache": true,
74
  "vocab_size": 50269
75
  }
 
1
  {
2
+ "_name_or_path": "facebook/bart-base",
3
  "activation_dropout": 0.1,
4
  "activation_function": "gelu",
5
  "add_bias_logits": false,
 
69
  }
70
  },
71
  "torch_dtype": "float32",
72
+ "transformers_version": "4.26.1",
73
  "use_cache": true,
74
  "vocab_size": 50269
75
  }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "decoder_start_token_id": 2,
5
+ "early_stopping": true,
6
+ "eos_token_id": 2,
7
+ "forced_bos_token_id": 0,
8
+ "forced_eos_token_id": 2,
9
+ "no_repeat_ngram_size": 3,
10
+ "num_beams": 4,
11
+ "pad_token_id": 1,
12
+ "transformers_version": "4.26.1"
13
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:831134c2202a73897debd659329db91c6bcdce2a187b9af3d0382bef9355d82c
3
+ size 1115539909
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68b04438e11c883d15e15c41e3e911503c102541d6137ccce6333da627cdc3d1
3
  size 557983517
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3437249cda3a802a24c3440dbf572b9af6927e09e118f652efca8edd2dcf9a77
3
  size 557983517
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:098e1d40526cacd3740caab46d2b7ba1af48c6d4b2f48a79e80bed7577755a7d
3
+ size 14575
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aa1a21c5a9c0d61520eb2233b2e5d0b3a2d290e2966820d403c972921e8aa71
3
+ size 557
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38d6d1bcdccb678167f0a57d9e8c5214e64f91d31d24c2a46723c1904e4bb722
3
+ size 627
tokenizer_config.json CHANGED
@@ -5,8 +5,8 @@
5
  "eos_token": "</s>",
6
  "errors": "replace",
7
  "mask_token": "<mask>",
8
- "model_max_length": 1000000000000000019884624838656,
9
- "name_or_path": "/lustre/projects/cardiff/bart/bart-base",
10
  "pad_token": "<pad>",
11
  "sep_token": "</s>",
12
  "special_tokens_map_file": null,
 
5
  "eos_token": "</s>",
6
  "errors": "replace",
7
  "mask_token": "<mask>",
8
+ "model_max_length": 1024,
9
+ "name_or_path": "facebook/bart-base",
10
  "pad_token": "<pad>",
11
  "sep_token": "</s>",
12
  "special_tokens_map_file": null,
trainer_state.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 15.4608,
5
+ "global_step": 1500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.99,
12
+ "learning_rate": 4.800000000000001e-06,
13
+ "loss": 10.7159,
14
+ "step": 97
15
+ },
16
+ {
17
+ "epoch": 0.99,
18
+ "eval_loss": 7.062511920928955,
19
+ "eval_runtime": 16.5468,
20
+ "eval_samples_per_second": 604.347,
21
+ "eval_steps_per_second": 37.772,
22
+ "step": 97
23
+ },
24
+ {
25
+ "epoch": 1.99,
26
+ "learning_rate": 9.65e-06,
27
+ "loss": 5.0371,
28
+ "step": 194
29
+ },
30
+ {
31
+ "epoch": 1.99,
32
+ "eval_loss": 2.8384995460510254,
33
+ "eval_runtime": 16.0338,
34
+ "eval_samples_per_second": 623.684,
35
+ "eval_steps_per_second": 38.98,
36
+ "step": 194
37
+ },
38
+ {
39
+ "epoch": 2.99,
40
+ "learning_rate": 1.4449999999999999e-05,
41
+ "loss": 2.4865,
42
+ "step": 291
43
+ },
44
+ {
45
+ "epoch": 2.99,
46
+ "eval_loss": 1.1602892875671387,
47
+ "eval_runtime": 16.2241,
48
+ "eval_samples_per_second": 616.367,
49
+ "eval_steps_per_second": 38.523,
50
+ "step": 291
51
+ },
52
+ {
53
+ "epoch": 3.99,
54
+ "learning_rate": 1.93e-05,
55
+ "loss": 1.0806,
56
+ "step": 388
57
+ },
58
+ {
59
+ "epoch": 3.99,
60
+ "eval_loss": 0.7614782452583313,
61
+ "eval_runtime": 16.2665,
62
+ "eval_samples_per_second": 614.761,
63
+ "eval_steps_per_second": 38.423,
64
+ "step": 388
65
+ },
66
+ {
67
+ "epoch": 4.99,
68
+ "learning_rate": 2.415e-05,
69
+ "loss": 0.8412,
70
+ "step": 485
71
+ },
72
+ {
73
+ "epoch": 4.99,
74
+ "eval_loss": 0.726601243019104,
75
+ "eval_runtime": 16.7852,
76
+ "eval_samples_per_second": 595.764,
77
+ "eval_steps_per_second": 37.235,
78
+ "step": 485
79
+ },
80
+ {
81
+ "epoch": 5.99,
82
+ "learning_rate": 2.9e-05,
83
+ "loss": 0.802,
84
+ "step": 582
85
+ },
86
+ {
87
+ "epoch": 5.99,
88
+ "eval_loss": 0.7105833292007446,
89
+ "eval_runtime": 16.763,
90
+ "eval_samples_per_second": 596.553,
91
+ "eval_steps_per_second": 37.285,
92
+ "step": 582
93
+ },
94
+ {
95
+ "epoch": 6.99,
96
+ "learning_rate": 3.385e-05,
97
+ "loss": 0.7776,
98
+ "step": 679
99
+ },
100
+ {
101
+ "epoch": 6.99,
102
+ "eval_loss": 0.6986653804779053,
103
+ "eval_runtime": 16.7875,
104
+ "eval_samples_per_second": 595.68,
105
+ "eval_steps_per_second": 37.23,
106
+ "step": 679
107
+ },
108
+ {
109
+ "epoch": 7.99,
110
+ "learning_rate": 3.8700000000000006e-05,
111
+ "loss": 0.7568,
112
+ "step": 776
113
+ },
114
+ {
115
+ "epoch": 7.99,
116
+ "eval_loss": 0.687833309173584,
117
+ "eval_runtime": 16.4861,
118
+ "eval_samples_per_second": 606.571,
119
+ "eval_steps_per_second": 37.911,
120
+ "step": 776
121
+ },
122
+ {
123
+ "epoch": 8.99,
124
+ "learning_rate": 4.355e-05,
125
+ "loss": 0.7382,
126
+ "step": 873
127
+ },
128
+ {
129
+ "epoch": 8.99,
130
+ "eval_loss": 0.6793721914291382,
131
+ "eval_runtime": 16.6577,
132
+ "eval_samples_per_second": 600.323,
133
+ "eval_steps_per_second": 37.52,
134
+ "step": 873
135
+ },
136
+ {
137
+ "epoch": 9.99,
138
+ "learning_rate": 4.8400000000000004e-05,
139
+ "loss": 0.7202,
140
+ "step": 970
141
+ },
142
+ {
143
+ "epoch": 9.99,
144
+ "eval_loss": 0.6695303320884705,
145
+ "eval_runtime": 16.3181,
146
+ "eval_samples_per_second": 612.817,
147
+ "eval_steps_per_second": 38.301,
148
+ "step": 970
149
+ },
150
+ {
151
+ "epoch": 10.99,
152
+ "learning_rate": 4.6542553191489364e-05,
153
+ "loss": 0.7022,
154
+ "step": 1067
155
+ },
156
+ {
157
+ "epoch": 10.99,
158
+ "eval_loss": 0.6605609059333801,
159
+ "eval_runtime": 16.748,
160
+ "eval_samples_per_second": 597.086,
161
+ "eval_steps_per_second": 37.318,
162
+ "step": 1067
163
+ },
164
+ {
165
+ "epoch": 11.99,
166
+ "learning_rate": 4.138297872340426e-05,
167
+ "loss": 0.6844,
168
+ "step": 1164
169
+ },
170
+ {
171
+ "epoch": 11.99,
172
+ "eval_loss": 0.6539720892906189,
173
+ "eval_runtime": 16.7037,
174
+ "eval_samples_per_second": 598.671,
175
+ "eval_steps_per_second": 37.417,
176
+ "step": 1164
177
+ },
178
+ {
179
+ "epoch": 12.99,
180
+ "learning_rate": 3.622340425531915e-05,
181
+ "loss": 0.669,
182
+ "step": 1261
183
+ },
184
+ {
185
+ "epoch": 12.99,
186
+ "eval_loss": 0.6471053957939148,
187
+ "eval_runtime": 16.5172,
188
+ "eval_samples_per_second": 605.43,
189
+ "eval_steps_per_second": 37.839,
190
+ "step": 1261
191
+ },
192
+ {
193
+ "epoch": 13.99,
194
+ "learning_rate": 3.1063829787234046e-05,
195
+ "loss": 0.6562,
196
+ "step": 1358
197
+ },
198
+ {
199
+ "epoch": 13.99,
200
+ "eval_loss": 0.6417160034179688,
201
+ "eval_runtime": 16.0822,
202
+ "eval_samples_per_second": 621.804,
203
+ "eval_steps_per_second": 38.863,
204
+ "step": 1358
205
+ },
206
+ {
207
+ "epoch": 14.99,
208
+ "learning_rate": 2.590425531914894e-05,
209
+ "loss": 0.6453,
210
+ "step": 1455
211
+ },
212
+ {
213
+ "epoch": 14.99,
214
+ "eval_loss": 0.6380994915962219,
215
+ "eval_runtime": 16.4132,
216
+ "eval_samples_per_second": 609.265,
217
+ "eval_steps_per_second": 38.079,
218
+ "step": 1455
219
+ }
220
+ ],
221
+ "max_steps": 1940,
222
+ "num_train_epochs": 20,
223
+ "total_flos": 5.89188339597312e+16,
224
+ "trial_name": null,
225
+ "trial_params": null
226
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:207f7859517470b9d297e0ed63cd5354a4f56b93e0d4b4ac80e96a3e0f517bc4
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d1a8fc4f5a85f1317b5de75f286f84c7011e5753082c8d97b2bc1ecea1cf191
3
  size 3643