Prasann15479 commited on
Commit
b0d389d
·
verified ·
1 Parent(s): 921cfff

Upload 12 files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForMultipleChoice"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 1024,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4096,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 1024,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.33.1",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128100
35
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f3a9520e6ea139162eaac1cfb3aabcc1cbd12aba9c9f6225cb361006c83863c
3
+ size 617320389
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b4689a2a1b1f3a1b0456d1362de82eb814c802004fa164275cf3565e1672b
3
+ size 1740387701
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00b0e5ba20b05bd7d2250c478388b604d376884dd33fc0a9241635a5db65c8ff
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16f603b3addff29dbecae32d0ae6971e16dd9bf0828145ba9e4f197635c7d613
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]",
15
+ "vocab_type": "spm"
16
+ }
trainer_state.json ADDED
@@ -0,0 +1,1159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9083333333333333,
3
+ "best_model_checkpoint": "deverta_60k_fillna_val100/checkpoint-7600",
4
+ "epoch": 1.9984551415846303,
5
+ "eval_steps": 100,
6
+ "global_step": 7600,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 1.717791411042945e-06,
14
+ "loss": 1.615,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.03,
19
+ "eval_loss": 1.6094433069229126,
20
+ "eval_map@3": 0.36999999999999994,
21
+ "eval_runtime": 3.9269,
22
+ "eval_samples_per_second": 25.465,
23
+ "eval_steps_per_second": 25.465,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.05,
28
+ "learning_rate": 3.470639789658195e-06,
29
+ "loss": 1.615,
30
+ "step": 200
31
+ },
32
+ {
33
+ "epoch": 0.05,
34
+ "eval_loss": 1.6090234518051147,
35
+ "eval_map@3": 0.5433333333333333,
36
+ "eval_runtime": 3.9569,
37
+ "eval_samples_per_second": 25.272,
38
+ "eval_steps_per_second": 25.272,
39
+ "step": 200
40
+ },
41
+ {
42
+ "epoch": 0.08,
43
+ "learning_rate": 5.2234881682734454e-06,
44
+ "loss": 1.6106,
45
+ "step": 300
46
+ },
47
+ {
48
+ "epoch": 0.08,
49
+ "eval_loss": 1.6112011671066284,
50
+ "eval_map@3": 0.6250000000000001,
51
+ "eval_runtime": 4.2374,
52
+ "eval_samples_per_second": 23.6,
53
+ "eval_steps_per_second": 23.6,
54
+ "step": 300
55
+ },
56
+ {
57
+ "epoch": 0.11,
58
+ "learning_rate": 6.976336546888695e-06,
59
+ "loss": 1.4954,
60
+ "step": 400
61
+ },
62
+ {
63
+ "epoch": 0.11,
64
+ "eval_loss": 1.2830108404159546,
65
+ "eval_map@3": 0.755,
66
+ "eval_runtime": 4.1043,
67
+ "eval_samples_per_second": 24.364,
68
+ "eval_steps_per_second": 24.364,
69
+ "step": 400
70
+ },
71
+ {
72
+ "epoch": 0.13,
73
+ "learning_rate": 8.729184925503945e-06,
74
+ "loss": 1.0519,
75
+ "step": 500
76
+ },
77
+ {
78
+ "epoch": 0.13,
79
+ "eval_loss": 0.8515128493309021,
80
+ "eval_map@3": 0.8183333333333335,
81
+ "eval_runtime": 4.0929,
82
+ "eval_samples_per_second": 24.432,
83
+ "eval_steps_per_second": 24.432,
84
+ "step": 500
85
+ },
86
+ {
87
+ "epoch": 0.16,
88
+ "learning_rate": 1.0482033304119195e-05,
89
+ "loss": 0.8846,
90
+ "step": 600
91
+ },
92
+ {
93
+ "epoch": 0.16,
94
+ "eval_loss": 0.7408291101455688,
95
+ "eval_map@3": 0.8366666666666667,
96
+ "eval_runtime": 4.1086,
97
+ "eval_samples_per_second": 24.339,
98
+ "eval_steps_per_second": 24.339,
99
+ "step": 600
100
+ },
101
+ {
102
+ "epoch": 0.18,
103
+ "learning_rate": 1.2234881682734446e-05,
104
+ "loss": 0.8651,
105
+ "step": 700
106
+ },
107
+ {
108
+ "epoch": 0.18,
109
+ "eval_loss": 0.7649513483047485,
110
+ "eval_map@3": 0.8633333333333334,
111
+ "eval_runtime": 4.0899,
112
+ "eval_samples_per_second": 24.45,
113
+ "eval_steps_per_second": 24.45,
114
+ "step": 700
115
+ },
116
+ {
117
+ "epoch": 0.21,
118
+ "learning_rate": 1.3987730061349694e-05,
119
+ "loss": 0.8166,
120
+ "step": 800
121
+ },
122
+ {
123
+ "epoch": 0.21,
124
+ "eval_loss": 0.7141894698143005,
125
+ "eval_map@3": 0.8583333333333334,
126
+ "eval_runtime": 4.1369,
127
+ "eval_samples_per_second": 24.173,
128
+ "eval_steps_per_second": 24.173,
129
+ "step": 800
130
+ },
131
+ {
132
+ "epoch": 0.24,
133
+ "learning_rate": 1.5740578439964945e-05,
134
+ "loss": 0.8283,
135
+ "step": 900
136
+ },
137
+ {
138
+ "epoch": 0.24,
139
+ "eval_loss": 0.6661826968193054,
140
+ "eval_map@3": 0.845,
141
+ "eval_runtime": 4.1543,
142
+ "eval_samples_per_second": 24.071,
143
+ "eval_steps_per_second": 24.071,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 0.26,
148
+ "learning_rate": 1.7493426818580194e-05,
149
+ "loss": 0.7593,
150
+ "step": 1000
151
+ },
152
+ {
153
+ "epoch": 0.26,
154
+ "eval_loss": 0.6564372181892395,
155
+ "eval_map@3": 0.8483333333333334,
156
+ "eval_runtime": 4.076,
157
+ "eval_samples_per_second": 24.534,
158
+ "eval_steps_per_second": 24.534,
159
+ "step": 1000
160
+ },
161
+ {
162
+ "epoch": 0.29,
163
+ "learning_rate": 1.9246275197195444e-05,
164
+ "loss": 0.7401,
165
+ "step": 1100
166
+ },
167
+ {
168
+ "epoch": 0.29,
169
+ "eval_loss": 0.6686395406723022,
170
+ "eval_map@3": 0.85,
171
+ "eval_runtime": 4.0963,
172
+ "eval_samples_per_second": 24.412,
173
+ "eval_steps_per_second": 24.412,
174
+ "step": 1100
175
+ },
176
+ {
177
+ "epoch": 0.32,
178
+ "learning_rate": 1.9998531353530498e-05,
179
+ "loss": 0.7791,
180
+ "step": 1200
181
+ },
182
+ {
183
+ "epoch": 0.32,
184
+ "eval_loss": 0.608403742313385,
185
+ "eval_map@3": 0.875,
186
+ "eval_runtime": 4.1272,
187
+ "eval_samples_per_second": 24.229,
188
+ "eval_steps_per_second": 24.229,
189
+ "step": 1200
190
+ },
191
+ {
192
+ "epoch": 0.34,
193
+ "learning_rate": 1.998860488949288e-05,
194
+ "loss": 0.7868,
195
+ "step": 1300
196
+ },
197
+ {
198
+ "epoch": 0.34,
199
+ "eval_loss": 0.6702239513397217,
200
+ "eval_map@3": 0.87,
201
+ "eval_runtime": 4.0746,
202
+ "eval_samples_per_second": 24.542,
203
+ "eval_steps_per_second": 24.542,
204
+ "step": 1300
205
+ },
206
+ {
207
+ "epoch": 0.37,
208
+ "learning_rate": 1.9969323231985812e-05,
209
+ "loss": 0.7567,
210
+ "step": 1400
211
+ },
212
+ {
213
+ "epoch": 0.37,
214
+ "eval_loss": 0.6087381839752197,
215
+ "eval_map@3": 0.8683333333333334,
216
+ "eval_runtime": 3.8341,
217
+ "eval_samples_per_second": 26.081,
218
+ "eval_steps_per_second": 26.081,
219
+ "step": 1400
220
+ },
221
+ {
222
+ "epoch": 0.39,
223
+ "learning_rate": 1.994070443995129e-05,
224
+ "loss": 0.7488,
225
+ "step": 1500
226
+ },
227
+ {
228
+ "epoch": 0.39,
229
+ "eval_loss": 0.6629943251609802,
230
+ "eval_map@3": 0.8616666666666667,
231
+ "eval_runtime": 4.0681,
232
+ "eval_samples_per_second": 24.581,
233
+ "eval_steps_per_second": 24.581,
234
+ "step": 1500
235
+ },
236
+ {
237
+ "epoch": 0.42,
238
+ "learning_rate": 1.990277531736638e-05,
239
+ "loss": 0.7833,
240
+ "step": 1600
241
+ },
242
+ {
243
+ "epoch": 0.42,
244
+ "eval_loss": 0.592115581035614,
245
+ "eval_map@3": 0.8566666666666667,
246
+ "eval_runtime": 4.0797,
247
+ "eval_samples_per_second": 24.512,
248
+ "eval_steps_per_second": 24.512,
249
+ "step": 1600
250
+ },
251
+ {
252
+ "epoch": 0.45,
253
+ "learning_rate": 1.9855571388138954e-05,
254
+ "loss": 0.7706,
255
+ "step": 1700
256
+ },
257
+ {
258
+ "epoch": 0.45,
259
+ "eval_loss": 0.6199390888214111,
260
+ "eval_map@3": 0.8733333333333333,
261
+ "eval_runtime": 3.9265,
262
+ "eval_samples_per_second": 25.468,
263
+ "eval_steps_per_second": 25.468,
264
+ "step": 1700
265
+ },
266
+ {
267
+ "epoch": 0.47,
268
+ "learning_rate": 1.979913686283649e-05,
269
+ "loss": 0.7422,
270
+ "step": 1800
271
+ },
272
+ {
273
+ "epoch": 0.47,
274
+ "eval_loss": 0.6019750237464905,
275
+ "eval_map@3": 0.8833333333333334,
276
+ "eval_runtime": 3.9986,
277
+ "eval_samples_per_second": 25.009,
278
+ "eval_steps_per_second": 25.009,
279
+ "step": 1800
280
+ },
281
+ {
282
+ "epoch": 0.5,
283
+ "learning_rate": 1.9733524597279037e-05,
284
+ "loss": 0.7129,
285
+ "step": 1900
286
+ },
287
+ {
288
+ "epoch": 0.5,
289
+ "eval_loss": 0.5683853626251221,
290
+ "eval_map@3": 0.8766666666666666,
291
+ "eval_runtime": 4.0628,
292
+ "eval_samples_per_second": 24.613,
293
+ "eval_steps_per_second": 24.613,
294
+ "step": 1900
295
+ },
296
+ {
297
+ "epoch": 0.53,
298
+ "learning_rate": 1.965879604303516e-05,
299
+ "loss": 0.753,
300
+ "step": 2000
301
+ },
302
+ {
303
+ "epoch": 0.53,
304
+ "eval_loss": 0.536411464214325,
305
+ "eval_map@3": 0.8733333333333333,
306
+ "eval_runtime": 3.989,
307
+ "eval_samples_per_second": 25.069,
308
+ "eval_steps_per_second": 25.069,
309
+ "step": 2000
310
+ },
311
+ {
312
+ "epoch": 0.55,
313
+ "learning_rate": 1.957502118986723e-05,
314
+ "loss": 0.7057,
315
+ "step": 2100
316
+ },
317
+ {
318
+ "epoch": 0.55,
319
+ "eval_loss": 0.5491364002227783,
320
+ "eval_map@3": 0.8583333333333333,
321
+ "eval_runtime": 3.9154,
322
+ "eval_samples_per_second": 25.54,
323
+ "eval_steps_per_second": 25.54,
324
+ "step": 2100
325
+ },
326
+ {
327
+ "epoch": 0.58,
328
+ "learning_rate": 1.9482278500179953e-05,
329
+ "loss": 0.73,
330
+ "step": 2200
331
+ },
332
+ {
333
+ "epoch": 0.58,
334
+ "eval_loss": 0.5584819316864014,
335
+ "eval_map@3": 0.8766666666666667,
336
+ "eval_runtime": 3.9164,
337
+ "eval_samples_per_second": 25.534,
338
+ "eval_steps_per_second": 25.534,
339
+ "step": 2200
340
+ },
341
+ {
342
+ "epoch": 0.6,
343
+ "learning_rate": 1.9380654835533523e-05,
344
+ "loss": 0.6898,
345
+ "step": 2300
346
+ },
347
+ {
348
+ "epoch": 0.6,
349
+ "eval_loss": 0.5588586330413818,
350
+ "eval_map@3": 0.88,
351
+ "eval_runtime": 3.8846,
352
+ "eval_samples_per_second": 25.743,
353
+ "eval_steps_per_second": 25.743,
354
+ "step": 2300
355
+ },
356
+ {
357
+ "epoch": 0.63,
358
+ "learning_rate": 1.9270245375290263e-05,
359
+ "loss": 0.6842,
360
+ "step": 2400
361
+ },
362
+ {
363
+ "epoch": 0.63,
364
+ "eval_loss": 0.573777437210083,
365
+ "eval_map@3": 0.8683333333333334,
366
+ "eval_runtime": 4.0374,
367
+ "eval_samples_per_second": 24.768,
368
+ "eval_steps_per_second": 24.768,
369
+ "step": 2400
370
+ },
371
+ {
372
+ "epoch": 0.66,
373
+ "learning_rate": 1.9151153527470895e-05,
374
+ "loss": 0.728,
375
+ "step": 2500
376
+ },
377
+ {
378
+ "epoch": 0.66,
379
+ "eval_loss": 0.5937029123306274,
380
+ "eval_map@3": 0.8866666666666666,
381
+ "eval_runtime": 3.9109,
382
+ "eval_samples_per_second": 25.57,
383
+ "eval_steps_per_second": 25.57,
384
+ "step": 2500
385
+ },
386
+ {
387
+ "epoch": 0.68,
388
+ "learning_rate": 1.902349083190399e-05,
389
+ "loss": 0.7047,
390
+ "step": 2600
391
+ },
392
+ {
393
+ "epoch": 0.68,
394
+ "eval_loss": 0.5620361566543579,
395
+ "eval_map@3": 0.885,
396
+ "eval_runtime": 3.9171,
397
+ "eval_samples_per_second": 25.529,
398
+ "eval_steps_per_second": 25.529,
399
+ "step": 2600
400
+ },
401
+ {
402
+ "epoch": 0.71,
403
+ "learning_rate": 1.888737685575924e-05,
404
+ "loss": 0.7554,
405
+ "step": 2700
406
+ },
407
+ {
408
+ "epoch": 0.71,
409
+ "eval_loss": 0.5551219582557678,
410
+ "eval_map@3": 0.8883333333333333,
411
+ "eval_runtime": 3.9451,
412
+ "eval_samples_per_second": 25.348,
413
+ "eval_steps_per_second": 25.348,
414
+ "step": 2700
415
+ },
416
+ {
417
+ "epoch": 0.74,
418
+ "learning_rate": 1.874293908156247e-05,
419
+ "loss": 0.7252,
420
+ "step": 2800
421
+ },
422
+ {
423
+ "epoch": 0.74,
424
+ "eval_loss": 0.5565034747123718,
425
+ "eval_map@3": 0.8983333333333331,
426
+ "eval_runtime": 4.1016,
427
+ "eval_samples_per_second": 24.381,
428
+ "eval_steps_per_second": 24.381,
429
+ "step": 2800
430
+ },
431
+ {
432
+ "epoch": 0.76,
433
+ "learning_rate": 1.8590312787797196e-05,
434
+ "loss": 0.7245,
435
+ "step": 2900
436
+ },
437
+ {
438
+ "epoch": 0.76,
439
+ "eval_loss": 0.5758660435676575,
440
+ "eval_map@3": 0.8816666666666667,
441
+ "eval_runtime": 3.9367,
442
+ "eval_samples_per_second": 25.402,
443
+ "eval_steps_per_second": 25.402,
444
+ "step": 2900
445
+ },
446
+ {
447
+ "epoch": 0.79,
448
+ "learning_rate": 1.8429640922204614e-05,
449
+ "loss": 0.725,
450
+ "step": 3000
451
+ },
452
+ {
453
+ "epoch": 0.79,
454
+ "eval_loss": 0.5699021816253662,
455
+ "eval_map@3": 0.8883333333333333,
456
+ "eval_runtime": 3.8274,
457
+ "eval_samples_per_second": 26.128,
458
+ "eval_steps_per_second": 26.128,
459
+ "step": 3000
460
+ },
461
+ {
462
+ "epoch": 0.82,
463
+ "learning_rate": 1.8261073967900676e-05,
464
+ "loss": 0.6983,
465
+ "step": 3100
466
+ },
467
+ {
468
+ "epoch": 0.82,
469
+ "eval_loss": 0.5696993470191956,
470
+ "eval_map@3": 0.8883333333333333,
471
+ "eval_runtime": 3.8632,
472
+ "eval_samples_per_second": 25.885,
473
+ "eval_steps_per_second": 25.885,
474
+ "step": 3100
475
+ },
476
+ {
477
+ "epoch": 0.84,
478
+ "learning_rate": 1.80847698024356e-05,
479
+ "loss": 0.7055,
480
+ "step": 3200
481
+ },
482
+ {
483
+ "epoch": 0.84,
484
+ "eval_loss": 0.5589382648468018,
485
+ "eval_map@3": 0.8933333333333333,
486
+ "eval_runtime": 4.205,
487
+ "eval_samples_per_second": 23.781,
488
+ "eval_steps_per_second": 23.781,
489
+ "step": 3200
490
+ },
491
+ {
492
+ "epoch": 0.87,
493
+ "learning_rate": 1.790089354992788e-05,
494
+ "loss": 0.7091,
495
+ "step": 3300
496
+ },
497
+ {
498
+ "epoch": 0.87,
499
+ "eval_loss": 0.5517598986625671,
500
+ "eval_map@3": 0.8883333333333333,
501
+ "eval_runtime": 4.2776,
502
+ "eval_samples_per_second": 23.378,
503
+ "eval_steps_per_second": 23.378,
504
+ "step": 3300
505
+ },
506
+ {
507
+ "epoch": 0.89,
508
+ "learning_rate": 1.7711566234443824e-05,
509
+ "loss": 0.7128,
510
+ "step": 3400
511
+ },
512
+ {
513
+ "epoch": 0.89,
514
+ "eval_loss": 0.5628377795219421,
515
+ "eval_map@3": 0.8866666666666667,
516
+ "eval_runtime": 4.1647,
517
+ "eval_samples_per_second": 24.011,
518
+ "eval_steps_per_second": 24.011,
519
+ "step": 3400
520
+ },
521
+ {
522
+ "epoch": 0.92,
523
+ "learning_rate": 1.751314068469163e-05,
524
+ "loss": 0.7465,
525
+ "step": 3500
526
+ },
527
+ {
528
+ "epoch": 0.92,
529
+ "eval_loss": 0.5919340252876282,
530
+ "eval_map@3": 0.8833333333333333,
531
+ "eval_runtime": 4.0418,
532
+ "eval_samples_per_second": 24.742,
533
+ "eval_steps_per_second": 24.742,
534
+ "step": 3500
535
+ },
536
+ {
537
+ "epoch": 0.95,
538
+ "learning_rate": 1.730767842806733e-05,
539
+ "loss": 0.708,
540
+ "step": 3600
541
+ },
542
+ {
543
+ "epoch": 0.95,
544
+ "eval_loss": 0.554470419883728,
545
+ "eval_map@3": 0.895,
546
+ "eval_runtime": 4.0703,
547
+ "eval_samples_per_second": 24.568,
548
+ "eval_steps_per_second": 24.568,
549
+ "step": 3600
550
+ },
551
+ {
552
+ "epoch": 0.97,
553
+ "learning_rate": 1.709537189776684e-05,
554
+ "loss": 0.7017,
555
+ "step": 3700
556
+ },
557
+ {
558
+ "epoch": 0.97,
559
+ "eval_loss": 0.5175462365150452,
560
+ "eval_map@3": 0.895,
561
+ "eval_runtime": 4.1999,
562
+ "eval_samples_per_second": 23.81,
563
+ "eval_steps_per_second": 23.81,
564
+ "step": 3700
565
+ },
566
+ {
567
+ "epoch": 1.0,
568
+ "learning_rate": 1.6876419937241033e-05,
569
+ "loss": 0.7,
570
+ "step": 3800
571
+ },
572
+ {
573
+ "epoch": 1.0,
574
+ "eval_loss": 0.5037916302680969,
575
+ "eval_map@3": 0.8916666666666667,
576
+ "eval_runtime": 4.1774,
577
+ "eval_samples_per_second": 23.938,
578
+ "eval_steps_per_second": 23.938,
579
+ "step": 3800
580
+ },
581
+ {
582
+ "epoch": 1.03,
583
+ "learning_rate": 1.665102761396169e-05,
584
+ "loss": 0.5923,
585
+ "step": 3900
586
+ },
587
+ {
588
+ "epoch": 1.03,
589
+ "eval_loss": 0.48426902294158936,
590
+ "eval_map@3": 0.8916666666666667,
591
+ "eval_runtime": 4.139,
592
+ "eval_samples_per_second": 24.16,
593
+ "eval_steps_per_second": 24.16,
594
+ "step": 3900
595
+ },
596
+ {
597
+ "epoch": 1.05,
598
+ "learning_rate": 1.641940602735802e-05,
599
+ "loss": 0.594,
600
+ "step": 4000
601
+ },
602
+ {
603
+ "epoch": 1.05,
604
+ "eval_loss": 0.48445701599121094,
605
+ "eval_map@3": 0.895,
606
+ "eval_runtime": 4.2445,
607
+ "eval_samples_per_second": 23.56,
608
+ "eval_steps_per_second": 23.56,
609
+ "step": 4000
610
+ },
611
+ {
612
+ "epoch": 1.08,
613
+ "learning_rate": 1.6181772111103775e-05,
614
+ "loss": 0.6075,
615
+ "step": 4100
616
+ },
617
+ {
618
+ "epoch": 1.08,
619
+ "eval_loss": 0.5103535652160645,
620
+ "eval_map@3": 0.8899999999999999,
621
+ "eval_runtime": 4.059,
622
+ "eval_samples_per_second": 24.636,
623
+ "eval_steps_per_second": 24.636,
624
+ "step": 4100
625
+ },
626
+ {
627
+ "epoch": 1.1,
628
+ "learning_rate": 1.5938348429940095e-05,
629
+ "loss": 0.5226,
630
+ "step": 4200
631
+ },
632
+ {
633
+ "epoch": 1.1,
634
+ "eval_loss": 0.5009714961051941,
635
+ "eval_map@3": 0.8883333333333333,
636
+ "eval_runtime": 4.0879,
637
+ "eval_samples_per_second": 24.463,
638
+ "eval_steps_per_second": 24.463,
639
+ "step": 4200
640
+ },
641
+ {
642
+ "epoch": 1.13,
643
+ "learning_rate": 1.568936297122432e-05,
644
+ "loss": 0.5886,
645
+ "step": 4300
646
+ },
647
+ {
648
+ "epoch": 1.13,
649
+ "eval_loss": 0.49200114607810974,
650
+ "eval_map@3": 0.9033333333333333,
651
+ "eval_runtime": 4.1867,
652
+ "eval_samples_per_second": 23.885,
653
+ "eval_steps_per_second": 23.885,
654
+ "step": 4300
655
+ },
656
+ {
657
+ "epoch": 1.16,
658
+ "learning_rate": 1.543504893140011e-05,
659
+ "loss": 0.5619,
660
+ "step": 4400
661
+ },
662
+ {
663
+ "epoch": 1.16,
664
+ "eval_loss": 0.49913597106933594,
665
+ "eval_map@3": 0.89,
666
+ "eval_runtime": 4.0492,
667
+ "eval_samples_per_second": 24.696,
668
+ "eval_steps_per_second": 24.696,
669
+ "step": 4400
670
+ },
671
+ {
672
+ "epoch": 1.18,
673
+ "learning_rate": 1.517564449758878e-05,
674
+ "loss": 0.6017,
675
+ "step": 4500
676
+ },
677
+ {
678
+ "epoch": 1.18,
679
+ "eval_loss": 0.4905971586704254,
680
+ "eval_map@3": 0.9033333333333333,
681
+ "eval_runtime": 4.0275,
682
+ "eval_samples_per_second": 24.83,
683
+ "eval_steps_per_second": 24.83,
684
+ "step": 4500
685
+ },
686
+ {
687
+ "epoch": 1.21,
688
+ "learning_rate": 1.4911392624506427e-05,
689
+ "loss": 0.534,
690
+ "step": 4600
691
+ },
692
+ {
693
+ "epoch": 1.21,
694
+ "eval_loss": 0.4901256263256073,
695
+ "eval_map@3": 0.89,
696
+ "eval_runtime": 5.205,
697
+ "eval_samples_per_second": 19.212,
698
+ "eval_steps_per_second": 19.212,
699
+ "step": 4600
700
+ },
701
+ {
702
+ "epoch": 1.24,
703
+ "learning_rate": 1.4642540806915802e-05,
704
+ "loss": 0.5914,
705
+ "step": 4700
706
+ },
707
+ {
708
+ "epoch": 1.24,
709
+ "eval_loss": 0.4857093095779419,
710
+ "eval_map@3": 0.8916666666666666,
711
+ "eval_runtime": 4.357,
712
+ "eval_samples_per_second": 22.952,
713
+ "eval_steps_per_second": 22.952,
714
+ "step": 4700
715
+ },
716
+ {
717
+ "epoch": 1.26,
718
+ "learning_rate": 1.4369340847826037e-05,
719
+ "loss": 0.6155,
720
+ "step": 4800
721
+ },
722
+ {
723
+ "epoch": 1.26,
724
+ "eval_loss": 0.49262702465057373,
725
+ "eval_map@3": 0.8849999999999999,
726
+ "eval_runtime": 4.3241,
727
+ "eval_samples_per_second": 23.126,
728
+ "eval_steps_per_second": 23.126,
729
+ "step": 4800
730
+ },
731
+ {
732
+ "epoch": 1.29,
733
+ "learning_rate": 1.4092048622657309e-05,
734
+ "loss": 0.5779,
735
+ "step": 4900
736
+ },
737
+ {
738
+ "epoch": 1.29,
739
+ "eval_loss": 0.4796687960624695,
740
+ "eval_map@3": 0.8849999999999999,
741
+ "eval_runtime": 4.3677,
742
+ "eval_samples_per_second": 22.895,
743
+ "eval_steps_per_second": 22.895,
744
+ "step": 4900
745
+ },
746
+ {
747
+ "epoch": 1.31,
748
+ "learning_rate": 1.3810923839591368e-05,
749
+ "loss": 0.6144,
750
+ "step": 5000
751
+ },
752
+ {
753
+ "epoch": 1.31,
754
+ "eval_loss": 0.4864266514778137,
755
+ "eval_map@3": 0.8916666666666666,
756
+ "eval_runtime": 4.1788,
757
+ "eval_samples_per_second": 23.93,
758
+ "eval_steps_per_second": 23.93,
759
+ "step": 5000
760
+ },
761
+ {
762
+ "epoch": 1.34,
763
+ "learning_rate": 1.3526229796332322e-05,
764
+ "loss": 0.5532,
765
+ "step": 5100
766
+ },
767
+ {
768
+ "epoch": 1.34,
769
+ "eval_loss": 0.5059311389923096,
770
+ "eval_map@3": 0.9033333333333334,
771
+ "eval_runtime": 4.1314,
772
+ "eval_samples_per_second": 24.205,
773
+ "eval_steps_per_second": 24.205,
774
+ "step": 5100
775
+ },
776
+ {
777
+ "epoch": 1.37,
778
+ "learning_rate": 1.3238233133505538e-05,
779
+ "loss": 0.5238,
780
+ "step": 5200
781
+ },
782
+ {
783
+ "epoch": 1.37,
784
+ "eval_loss": 0.5035010576248169,
785
+ "eval_map@3": 0.8933333333333334,
786
+ "eval_runtime": 4.0704,
787
+ "eval_samples_per_second": 24.568,
788
+ "eval_steps_per_second": 24.568,
789
+ "step": 5200
790
+ },
791
+ {
792
+ "epoch": 1.39,
793
+ "learning_rate": 1.2947203584925607e-05,
794
+ "loss": 0.5993,
795
+ "step": 5300
796
+ },
797
+ {
798
+ "epoch": 1.39,
799
+ "eval_loss": 0.5209221839904785,
800
+ "eval_map@3": 0.8983333333333334,
801
+ "eval_runtime": 4.0192,
802
+ "eval_samples_per_second": 24.881,
803
+ "eval_steps_per_second": 24.881,
804
+ "step": 5300
805
+ },
806
+ {
807
+ "epoch": 1.42,
808
+ "learning_rate": 1.2653413724967272e-05,
809
+ "loss": 0.5443,
810
+ "step": 5400
811
+ },
812
+ {
813
+ "epoch": 1.42,
814
+ "eval_loss": 0.5166578888893127,
815
+ "eval_map@3": 0.8883333333333334,
816
+ "eval_runtime": 4.0162,
817
+ "eval_samples_per_second": 24.899,
818
+ "eval_steps_per_second": 24.899,
819
+ "step": 5400
820
+ },
821
+ {
822
+ "epoch": 1.45,
823
+ "learning_rate": 1.2357138713275916e-05,
824
+ "loss": 0.5564,
825
+ "step": 5500
826
+ },
827
+ {
828
+ "epoch": 1.45,
829
+ "eval_loss": 0.50209641456604,
830
+ "eval_map@3": 0.8883333333333334,
831
+ "eval_runtime": 4.0769,
832
+ "eval_samples_per_second": 24.529,
833
+ "eval_steps_per_second": 24.529,
834
+ "step": 5500
835
+ },
836
+ {
837
+ "epoch": 1.47,
838
+ "learning_rate": 1.2058656037056733e-05,
839
+ "loss": 0.5962,
840
+ "step": 5600
841
+ },
842
+ {
843
+ "epoch": 1.47,
844
+ "eval_loss": 0.5111860632896423,
845
+ "eval_map@3": 0.8933333333333334,
846
+ "eval_runtime": 4.1245,
847
+ "eval_samples_per_second": 24.245,
848
+ "eval_steps_per_second": 24.245,
849
+ "step": 5600
850
+ },
851
+ {
852
+ "epoch": 1.5,
853
+ "learning_rate": 1.1758245251183917e-05,
854
+ "loss": 0.5824,
855
+ "step": 5700
856
+ },
857
+ {
858
+ "epoch": 1.5,
859
+ "eval_loss": 0.5007131695747375,
860
+ "eval_map@3": 0.8966666666666666,
861
+ "eval_runtime": 4.0577,
862
+ "eval_samples_per_second": 24.644,
863
+ "eval_steps_per_second": 24.644,
864
+ "step": 5700
865
+ },
866
+ {
867
+ "epoch": 1.53,
868
+ "learning_rate": 1.1456187716373334e-05,
869
+ "loss": 0.5327,
870
+ "step": 5800
871
+ },
872
+ {
873
+ "epoch": 1.53,
874
+ "eval_loss": 0.5062887072563171,
875
+ "eval_map@3": 0.8883333333333334,
876
+ "eval_runtime": 3.9467,
877
+ "eval_samples_per_second": 25.337,
878
+ "eval_steps_per_second": 25.337,
879
+ "step": 5800
880
+ },
881
+ {
882
+ "epoch": 1.55,
883
+ "learning_rate": 1.1152766335663825e-05,
884
+ "loss": 0.5649,
885
+ "step": 5900
886
+ },
887
+ {
888
+ "epoch": 1.55,
889
+ "eval_loss": 0.5016913414001465,
890
+ "eval_map@3": 0.8883333333333334,
891
+ "eval_runtime": 4.1523,
892
+ "eval_samples_per_second": 24.083,
893
+ "eval_steps_per_second": 24.083,
894
+ "step": 5900
895
+ },
896
+ {
897
+ "epoch": 1.58,
898
+ "learning_rate": 1.0848265289454028e-05,
899
+ "loss": 0.5717,
900
+ "step": 6000
901
+ },
902
+ {
903
+ "epoch": 1.58,
904
+ "eval_loss": 0.4974103271961212,
905
+ "eval_map@3": 0.9033333333333334,
906
+ "eval_runtime": 4.0909,
907
+ "eval_samples_per_second": 24.444,
908
+ "eval_steps_per_second": 24.444,
909
+ "step": 6000
910
+ },
911
+ {
912
+ "epoch": 1.6,
913
+ "learning_rate": 1.054296976934281e-05,
914
+ "loss": 0.5737,
915
+ "step": 6100
916
+ },
917
+ {
918
+ "epoch": 1.6,
919
+ "eval_loss": 0.49186939001083374,
920
+ "eval_map@3": 0.8933333333333334,
921
+ "eval_runtime": 4.2135,
922
+ "eval_samples_per_second": 23.733,
923
+ "eval_steps_per_second": 23.733,
924
+ "step": 6100
925
+ },
926
+ {
927
+ "epoch": 1.63,
928
+ "learning_rate": 1.0237165711022661e-05,
929
+ "loss": 0.5351,
930
+ "step": 6200
931
+ },
932
+ {
933
+ "epoch": 1.63,
934
+ "eval_loss": 0.4999094009399414,
935
+ "eval_map@3": 0.895,
936
+ "eval_runtime": 3.9962,
937
+ "eval_samples_per_second": 25.024,
938
+ "eval_steps_per_second": 25.024,
939
+ "step": 6200
940
+ },
941
+ {
942
+ "epoch": 1.66,
943
+ "learning_rate": 9.934199946768965e-06,
944
+ "loss": 0.5341,
945
+ "step": 6300
946
+ },
947
+ {
948
+ "epoch": 1.66,
949
+ "eval_loss": 0.4991361200809479,
950
+ "eval_map@3": 0.9016666666666667,
951
+ "eval_runtime": 4.1686,
952
+ "eval_samples_per_second": 23.989,
953
+ "eval_steps_per_second": 23.989,
954
+ "step": 6300
955
+ },
956
+ {
957
+ "epoch": 1.68,
958
+ "learning_rate": 9.628236192287725e-06,
959
+ "loss": 0.5501,
960
+ "step": 6400
961
+ },
962
+ {
963
+ "epoch": 1.68,
964
+ "eval_loss": 0.5340888500213623,
965
+ "eval_map@3": 0.8866666666666667,
966
+ "eval_runtime": 4.1621,
967
+ "eval_samples_per_second": 24.027,
968
+ "eval_steps_per_second": 24.027,
969
+ "step": 6400
970
+ },
971
+ {
972
+ "epoch": 1.71,
973
+ "learning_rate": 9.32262062680631e-06,
974
+ "loss": 0.5552,
975
+ "step": 6500
976
+ },
977
+ {
978
+ "epoch": 1.71,
979
+ "eval_loss": 0.4900314211845398,
980
+ "eval_map@3": 0.9016666666666667,
981
+ "eval_runtime": 4.0101,
982
+ "eval_samples_per_second": 24.937,
983
+ "eval_steps_per_second": 24.937,
984
+ "step": 6500
985
+ },
986
+ {
987
+ "epoch": 1.74,
988
+ "learning_rate": 9.017639485767398e-06,
989
+ "loss": 0.5362,
990
+ "step": 6600
991
+ },
992
+ {
993
+ "epoch": 1.74,
994
+ "eval_loss": 0.5052759647369385,
995
+ "eval_map@3": 0.905,
996
+ "eval_runtime": 4.083,
997
+ "eval_samples_per_second": 24.492,
998
+ "eval_steps_per_second": 24.492,
999
+ "step": 6600
1000
+ },
1001
+ {
1002
+ "epoch": 1.76,
1003
+ "learning_rate": 8.713578410420254e-06,
1004
+ "loss": 0.5474,
1005
+ "step": 6700
1006
+ },
1007
+ {
1008
+ "epoch": 1.76,
1009
+ "eval_loss": 0.5121599435806274,
1010
+ "eval_map@3": 0.905,
1011
+ "eval_runtime": 3.9444,
1012
+ "eval_samples_per_second": 25.352,
1013
+ "eval_steps_per_second": 25.352,
1014
+ "step": 6700
1015
+ },
1016
+ {
1017
+ "epoch": 1.79,
1018
+ "learning_rate": 8.41072218029293e-06,
1019
+ "loss": 0.5134,
1020
+ "step": 6800
1021
+ },
1022
+ {
1023
+ "epoch": 1.79,
1024
+ "eval_loss": 0.510312020778656,
1025
+ "eval_map@3": 0.8983333333333334,
1026
+ "eval_runtime": 4.0574,
1027
+ "eval_samples_per_second": 24.647,
1028
+ "eval_steps_per_second": 24.647,
1029
+ "step": 6800
1030
+ },
1031
+ {
1032
+ "epoch": 1.81,
1033
+ "learning_rate": 8.1093544464716e-06,
1034
+ "loss": 0.5454,
1035
+ "step": 6900
1036
+ },
1037
+ {
1038
+ "epoch": 1.81,
1039
+ "eval_loss": 0.5062999725341797,
1040
+ "eval_map@3": 0.9,
1041
+ "eval_runtime": 4.0245,
1042
+ "eval_samples_per_second": 24.848,
1043
+ "eval_steps_per_second": 24.848,
1044
+ "step": 6900
1045
+ },
1046
+ {
1047
+ "epoch": 1.84,
1048
+ "learning_rate": 7.809757465936754e-06,
1049
+ "loss": 0.5726,
1050
+ "step": 7000
1051
+ },
1052
+ {
1053
+ "epoch": 1.84,
1054
+ "eval_loss": 0.5135546326637268,
1055
+ "eval_map@3": 0.905,
1056
+ "eval_runtime": 4.053,
1057
+ "eval_samples_per_second": 24.673,
1058
+ "eval_steps_per_second": 24.673,
1059
+ "step": 7000
1060
+ },
1061
+ {
1062
+ "epoch": 1.87,
1063
+ "learning_rate": 7.512211837205075e-06,
1064
+ "loss": 0.5173,
1065
+ "step": 7100
1066
+ },
1067
+ {
1068
+ "epoch": 1.87,
1069
+ "eval_loss": 0.49537014961242676,
1070
+ "eval_map@3": 0.9,
1071
+ "eval_runtime": 4.1604,
1072
+ "eval_samples_per_second": 24.036,
1073
+ "eval_steps_per_second": 24.036,
1074
+ "step": 7100
1075
+ },
1076
+ {
1077
+ "epoch": 1.89,
1078
+ "learning_rate": 7.216996237524646e-06,
1079
+ "loss": 0.5118,
1080
+ "step": 7200
1081
+ },
1082
+ {
1083
+ "epoch": 1.89,
1084
+ "eval_loss": 0.5108374357223511,
1085
+ "eval_map@3": 0.9066666666666667,
1086
+ "eval_runtime": 4.0629,
1087
+ "eval_samples_per_second": 24.613,
1088
+ "eval_steps_per_second": 24.613,
1089
+ "step": 7200
1090
+ },
1091
+ {
1092
+ "epoch": 1.92,
1093
+ "learning_rate": 6.924387161869565e-06,
1094
+ "loss": 0.5372,
1095
+ "step": 7300
1096
+ },
1097
+ {
1098
+ "epoch": 1.92,
1099
+ "eval_loss": 0.5062543749809265,
1100
+ "eval_map@3": 0.8866666666666666,
1101
+ "eval_runtime": 3.9525,
1102
+ "eval_samples_per_second": 25.3,
1103
+ "eval_steps_per_second": 25.3,
1104
+ "step": 7300
1105
+ },
1106
+ {
1107
+ "epoch": 1.95,
1108
+ "learning_rate": 6.634658663978463e-06,
1109
+ "loss": 0.4689,
1110
+ "step": 7400
1111
+ },
1112
+ {
1113
+ "epoch": 1.95,
1114
+ "eval_loss": 0.5014258623123169,
1115
+ "eval_map@3": 0.8933333333333333,
1116
+ "eval_runtime": 4.084,
1117
+ "eval_samples_per_second": 24.486,
1118
+ "eval_steps_per_second": 24.486,
1119
+ "step": 7400
1120
+ },
1121
+ {
1122
+ "epoch": 1.97,
1123
+ "learning_rate": 6.348082099679445e-06,
1124
+ "loss": 0.5096,
1125
+ "step": 7500
1126
+ },
1127
+ {
1128
+ "epoch": 1.97,
1129
+ "eval_loss": 0.5079813003540039,
1130
+ "eval_map@3": 0.905,
1131
+ "eval_runtime": 4.207,
1132
+ "eval_samples_per_second": 23.77,
1133
+ "eval_steps_per_second": 23.77,
1134
+ "step": 7500
1135
+ },
1136
+ {
1137
+ "epoch": 2.0,
1138
+ "learning_rate": 6.0649258727418535e-06,
1139
+ "loss": 0.5272,
1140
+ "step": 7600
1141
+ },
1142
+ {
1143
+ "epoch": 2.0,
1144
+ "eval_loss": 0.4955463409423828,
1145
+ "eval_map@3": 0.9083333333333333,
1146
+ "eval_runtime": 4.0938,
1147
+ "eval_samples_per_second": 24.427,
1148
+ "eval_steps_per_second": 24.427,
1149
+ "step": 7600
1150
+ }
1151
+ ],
1152
+ "logging_steps": 100,
1153
+ "max_steps": 11406,
1154
+ "num_train_epochs": 3,
1155
+ "save_steps": 100,
1156
+ "total_flos": 3.996618693693601e+17,
1157
+ "trial_name": null,
1158
+ "trial_params": null
1159
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73a678ce749c60f5e44929d4d02123f240667b09ccba261e1c5f64b07ae9cbe9
3
+ size 4027