minimario commited on
Commit
1ef15eb
·
1 Parent(s): 84408cd

add rankers

Browse files
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "models_partial_t5/checkpoint-8000",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "problem_type": "single_label_classification",
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.25.1",
26
+ "type_vocab_size": 1,
27
+ "use_cache": true,
28
+ "vocab_size": 50265
29
+ }
partial-coderanker-t5/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "models_partial_t5/checkpoint-8000",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "problem_type": "single_label_classification",
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.25.1",
26
+ "type_vocab_size": 1,
27
+ "use_cache": true,
28
+ "vocab_size": 50265
29
+ }
partial-coderanker-t5/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510c3eb3cc160b18106c0f6a20224a8b5d85cc214f1cb5a4fd7a081f66b3ec84
3
+ size 498657517
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510c3eb3cc160b18106c0f6a20224a8b5d85cc214f1cb5a4fd7a081f66b3ec84
3
+ size 498657517
ranker_5/checkpoint-200/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/codebert-base",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "problem_type": "single_label_classification",
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.26.1",
26
+ "type_vocab_size": 1,
27
+ "use_cache": true,
28
+ "vocab_size": 50265
29
+ }
ranker_5/checkpoint-200/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
ranker_5/checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5ca57ee890a3958d1e5940842b68070d78e768db14802189867a5b849a7f049
3
+ size 997295045
ranker_5/checkpoint-200/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17553744bfff03e4ec2773f5017be5fe6e96e765ee6a40d86f34c6ee5e5dbec7
3
+ size 498662069
ranker_5/checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a91eea1dffdd54f976e0f1205ab82a6b45037f0062e18e5cdba1d77041522ef
3
+ size 15597
ranker_5/checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2ee5e037f2ff613ef67e0ad201c15c247a7446512fc7e4f4386db3464f7e9af
3
+ size 627
ranker_5/checkpoint-200/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "</s>",
8
+ "unk_token": "<unk>"
9
+ }
ranker_5/checkpoint-200/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "errors": "replace",
28
+ "mask_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<mask>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "model_max_length": 512,
37
+ "name_or_path": "microsoft/codebert-base",
38
+ "pad_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<pad>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "sep_token": {
47
+ "__type": "AddedToken",
48
+ "content": "</s>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "special_tokens_map_file": "/om2/user/gua/.cache/huggingface/hub/models--microsoft--codebert-base/snapshots/3b0952feddeffad0063f274080e3c23d75e7eb39/special_tokens_map.json",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "unk_token": {
57
+ "__type": "AddedToken",
58
+ "content": "<unk>",
59
+ "lstrip": false,
60
+ "normalized": true,
61
+ "rstrip": false,
62
+ "single_word": false
63
+ },
64
+ "use_fast": true
65
+ }
ranker_5/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.19184652278177458,
5
+ "global_step": 200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 9.995201535508638e-05,
13
+ "loss": 0.6593,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "learning_rate": 9.990403071017275e-05,
19
+ "loss": 0.5495,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.01,
24
+ "learning_rate": 9.985604606525912e-05,
25
+ "loss": 0.507,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.02,
30
+ "learning_rate": 9.980806142034548e-05,
31
+ "loss": 0.4512,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.02,
36
+ "learning_rate": 9.976007677543187e-05,
37
+ "loss": 0.417,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.03,
42
+ "learning_rate": 9.971209213051824e-05,
43
+ "loss": 0.4129,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.03,
48
+ "learning_rate": 9.96641074856046e-05,
49
+ "loss": 0.4053,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.04,
54
+ "learning_rate": 9.961612284069098e-05,
55
+ "loss": 0.3658,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.04,
60
+ "learning_rate": 9.956813819577736e-05,
61
+ "loss": 0.3779,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.05,
66
+ "learning_rate": 9.952015355086372e-05,
67
+ "loss": 0.3791,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.05,
72
+ "learning_rate": 9.94721689059501e-05,
73
+ "loss": 0.3492,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.06,
78
+ "learning_rate": 9.942418426103647e-05,
79
+ "loss": 0.341,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.06,
84
+ "learning_rate": 9.937619961612284e-05,
85
+ "loss": 0.3388,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 0.07,
90
+ "learning_rate": 9.932821497120922e-05,
91
+ "loss": 0.3597,
92
+ "step": 70
93
+ },
94
+ {
95
+ "epoch": 0.07,
96
+ "learning_rate": 9.928023032629559e-05,
97
+ "loss": 0.3356,
98
+ "step": 75
99
+ },
100
+ {
101
+ "epoch": 0.08,
102
+ "learning_rate": 9.923224568138196e-05,
103
+ "loss": 0.3317,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 0.08,
108
+ "learning_rate": 9.918426103646834e-05,
109
+ "loss": 0.3424,
110
+ "step": 85
111
+ },
112
+ {
113
+ "epoch": 0.09,
114
+ "learning_rate": 9.913627639155471e-05,
115
+ "loss": 0.333,
116
+ "step": 90
117
+ },
118
+ {
119
+ "epoch": 0.09,
120
+ "learning_rate": 9.908829174664108e-05,
121
+ "loss": 0.3145,
122
+ "step": 95
123
+ },
124
+ {
125
+ "epoch": 0.1,
126
+ "learning_rate": 9.904030710172744e-05,
127
+ "loss": 0.3213,
128
+ "step": 100
129
+ },
130
+ {
131
+ "epoch": 0.1,
132
+ "learning_rate": 9.899232245681382e-05,
133
+ "loss": 0.3139,
134
+ "step": 105
135
+ },
136
+ {
137
+ "epoch": 0.11,
138
+ "learning_rate": 9.89443378119002e-05,
139
+ "loss": 0.3155,
140
+ "step": 110
141
+ },
142
+ {
143
+ "epoch": 0.11,
144
+ "learning_rate": 9.889635316698656e-05,
145
+ "loss": 0.3219,
146
+ "step": 115
147
+ },
148
+ {
149
+ "epoch": 0.12,
150
+ "learning_rate": 9.884836852207294e-05,
151
+ "loss": 0.3049,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.12,
156
+ "learning_rate": 9.880038387715931e-05,
157
+ "loss": 0.3171,
158
+ "step": 125
159
+ },
160
+ {
161
+ "epoch": 0.12,
162
+ "learning_rate": 9.875239923224568e-05,
163
+ "loss": 0.2836,
164
+ "step": 130
165
+ },
166
+ {
167
+ "epoch": 0.13,
168
+ "learning_rate": 9.870441458733206e-05,
169
+ "loss": 0.3102,
170
+ "step": 135
171
+ },
172
+ {
173
+ "epoch": 0.13,
174
+ "learning_rate": 9.865642994241843e-05,
175
+ "loss": 0.2853,
176
+ "step": 140
177
+ },
178
+ {
179
+ "epoch": 0.14,
180
+ "learning_rate": 9.86084452975048e-05,
181
+ "loss": 0.3273,
182
+ "step": 145
183
+ },
184
+ {
185
+ "epoch": 0.14,
186
+ "learning_rate": 9.856046065259118e-05,
187
+ "loss": 0.3093,
188
+ "step": 150
189
+ },
190
+ {
191
+ "epoch": 0.15,
192
+ "learning_rate": 9.851247600767755e-05,
193
+ "loss": 0.3112,
194
+ "step": 155
195
+ },
196
+ {
197
+ "epoch": 0.15,
198
+ "learning_rate": 9.846449136276392e-05,
199
+ "loss": 0.308,
200
+ "step": 160
201
+ },
202
+ {
203
+ "epoch": 0.16,
204
+ "learning_rate": 9.841650671785028e-05,
205
+ "loss": 0.3217,
206
+ "step": 165
207
+ },
208
+ {
209
+ "epoch": 0.16,
210
+ "learning_rate": 9.836852207293667e-05,
211
+ "loss": 0.2773,
212
+ "step": 170
213
+ },
214
+ {
215
+ "epoch": 0.17,
216
+ "learning_rate": 9.832053742802304e-05,
217
+ "loss": 0.2932,
218
+ "step": 175
219
+ },
220
+ {
221
+ "epoch": 0.17,
222
+ "learning_rate": 9.82725527831094e-05,
223
+ "loss": 0.3055,
224
+ "step": 180
225
+ },
226
+ {
227
+ "epoch": 0.18,
228
+ "learning_rate": 9.822456813819578e-05,
229
+ "loss": 0.282,
230
+ "step": 185
231
+ },
232
+ {
233
+ "epoch": 0.18,
234
+ "learning_rate": 9.817658349328216e-05,
235
+ "loss": 0.2682,
236
+ "step": 190
237
+ },
238
+ {
239
+ "epoch": 0.19,
240
+ "learning_rate": 9.812859884836852e-05,
241
+ "loss": 0.2913,
242
+ "step": 195
243
+ },
244
+ {
245
+ "epoch": 0.19,
246
+ "learning_rate": 9.80806142034549e-05,
247
+ "loss": 0.2813,
248
+ "step": 200
249
+ },
250
+ {
251
+ "epoch": 0.19,
252
+ "eval_accuracy": 0.6656578015326164,
253
+ "eval_accuracy_sklearn": 0.6656578015326164,
254
+ "eval_f1": 0.6489045171760888,
255
+ "eval_loss": 0.7392101883888245,
256
+ "eval_precision": 0.809106239460371,
257
+ "eval_recall": 0.5416572589749379,
258
+ "eval_runtime": 84.7791,
259
+ "eval_samples_per_second": 183.17,
260
+ "eval_steps_per_second": 11.453,
261
+ "step": 200
262
+ }
263
+ ],
264
+ "max_steps": 10420,
265
+ "num_train_epochs": 10,
266
+ "total_flos": 2.6942572068864e+16,
267
+ "trial_name": null,
268
+ "trial_params": null
269
+ }
ranker_5/checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8afcd45054b4f042d2c517636d041f304a64f0acd72b54cb7054399d66b952a3
3
+ size 3451
ranker_5/checkpoint-200/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
ranker_6/checkpoint-200/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/codebert-base",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "problem_type": "single_label_classification",
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.26.1",
26
+ "type_vocab_size": 1,
27
+ "use_cache": true,
28
+ "vocab_size": 50265
29
+ }
ranker_6/checkpoint-200/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
ranker_6/checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddefc015de30964a411b6b2b79e4d96111e9cea1c3781bbbdf8dc9142c2a0eaa
3
+ size 997295045
ranker_6/checkpoint-200/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bf0930cffd75a4c72acd82d1ec517c0c91a058e8f95f675e1168ac461ce2920
3
+ size 498662069
ranker_6/checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a91eea1dffdd54f976e0f1205ab82a6b45037f0062e18e5cdba1d77041522ef
3
+ size 15597
ranker_6/checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f232c6145f58cfb2b4586a000a440995d052431f92826c9d2659b20bea73b50
3
+ size 627
ranker_6/checkpoint-200/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "</s>",
8
+ "unk_token": "<unk>"
9
+ }
ranker_6/checkpoint-200/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "errors": "replace",
28
+ "mask_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<mask>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "model_max_length": 512,
37
+ "name_or_path": "microsoft/codebert-base",
38
+ "pad_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<pad>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "sep_token": {
47
+ "__type": "AddedToken",
48
+ "content": "</s>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "special_tokens_map_file": "/om2/user/gua/.cache/huggingface/hub/models--microsoft--codebert-base/snapshots/3b0952feddeffad0063f274080e3c23d75e7eb39/special_tokens_map.json",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "unk_token": {
57
+ "__type": "AddedToken",
58
+ "content": "<unk>",
59
+ "lstrip": false,
60
+ "normalized": true,
61
+ "rstrip": false,
62
+ "single_word": false
63
+ },
64
+ "use_fast": true
65
+ }
ranker_6/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.23873470605789315,
5
+ "global_step": 200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 9.994026284348866e-05,
13
+ "loss": 0.7347,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "learning_rate": 9.98805256869773e-05,
19
+ "loss": 0.6862,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.02,
24
+ "learning_rate": 9.982078853046596e-05,
25
+ "loss": 0.6505,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.02,
30
+ "learning_rate": 9.97610513739546e-05,
31
+ "loss": 0.6177,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.03,
36
+ "learning_rate": 9.970131421744326e-05,
37
+ "loss": 0.5992,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.04,
42
+ "learning_rate": 9.96415770609319e-05,
43
+ "loss": 0.5671,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.04,
48
+ "learning_rate": 9.958183990442056e-05,
49
+ "loss": 0.5407,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.05,
54
+ "learning_rate": 9.952210274790921e-05,
55
+ "loss": 0.5186,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.05,
60
+ "learning_rate": 9.946236559139786e-05,
61
+ "loss": 0.5068,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.06,
66
+ "learning_rate": 9.940262843488651e-05,
67
+ "loss": 0.5162,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.07,
72
+ "learning_rate": 9.934289127837514e-05,
73
+ "loss": 0.527,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.07,
78
+ "learning_rate": 9.928315412186381e-05,
79
+ "loss": 0.4849,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.08,
84
+ "learning_rate": 9.922341696535246e-05,
85
+ "loss": 0.477,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 0.08,
90
+ "learning_rate": 9.916367980884111e-05,
91
+ "loss": 0.4702,
92
+ "step": 70
93
+ },
94
+ {
95
+ "epoch": 0.09,
96
+ "learning_rate": 9.910394265232975e-05,
97
+ "loss": 0.4504,
98
+ "step": 75
99
+ },
100
+ {
101
+ "epoch": 0.1,
102
+ "learning_rate": 9.90442054958184e-05,
103
+ "loss": 0.4422,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 0.1,
108
+ "learning_rate": 9.898446833930706e-05,
109
+ "loss": 0.4383,
110
+ "step": 85
111
+ },
112
+ {
113
+ "epoch": 0.11,
114
+ "learning_rate": 9.892473118279571e-05,
115
+ "loss": 0.4356,
116
+ "step": 90
117
+ },
118
+ {
119
+ "epoch": 0.11,
120
+ "learning_rate": 9.886499402628435e-05,
121
+ "loss": 0.4446,
122
+ "step": 95
123
+ },
124
+ {
125
+ "epoch": 0.12,
126
+ "learning_rate": 9.8805256869773e-05,
127
+ "loss": 0.4251,
128
+ "step": 100
129
+ },
130
+ {
131
+ "epoch": 0.13,
132
+ "learning_rate": 9.874551971326166e-05,
133
+ "loss": 0.4201,
134
+ "step": 105
135
+ },
136
+ {
137
+ "epoch": 0.13,
138
+ "learning_rate": 9.868578255675031e-05,
139
+ "loss": 0.4389,
140
+ "step": 110
141
+ },
142
+ {
143
+ "epoch": 0.14,
144
+ "learning_rate": 9.862604540023895e-05,
145
+ "loss": 0.4319,
146
+ "step": 115
147
+ },
148
+ {
149
+ "epoch": 0.14,
150
+ "learning_rate": 9.85663082437276e-05,
151
+ "loss": 0.455,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.15,
156
+ "learning_rate": 9.850657108721625e-05,
157
+ "loss": 0.4297,
158
+ "step": 125
159
+ },
160
+ {
161
+ "epoch": 0.16,
162
+ "learning_rate": 9.844683393070491e-05,
163
+ "loss": 0.4383,
164
+ "step": 130
165
+ },
166
+ {
167
+ "epoch": 0.16,
168
+ "learning_rate": 9.838709677419355e-05,
169
+ "loss": 0.4658,
170
+ "step": 135
171
+ },
172
+ {
173
+ "epoch": 0.17,
174
+ "learning_rate": 9.83273596176822e-05,
175
+ "loss": 0.4499,
176
+ "step": 140
177
+ },
178
+ {
179
+ "epoch": 0.17,
180
+ "learning_rate": 9.826762246117085e-05,
181
+ "loss": 0.41,
182
+ "step": 145
183
+ },
184
+ {
185
+ "epoch": 0.18,
186
+ "learning_rate": 9.820788530465951e-05,
187
+ "loss": 0.3866,
188
+ "step": 150
189
+ },
190
+ {
191
+ "epoch": 0.19,
192
+ "learning_rate": 9.814814814814815e-05,
193
+ "loss": 0.3897,
194
+ "step": 155
195
+ },
196
+ {
197
+ "epoch": 0.19,
198
+ "learning_rate": 9.80884109916368e-05,
199
+ "loss": 0.4053,
200
+ "step": 160
201
+ },
202
+ {
203
+ "epoch": 0.2,
204
+ "learning_rate": 9.802867383512545e-05,
205
+ "loss": 0.4022,
206
+ "step": 165
207
+ },
208
+ {
209
+ "epoch": 0.2,
210
+ "learning_rate": 9.79689366786141e-05,
211
+ "loss": 0.4074,
212
+ "step": 170
213
+ },
214
+ {
215
+ "epoch": 0.21,
216
+ "learning_rate": 9.790919952210275e-05,
217
+ "loss": 0.4,
218
+ "step": 175
219
+ },
220
+ {
221
+ "epoch": 0.21,
222
+ "learning_rate": 9.78494623655914e-05,
223
+ "loss": 0.4282,
224
+ "step": 180
225
+ },
226
+ {
227
+ "epoch": 0.22,
228
+ "learning_rate": 9.778972520908005e-05,
229
+ "loss": 0.4028,
230
+ "step": 185
231
+ },
232
+ {
233
+ "epoch": 0.23,
234
+ "learning_rate": 9.77299880525687e-05,
235
+ "loss": 0.3937,
236
+ "step": 190
237
+ },
238
+ {
239
+ "epoch": 0.23,
240
+ "learning_rate": 9.767025089605735e-05,
241
+ "loss": 0.3665,
242
+ "step": 195
243
+ },
244
+ {
245
+ "epoch": 0.24,
246
+ "learning_rate": 9.7610513739546e-05,
247
+ "loss": 0.3743,
248
+ "step": 200
249
+ },
250
+ {
251
+ "epoch": 0.24,
252
+ "eval_accuracy": 0.7354377926759424,
253
+ "eval_accuracy_sklearn": 0.7354377926759424,
254
+ "eval_f1": 0.7483577402505849,
255
+ "eval_loss": 0.5406955480575562,
256
+ "eval_precision": 0.8060886373049841,
257
+ "eval_recall": 0.6983434005143233,
258
+ "eval_runtime": 162.5908,
259
+ "eval_samples_per_second": 182.563,
260
+ "eval_steps_per_second": 11.415,
261
+ "step": 200
262
+ }
263
+ ],
264
+ "max_steps": 8370,
265
+ "num_train_epochs": 10,
266
+ "total_flos": 2.6942572068864e+16,
267
+ "trial_name": null,
268
+ "trial_params": null
269
+ }
ranker_6/checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe9172185ff2f8d76489141d37a4aee15a69f345332203636a0331020f1caf0
3
+ size 3451
ranker_6/checkpoint-200/vocab.json ADDED
The diff for this file is too large to render. See raw diff