Plasmoxy commited on
Commit
bb9a768
·
verified ·
1 Parent(s): 07a9be1

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. added_tokens.json +3 -0
  2. checkpoint-1492/added_tokens.json +3 -0
  3. checkpoint-1492/config.json +60 -0
  4. checkpoint-1492/model.safetensors +3 -0
  5. checkpoint-1492/optimizer.pt +3 -0
  6. checkpoint-1492/rng_state.pth +3 -0
  7. checkpoint-1492/scheduler.pt +3 -0
  8. checkpoint-1492/special_tokens_map.json +15 -0
  9. checkpoint-1492/spm.model +3 -0
  10. checkpoint-1492/tokenizer.json +0 -0
  11. checkpoint-1492/tokenizer_config.json +58 -0
  12. checkpoint-1492/trainer_state.json +58 -0
  13. checkpoint-1492/training_args.bin +3 -0
  14. checkpoint-2984/added_tokens.json +3 -0
  15. checkpoint-2984/config.json +60 -0
  16. checkpoint-2984/model.safetensors +3 -0
  17. checkpoint-2984/optimizer.pt +3 -0
  18. checkpoint-2984/rng_state.pth +3 -0
  19. checkpoint-2984/scheduler.pt +3 -0
  20. checkpoint-2984/special_tokens_map.json +15 -0
  21. checkpoint-2984/spm.model +3 -0
  22. checkpoint-2984/tokenizer.json +0 -0
  23. checkpoint-2984/tokenizer_config.json +58 -0
  24. checkpoint-2984/trainer_state.json +90 -0
  25. checkpoint-2984/training_args.bin +3 -0
  26. checkpoint-4476/added_tokens.json +3 -0
  27. checkpoint-4476/config.json +60 -0
  28. checkpoint-4476/model.safetensors +3 -0
  29. checkpoint-4476/optimizer.pt +3 -0
  30. checkpoint-4476/rng_state.pth +3 -0
  31. checkpoint-4476/scheduler.pt +3 -0
  32. checkpoint-4476/special_tokens_map.json +15 -0
  33. checkpoint-4476/spm.model +3 -0
  34. checkpoint-4476/tokenizer.json +0 -0
  35. checkpoint-4476/tokenizer_config.json +58 -0
  36. checkpoint-4476/trainer_state.json +122 -0
  37. checkpoint-4476/training_args.bin +3 -0
  38. checkpoint-5968/added_tokens.json +3 -0
  39. checkpoint-5968/config.json +60 -0
  40. checkpoint-5968/model.safetensors +3 -0
  41. checkpoint-5968/optimizer.pt +3 -0
  42. checkpoint-5968/rng_state.pth +3 -0
  43. checkpoint-5968/scheduler.pt +3 -0
  44. checkpoint-5968/special_tokens_map.json +15 -0
  45. checkpoint-5968/spm.model +3 -0
  46. checkpoint-5968/tokenizer.json +0 -0
  47. checkpoint-5968/tokenizer_config.json +58 -0
  48. checkpoint-5968/trainer_state.json +154 -0
  49. checkpoint-5968/training_args.bin +3 -0
  50. checkpoint-7460/added_tokens.json +3 -0
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-1492/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-1492/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6",
18
+ "7": "LABEL_7",
19
+ "8": "LABEL_8",
20
+ "9": "LABEL_9"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "LABEL_0": 0,
26
+ "LABEL_1": 1,
27
+ "LABEL_2": 2,
28
+ "LABEL_3": 3,
29
+ "LABEL_4": 4,
30
+ "LABEL_5": 5,
31
+ "LABEL_6": 6,
32
+ "LABEL_7": 7,
33
+ "LABEL_8": 8,
34
+ "LABEL_9": 9
35
+ },
36
+ "layer_norm_eps": 1e-07,
37
+ "max_position_embeddings": 512,
38
+ "max_relative_positions": -1,
39
+ "model_type": "deberta-v2",
40
+ "norm_rel_ebd": "layer_norm",
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 6,
43
+ "pad_token_id": 0,
44
+ "pooler_dropout": 0,
45
+ "pooler_hidden_act": "gelu",
46
+ "pooler_hidden_size": 768,
47
+ "pos_att_type": [
48
+ "p2c",
49
+ "c2p"
50
+ ],
51
+ "position_biased_input": false,
52
+ "position_buckets": 256,
53
+ "problem_type": "multi_label_classification",
54
+ "relative_attention": true,
55
+ "share_att_key": true,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.45.2",
58
+ "type_vocab_size": 0,
59
+ "vocab_size": 128100
60
+ }
checkpoint-1492/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927c57b08b9cf5b47ba63fb475ee871491315e6f4920ce3201b71a4f5d033af3
3
+ size 567623168
checkpoint-1492/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781e8898257f4862c94423e61e0c805476e3cd1e8f9160967c2ad9548a7a2675
3
+ size 348248442
checkpoint-1492/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d09cf12694bacb4423bbdf595452f73bf98cb80697c95ffd567f145196359bd3
3
+ size 14244
checkpoint-1492/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54803337e0f674234e98979d81842325f32e7ff16f81c39173e85faf234be543
3
+ size 1064
checkpoint-1492/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-1492/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-1492/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1492/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
checkpoint-1492/trainer_state.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.34355366230010986,
3
+ "best_model_checkpoint": "out/deberta-v3-small-bkl-multi/checkpoint-1492",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1492,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.3351206434316354,
13
+ "grad_norm": 1.3047796487808228,
14
+ "learning_rate": 1.865951742627346e-05,
15
+ "loss": 0.4032,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.6702412868632708,
20
+ "grad_norm": 0.7828705310821533,
21
+ "learning_rate": 1.731903485254692e-05,
22
+ "loss": 0.3503,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.2743503772003353,
28
+ "eval_f1": 0.7807241322816955,
29
+ "eval_loss": 0.34355366230010986,
30
+ "eval_roc_auc": 0.8276824995087615,
31
+ "eval_runtime": 20.9327,
32
+ "eval_samples_per_second": 569.923,
33
+ "eval_steps_per_second": 8.933,
34
+ "step": 1492
35
+ }
36
+ ],
37
+ "logging_steps": 500,
38
+ "max_steps": 7460,
39
+ "num_input_tokens_seen": 0,
40
+ "num_train_epochs": 5,
41
+ "save_steps": 500,
42
+ "stateful_callbacks": {
43
+ "TrainerControl": {
44
+ "args": {
45
+ "should_epoch_stop": false,
46
+ "should_evaluate": false,
47
+ "should_log": false,
48
+ "should_save": true,
49
+ "should_training_stop": false
50
+ },
51
+ "attributes": {}
52
+ }
53
+ },
54
+ "total_flos": 2469534208642800.0,
55
+ "train_batch_size": 64,
56
+ "trial_name": null,
57
+ "trial_params": null
58
+ }
checkpoint-1492/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f19c3d98db4c53395f5f0a6d8d40c72eabb1f4e33e60306d839111edd328b17c
3
+ size 5240
checkpoint-2984/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-2984/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6",
18
+ "7": "LABEL_7",
19
+ "8": "LABEL_8",
20
+ "9": "LABEL_9"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "LABEL_0": 0,
26
+ "LABEL_1": 1,
27
+ "LABEL_2": 2,
28
+ "LABEL_3": 3,
29
+ "LABEL_4": 4,
30
+ "LABEL_5": 5,
31
+ "LABEL_6": 6,
32
+ "LABEL_7": 7,
33
+ "LABEL_8": 8,
34
+ "LABEL_9": 9
35
+ },
36
+ "layer_norm_eps": 1e-07,
37
+ "max_position_embeddings": 512,
38
+ "max_relative_positions": -1,
39
+ "model_type": "deberta-v2",
40
+ "norm_rel_ebd": "layer_norm",
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 6,
43
+ "pad_token_id": 0,
44
+ "pooler_dropout": 0,
45
+ "pooler_hidden_act": "gelu",
46
+ "pooler_hidden_size": 768,
47
+ "pos_att_type": [
48
+ "p2c",
49
+ "c2p"
50
+ ],
51
+ "position_biased_input": false,
52
+ "position_buckets": 256,
53
+ "problem_type": "multi_label_classification",
54
+ "relative_attention": true,
55
+ "share_att_key": true,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.45.2",
58
+ "type_vocab_size": 0,
59
+ "vocab_size": 128100
60
+ }
checkpoint-2984/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a72b3c15361ae501241a64c97ec13fc1865c6503e5ca696077b906aa42d79126
3
+ size 567623168
checkpoint-2984/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb6c9a3f1c0d9e50003741d0d52ed66b0044ef6bb843737b20e7665c9eab5a51
3
+ size 348248442
checkpoint-2984/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a055979ed102d268b340f9b62f291971793e75f46bca733e73bbbd8fc1dcc29d
3
+ size 14244
checkpoint-2984/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de1fb7c9570d0bad23dec261e04dcc4519aa1e68a085025ef8b72c8df73ebafd
3
+ size 1064
checkpoint-2984/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-2984/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-2984/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2984/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
checkpoint-2984/trainer_state.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3326815962791443,
3
+ "best_model_checkpoint": "out/deberta-v3-small-bkl-multi/checkpoint-2984",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2984,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.3351206434316354,
13
+ "grad_norm": 1.3047796487808228,
14
+ "learning_rate": 1.865951742627346e-05,
15
+ "loss": 0.4032,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.6702412868632708,
20
+ "grad_norm": 0.7828705310821533,
21
+ "learning_rate": 1.731903485254692e-05,
22
+ "loss": 0.3503,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.2743503772003353,
28
+ "eval_f1": 0.7807241322816955,
29
+ "eval_loss": 0.34355366230010986,
30
+ "eval_roc_auc": 0.8276824995087615,
31
+ "eval_runtime": 20.9327,
32
+ "eval_samples_per_second": 569.923,
33
+ "eval_steps_per_second": 8.933,
34
+ "step": 1492
35
+ },
36
+ {
37
+ "epoch": 1.0053619302949062,
38
+ "grad_norm": 1.0840567350387573,
39
+ "learning_rate": 1.5978552278820375e-05,
40
+ "loss": 0.3432,
41
+ "step": 1500
42
+ },
43
+ {
44
+ "epoch": 1.3404825737265416,
45
+ "grad_norm": 0.586756706237793,
46
+ "learning_rate": 1.4638069705093835e-05,
47
+ "loss": 0.3322,
48
+ "step": 2000
49
+ },
50
+ {
51
+ "epoch": 1.675603217158177,
52
+ "grad_norm": 0.6265957951545715,
53
+ "learning_rate": 1.3297587131367293e-05,
54
+ "loss": 0.3313,
55
+ "step": 2500
56
+ },
57
+ {
58
+ "epoch": 2.0,
59
+ "eval_accuracy": 0.28491198658843253,
60
+ "eval_f1": 0.7920844448532793,
61
+ "eval_loss": 0.3326815962791443,
62
+ "eval_roc_auc": 0.8374262996694112,
63
+ "eval_runtime": 20.987,
64
+ "eval_samples_per_second": 568.447,
65
+ "eval_steps_per_second": 8.91,
66
+ "step": 2984
67
+ }
68
+ ],
69
+ "logging_steps": 500,
70
+ "max_steps": 7460,
71
+ "num_input_tokens_seen": 0,
72
+ "num_train_epochs": 5,
73
+ "save_steps": 500,
74
+ "stateful_callbacks": {
75
+ "TrainerControl": {
76
+ "args": {
77
+ "should_epoch_stop": false,
78
+ "should_evaluate": false,
79
+ "should_log": false,
80
+ "should_save": true,
81
+ "should_training_stop": false
82
+ },
83
+ "attributes": {}
84
+ }
85
+ },
86
+ "total_flos": 4939068417285600.0,
87
+ "train_batch_size": 64,
88
+ "trial_name": null,
89
+ "trial_params": null
90
+ }
checkpoint-2984/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f19c3d98db4c53395f5f0a6d8d40c72eabb1f4e33e60306d839111edd328b17c
3
+ size 5240
checkpoint-4476/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-4476/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6",
18
+ "7": "LABEL_7",
19
+ "8": "LABEL_8",
20
+ "9": "LABEL_9"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "LABEL_0": 0,
26
+ "LABEL_1": 1,
27
+ "LABEL_2": 2,
28
+ "LABEL_3": 3,
29
+ "LABEL_4": 4,
30
+ "LABEL_5": 5,
31
+ "LABEL_6": 6,
32
+ "LABEL_7": 7,
33
+ "LABEL_8": 8,
34
+ "LABEL_9": 9
35
+ },
36
+ "layer_norm_eps": 1e-07,
37
+ "max_position_embeddings": 512,
38
+ "max_relative_positions": -1,
39
+ "model_type": "deberta-v2",
40
+ "norm_rel_ebd": "layer_norm",
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 6,
43
+ "pad_token_id": 0,
44
+ "pooler_dropout": 0,
45
+ "pooler_hidden_act": "gelu",
46
+ "pooler_hidden_size": 768,
47
+ "pos_att_type": [
48
+ "p2c",
49
+ "c2p"
50
+ ],
51
+ "position_biased_input": false,
52
+ "position_buckets": 256,
53
+ "problem_type": "multi_label_classification",
54
+ "relative_attention": true,
55
+ "share_att_key": true,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.45.2",
58
+ "type_vocab_size": 0,
59
+ "vocab_size": 128100
60
+ }
checkpoint-4476/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2a3e107d8f5dca555756fa40a7b59f9d23b97cb5201d48a9ca2cd9db77f4437
3
+ size 567623168
checkpoint-4476/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf95a23de11df469b5fb18212d5321fab15fe27b22be26d52a6688f5471d775e
3
+ size 348248442
checkpoint-4476/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9b13e86bcba94394bf12ff8abfa598710fc9c50d1845fe4f6e792d8df6573f7
3
+ size 14244
checkpoint-4476/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c40ae8b796a630c2abc66d5298a9df375245f8f20f45a5830b71a71d3d3decc8
3
+ size 1064
checkpoint-4476/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-4476/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-4476/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4476/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
checkpoint-4476/trainer_state.json ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3326460123062134,
3
+ "best_model_checkpoint": "out/deberta-v3-small-bkl-multi/checkpoint-4476",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4476,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.3351206434316354,
13
+ "grad_norm": 1.3047796487808228,
14
+ "learning_rate": 1.865951742627346e-05,
15
+ "loss": 0.4032,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.6702412868632708,
20
+ "grad_norm": 0.7828705310821533,
21
+ "learning_rate": 1.731903485254692e-05,
22
+ "loss": 0.3503,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.2743503772003353,
28
+ "eval_f1": 0.7807241322816955,
29
+ "eval_loss": 0.34355366230010986,
30
+ "eval_roc_auc": 0.8276824995087615,
31
+ "eval_runtime": 20.9327,
32
+ "eval_samples_per_second": 569.923,
33
+ "eval_steps_per_second": 8.933,
34
+ "step": 1492
35
+ },
36
+ {
37
+ "epoch": 1.0053619302949062,
38
+ "grad_norm": 1.0840567350387573,
39
+ "learning_rate": 1.5978552278820375e-05,
40
+ "loss": 0.3432,
41
+ "step": 1500
42
+ },
43
+ {
44
+ "epoch": 1.3404825737265416,
45
+ "grad_norm": 0.586756706237793,
46
+ "learning_rate": 1.4638069705093835e-05,
47
+ "loss": 0.3322,
48
+ "step": 2000
49
+ },
50
+ {
51
+ "epoch": 1.675603217158177,
52
+ "grad_norm": 0.6265957951545715,
53
+ "learning_rate": 1.3297587131367293e-05,
54
+ "loss": 0.3313,
55
+ "step": 2500
56
+ },
57
+ {
58
+ "epoch": 2.0,
59
+ "eval_accuracy": 0.28491198658843253,
60
+ "eval_f1": 0.7920844448532793,
61
+ "eval_loss": 0.3326815962791443,
62
+ "eval_roc_auc": 0.8374262996694112,
63
+ "eval_runtime": 20.987,
64
+ "eval_samples_per_second": 568.447,
65
+ "eval_steps_per_second": 8.91,
66
+ "step": 2984
67
+ },
68
+ {
69
+ "epoch": 2.0107238605898123,
70
+ "grad_norm": 0.7209903001785278,
71
+ "learning_rate": 1.1957104557640751e-05,
72
+ "loss": 0.3266,
73
+ "step": 3000
74
+ },
75
+ {
76
+ "epoch": 2.3458445040214477,
77
+ "grad_norm": 0.6551246643066406,
78
+ "learning_rate": 1.061662198391421e-05,
79
+ "loss": 0.3191,
80
+ "step": 3500
81
+ },
82
+ {
83
+ "epoch": 2.680965147453083,
84
+ "grad_norm": 0.9030967354774475,
85
+ "learning_rate": 9.276139410187667e-06,
86
+ "loss": 0.3199,
87
+ "step": 4000
88
+ },
89
+ {
90
+ "epoch": 3.0,
91
+ "eval_accuracy": 0.28499580888516346,
92
+ "eval_f1": 0.7939965223757665,
93
+ "eval_loss": 0.3326460123062134,
94
+ "eval_roc_auc": 0.8392196917667687,
95
+ "eval_runtime": 21.0081,
96
+ "eval_samples_per_second": 567.876,
97
+ "eval_steps_per_second": 8.901,
98
+ "step": 4476
99
+ }
100
+ ],
101
+ "logging_steps": 500,
102
+ "max_steps": 7460,
103
+ "num_input_tokens_seen": 0,
104
+ "num_train_epochs": 5,
105
+ "save_steps": 500,
106
+ "stateful_callbacks": {
107
+ "TrainerControl": {
108
+ "args": {
109
+ "should_epoch_stop": false,
110
+ "should_evaluate": false,
111
+ "should_log": false,
112
+ "should_save": true,
113
+ "should_training_stop": false
114
+ },
115
+ "attributes": {}
116
+ }
117
+ },
118
+ "total_flos": 7408602625928400.0,
119
+ "train_batch_size": 64,
120
+ "trial_name": null,
121
+ "trial_params": null
122
+ }
checkpoint-4476/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f19c3d98db4c53395f5f0a6d8d40c72eabb1f4e33e60306d839111edd328b17c
3
+ size 5240
checkpoint-5968/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-5968/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6",
18
+ "7": "LABEL_7",
19
+ "8": "LABEL_8",
20
+ "9": "LABEL_9"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "LABEL_0": 0,
26
+ "LABEL_1": 1,
27
+ "LABEL_2": 2,
28
+ "LABEL_3": 3,
29
+ "LABEL_4": 4,
30
+ "LABEL_5": 5,
31
+ "LABEL_6": 6,
32
+ "LABEL_7": 7,
33
+ "LABEL_8": 8,
34
+ "LABEL_9": 9
35
+ },
36
+ "layer_norm_eps": 1e-07,
37
+ "max_position_embeddings": 512,
38
+ "max_relative_positions": -1,
39
+ "model_type": "deberta-v2",
40
+ "norm_rel_ebd": "layer_norm",
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 6,
43
+ "pad_token_id": 0,
44
+ "pooler_dropout": 0,
45
+ "pooler_hidden_act": "gelu",
46
+ "pooler_hidden_size": 768,
47
+ "pos_att_type": [
48
+ "p2c",
49
+ "c2p"
50
+ ],
51
+ "position_biased_input": false,
52
+ "position_buckets": 256,
53
+ "problem_type": "multi_label_classification",
54
+ "relative_attention": true,
55
+ "share_att_key": true,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.45.2",
58
+ "type_vocab_size": 0,
59
+ "vocab_size": 128100
60
+ }
checkpoint-5968/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510969a2a4d29d663348c7e910b7b5c62294f1f99e6b9d52c17fbe5dc90f7fe2
3
+ size 567623168
checkpoint-5968/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a37ba2e604535b7c3d643f509cba82a1ee88825d03e3aa8d4b9a0183e2c47b8
3
+ size 348248442
checkpoint-5968/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67e6c5945060618e21500affb098aac20a5128da9453fe56f55615834c573147
3
+ size 14244
checkpoint-5968/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ad143173d23379f505622af31cae7e9b3adb721d50005d8e012fc6b41880d7b
3
+ size 1064
checkpoint-5968/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-5968/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-5968/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-5968/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
checkpoint-5968/trainer_state.json ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3326460123062134,
3
+ "best_model_checkpoint": "out/deberta-v3-small-bkl-multi/checkpoint-4476",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 5968,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.3351206434316354,
13
+ "grad_norm": 1.3047796487808228,
14
+ "learning_rate": 1.865951742627346e-05,
15
+ "loss": 0.4032,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.6702412868632708,
20
+ "grad_norm": 0.7828705310821533,
21
+ "learning_rate": 1.731903485254692e-05,
22
+ "loss": 0.3503,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.2743503772003353,
28
+ "eval_f1": 0.7807241322816955,
29
+ "eval_loss": 0.34355366230010986,
30
+ "eval_roc_auc": 0.8276824995087615,
31
+ "eval_runtime": 20.9327,
32
+ "eval_samples_per_second": 569.923,
33
+ "eval_steps_per_second": 8.933,
34
+ "step": 1492
35
+ },
36
+ {
37
+ "epoch": 1.0053619302949062,
38
+ "grad_norm": 1.0840567350387573,
39
+ "learning_rate": 1.5978552278820375e-05,
40
+ "loss": 0.3432,
41
+ "step": 1500
42
+ },
43
+ {
44
+ "epoch": 1.3404825737265416,
45
+ "grad_norm": 0.586756706237793,
46
+ "learning_rate": 1.4638069705093835e-05,
47
+ "loss": 0.3322,
48
+ "step": 2000
49
+ },
50
+ {
51
+ "epoch": 1.675603217158177,
52
+ "grad_norm": 0.6265957951545715,
53
+ "learning_rate": 1.3297587131367293e-05,
54
+ "loss": 0.3313,
55
+ "step": 2500
56
+ },
57
+ {
58
+ "epoch": 2.0,
59
+ "eval_accuracy": 0.28491198658843253,
60
+ "eval_f1": 0.7920844448532793,
61
+ "eval_loss": 0.3326815962791443,
62
+ "eval_roc_auc": 0.8374262996694112,
63
+ "eval_runtime": 20.987,
64
+ "eval_samples_per_second": 568.447,
65
+ "eval_steps_per_second": 8.91,
66
+ "step": 2984
67
+ },
68
+ {
69
+ "epoch": 2.0107238605898123,
70
+ "grad_norm": 0.7209903001785278,
71
+ "learning_rate": 1.1957104557640751e-05,
72
+ "loss": 0.3266,
73
+ "step": 3000
74
+ },
75
+ {
76
+ "epoch": 2.3458445040214477,
77
+ "grad_norm": 0.6551246643066406,
78
+ "learning_rate": 1.061662198391421e-05,
79
+ "loss": 0.3191,
80
+ "step": 3500
81
+ },
82
+ {
83
+ "epoch": 2.680965147453083,
84
+ "grad_norm": 0.9030967354774475,
85
+ "learning_rate": 9.276139410187667e-06,
86
+ "loss": 0.3199,
87
+ "step": 4000
88
+ },
89
+ {
90
+ "epoch": 3.0,
91
+ "eval_accuracy": 0.28499580888516346,
92
+ "eval_f1": 0.7939965223757665,
93
+ "eval_loss": 0.3326460123062134,
94
+ "eval_roc_auc": 0.8392196917667687,
95
+ "eval_runtime": 21.0081,
96
+ "eval_samples_per_second": 567.876,
97
+ "eval_steps_per_second": 8.901,
98
+ "step": 4476
99
+ },
100
+ {
101
+ "epoch": 3.0160857908847185,
102
+ "grad_norm": 1.185642123222351,
103
+ "learning_rate": 7.935656836461126e-06,
104
+ "loss": 0.3181,
105
+ "step": 4500
106
+ },
107
+ {
108
+ "epoch": 3.351206434316354,
109
+ "grad_norm": 0.6712960004806519,
110
+ "learning_rate": 6.595174262734585e-06,
111
+ "loss": 0.3117,
112
+ "step": 5000
113
+ },
114
+ {
115
+ "epoch": 3.6863270777479893,
116
+ "grad_norm": 0.9039814472198486,
117
+ "learning_rate": 5.254691689008043e-06,
118
+ "loss": 0.3103,
119
+ "step": 5500
120
+ },
121
+ {
122
+ "epoch": 4.0,
123
+ "eval_accuracy": 0.28767812238055324,
124
+ "eval_f1": 0.7908335467051512,
125
+ "eval_loss": 0.33351966738700867,
126
+ "eval_roc_auc": 0.8358048132071307,
127
+ "eval_runtime": 21.0091,
128
+ "eval_samples_per_second": 567.849,
129
+ "eval_steps_per_second": 8.901,
130
+ "step": 5968
131
+ }
132
+ ],
133
+ "logging_steps": 500,
134
+ "max_steps": 7460,
135
+ "num_input_tokens_seen": 0,
136
+ "num_train_epochs": 5,
137
+ "save_steps": 500,
138
+ "stateful_callbacks": {
139
+ "TrainerControl": {
140
+ "args": {
141
+ "should_epoch_stop": false,
142
+ "should_evaluate": false,
143
+ "should_log": false,
144
+ "should_save": true,
145
+ "should_training_stop": false
146
+ },
147
+ "attributes": {}
148
+ }
149
+ },
150
+ "total_flos": 9878136834571200.0,
151
+ "train_batch_size": 64,
152
+ "trial_name": null,
153
+ "trial_params": null
154
+ }
checkpoint-5968/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f19c3d98db4c53395f5f0a6d8d40c72eabb1f4e33e60306d839111edd328b17c
3
+ size 5240
checkpoint-7460/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }