hZzy commited on
Commit
2573912
·
verified ·
1 Parent(s): 502a542

Model save

Browse files
README.md ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
4
+ tags:
5
+ - trl
6
+ - expo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: qwen2.5-0.5b-expo-DPO-ES-1000
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/eygej0eq)
17
+ # qwen2.5-0.5b-expo-DPO-ES-1000
18
+
19
+ This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 6014.5977
22
+ - Logps: -88.1664
23
+ - Logits: -0.1249
24
+ - Objective: 6010.9189
25
+ - Dpo Loss: 3144.7844
26
+ - Regularize: 6010.9189
27
+ - Ranking Simple: 0.5217
28
+ - Ranking Idealized: 0.5212
29
+ - Ranking Idealized Expo: 0.5212
30
+ - Wo Beta: 14.2338
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 5e-06
50
+ - train_batch_size: 4
51
+ - eval_batch_size: 4
52
+ - seed: 42
53
+ - distributed_type: multi-GPU
54
+ - num_devices: 3
55
+ - gradient_accumulation_steps: 12
56
+ - total_train_batch_size: 144
57
+ - total_eval_batch_size: 12
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: cosine
60
+ - lr_scheduler_warmup_ratio: 0.1
61
+ - num_epochs: 5
62
+
63
+ ### Training results
64
+
65
+ | Training Loss | Epoch | Step | Dpo Loss | Logits | Logps | Validation Loss | Objective | Ranking Idealized | Ranking Idealized Expo | Ranking Simple | Regularize | Wo Beta |
66
+ |:-------------:|:------:|:----:|:---------:|:-------:|:--------:|:---------------:|:---------:|:-----------------:|:----------------------:|:--------------:|:----------:|:-------:|
67
+ | 171.6127 | 0.1417 | 50 | 308.1877 | -1.4575 | -90.7970 | 309.7356 | 308.1877 | 0.5212 | 0.5212 | 0.5254 | 308.1877 | 7.7047 |
68
+ | 582.3149 | 0.2834 | 100 | 708.8133 | -1.3877 | -88.8040 | 733.8202 | 708.8133 | 0.5212 | 0.5212 | 0.5285 | 708.8133 | 7.4775 |
69
+ | 1002.3808 | 0.4251 | 150 | 1245.7263 | -1.3138 | -83.6606 | 1283.0697 | 1245.7263 | 0.5212 | 0.5212 | 0.5311 | 1245.7263 | 7.3632 |
70
+ | 1199.7266 | 0.5668 | 200 | 1471.0287 | -1.2584 | -79.8249 | 1530.3123 | 1471.0287 | 0.5212 | 0.5212 | 0.5347 | 1471.0287 | 7.2330 |
71
+ | 1311.3106 | 0.7085 | 250 | 1842.3601 | -1.1799 | -78.5750 | 1873.1123 | 1842.3601 | 0.5212 | 0.5212 | 0.5347 | 1842.3601 | 7.2046 |
72
+ | 1216.5524 | 0.8503 | 300 | 1949.1084 | -1.0463 | -80.6875 | 2001.6104 | 1949.1084 | 0.5212 | 0.5212 | 0.5326 | 1949.1084 | 6.9438 |
73
+ | 1157.2415 | 0.9920 | 350 | 1956.4012 | -0.8782 | -79.7493 | 2064.3220 | 1956.4012 | 0.5212 | 0.5212 | 0.5440 | 1956.4012 | 7.0169 |
74
+ | 721.9005 | 1.1337 | 400 | 2228.8811 | -0.5703 | -80.2022 | 2276.4189 | 2228.8811 | 0.5212 | 0.5212 | 0.5404 | 2228.8811 | 7.2480 |
75
+ | 779.6797 | 1.2754 | 450 | 2016.3281 | -0.7091 | -78.4054 | 2069.4939 | 2016.3281 | 0.5212 | 0.5212 | 0.5367 | 2016.3281 | 6.8242 |
76
+ | 788.48 | 1.4171 | 500 | 2044.0745 | -0.6659 | -81.9827 | 2120.1182 | 2044.0745 | 0.5212 | 0.5212 | 0.5342 | 2044.0745 | 6.8667 |
77
+ | 684.4246 | 1.5588 | 550 | 2053.8372 | -0.6751 | -81.6376 | 2148.1580 | 2053.8372 | 0.5212 | 0.5212 | 0.5342 | 2053.8372 | 6.7901 |
78
+ | 708.5259 | 1.7005 | 600 | 2071.1946 | -0.5595 | -79.4462 | 2179.6001 | 2071.1946 | 0.5212 | 0.5212 | 0.5362 | 2071.1946 | 6.6511 |
79
+ | 690.9902 | 1.8422 | 650 | 2158.4885 | -0.5552 | -80.5108 | 2241.3740 | 2158.4885 | 0.5212 | 0.5212 | 0.5414 | 2158.4885 | 6.7740 |
80
+ | 617.6108 | 1.9839 | 700 | 2132.2517 | -0.5079 | -80.3825 | 2230.5115 | 2132.2517 | 0.5212 | 0.5212 | 0.5404 | 2132.2517 | 6.7954 |
81
+ | 343.0455 | 2.1256 | 750 | 2123.3604 | -0.5398 | -81.3539 | 2199.7175 | 2123.3604 | 0.5212 | 0.5212 | 0.5430 | 2123.3604 | 6.7578 |
82
+ | 311.7518 | 2.2674 | 800 | 2038.6656 | -0.5497 | -80.2739 | 2139.7871 | 2038.6656 | 0.5212 | 0.5212 | 0.5378 | 2038.6656 | 6.6768 |
83
+ | 315.5968 | 2.4091 | 850 | 2184.7112 | -0.5282 | -83.3843 | 2249.3201 | 2184.7112 | 0.5212 | 0.5212 | 0.5404 | 2184.7112 | 6.8072 |
84
+ | 6263.3387 | 2.5555 | 900 | 6365.9199 | -90.6956| -0.1035 | 6410.7383 | 3261.5381 | 6410.7383 | 0.5248 | 0.5212 | 0.5212 | 14.3919 |
85
+ | 4964.9731 | 2.6972 | 950 | 6126.6899 | -88.8726| -0.1541 | 6172.9790 | 3203.6243 | 6172.9790 | 0.5259 | 0.5212 | 0.5212 | 14.2868 |
86
+ | 4278.7487 | 2.8389 | 1000 | 6092.5342 | -87.8890| -0.0946 | 6127.7734 | 3153.0500 | 6127.7734 | 0.5243 | 0.5212 | 0.5212 | 14.2488 |
87
+ | 3830.4475 | 2.9806 | 1050 | 6040.4917 | -86.9566| -0.1365 | 6063.3457 | 3127.4956 | 6063.3457 | 0.5233 | 0.5212 | 0.5212 | 14.0310 |
88
+ | 3079.7456 | 3.1223 | 1100 | 6014.5977 | -88.1664| -0.1249 | 6010.9189 | 3144.7844 | 6010.9189 | 0.5217 | 0.5212 | 0.5212 | 14.2338 |
89
+
90
+
91
+ ### Framework versions
92
+
93
+ - Transformers 4.42.0
94
+ - Pytorch 2.3.0+cu121
95
+ - Datasets 2.19.1
96
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.122342938119981,
3
+ "total_flos": 0.0,
4
+ "train_loss": 1018.9660795454546,
5
+ "train_runtime": 6860.7986,
6
+ "train_samples": 50802,
7
+ "train_samples_per_second": 37.023,
8
+ "train_steps_per_second": 0.257
9
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151644,
3
+ "eos_token_id": 151645,
4
+ "max_new_tokens": 2048,
5
+ "pad_token_id": 151645,
6
+ "transformers_version": "4.42.0"
7
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d1adec51cd81fdf81d1b17b1769c55f476a06dedf7a843b4703fe505dda2658
3
  size 1975192208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:538f5225c8ef7b6a48194701dd9cb305ed09178be0f54e336e1fcc55f02af1cb
3
  size 1975192208
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.122342938119981,
3
+ "total_flos": 0.0,
4
+ "train_loss": 1018.9660795454546,
5
+ "train_runtime": 6860.7986,
6
+ "train_samples": 50802,
7
+ "train_samples_per_second": 37.023,
8
+ "train_steps_per_second": 0.257
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,793 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 6.651080131530762,
3
+ "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-1000/checkpoint-600",
4
+ "epoch": 3.122342938119981,
5
+ "eval_steps": 50,
6
+ "global_step": 1100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "dpo_loss": 0.6931471824645996,
13
+ "epoch": 0.002834199338686821,
14
+ "grad_norm": 184425.11607762048,
15
+ "learning_rate": 2.840909090909091e-08,
16
+ "logits": -1.359458565711975,
17
+ "logps": -84.69721221923828,
18
+ "loss": 0.6931,
19
+ "objective": 0.6931471824645996,
20
+ "ranking_idealized": 0.5833333134651184,
21
+ "ranking_idealized_expo": 0.5833333134651184,
22
+ "ranking_simple": 0.5833333134651184,
23
+ "regularize": 0.6931471824645996,
24
+ "step": 1,
25
+ "wo_beta": 5.271125316619873
26
+ },
27
+ {
28
+ "dpo_loss": 164.56210327148438,
29
+ "epoch": 0.14170996693434104,
30
+ "grad_norm": 220470.08226005477,
31
+ "learning_rate": 1.4204545454545458e-06,
32
+ "logits": -1.4481998682022095,
33
+ "logps": -84.27718353271484,
34
+ "loss": 171.6127,
35
+ "objective": 164.56210327148438,
36
+ "ranking_idealized": 0.5216836929321289,
37
+ "ranking_idealized_expo": 0.5216836929321289,
38
+ "ranking_simple": 0.521258533000946,
39
+ "regularize": 164.56210327148438,
40
+ "step": 50,
41
+ "wo_beta": 7.096639156341553
42
+ },
43
+ {
44
+ "epoch": 0.14170996693434104,
45
+ "eval_dpo_loss": 308.1877136230469,
46
+ "eval_logits": -1.4575201272964478,
47
+ "eval_logps": -90.79700469970703,
48
+ "eval_loss": 309.735595703125,
49
+ "eval_objective": 308.1877136230469,
50
+ "eval_ranking_idealized": 0.5212215185165405,
51
+ "eval_ranking_idealized_expo": 0.5212215185165405,
52
+ "eval_ranking_simple": 0.5253623127937317,
53
+ "eval_regularize": 308.1877136230469,
54
+ "eval_runtime": 308.9992,
55
+ "eval_samples_per_second": 18.738,
56
+ "eval_steps_per_second": 1.563,
57
+ "eval_wo_beta": 7.70471715927124,
58
+ "step": 50
59
+ },
60
+ {
61
+ "dpo_loss": 567.9904174804688,
62
+ "epoch": 0.2834199338686821,
63
+ "grad_norm": 197852.1649626301,
64
+ "learning_rate": 2.8409090909090916e-06,
65
+ "logits": -1.4372496604919434,
66
+ "logps": -82.37940216064453,
67
+ "loss": 582.3149,
68
+ "objective": 567.9904174804688,
69
+ "ranking_idealized": 0.5137500166893005,
70
+ "ranking_idealized_expo": 0.5137500166893005,
71
+ "ranking_simple": 0.5429166555404663,
72
+ "regularize": 567.9904174804688,
73
+ "step": 100,
74
+ "wo_beta": 6.490437030792236
75
+ },
76
+ {
77
+ "epoch": 0.2834199338686821,
78
+ "eval_dpo_loss": 708.8132934570312,
79
+ "eval_logits": -1.3877463340759277,
80
+ "eval_logps": -88.8039779663086,
81
+ "eval_loss": 733.8201904296875,
82
+ "eval_objective": 708.8132934570312,
83
+ "eval_ranking_idealized": 0.5212215185165405,
84
+ "eval_ranking_idealized_expo": 0.5212215185165405,
85
+ "eval_ranking_simple": 0.5284678936004639,
86
+ "eval_regularize": 708.8132934570312,
87
+ "eval_runtime": 308.1307,
88
+ "eval_samples_per_second": 18.791,
89
+ "eval_steps_per_second": 1.568,
90
+ "eval_wo_beta": 7.477492332458496,
91
+ "step": 100
92
+ },
93
+ {
94
+ "dpo_loss": 988.060302734375,
95
+ "epoch": 0.42512990080302315,
96
+ "grad_norm": 157694.24240229145,
97
+ "learning_rate": 4.2613636363636365e-06,
98
+ "logits": -1.3607189655303955,
99
+ "logps": -79.60610961914062,
100
+ "loss": 1002.3808,
101
+ "objective": 988.060302734375,
102
+ "ranking_idealized": 0.527916669845581,
103
+ "ranking_idealized_expo": 0.527916669845581,
104
+ "ranking_simple": 0.5629166960716248,
105
+ "regularize": 988.060302734375,
106
+ "step": 150,
107
+ "wo_beta": 6.3590989112854
108
+ },
109
+ {
110
+ "epoch": 0.42512990080302315,
111
+ "eval_dpo_loss": 1245.726318359375,
112
+ "eval_logits": -1.3138219118118286,
113
+ "eval_logps": -83.66063690185547,
114
+ "eval_loss": 1283.0697021484375,
115
+ "eval_objective": 1245.726318359375,
116
+ "eval_ranking_idealized": 0.5212215185165405,
117
+ "eval_ranking_idealized_expo": 0.5212215185165405,
118
+ "eval_ranking_simple": 0.5310559272766113,
119
+ "eval_regularize": 1245.726318359375,
120
+ "eval_runtime": 308.3631,
121
+ "eval_samples_per_second": 18.777,
122
+ "eval_steps_per_second": 1.566,
123
+ "eval_wo_beta": 7.363152503967285,
124
+ "step": 150
125
+ },
126
+ {
127
+ "dpo_loss": 1228.03515625,
128
+ "epoch": 0.5668398677373642,
129
+ "grad_norm": 135260.80669895583,
130
+ "learning_rate": 4.997168347957521e-06,
131
+ "logits": -1.2575763463974,
132
+ "logps": -76.39550018310547,
133
+ "loss": 1199.7266,
134
+ "objective": 1228.03515625,
135
+ "ranking_idealized": 0.51541668176651,
136
+ "ranking_idealized_expo": 0.51541668176651,
137
+ "ranking_simple": 0.5520833134651184,
138
+ "regularize": 1228.03515625,
139
+ "step": 200,
140
+ "wo_beta": 6.235730171203613
141
+ },
142
+ {
143
+ "epoch": 0.5668398677373642,
144
+ "eval_dpo_loss": 1471.0286865234375,
145
+ "eval_logits": -1.2583951950073242,
146
+ "eval_logps": -79.82491302490234,
147
+ "eval_loss": 1530.312255859375,
148
+ "eval_objective": 1471.0286865234375,
149
+ "eval_ranking_idealized": 0.5212215185165405,
150
+ "eval_ranking_idealized_expo": 0.5212215185165405,
151
+ "eval_ranking_simple": 0.534679114818573,
152
+ "eval_regularize": 1471.0286865234375,
153
+ "eval_runtime": 311.5857,
154
+ "eval_samples_per_second": 18.582,
155
+ "eval_steps_per_second": 1.55,
156
+ "eval_wo_beta": 7.23297643661499,
157
+ "step": 200
158
+ },
159
+ {
160
+ "dpo_loss": 1277.0172119140625,
161
+ "epoch": 0.7085498346717053,
162
+ "grad_norm": 126264.1949530397,
163
+ "learning_rate": 4.973122855144066e-06,
164
+ "logits": -1.0750112533569336,
165
+ "logps": -75.37919616699219,
166
+ "loss": 1311.3106,
167
+ "objective": 1277.0172119140625,
168
+ "ranking_idealized": 0.5162500143051147,
169
+ "ranking_idealized_expo": 0.5162500143051147,
170
+ "ranking_simple": 0.5754166841506958,
171
+ "regularize": 1277.0172119140625,
172
+ "step": 250,
173
+ "wo_beta": 6.227660179138184
174
+ },
175
+ {
176
+ "epoch": 0.7085498346717053,
177
+ "eval_dpo_loss": 1842.360107421875,
178
+ "eval_logits": -1.1798591613769531,
179
+ "eval_logps": -78.57501220703125,
180
+ "eval_loss": 1873.1123046875,
181
+ "eval_objective": 1842.360107421875,
182
+ "eval_ranking_idealized": 0.5212215185165405,
183
+ "eval_ranking_idealized_expo": 0.5212215185165405,
184
+ "eval_ranking_simple": 0.534679114818573,
185
+ "eval_regularize": 1842.360107421875,
186
+ "eval_runtime": 308.1159,
187
+ "eval_samples_per_second": 18.792,
188
+ "eval_steps_per_second": 1.568,
189
+ "eval_wo_beta": 7.204605579376221,
190
+ "step": 250
191
+ },
192
+ {
193
+ "dpo_loss": 1138.1712646484375,
194
+ "epoch": 0.8502598016060463,
195
+ "grad_norm": 91406.60871461965,
196
+ "learning_rate": 4.924776641419513e-06,
197
+ "logits": -1.083847999572754,
198
+ "logps": -76.23174285888672,
199
+ "loss": 1216.5524,
200
+ "objective": 1138.1712646484375,
201
+ "ranking_idealized": 0.4950000047683716,
202
+ "ranking_idealized_expo": 0.4950000047683716,
203
+ "ranking_simple": 0.5774999856948853,
204
+ "regularize": 1138.1712646484375,
205
+ "step": 300,
206
+ "wo_beta": 6.088484287261963
207
+ },
208
+ {
209
+ "epoch": 0.8502598016060463,
210
+ "eval_dpo_loss": 1949.1083984375,
211
+ "eval_logits": -1.0463101863861084,
212
+ "eval_logps": -80.68748474121094,
213
+ "eval_loss": 2001.6103515625,
214
+ "eval_objective": 1949.1083984375,
215
+ "eval_ranking_idealized": 0.5212215185165405,
216
+ "eval_ranking_idealized_expo": 0.5212215185165405,
217
+ "eval_ranking_simple": 0.532608687877655,
218
+ "eval_regularize": 1949.1083984375,
219
+ "eval_runtime": 308.2132,
220
+ "eval_samples_per_second": 18.786,
221
+ "eval_steps_per_second": 1.567,
222
+ "eval_wo_beta": 6.943759918212891,
223
+ "step": 300
224
+ },
225
+ {
226
+ "dpo_loss": 1116.613525390625,
227
+ "epoch": 0.9919697685403873,
228
+ "grad_norm": 107657.35203851353,
229
+ "learning_rate": 4.8526047530778175e-06,
230
+ "logits": -0.8322110176086426,
231
+ "logps": -76.44075775146484,
232
+ "loss": 1157.2415,
233
+ "objective": 1116.613525390625,
234
+ "ranking_idealized": 0.5254166722297668,
235
+ "ranking_idealized_expo": 0.5254166722297668,
236
+ "ranking_simple": 0.6041666865348816,
237
+ "regularize": 1116.613525390625,
238
+ "step": 350,
239
+ "wo_beta": 5.487993240356445
240
+ },
241
+ {
242
+ "epoch": 0.9919697685403873,
243
+ "eval_dpo_loss": 1956.4012451171875,
244
+ "eval_logits": -0.8782438635826111,
245
+ "eval_logps": -79.74929809570312,
246
+ "eval_loss": 2064.322021484375,
247
+ "eval_objective": 1956.4012451171875,
248
+ "eval_ranking_idealized": 0.5212215185165405,
249
+ "eval_ranking_idealized_expo": 0.5212215185165405,
250
+ "eval_ranking_simple": 0.5439958572387695,
251
+ "eval_regularize": 1956.4012451171875,
252
+ "eval_runtime": 308.4172,
253
+ "eval_samples_per_second": 18.773,
254
+ "eval_steps_per_second": 1.566,
255
+ "eval_wo_beta": 7.016916751861572,
256
+ "step": 350
257
+ },
258
+ {
259
+ "dpo_loss": 730.6525268554688,
260
+ "epoch": 1.1336797354747283,
261
+ "grad_norm": 85954.8176907515,
262
+ "learning_rate": 4.757316345716554e-06,
263
+ "logits": -0.6696025133132935,
264
+ "logps": -74.4837875366211,
265
+ "loss": 721.9005,
266
+ "objective": 730.6525268554688,
267
+ "ranking_idealized": 0.5320833325386047,
268
+ "ranking_idealized_expo": 0.5320833325386047,
269
+ "ranking_simple": 0.6329166889190674,
270
+ "regularize": 730.6525268554688,
271
+ "step": 400,
272
+ "wo_beta": 4.743136882781982
273
+ },
274
+ {
275
+ "epoch": 1.1336797354747283,
276
+ "eval_dpo_loss": 2228.881103515625,
277
+ "eval_logits": -0.5703257918357849,
278
+ "eval_logps": -80.20221710205078,
279
+ "eval_loss": 2276.4189453125,
280
+ "eval_objective": 2228.881103515625,
281
+ "eval_ranking_idealized": 0.5212215185165405,
282
+ "eval_ranking_idealized_expo": 0.5212215185165405,
283
+ "eval_ranking_simple": 0.5403726696968079,
284
+ "eval_regularize": 2228.881103515625,
285
+ "eval_runtime": 308.9371,
286
+ "eval_samples_per_second": 18.742,
287
+ "eval_steps_per_second": 1.563,
288
+ "eval_wo_beta": 7.247954845428467,
289
+ "step": 400
290
+ },
291
+ {
292
+ "dpo_loss": 807.7144775390625,
293
+ "epoch": 1.2753897024090695,
294
+ "grad_norm": 87253.50953833942,
295
+ "learning_rate": 4.639847716126855e-06,
296
+ "logits": -0.5064049959182739,
297
+ "logps": -74.17053985595703,
298
+ "loss": 779.6797,
299
+ "objective": 807.7144775390625,
300
+ "ranking_idealized": 0.5191666483879089,
301
+ "ranking_idealized_expo": 0.5191666483879089,
302
+ "ranking_simple": 0.6362500190734863,
303
+ "regularize": 807.7144775390625,
304
+ "step": 450,
305
+ "wo_beta": 5.3411478996276855
306
+ },
307
+ {
308
+ "epoch": 1.2753897024090695,
309
+ "eval_dpo_loss": 2016.328125,
310
+ "eval_logits": -0.709108293056488,
311
+ "eval_logps": -78.40538787841797,
312
+ "eval_loss": 2069.493896484375,
313
+ "eval_objective": 2016.328125,
314
+ "eval_ranking_idealized": 0.5212215185165405,
315
+ "eval_ranking_idealized_expo": 0.5212215185165405,
316
+ "eval_ranking_simple": 0.5367494821548462,
317
+ "eval_regularize": 2016.328125,
318
+ "eval_runtime": 308.0644,
319
+ "eval_samples_per_second": 18.795,
320
+ "eval_steps_per_second": 1.568,
321
+ "eval_wo_beta": 6.824249744415283,
322
+ "step": 450
323
+ },
324
+ {
325
+ "dpo_loss": 769.7364501953125,
326
+ "epoch": 1.4170996693434104,
327
+ "grad_norm": 88303.87563999402,
328
+ "learning_rate": 4.501353102310901e-06,
329
+ "logits": -0.6304615139961243,
330
+ "logps": -75.90121459960938,
331
+ "loss": 788.48,
332
+ "objective": 769.7364501953125,
333
+ "ranking_idealized": 0.4970833361148834,
334
+ "ranking_idealized_expo": 0.4970833361148834,
335
+ "ranking_simple": 0.6183333396911621,
336
+ "regularize": 769.7364501953125,
337
+ "step": 500,
338
+ "wo_beta": 5.177425861358643
339
+ },
340
+ {
341
+ "epoch": 1.4170996693434104,
342
+ "eval_dpo_loss": 2044.074462890625,
343
+ "eval_logits": -0.6658820509910583,
344
+ "eval_logps": -81.98267364501953,
345
+ "eval_loss": 2120.1181640625,
346
+ "eval_objective": 2044.074462890625,
347
+ "eval_ranking_idealized": 0.5212215185165405,
348
+ "eval_ranking_idealized_expo": 0.5212215185165405,
349
+ "eval_ranking_simple": 0.5341615080833435,
350
+ "eval_regularize": 2044.074462890625,
351
+ "eval_runtime": 308.3125,
352
+ "eval_samples_per_second": 18.78,
353
+ "eval_steps_per_second": 1.567,
354
+ "eval_wo_beta": 6.866703033447266,
355
+ "step": 500
356
+ },
357
+ {
358
+ "dpo_loss": 711.3790893554688,
359
+ "epoch": 1.5588096362777515,
360
+ "grad_norm": 81196.09184670768,
361
+ "learning_rate": 4.34319334202531e-06,
362
+ "logits": -0.6250764727592468,
363
+ "logps": -76.77790832519531,
364
+ "loss": 684.4246,
365
+ "objective": 711.3790893554688,
366
+ "ranking_idealized": 0.5104166865348816,
367
+ "ranking_idealized_expo": 0.5104166865348816,
368
+ "ranking_simple": 0.6399999856948853,
369
+ "regularize": 711.3790893554688,
370
+ "step": 550,
371
+ "wo_beta": 4.714800834655762
372
+ },
373
+ {
374
+ "epoch": 1.5588096362777515,
375
+ "eval_dpo_loss": 2053.837158203125,
376
+ "eval_logits": -0.6751150488853455,
377
+ "eval_logps": -81.63760375976562,
378
+ "eval_loss": 2148.157958984375,
379
+ "eval_objective": 2053.837158203125,
380
+ "eval_ranking_idealized": 0.5212215185165405,
381
+ "eval_ranking_idealized_expo": 0.5212215185165405,
382
+ "eval_ranking_simple": 0.5341615080833435,
383
+ "eval_regularize": 2053.837158203125,
384
+ "eval_runtime": 310.078,
385
+ "eval_samples_per_second": 18.673,
386
+ "eval_steps_per_second": 1.558,
387
+ "eval_wo_beta": 6.790140151977539,
388
+ "step": 550
389
+ },
390
+ {
391
+ "dpo_loss": 692.1633911132812,
392
+ "epoch": 1.7005196032120926,
393
+ "grad_norm": 81536.59548809296,
394
+ "learning_rate": 4.16692250129073e-06,
395
+ "logits": -0.5091446042060852,
396
+ "logps": -76.25757598876953,
397
+ "loss": 708.5259,
398
+ "objective": 692.1633911132812,
399
+ "ranking_idealized": 0.5149999856948853,
400
+ "ranking_idealized_expo": 0.5149999856948853,
401
+ "ranking_simple": 0.628333330154419,
402
+ "regularize": 692.1633911132812,
403
+ "step": 600,
404
+ "wo_beta": 4.881653785705566
405
+ },
406
+ {
407
+ "epoch": 1.7005196032120926,
408
+ "eval_dpo_loss": 2071.194580078125,
409
+ "eval_logits": -0.5595079660415649,
410
+ "eval_logps": -79.44622039794922,
411
+ "eval_loss": 2179.60009765625,
412
+ "eval_objective": 2071.194580078125,
413
+ "eval_ranking_idealized": 0.5212215185165405,
414
+ "eval_ranking_idealized_expo": 0.5212215185165405,
415
+ "eval_ranking_simple": 0.5362318754196167,
416
+ "eval_regularize": 2071.194580078125,
417
+ "eval_runtime": 310.8858,
418
+ "eval_samples_per_second": 18.624,
419
+ "eval_steps_per_second": 1.554,
420
+ "eval_wo_beta": 6.651080131530762,
421
+ "step": 600
422
+ },
423
+ {
424
+ "dpo_loss": 673.4611206054688,
425
+ "epoch": 1.8422295701464337,
426
+ "grad_norm": 86567.62941958332,
427
+ "learning_rate": 3.974272604254906e-06,
428
+ "logits": -0.48710063099861145,
429
+ "logps": -76.06396484375,
430
+ "loss": 690.9902,
431
+ "objective": 673.4611206054688,
432
+ "ranking_idealized": 0.527916669845581,
433
+ "ranking_idealized_expo": 0.527916669845581,
434
+ "ranking_simple": 0.6387500166893005,
435
+ "regularize": 673.4611206054688,
436
+ "step": 650,
437
+ "wo_beta": 5.110637187957764
438
+ },
439
+ {
440
+ "epoch": 1.8422295701464337,
441
+ "eval_dpo_loss": 2158.488525390625,
442
+ "eval_logits": -0.5552332997322083,
443
+ "eval_logps": -80.51081085205078,
444
+ "eval_loss": 2241.3740234375,
445
+ "eval_objective": 2158.488525390625,
446
+ "eval_ranking_idealized": 0.5212215185165405,
447
+ "eval_ranking_idealized_expo": 0.5212215185165405,
448
+ "eval_ranking_simple": 0.5414078831672668,
449
+ "eval_regularize": 2158.488525390625,
450
+ "eval_runtime": 308.5347,
451
+ "eval_samples_per_second": 18.766,
452
+ "eval_steps_per_second": 1.565,
453
+ "eval_wo_beta": 6.7740159034729,
454
+ "step": 650
455
+ },
456
+ {
457
+ "dpo_loss": 586.7876586914062,
458
+ "epoch": 1.9839395370807746,
459
+ "grad_norm": 79003.551065143,
460
+ "learning_rate": 3.767136614452458e-06,
461
+ "logits": -0.3871801495552063,
462
+ "logps": -76.267578125,
463
+ "loss": 617.6108,
464
+ "objective": 586.7876586914062,
465
+ "ranking_idealized": 0.5108333230018616,
466
+ "ranking_idealized_expo": 0.5108333230018616,
467
+ "ranking_simple": 0.6416666507720947,
468
+ "regularize": 586.7876586914062,
469
+ "step": 700,
470
+ "wo_beta": 4.860604286193848
471
+ },
472
+ {
473
+ "epoch": 1.9839395370807746,
474
+ "eval_dpo_loss": 2132.251708984375,
475
+ "eval_logits": -0.5078864097595215,
476
+ "eval_logps": -80.38253784179688,
477
+ "eval_loss": 2230.511474609375,
478
+ "eval_objective": 2132.251708984375,
479
+ "eval_ranking_idealized": 0.5212215185165405,
480
+ "eval_ranking_idealized_expo": 0.5212215185165405,
481
+ "eval_ranking_simple": 0.5403726696968079,
482
+ "eval_regularize": 2132.251708984375,
483
+ "eval_runtime": 309.2277,
484
+ "eval_samples_per_second": 18.724,
485
+ "eval_steps_per_second": 1.562,
486
+ "eval_wo_beta": 6.795376777648926,
487
+ "step": 700
488
+ },
489
+ {
490
+ "dpo_loss": 347.239990234375,
491
+ "epoch": 2.1256495040151155,
492
+ "grad_norm": 48670.94258805941,
493
+ "learning_rate": 3.547549834686222e-06,
494
+ "logits": -0.3425801694393158,
495
+ "logps": -76.91646575927734,
496
+ "loss": 343.0455,
497
+ "objective": 347.239990234375,
498
+ "ranking_idealized": 0.5112500190734863,
499
+ "ranking_idealized_expo": 0.5112500190734863,
500
+ "ranking_simple": 0.6691666841506958,
501
+ "regularize": 347.239990234375,
502
+ "step": 750,
503
+ "wo_beta": 4.557406902313232
504
+ },
505
+ {
506
+ "epoch": 2.1256495040151155,
507
+ "eval_dpo_loss": 2123.3603515625,
508
+ "eval_logits": -0.5397638082504272,
509
+ "eval_logps": -81.35387420654297,
510
+ "eval_loss": 2199.717529296875,
511
+ "eval_objective": 2123.3603515625,
512
+ "eval_ranking_idealized": 0.5212215185165405,
513
+ "eval_ranking_idealized_expo": 0.5212215185165405,
514
+ "eval_ranking_simple": 0.5429606437683105,
515
+ "eval_regularize": 2123.3603515625,
516
+ "eval_runtime": 308.3199,
517
+ "eval_samples_per_second": 18.779,
518
+ "eval_steps_per_second": 1.567,
519
+ "eval_wo_beta": 6.757833003997803,
520
+ "step": 750
521
+ },
522
+ {
523
+ "dpo_loss": 317.3254089355469,
524
+ "epoch": 2.2673594709494567,
525
+ "grad_norm": 42462.56905768485,
526
+ "learning_rate": 3.3176699082935546e-06,
527
+ "logits": -0.47370415925979614,
528
+ "logps": -76.58859252929688,
529
+ "loss": 311.7518,
530
+ "objective": 317.3254089355469,
531
+ "ranking_idealized": 0.512499988079071,
532
+ "ranking_idealized_expo": 0.512499988079071,
533
+ "ranking_simple": 0.6679166555404663,
534
+ "regularize": 317.3254089355469,
535
+ "step": 800,
536
+ "wo_beta": 4.156112194061279
537
+ },
538
+ {
539
+ "epoch": 2.2673594709494567,
540
+ "eval_dpo_loss": 2038.6656494140625,
541
+ "eval_logits": -0.5496692657470703,
542
+ "eval_logps": -80.27388763427734,
543
+ "eval_loss": 2139.787109375,
544
+ "eval_objective": 2038.6656494140625,
545
+ "eval_ranking_idealized": 0.5212215185165405,
546
+ "eval_ranking_idealized_expo": 0.5212215185165405,
547
+ "eval_ranking_simple": 0.5377846956253052,
548
+ "eval_regularize": 2038.6656494140625,
549
+ "eval_runtime": 308.3672,
550
+ "eval_samples_per_second": 18.776,
551
+ "eval_steps_per_second": 1.566,
552
+ "eval_wo_beta": 6.676809310913086,
553
+ "step": 800
554
+ },
555
+ {
556
+ "dpo_loss": 342.0184020996094,
557
+ "epoch": 2.409069437883798,
558
+ "grad_norm": 50353.55073679767,
559
+ "learning_rate": 3.0797556183036582e-06,
560
+ "logits": -0.47750580310821533,
561
+ "logps": -78.6015853881836,
562
+ "loss": 315.5968,
563
+ "objective": 342.0184020996094,
564
+ "ranking_idealized": 0.5133333206176758,
565
+ "ranking_idealized_expo": 0.5133333206176758,
566
+ "ranking_simple": 0.6566666960716248,
567
+ "regularize": 342.0184020996094,
568
+ "step": 850,
569
+ "wo_beta": 4.543591499328613
570
+ },
571
+ {
572
+ "epoch": 2.409069437883798,
573
+ "eval_dpo_loss": 2184.711181640625,
574
+ "eval_logits": -0.5281842350959778,
575
+ "eval_logps": -83.38427734375,
576
+ "eval_loss": 2249.320068359375,
577
+ "eval_objective": 2184.711181640625,
578
+ "eval_ranking_idealized": 0.5212215185165405,
579
+ "eval_ranking_idealized_expo": 0.5212215185165405,
580
+ "eval_ranking_simple": 0.5403726696968079,
581
+ "eval_regularize": 2184.711181640625,
582
+ "eval_runtime": 308.9348,
583
+ "eval_samples_per_second": 18.742,
584
+ "eval_steps_per_second": 1.563,
585
+ "eval_wo_beta": 6.807182312011719,
586
+ "step": 850
587
+ },
588
+ {
589
+ "dpo_loss": 1037.1005859375,
590
+ "epoch": 2.555503070382617,
591
+ "grad_norm": 148584.23094187575,
592
+ "learning_rate": 2.8361446928038298e-06,
593
+ "logits": -0.11181627959012985,
594
+ "logps": -87.09500885009766,
595
+ "loss": 6263.3387,
596
+ "objective": 6087.2216796875,
597
+ "ranking_idealized": 0.5183333158493042,
598
+ "ranking_idealized_expo": 0.5179166793823242,
599
+ "ranking_simple": 0.5941666960716248,
600
+ "regularize": 6087.2216796875,
601
+ "step": 900,
602
+ "wo_beta": 15.719075202941895
603
+ },
604
+ {
605
+ "epoch": 2.555503070382617,
606
+ "eval_dpo_loss": 3261.5380859375,
607
+ "eval_logits": -0.10349252820014954,
608
+ "eval_logps": -90.69556427001953,
609
+ "eval_loss": 6365.919921875,
610
+ "eval_objective": 6410.73828125,
611
+ "eval_ranking_idealized": 0.5212215185165405,
612
+ "eval_ranking_idealized_expo": 0.5212215185165405,
613
+ "eval_ranking_simple": 0.5248447060585022,
614
+ "eval_regularize": 6410.73828125,
615
+ "eval_runtime": 309.2116,
616
+ "eval_samples_per_second": 18.725,
617
+ "eval_steps_per_second": 1.562,
618
+ "eval_wo_beta": 14.391923904418945,
619
+ "step": 900
620
+ },
621
+ {
622
+ "dpo_loss": 1062.182861328125,
623
+ "epoch": 2.697213037316958,
624
+ "grad_norm": 142709.83683573626,
625
+ "learning_rate": 2.5892308345974517e-06,
626
+ "logits": -0.11892472952604294,
627
+ "logps": -86.6119613647461,
628
+ "loss": 4964.9731,
629
+ "objective": 4921.0400390625,
630
+ "ranking_idealized": 0.5045833587646484,
631
+ "ranking_idealized_expo": 0.5045833587646484,
632
+ "ranking_simple": 0.574999988079071,
633
+ "regularize": 4921.0400390625,
634
+ "step": 950,
635
+ "wo_beta": 14.98097038269043
636
+ },
637
+ {
638
+ "epoch": 2.697213037316958,
639
+ "eval_dpo_loss": 3203.624267578125,
640
+ "eval_logits": -0.15411485731601715,
641
+ "eval_logps": -88.87255096435547,
642
+ "eval_loss": 6126.68994140625,
643
+ "eval_objective": 6172.97900390625,
644
+ "eval_ranking_idealized": 0.5212215185165405,
645
+ "eval_ranking_idealized_expo": 0.5212215185165405,
646
+ "eval_ranking_simple": 0.5258799195289612,
647
+ "eval_regularize": 6172.97900390625,
648
+ "eval_runtime": 308.5864,
649
+ "eval_samples_per_second": 18.763,
650
+ "eval_steps_per_second": 1.565,
651
+ "eval_wo_beta": 14.286750793457031,
652
+ "step": 950
653
+ },
654
+ {
655
+ "dpo_loss": 1058.12353515625,
656
+ "epoch": 2.838923004251299,
657
+ "grad_norm": 141320.36114480646,
658
+ "learning_rate": 2.341440200858589e-06,
659
+ "logits": -0.06014474108815193,
660
+ "logps": -84.71208190917969,
661
+ "loss": 4278.7487,
662
+ "objective": 4290.322265625,
663
+ "ranking_idealized": 0.5112500190734863,
664
+ "ranking_idealized_expo": 0.5112500190734863,
665
+ "ranking_simple": 0.559583306312561,
666
+ "regularize": 4290.322265625,
667
+ "step": 1000,
668
+ "wo_beta": 14.737447738647461
669
+ },
670
+ {
671
+ "epoch": 2.838923004251299,
672
+ "eval_dpo_loss": 3153.050048828125,
673
+ "eval_logits": -0.094576895236969,
674
+ "eval_logps": -87.88904571533203,
675
+ "eval_loss": 6092.5341796875,
676
+ "eval_objective": 6127.7734375,
677
+ "eval_ranking_idealized": 0.5212215185165405,
678
+ "eval_ranking_idealized_expo": 0.5212215185165405,
679
+ "eval_ranking_simple": 0.5243270993232727,
680
+ "eval_regularize": 6127.7734375,
681
+ "eval_runtime": 308.7636,
682
+ "eval_samples_per_second": 18.752,
683
+ "eval_steps_per_second": 1.564,
684
+ "eval_wo_beta": 14.248814582824707,
685
+ "step": 1000
686
+ },
687
+ {
688
+ "dpo_loss": 969.5340576171875,
689
+ "epoch": 2.9806329711856403,
690
+ "grad_norm": 145217.5279628283,
691
+ "learning_rate": 2.0952075638923656e-06,
692
+ "logits": -0.07964936643838882,
693
+ "logps": -84.64329528808594,
694
+ "loss": 3830.4475,
695
+ "objective": 3857.745361328125,
696
+ "ranking_idealized": 0.5174999833106995,
697
+ "ranking_idealized_expo": 0.5174999833106995,
698
+ "ranking_simple": 0.5695833563804626,
699
+ "regularize": 3857.745361328125,
700
+ "step": 1050,
701
+ "wo_beta": 15.0871000289917
702
+ },
703
+ {
704
+ "epoch": 2.9806329711856403,
705
+ "eval_dpo_loss": 3127.49560546875,
706
+ "eval_logits": -0.13649722933769226,
707
+ "eval_logps": -86.9566421508789,
708
+ "eval_loss": 6040.49169921875,
709
+ "eval_objective": 6063.345703125,
710
+ "eval_ranking_idealized": 0.5212215185165405,
711
+ "eval_ranking_idealized_expo": 0.5212215185165405,
712
+ "eval_ranking_simple": 0.5232919454574585,
713
+ "eval_regularize": 6063.345703125,
714
+ "eval_runtime": 308.411,
715
+ "eval_samples_per_second": 18.774,
716
+ "eval_steps_per_second": 1.566,
717
+ "eval_wo_beta": 14.031007766723633,
718
+ "step": 1050
719
+ },
720
+ {
721
+ "dpo_loss": 784.6331787109375,
722
+ "epoch": 3.122342938119981,
723
+ "grad_norm": 148785.0779157092,
724
+ "learning_rate": 1.852952387243698e-06,
725
+ "logits": -0.06228747218847275,
726
+ "logps": -85.15433502197266,
727
+ "loss": 3079.7456,
728
+ "objective": 3018.007080078125,
729
+ "ranking_idealized": 0.528333306312561,
730
+ "ranking_idealized_expo": 0.528333306312561,
731
+ "ranking_simple": 0.559166669845581,
732
+ "regularize": 3018.007080078125,
733
+ "step": 1100,
734
+ "wo_beta": 15.30273151397705
735
+ },
736
+ {
737
+ "epoch": 3.122342938119981,
738
+ "eval_dpo_loss": 3144.784423828125,
739
+ "eval_logits": -0.12487590312957764,
740
+ "eval_logps": -88.1663589477539,
741
+ "eval_loss": 6014.59765625,
742
+ "eval_objective": 6010.9189453125,
743
+ "eval_ranking_idealized": 0.5212215185165405,
744
+ "eval_ranking_idealized_expo": 0.5212215185165405,
745
+ "eval_ranking_simple": 0.52173912525177,
746
+ "eval_regularize": 6010.9189453125,
747
+ "eval_runtime": 308.9136,
748
+ "eval_samples_per_second": 18.743,
749
+ "eval_steps_per_second": 1.564,
750
+ "eval_wo_beta": 14.233796119689941,
751
+ "step": 1100
752
+ },
753
+ {
754
+ "epoch": 3.122342938119981,
755
+ "step": 1100,
756
+ "total_flos": 0.0,
757
+ "train_loss": 1018.9660795454546,
758
+ "train_runtime": 6860.7986,
759
+ "train_samples_per_second": 37.023,
760
+ "train_steps_per_second": 0.257
761
+ }
762
+ ],
763
+ "logging_steps": 50,
764
+ "max_steps": 1760,
765
+ "num_input_tokens_seen": 0,
766
+ "num_train_epochs": 5,
767
+ "save_steps": 50,
768
+ "stateful_callbacks": {
769
+ "EarlyStoppingCallback": {
770
+ "args": {
771
+ "early_stopping_patience": 5,
772
+ "early_stopping_threshold": 0.0
773
+ },
774
+ "attributes": {
775
+ "early_stopping_patience_counter": 0
776
+ }
777
+ },
778
+ "TrainerControl": {
779
+ "args": {
780
+ "should_epoch_stop": false,
781
+ "should_evaluate": false,
782
+ "should_log": false,
783
+ "should_save": true,
784
+ "should_training_stop": true
785
+ },
786
+ "attributes": {}
787
+ }
788
+ },
789
+ "total_flos": 0.0,
790
+ "train_batch_size": 4,
791
+ "trial_name": null,
792
+ "trial_params": null
793
+ }