hZzy commited on
Commit
04e1e01
1 Parent(s): 52aec79

Model save

Browse files
README.md ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
4
+ tags:
5
+ - trl
6
+ - expo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: qwen2.5-0.5b-expo-DPO-ES-10
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/8420vo52)
17
+ # qwen2.5-0.5b-expo-DPO-ES-10
18
+
19
+ This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 21.3539
22
+ - Logps: -79.7115
23
+ - Logits: -0.5475
24
+ - Objective: 20.4532
25
+ - Dpo Loss: 20.4532
26
+ - Regularize: 20.4532
27
+ - Ranking Simple: 0.5362
28
+ - Ranking Idealized: 0.5212
29
+ - Ranking Idealized Expo: 0.5212
30
+ - Wo Beta: 6.6867
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 5e-06
50
+ - train_batch_size: 4
51
+ - eval_batch_size: 4
52
+ - seed: 42
53
+ - distributed_type: multi-GPU
54
+ - num_devices: 3
55
+ - gradient_accumulation_steps: 12
56
+ - total_train_batch_size: 144
57
+ - total_eval_batch_size: 12
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: cosine
60
+ - lr_scheduler_warmup_ratio: 0.1
61
+ - num_epochs: 5
62
+
63
+ ### Training results
64
+
65
+ | Training Loss | Epoch | Step | Validation Loss | Logps | Logits | Objective | Dpo Loss | Regularize | Ranking Simple | Ranking Idealized | Ranking Idealized Expo | Wo Beta |
66
+ |:-------------:|:------:|:----:|:---------------:|:--------:|:-------:|:---------:|:--------:|:----------:|:--------------:|:-----------------:|:----------------------:|:-------:|
67
+ | 2.0094 | 0.1417 | 50 | 3.1068 | -90.6242 | -1.4592 | 3.0980 | 3.0980 | 3.0980 | 0.5259 | 0.5212 | 0.5212 | 7.7179 |
68
+ | 5.9165 | 0.2834 | 100 | 7.1487 | -82.8335 | -1.4642 | 7.1399 | 7.1399 | 7.1399 | 0.5300 | 0.5212 | 0.5212 | 7.4498 |
69
+ | 9.9617 | 0.4251 | 150 | 11.8998 | -83.0745 | -1.3437 | 11.3536 | 11.3536 | 11.3536 | 0.5305 | 0.5212 | 0.5212 | 7.2609 |
70
+ | 12.4724 | 0.5668 | 200 | 17.0987 | -79.9360 | -1.3880 | 16.0617 | 16.0617 | 16.0617 | 0.5300 | 0.5212 | 0.5212 | 7.2290 |
71
+ | 13.2936 | 0.7085 | 250 | 18.5309 | -77.3150 | -1.3641 | 17.7971 | 17.7971 | 17.7971 | 0.5342 | 0.5212 | 0.5212 | 7.2078 |
72
+ | 11.5204 | 0.8503 | 300 | 19.4344 | -76.9798 | -0.9941 | 18.7017 | 18.7017 | 18.7017 | 0.5357 | 0.5212 | 0.5212 | 7.0136 |
73
+ | 11.3717 | 0.9920 | 350 | 20.3959 | -76.1623 | -1.0426 | 19.0398 | 19.0398 | 19.0398 | 0.5409 | 0.5212 | 0.5212 | 7.0261 |
74
+ | 7.0971 | 1.1337 | 400 | 21.9279 | -76.1458 | -0.6236 | 21.6902 | 21.6902 | 21.6902 | 0.5388 | 0.5212 | 0.5212 | 7.1227 |
75
+ | 7.5725 | 1.2754 | 450 | 20.9480 | -76.3924 | -0.8352 | 20.3853 | 20.3853 | 20.3853 | 0.5373 | 0.5212 | 0.5212 | 6.8500 |
76
+ | 7.6466 | 1.4171 | 500 | 20.9821 | -80.7806 | -0.7483 | 20.2651 | 20.2651 | 20.2651 | 0.5326 | 0.5212 | 0.5212 | 6.8824 |
77
+ | 6.9565 | 1.5588 | 550 | 21.3506 | -80.2051 | -0.6148 | 20.5661 | 20.5661 | 20.5661 | 0.5383 | 0.5212 | 0.5212 | 6.6513 |
78
+ | 6.7183 | 1.7005 | 600 | 21.1265 | -78.5344 | -0.6067 | 20.0027 | 20.0027 | 20.0027 | 0.5367 | 0.5212 | 0.5212 | 6.6768 |
79
+ | 6.9931 | 1.8422 | 650 | 22.2083 | -77.6509 | -0.5872 | 21.4455 | 21.4455 | 21.4455 | 0.5383 | 0.5212 | 0.5212 | 6.8190 |
80
+ | 6.1685 | 1.9839 | 700 | 22.3607 | -77.1493 | -0.5436 | 21.5512 | 21.5512 | 21.5512 | 0.5404 | 0.5212 | 0.5212 | 6.7299 |
81
+ | 3.4811 | 2.1256 | 750 | 21.8349 | -78.9312 | -0.7313 | 21.1379 | 21.1379 | 21.1379 | 0.5424 | 0.5212 | 0.5212 | 6.8213 |
82
+ | 3.3995 | 2.2674 | 800 | 21.3539 | -79.7115 | -0.5475 | 20.4532 | 20.4532 | 20.4532 | 0.5362 | 0.5212 | 0.5212 | 6.6867 |
83
+
84
+
85
+ ### Framework versions
86
+
87
+ - Transformers 4.42.0
88
+ - Pytorch 2.3.0+cu121
89
+ - Datasets 2.19.1
90
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.2673594709494567,
3
+ "total_flos": 0.0,
4
+ "train_loss": 7.659533626437187,
5
+ "train_runtime": 21973.9049,
6
+ "train_samples": 50802,
7
+ "train_samples_per_second": 11.56,
8
+ "train_steps_per_second": 0.08
9
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151644,
3
+ "eos_token_id": 151645,
4
+ "max_new_tokens": 2048,
5
+ "pad_token_id": 151645,
6
+ "transformers_version": "4.42.0"
7
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44c83eb7c514ba46addc0e0de66d0084a998b275070f4c85f90f3cf4193a4a9a
3
  size 1975192208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9de1680114e84356068a7b1e6938567a46728c700a4677ea6c5d0c0258a70537
3
  size 1975192208
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.2673594709494567,
3
+ "total_flos": 0.0,
4
+ "train_loss": 7.659533626437187,
5
+ "train_runtime": 21973.9049,
6
+ "train_samples": 50802,
7
+ "train_samples_per_second": 11.56,
8
+ "train_steps_per_second": 0.08
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,595 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 6.651296138763428,
3
+ "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-10/checkpoint-550",
4
+ "epoch": 2.2673594709494567,
5
+ "eval_steps": 50,
6
+ "global_step": 800,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "dpo_loss": 0.6931471824645996,
13
+ "epoch": 0.002834199338686821,
14
+ "grad_norm": 1844.2532039401294,
15
+ "learning_rate": 2.840909090909091e-08,
16
+ "logits": -1.359458565711975,
17
+ "logps": -84.69721221923828,
18
+ "loss": 0.6931,
19
+ "objective": 0.6931471824645996,
20
+ "ranking_idealized": 0.5833333134651184,
21
+ "ranking_idealized_expo": 0.5833333134651184,
22
+ "ranking_simple": 0.5833333134651184,
23
+ "regularize": 0.6931471824645996,
24
+ "step": 1,
25
+ "wo_beta": 5.271125316619873
26
+ },
27
+ {
28
+ "dpo_loss": 1.9794068336486816,
29
+ "epoch": 0.14170996693434104,
30
+ "grad_norm": 1879.680280823908,
31
+ "learning_rate": 1.4204545454545458e-06,
32
+ "logits": -1.4480701684951782,
33
+ "logps": -84.59326934814453,
34
+ "loss": 2.0094,
35
+ "objective": 1.9794068336486816,
36
+ "ranking_idealized": 0.5225340127944946,
37
+ "ranking_idealized_expo": 0.5216836929321289,
38
+ "ranking_simple": 0.5250850319862366,
39
+ "regularize": 1.9794068336486816,
40
+ "step": 50,
41
+ "wo_beta": 7.08821439743042
42
+ },
43
+ {
44
+ "epoch": 0.14170996693434104,
45
+ "eval_dpo_loss": 3.0980334281921387,
46
+ "eval_logits": -1.4591896533966064,
47
+ "eval_logps": -90.62417602539062,
48
+ "eval_loss": 3.106841564178467,
49
+ "eval_objective": 3.0980334281921387,
50
+ "eval_ranking_idealized": 0.5212215185165405,
51
+ "eval_ranking_idealized_expo": 0.5212215185165405,
52
+ "eval_ranking_simple": 0.5258799195289612,
53
+ "eval_regularize": 3.0980334281921387,
54
+ "eval_runtime": 308.1993,
55
+ "eval_samples_per_second": 18.787,
56
+ "eval_steps_per_second": 1.567,
57
+ "eval_wo_beta": 7.7179274559021,
58
+ "step": 50
59
+ },
60
+ {
61
+ "dpo_loss": 6.002392768859863,
62
+ "epoch": 0.2834199338686821,
63
+ "grad_norm": 1776.9003571892035,
64
+ "learning_rate": 2.8409090909090916e-06,
65
+ "logits": -1.4501550197601318,
66
+ "logps": -81.94203186035156,
67
+ "loss": 5.9165,
68
+ "objective": 6.002392768859863,
69
+ "ranking_idealized": 0.5141666531562805,
70
+ "ranking_idealized_expo": 0.5137500166893005,
71
+ "ranking_simple": 0.5425000190734863,
72
+ "regularize": 6.002392768859863,
73
+ "step": 100,
74
+ "wo_beta": 6.498049736022949
75
+ },
76
+ {
77
+ "epoch": 0.2834199338686821,
78
+ "eval_dpo_loss": 7.139862060546875,
79
+ "eval_logits": -1.4642183780670166,
80
+ "eval_logps": -82.83346557617188,
81
+ "eval_loss": 7.14874792098999,
82
+ "eval_objective": 7.139862060546875,
83
+ "eval_ranking_idealized": 0.5212215185165405,
84
+ "eval_ranking_idealized_expo": 0.5212215185165405,
85
+ "eval_ranking_simple": 0.5300207138061523,
86
+ "eval_regularize": 7.139862060546875,
87
+ "eval_runtime": 309.8222,
88
+ "eval_samples_per_second": 18.688,
89
+ "eval_steps_per_second": 1.559,
90
+ "eval_wo_beta": 7.4498443603515625,
91
+ "step": 100
92
+ },
93
+ {
94
+ "dpo_loss": 10.150534629821777,
95
+ "epoch": 0.42512990080302315,
96
+ "grad_norm": 1372.4824531102197,
97
+ "learning_rate": 4.2613636363636365e-06,
98
+ "logits": -1.4184441566467285,
99
+ "logps": -73.7444076538086,
100
+ "loss": 9.9617,
101
+ "objective": 10.150534629821777,
102
+ "ranking_idealized": 0.5287500023841858,
103
+ "ranking_idealized_expo": 0.527916669845581,
104
+ "ranking_simple": 0.5641666650772095,
105
+ "regularize": 10.150534629821777,
106
+ "step": 150,
107
+ "wo_beta": 6.36607027053833
108
+ },
109
+ {
110
+ "epoch": 0.42512990080302315,
111
+ "eval_dpo_loss": 11.353630065917969,
112
+ "eval_logits": -1.3437175750732422,
113
+ "eval_logps": -83.07452392578125,
114
+ "eval_loss": 11.899770736694336,
115
+ "eval_objective": 11.353630065917969,
116
+ "eval_ranking_idealized": 0.5212215185165405,
117
+ "eval_ranking_idealized_expo": 0.5212215185165405,
118
+ "eval_ranking_simple": 0.5305383205413818,
119
+ "eval_regularize": 11.353630065917969,
120
+ "eval_runtime": 307.9181,
121
+ "eval_samples_per_second": 18.804,
122
+ "eval_steps_per_second": 1.569,
123
+ "eval_wo_beta": 7.260918140411377,
124
+ "step": 150
125
+ },
126
+ {
127
+ "dpo_loss": 12.522791862487793,
128
+ "epoch": 0.5668398677373642,
129
+ "grad_norm": 1378.4122138720427,
130
+ "learning_rate": 4.997168347957521e-06,
131
+ "logits": -1.3766180276870728,
132
+ "logps": -77.5620346069336,
133
+ "loss": 12.4724,
134
+ "objective": 12.522791862487793,
135
+ "ranking_idealized": 0.51583331823349,
136
+ "ranking_idealized_expo": 0.51541668176651,
137
+ "ranking_simple": 0.5550000071525574,
138
+ "regularize": 12.522791862487793,
139
+ "step": 200,
140
+ "wo_beta": 6.352013111114502
141
+ },
142
+ {
143
+ "epoch": 0.5668398677373642,
144
+ "eval_dpo_loss": 16.061721801757812,
145
+ "eval_logits": -1.387966513633728,
146
+ "eval_logps": -79.93595886230469,
147
+ "eval_loss": 17.09868049621582,
148
+ "eval_objective": 16.061721801757812,
149
+ "eval_ranking_idealized": 0.5212215185165405,
150
+ "eval_ranking_idealized_expo": 0.5212215185165405,
151
+ "eval_ranking_simple": 0.5300207138061523,
152
+ "eval_regularize": 16.061721801757812,
153
+ "eval_runtime": 307.8039,
154
+ "eval_samples_per_second": 18.811,
155
+ "eval_steps_per_second": 1.569,
156
+ "eval_wo_beta": 7.228997230529785,
157
+ "step": 200
158
+ },
159
+ {
160
+ "dpo_loss": 13.093570709228516,
161
+ "epoch": 0.7085498346717053,
162
+ "grad_norm": 1274.583157442186,
163
+ "learning_rate": 4.973122855144066e-06,
164
+ "logits": -1.294631004333496,
165
+ "logps": -76.13822174072266,
166
+ "loss": 13.2936,
167
+ "objective": 13.093570709228516,
168
+ "ranking_idealized": 0.5166666507720947,
169
+ "ranking_idealized_expo": 0.5162500143051147,
170
+ "ranking_simple": 0.5824999809265137,
171
+ "regularize": 13.093570709228516,
172
+ "step": 250,
173
+ "wo_beta": 6.279551982879639
174
+ },
175
+ {
176
+ "epoch": 0.7085498346717053,
177
+ "eval_dpo_loss": 17.797138214111328,
178
+ "eval_logits": -1.3640648126602173,
179
+ "eval_logps": -77.31498718261719,
180
+ "eval_loss": 18.5308780670166,
181
+ "eval_objective": 17.797138214111328,
182
+ "eval_ranking_idealized": 0.5212215185165405,
183
+ "eval_ranking_idealized_expo": 0.5212215185165405,
184
+ "eval_ranking_simple": 0.5341615080833435,
185
+ "eval_regularize": 17.797138214111328,
186
+ "eval_runtime": 313.1016,
187
+ "eval_samples_per_second": 18.492,
188
+ "eval_steps_per_second": 1.543,
189
+ "eval_wo_beta": 7.207766532897949,
190
+ "step": 250
191
+ },
192
+ {
193
+ "dpo_loss": 10.712362289428711,
194
+ "epoch": 0.8502598016060463,
195
+ "grad_norm": 982.3462926804266,
196
+ "learning_rate": 4.924776641419513e-06,
197
+ "logits": -1.090299129486084,
198
+ "logps": -70.98873138427734,
199
+ "loss": 11.5204,
200
+ "objective": 10.712362289428711,
201
+ "ranking_idealized": 0.4962500035762787,
202
+ "ranking_idealized_expo": 0.4950000047683716,
203
+ "ranking_simple": 0.5679166913032532,
204
+ "regularize": 10.712362289428711,
205
+ "step": 300,
206
+ "wo_beta": 6.134185314178467
207
+ },
208
+ {
209
+ "epoch": 0.8502598016060463,
210
+ "eval_dpo_loss": 18.701662063598633,
211
+ "eval_logits": -0.9941285848617554,
212
+ "eval_logps": -76.97978210449219,
213
+ "eval_loss": 19.434432983398438,
214
+ "eval_objective": 18.701662063598633,
215
+ "eval_ranking_idealized": 0.5212215185165405,
216
+ "eval_ranking_idealized_expo": 0.5212215185165405,
217
+ "eval_ranking_simple": 0.5357142686843872,
218
+ "eval_regularize": 18.701662063598633,
219
+ "eval_runtime": 307.6602,
220
+ "eval_samples_per_second": 18.819,
221
+ "eval_steps_per_second": 1.57,
222
+ "eval_wo_beta": 7.013552188873291,
223
+ "step": 300
224
+ },
225
+ {
226
+ "dpo_loss": 10.92597484588623,
227
+ "epoch": 0.9919697685403873,
228
+ "grad_norm": 938.8397527375307,
229
+ "learning_rate": 4.8526047530778175e-06,
230
+ "logits": -0.9006206393241882,
231
+ "logps": -72.82616424560547,
232
+ "loss": 11.3717,
233
+ "objective": 10.92597484588623,
234
+ "ranking_idealized": 0.5262500047683716,
235
+ "ranking_idealized_expo": 0.5254166722297668,
236
+ "ranking_simple": 0.6033333539962769,
237
+ "regularize": 10.92597484588623,
238
+ "step": 350,
239
+ "wo_beta": 5.362515449523926
240
+ },
241
+ {
242
+ "epoch": 0.9919697685403873,
243
+ "eval_dpo_loss": 19.039833068847656,
244
+ "eval_logits": -1.0426429510116577,
245
+ "eval_logps": -76.1622543334961,
246
+ "eval_loss": 20.39594841003418,
247
+ "eval_objective": 19.039833068847656,
248
+ "eval_ranking_idealized": 0.5212215185165405,
249
+ "eval_ranking_idealized_expo": 0.5212215185165405,
250
+ "eval_ranking_simple": 0.5408902764320374,
251
+ "eval_regularize": 19.039833068847656,
252
+ "eval_runtime": 307.6502,
253
+ "eval_samples_per_second": 18.82,
254
+ "eval_steps_per_second": 1.57,
255
+ "eval_wo_beta": 7.0260910987854,
256
+ "step": 350
257
+ },
258
+ {
259
+ "dpo_loss": 6.708657264709473,
260
+ "epoch": 1.1336797354747283,
261
+ "grad_norm": 902.5480798954853,
262
+ "learning_rate": 4.757316345716554e-06,
263
+ "logits": -0.7562137246131897,
264
+ "logps": -70.6362075805664,
265
+ "loss": 7.0971,
266
+ "objective": 6.708657264709473,
267
+ "ranking_idealized": 0.5333333611488342,
268
+ "ranking_idealized_expo": 0.5320833325386047,
269
+ "ranking_simple": 0.6329166889190674,
270
+ "regularize": 6.708657264709473,
271
+ "step": 400,
272
+ "wo_beta": 4.74643087387085
273
+ },
274
+ {
275
+ "epoch": 1.1336797354747283,
276
+ "eval_dpo_loss": 21.69021987915039,
277
+ "eval_logits": -0.6236207485198975,
278
+ "eval_logps": -76.14582824707031,
279
+ "eval_loss": 21.927854537963867,
280
+ "eval_objective": 21.69021987915039,
281
+ "eval_ranking_idealized": 0.5212215185165405,
282
+ "eval_ranking_idealized_expo": 0.5212215185165405,
283
+ "eval_ranking_simple": 0.5388198494911194,
284
+ "eval_regularize": 21.69021987915039,
285
+ "eval_runtime": 308.4451,
286
+ "eval_samples_per_second": 18.772,
287
+ "eval_steps_per_second": 1.566,
288
+ "eval_wo_beta": 7.122740745544434,
289
+ "step": 400
290
+ },
291
+ {
292
+ "dpo_loss": 8.22778606414795,
293
+ "epoch": 1.2753897024090695,
294
+ "grad_norm": 701.1530681925066,
295
+ "learning_rate": 4.639847716126855e-06,
296
+ "logits": -0.6124467849731445,
297
+ "logps": -71.35508728027344,
298
+ "loss": 7.5725,
299
+ "objective": 8.22778606414795,
300
+ "ranking_idealized": 0.5195833444595337,
301
+ "ranking_idealized_expo": 0.5191666483879089,
302
+ "ranking_simple": 0.6312500238418579,
303
+ "regularize": 8.22778606414795,
304
+ "step": 450,
305
+ "wo_beta": 5.267808437347412
306
+ },
307
+ {
308
+ "epoch": 1.2753897024090695,
309
+ "eval_dpo_loss": 20.385303497314453,
310
+ "eval_logits": -0.8352137207984924,
311
+ "eval_logps": -76.3924331665039,
312
+ "eval_loss": 20.948013305664062,
313
+ "eval_objective": 20.385303497314453,
314
+ "eval_ranking_idealized": 0.5212215185165405,
315
+ "eval_ranking_idealized_expo": 0.5212215185165405,
316
+ "eval_ranking_simple": 0.5372670888900757,
317
+ "eval_regularize": 20.385303497314453,
318
+ "eval_runtime": 307.7791,
319
+ "eval_samples_per_second": 18.812,
320
+ "eval_steps_per_second": 1.569,
321
+ "eval_wo_beta": 6.8499908447265625,
322
+ "step": 450
323
+ },
324
+ {
325
+ "dpo_loss": 7.319465160369873,
326
+ "epoch": 1.4170996693434104,
327
+ "grad_norm": 1027.4391137177338,
328
+ "learning_rate": 4.501353102310901e-06,
329
+ "logits": -0.7022644877433777,
330
+ "logps": -74.45861053466797,
331
+ "loss": 7.6466,
332
+ "objective": 7.319465160369873,
333
+ "ranking_idealized": 0.49791666865348816,
334
+ "ranking_idealized_expo": 0.4970833361148834,
335
+ "ranking_simple": 0.6225000023841858,
336
+ "regularize": 7.319465160369873,
337
+ "step": 500,
338
+ "wo_beta": 5.078485488891602
339
+ },
340
+ {
341
+ "epoch": 1.4170996693434104,
342
+ "eval_dpo_loss": 20.265100479125977,
343
+ "eval_logits": -0.7483307123184204,
344
+ "eval_logps": -80.78058624267578,
345
+ "eval_loss": 20.982105255126953,
346
+ "eval_objective": 20.265100479125977,
347
+ "eval_ranking_idealized": 0.5212215185165405,
348
+ "eval_ranking_idealized_expo": 0.5212215185165405,
349
+ "eval_ranking_simple": 0.532608687877655,
350
+ "eval_regularize": 20.265100479125977,
351
+ "eval_runtime": 307.6224,
352
+ "eval_samples_per_second": 18.822,
353
+ "eval_steps_per_second": 1.57,
354
+ "eval_wo_beta": 6.882425785064697,
355
+ "step": 500
356
+ },
357
+ {
358
+ "dpo_loss": 6.880460739135742,
359
+ "epoch": 1.5588096362777515,
360
+ "grad_norm": 856.3100755197052,
361
+ "learning_rate": 4.34319334202531e-06,
362
+ "logits": -0.6065574884414673,
363
+ "logps": -75.99507141113281,
364
+ "loss": 6.9565,
365
+ "objective": 6.880460739135742,
366
+ "ranking_idealized": 0.5112500190734863,
367
+ "ranking_idealized_expo": 0.5104166865348816,
368
+ "ranking_simple": 0.6445833444595337,
369
+ "regularize": 6.880460739135742,
370
+ "step": 550,
371
+ "wo_beta": 4.776731967926025
372
+ },
373
+ {
374
+ "epoch": 1.5588096362777515,
375
+ "eval_dpo_loss": 20.566144943237305,
376
+ "eval_logits": -0.6148493885993958,
377
+ "eval_logps": -80.20514678955078,
378
+ "eval_loss": 21.350601196289062,
379
+ "eval_objective": 20.566144943237305,
380
+ "eval_ranking_idealized": 0.5212215185165405,
381
+ "eval_ranking_idealized_expo": 0.5212215185165405,
382
+ "eval_ranking_simple": 0.5383023023605347,
383
+ "eval_regularize": 20.566144943237305,
384
+ "eval_runtime": 311.6281,
385
+ "eval_samples_per_second": 18.58,
386
+ "eval_steps_per_second": 1.55,
387
+ "eval_wo_beta": 6.651296138763428,
388
+ "step": 550
389
+ },
390
+ {
391
+ "dpo_loss": 6.715544700622559,
392
+ "epoch": 1.7005196032120926,
393
+ "grad_norm": 710.120603889053,
394
+ "learning_rate": 4.16692250129073e-06,
395
+ "logits": -0.4755525290966034,
396
+ "logps": -75.72002410888672,
397
+ "loss": 6.7183,
398
+ "objective": 6.715544700622559,
399
+ "ranking_idealized": 0.51541668176651,
400
+ "ranking_idealized_expo": 0.5149999856948853,
401
+ "ranking_simple": 0.637499988079071,
402
+ "regularize": 6.715544700622559,
403
+ "step": 600,
404
+ "wo_beta": 4.843540668487549
405
+ },
406
+ {
407
+ "epoch": 1.7005196032120926,
408
+ "eval_dpo_loss": 20.002656936645508,
409
+ "eval_logits": -0.606741189956665,
410
+ "eval_logps": -78.53438568115234,
411
+ "eval_loss": 21.126510620117188,
412
+ "eval_objective": 20.002656936645508,
413
+ "eval_ranking_idealized": 0.5212215185165405,
414
+ "eval_ranking_idealized_expo": 0.5212215185165405,
415
+ "eval_ranking_simple": 0.5367494821548462,
416
+ "eval_regularize": 20.002656936645508,
417
+ "eval_runtime": 307.6088,
418
+ "eval_samples_per_second": 18.823,
419
+ "eval_steps_per_second": 1.57,
420
+ "eval_wo_beta": 6.676760673522949,
421
+ "step": 600
422
+ },
423
+ {
424
+ "dpo_loss": 7.323308944702148,
425
+ "epoch": 1.8422295701464337,
426
+ "grad_norm": 790.4876193704064,
427
+ "learning_rate": 3.974272604254906e-06,
428
+ "logits": -0.5003318190574646,
429
+ "logps": -74.34846496582031,
430
+ "loss": 6.9931,
431
+ "objective": 7.323308944702148,
432
+ "ranking_idealized": 0.5291666388511658,
433
+ "ranking_idealized_expo": 0.527916669845581,
434
+ "ranking_simple": 0.64083331823349,
435
+ "regularize": 7.323308944702148,
436
+ "step": 650,
437
+ "wo_beta": 5.1112799644470215
438
+ },
439
+ {
440
+ "epoch": 1.8422295701464337,
441
+ "eval_dpo_loss": 21.445514678955078,
442
+ "eval_logits": -0.5872498154640198,
443
+ "eval_logps": -77.65087127685547,
444
+ "eval_loss": 22.20830535888672,
445
+ "eval_objective": 21.445514678955078,
446
+ "eval_ranking_idealized": 0.5212215185165405,
447
+ "eval_ranking_idealized_expo": 0.5212215185165405,
448
+ "eval_ranking_simple": 0.5383023023605347,
449
+ "eval_regularize": 21.445514678955078,
450
+ "eval_runtime": 307.7733,
451
+ "eval_samples_per_second": 18.813,
452
+ "eval_steps_per_second": 1.569,
453
+ "eval_wo_beta": 6.819047451019287,
454
+ "step": 650
455
+ },
456
+ {
457
+ "dpo_loss": 6.620248317718506,
458
+ "epoch": 1.9839395370807746,
459
+ "grad_norm": 688.8652687295252,
460
+ "learning_rate": 3.767136614452458e-06,
461
+ "logits": -0.40135031938552856,
462
+ "logps": -73.09497833251953,
463
+ "loss": 6.1685,
464
+ "objective": 6.620248317718506,
465
+ "ranking_idealized": 0.5129166841506958,
466
+ "ranking_idealized_expo": 0.5108333230018616,
467
+ "ranking_simple": 0.6358333230018616,
468
+ "regularize": 6.620248317718506,
469
+ "step": 700,
470
+ "wo_beta": 5.023129463195801
471
+ },
472
+ {
473
+ "epoch": 1.9839395370807746,
474
+ "eval_dpo_loss": 21.55119514465332,
475
+ "eval_logits": -0.5436362028121948,
476
+ "eval_logps": -77.14934539794922,
477
+ "eval_loss": 22.36069679260254,
478
+ "eval_objective": 21.55119514465332,
479
+ "eval_ranking_idealized": 0.5212215185165405,
480
+ "eval_ranking_idealized_expo": 0.5212215185165405,
481
+ "eval_ranking_simple": 0.5403726696968079,
482
+ "eval_regularize": 21.55119514465332,
483
+ "eval_runtime": 310.6834,
484
+ "eval_samples_per_second": 18.636,
485
+ "eval_steps_per_second": 1.555,
486
+ "eval_wo_beta": 6.729911804199219,
487
+ "step": 700
488
+ },
489
+ {
490
+ "dpo_loss": 3.5603878498077393,
491
+ "epoch": 2.1256495040151155,
492
+ "grad_norm": 579.769858214478,
493
+ "learning_rate": 3.547549834686222e-06,
494
+ "logits": -0.5370141863822937,
495
+ "logps": -73.9045639038086,
496
+ "loss": 3.4811,
497
+ "objective": 3.5603878498077393,
498
+ "ranking_idealized": 0.5129166841506958,
499
+ "ranking_idealized_expo": 0.5112500190734863,
500
+ "ranking_simple": 0.6691666841506958,
501
+ "regularize": 3.5603878498077393,
502
+ "step": 750,
503
+ "wo_beta": 4.534417152404785
504
+ },
505
+ {
506
+ "epoch": 2.1256495040151155,
507
+ "eval_dpo_loss": 21.137874603271484,
508
+ "eval_logits": -0.7312601804733276,
509
+ "eval_logps": -78.93118286132812,
510
+ "eval_loss": 21.834890365600586,
511
+ "eval_objective": 21.137874603271484,
512
+ "eval_ranking_idealized": 0.5212215185165405,
513
+ "eval_ranking_idealized_expo": 0.5212215185165405,
514
+ "eval_ranking_simple": 0.542443037033081,
515
+ "eval_regularize": 21.137874603271484,
516
+ "eval_runtime": 307.6064,
517
+ "eval_samples_per_second": 18.823,
518
+ "eval_steps_per_second": 1.57,
519
+ "eval_wo_beta": 6.821295261383057,
520
+ "step": 750
521
+ },
522
+ {
523
+ "dpo_loss": 3.4036636352539062,
524
+ "epoch": 2.2673594709494567,
525
+ "grad_norm": 573.1633009551587,
526
+ "learning_rate": 3.3176699082935546e-06,
527
+ "logits": -0.5852146148681641,
528
+ "logps": -75.8536376953125,
529
+ "loss": 3.3995,
530
+ "objective": 3.4036636352539062,
531
+ "ranking_idealized": 0.512499988079071,
532
+ "ranking_idealized_expo": 0.512499988079071,
533
+ "ranking_simple": 0.6625000238418579,
534
+ "regularize": 3.4036636352539062,
535
+ "step": 800,
536
+ "wo_beta": 4.245257377624512
537
+ },
538
+ {
539
+ "epoch": 2.2673594709494567,
540
+ "eval_dpo_loss": 20.453168869018555,
541
+ "eval_logits": -0.5475257635116577,
542
+ "eval_logps": -79.71145629882812,
543
+ "eval_loss": 21.353944778442383,
544
+ "eval_objective": 20.453168869018555,
545
+ "eval_ranking_idealized": 0.5212215185165405,
546
+ "eval_ranking_idealized_expo": 0.5212215185165405,
547
+ "eval_ranking_simple": 0.5362318754196167,
548
+ "eval_regularize": 20.453168869018555,
549
+ "eval_runtime": 307.5814,
550
+ "eval_samples_per_second": 18.824,
551
+ "eval_steps_per_second": 1.57,
552
+ "eval_wo_beta": 6.686735153198242,
553
+ "step": 800
554
+ },
555
+ {
556
+ "epoch": 2.2673594709494567,
557
+ "step": 800,
558
+ "total_flos": 0.0,
559
+ "train_loss": 7.659533626437187,
560
+ "train_runtime": 21973.9049,
561
+ "train_samples_per_second": 11.56,
562
+ "train_steps_per_second": 0.08
563
+ }
564
+ ],
565
+ "logging_steps": 50,
566
+ "max_steps": 1760,
567
+ "num_input_tokens_seen": 0,
568
+ "num_train_epochs": 5,
569
+ "save_steps": 50,
570
+ "stateful_callbacks": {
571
+ "EarlyStoppingCallback": {
572
+ "args": {
573
+ "early_stopping_patience": 5,
574
+ "early_stopping_threshold": 0.0
575
+ },
576
+ "attributes": {
577
+ "early_stopping_patience_counter": 0
578
+ }
579
+ },
580
+ "TrainerControl": {
581
+ "args": {
582
+ "should_epoch_stop": false,
583
+ "should_evaluate": false,
584
+ "should_log": false,
585
+ "should_save": true,
586
+ "should_training_stop": true
587
+ },
588
+ "attributes": {}
589
+ }
590
+ },
591
+ "total_flos": 0.0,
592
+ "train_batch_size": 4,
593
+ "trial_name": null,
594
+ "trial_params": null
595
+ }