hZzy commited on
Commit
f3678be
1 Parent(s): d6a1977

Model save

Browse files
README.md ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
4
+ tags:
5
+ - trl
6
+ - expo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: qwen2.5-0.5b-expo-L1EXPO-ES-10
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/5dedaauf)
17
+ # qwen2.5-0.5b-expo-L1EXPO-ES-10
18
+
19
+ This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 51.5859
22
+ - Logps: -84.1485
23
+ - Logits: -0.4568
24
+ - Objective: 51.6626
25
+ - Dpo Loss: 26.3073
26
+ - Regularize: 51.6626
27
+ - Ranking Simple: 0.5254
28
+ - Ranking Idealized: 0.5212
29
+ - Ranking Idealized Expo: 0.5212
30
+ - Wo Beta: 14.1450
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 5e-06
50
+ - train_batch_size: 4
51
+ - eval_batch_size: 4
52
+ - seed: 42
53
+ - distributed_type: multi-GPU
54
+ - num_devices: 3
55
+ - gradient_accumulation_steps: 12
56
+ - total_train_batch_size: 144
57
+ - total_eval_batch_size: 12
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: cosine
60
+ - lr_scheduler_warmup_ratio: 0.1
61
+ - num_epochs: 5
62
+
63
+ ### Training results
64
+
65
+ | Training Loss | Epoch | Step | Dpo Loss | Logits | Logps | Validation Loss | Objective | Ranking Idealized | Ranking Idealized Expo | Ranking Simple | Regularize | Wo Beta |
66
+ |:-------------:|:------:|:----:|:--------:|:-------:|:--------:|:---------------:|:---------:|:-----------------:|:----------------------:|:--------------:|:----------:|:-------:|
67
+ | 4.2778 | 0.1417 | 50 | 2.8787 | -1.4301 | -91.7813 | 5.6511 | 5.5786 | 0.5212 | 0.5212 | 0.5243 | 5.5786 | 16.1070 |
68
+ | 17.3516 | 0.2834 | 100 | 7.9687 | -1.3171 | -86.6635 | 15.6835 | 15.7548 | 0.5212 | 0.5212 | 0.5280 | 15.7548 | 15.6261 |
69
+ | 28.6009 | 0.4251 | 150 | 15.0002 | -1.1259 | -81.4986 | 29.0753 | 28.9045 | 0.5212 | 0.5212 | 0.5243 | 28.9045 | 15.2369 |
70
+ | 35.0698 | 0.5668 | 200 | 21.3918 | -0.8776 | -82.1578 | 41.1263 | 40.4593 | 0.5212 | 0.5212 | 0.5124 | 40.4593 | 14.9112 |
71
+ | 37.7822 | 0.7085 | 250 | 21.9288 | -0.6419 | -83.0039 | 44.0746 | 43.3933 | 0.5212 | 0.5212 | 0.5280 | 43.3933 | 14.6204 |
72
+ | 35.2811 | 0.8503 | 300 | 21.4307 | -0.5316 | -83.8429 | 43.6626 | 43.4643 | 0.5212 | 0.5212 | 0.5321 | 43.4643 | 14.5447 |
73
+ | 33.8034 | 0.9920 | 350 | 23.3301 | -0.5934 | -84.0573 | 45.2649 | 45.3586 | 0.5212 | 0.5212 | 0.5238 | 45.3586 | 14.6023 |
74
+ | 30.8702 | 1.1337 | 400 | 23.8270 | -0.6271 | -82.2022 | 47.2698 | 47.2674 | 0.5212 | 0.5212 | 0.5248 | 47.2674 | 14.3367 |
75
+ | 29.5027 | 1.2754 | 450 | 25.1794 | -0.5508 | -82.7233 | 49.3412 | 49.4737 | 0.5212 | 0.5212 | 0.5202 | 49.4737 | 14.3433 |
76
+ | 27.7693 | 1.4171 | 500 | 24.6274 | -0.5208 | -83.1404 | 48.4138 | 48.5616 | 0.5212 | 0.5212 | 0.5181 | 48.5616 | 14.3259 |
77
+ | 26.3455 | 1.5588 | 550 | 24.8876 | -0.5377 | -81.6711 | 49.4754 | 49.7513 | 0.5212 | 0.5212 | 0.5264 | 49.7513 | 14.2335 |
78
+ | 25.3777 | 1.7005 | 600 | 24.6279 | -0.5633 | -81.3699 | 48.8078 | 49.2645 | 0.5212 | 0.5212 | 0.5238 | 49.2645 | 14.1972 |
79
+ | 24.4429 | 1.8422 | 650 | 25.3419 | -0.4757 | -81.6565 | 49.7105 | 49.8172 | 0.5212 | 0.5212 | 0.5192 | 49.8172 | 14.3368 |
80
+ | 22.5358 | 1.9839 | 700 | 26.2794 | -0.5140 | -80.6186 | 51.6794 | 51.5628 | 0.5212 | 0.5212 | 0.5248 | 51.5628 | 14.0744 |
81
+ | 20.6864 | 2.1256 | 750 | 25.7920 | -0.4511 | -83.9474 | 50.9028 | 51.1398 | 0.5212 | 0.5212 | 0.5274 | 51.1398 | 14.2847 |
82
+ | 19.5881 | 2.2674 | 800 | 26.2232 | -0.4519 | -84.1413 | 51.4440 | 51.8351 | 0.5212 | 0.5212 | 0.5274 | 51.8351 | 14.2120 |
83
+ | 18.5246 | 2.4091 | 850 | 26.5269 | -0.5061 | -82.9639 | 52.2825 | 52.2313 | 0.5212 | 0.5212 | 0.5285 | 52.2313 | 14.1205 |
84
+ | 17.4115 | 2.5508 | 900 | 26.5477 | -0.5079 | -83.9889 | 52.2686 | 52.2795 | 0.5212 | 0.5212 | 0.5290 | 52.2795 | 14.1975 |
85
+ | 16.2052 | 2.6925 | 950 | 26.6571 | -0.4691 | -83.1267 | 52.4042 | 52.3891 | 0.5212 | 0.5212 | 0.5238 | 52.3891 | 14.2985 |
86
+ | 15.0384 | 2.8389 | 1000 | 51.7636 | -82.8277| -0.4551 | 51.6447 | 26.1645 | 51.6447 | 0.5264 | 0.5212 | 0.5212 | 14.2036 |
87
+ | 14.381 | 2.9806 | 1050 | 51.8214 | -83.0540| -0.4122 | 51.9024 | 26.5043 | 51.9024 | 0.5248 | 0.5212 | 0.5212 | 14.1669 |
88
+ | 12.5437 | 3.1223 | 1100 | 51.6017 | -83.8731| -0.4408 | 51.8998 | 26.1851 | 51.8998 | 0.5254 | 0.5212 | 0.5212 | 14.1769 |
89
+ | 11.3828 | 3.2641 | 1150 | 51.5869 | -84.2104| -0.4506 | 51.7268 | 26.2023 | 51.7268 | 0.5259 | 0.5212 | 0.5212 | 14.1768 |
90
+ | 10.5152 | 3.4058 | 1200 | 51.5859 | -84.1485| -0.4568 | 51.6626 | 26.3073 | 51.6626 | 0.5254 | 0.5212 | 0.5212 | 14.1450 |
91
+
92
+
93
+ ### Framework versions
94
+
95
+ - Transformers 4.42.0
96
+ - Pytorch 2.3.0+cu121
97
+ - Datasets 2.19.1
98
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.4057628719886632,
3
+ "total_flos": 0.0,
4
+ "train_loss": 2.660881093343099,
5
+ "train_runtime": 6833.7834,
6
+ "train_samples": 50802,
7
+ "train_samples_per_second": 37.17,
8
+ "train_steps_per_second": 0.258
9
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151644,
3
+ "eos_token_id": 151645,
4
+ "max_new_tokens": 2048,
5
+ "pad_token_id": 151645,
6
+ "transformers_version": "4.42.0"
7
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:018a56210be107aa89f9990fdd1b99c7c848a78c527a0c80034e8d214014bcb0
3
  size 1975192208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3c335c01beeef42869d9b4cb44f615faf6c0fd7d9ce2483063add81ba32d8b7
3
  size 1975192208
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.4057628719886632,
3
+ "total_flos": 0.0,
4
+ "train_loss": 2.660881093343099,
5
+ "train_runtime": 6833.7834,
6
+ "train_samples": 50802,
7
+ "train_samples_per_second": 37.17,
8
+ "train_steps_per_second": 0.258
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,859 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 14.074385643005371,
3
+ "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L1EXPO-ES-10/checkpoint-700",
4
+ "epoch": 3.4057628719886632,
5
+ "eval_steps": 50,
6
+ "global_step": 1200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "dpo_loss": 0.6931471824645996,
13
+ "epoch": 0.002834199338686821,
14
+ "grad_norm": 3688.5065763773923,
15
+ "learning_rate": 2.840909090909091e-08,
16
+ "logits": -1.359458565711975,
17
+ "logps": -84.69721221923828,
18
+ "loss": 0.0051,
19
+ "objective": 0.0046141319908201694,
20
+ "ranking_idealized": 0.5833333134651184,
21
+ "ranking_idealized_expo": 0.5833333134651184,
22
+ "ranking_simple": 0.5833333134651184,
23
+ "regularize": 0.0046141319908201694,
24
+ "step": 1,
25
+ "wo_beta": 14.840873718261719
26
+ },
27
+ {
28
+ "dpo_loss": 2.3264636993408203,
29
+ "epoch": 0.14170996693434104,
30
+ "grad_norm": 3322.6545378359765,
31
+ "learning_rate": 1.4204545454545458e-06,
32
+ "logits": -1.454339623451233,
33
+ "logps": -84.50347900390625,
34
+ "loss": 4.2778,
35
+ "objective": 4.120908737182617,
36
+ "ranking_idealized": 0.5225340127944946,
37
+ "ranking_idealized_expo": 0.5216836929321289,
38
+ "ranking_simple": 0.521258533000946,
39
+ "regularize": 4.120908737182617,
40
+ "step": 50,
41
+ "wo_beta": 15.655658721923828
42
+ },
43
+ {
44
+ "epoch": 0.14170996693434104,
45
+ "eval_dpo_loss": 2.8787100315093994,
46
+ "eval_logits": -1.4301204681396484,
47
+ "eval_logps": -91.78133392333984,
48
+ "eval_loss": 5.651101589202881,
49
+ "eval_objective": 5.578580379486084,
50
+ "eval_ranking_idealized": 0.5212215185165405,
51
+ "eval_ranking_idealized_expo": 0.5212215185165405,
52
+ "eval_ranking_simple": 0.5243270993232727,
53
+ "eval_regularize": 5.578580379486084,
54
+ "eval_runtime": 307.7497,
55
+ "eval_samples_per_second": 18.814,
56
+ "eval_steps_per_second": 1.569,
57
+ "eval_wo_beta": 16.107044219970703,
58
+ "step": 50
59
+ },
60
+ {
61
+ "dpo_loss": 8.834875106811523,
62
+ "epoch": 0.2834199338686821,
63
+ "grad_norm": 2883.7985857251942,
64
+ "learning_rate": 2.8409090909090916e-06,
65
+ "logits": -1.3840159177780151,
66
+ "logps": -82.65471649169922,
67
+ "loss": 17.3516,
68
+ "objective": 17.624128341674805,
69
+ "ranking_idealized": 0.5141666531562805,
70
+ "ranking_idealized_expo": 0.5137500166893005,
71
+ "ranking_simple": 0.5179166793823242,
72
+ "regularize": 17.624128341674805,
73
+ "step": 100,
74
+ "wo_beta": 15.28693675994873
75
+ },
76
+ {
77
+ "epoch": 0.2834199338686821,
78
+ "eval_dpo_loss": 7.968704700469971,
79
+ "eval_logits": -1.3171318769454956,
80
+ "eval_logps": -86.66349792480469,
81
+ "eval_loss": 15.683460235595703,
82
+ "eval_objective": 15.754798889160156,
83
+ "eval_ranking_idealized": 0.5212215185165405,
84
+ "eval_ranking_idealized_expo": 0.5212215185165405,
85
+ "eval_ranking_simple": 0.5279502868652344,
86
+ "eval_regularize": 15.754798889160156,
87
+ "eval_runtime": 307.2479,
88
+ "eval_samples_per_second": 18.845,
89
+ "eval_steps_per_second": 1.572,
90
+ "eval_wo_beta": 15.626057624816895,
91
+ "step": 100
92
+ },
93
+ {
94
+ "dpo_loss": 14.751253128051758,
95
+ "epoch": 0.42512990080302315,
96
+ "grad_norm": 2254.7228314416952,
97
+ "learning_rate": 4.2613636363636365e-06,
98
+ "logits": -1.1572282314300537,
99
+ "logps": -80.76160430908203,
100
+ "loss": 28.6009,
101
+ "objective": 28.620296478271484,
102
+ "ranking_idealized": 0.5287500023841858,
103
+ "ranking_idealized_expo": 0.527916669845581,
104
+ "ranking_simple": 0.5266666412353516,
105
+ "regularize": 28.620296478271484,
106
+ "step": 150,
107
+ "wo_beta": 15.1625394821167
108
+ },
109
+ {
110
+ "epoch": 0.42512990080302315,
111
+ "eval_dpo_loss": 15.000219345092773,
112
+ "eval_logits": -1.1259195804595947,
113
+ "eval_logps": -81.49861145019531,
114
+ "eval_loss": 29.075258255004883,
115
+ "eval_objective": 28.90445327758789,
116
+ "eval_ranking_idealized": 0.5212215185165405,
117
+ "eval_ranking_idealized_expo": 0.5212215185165405,
118
+ "eval_ranking_simple": 0.5243270993232727,
119
+ "eval_regularize": 28.90445327758789,
120
+ "eval_runtime": 307.0327,
121
+ "eval_samples_per_second": 18.858,
122
+ "eval_steps_per_second": 1.573,
123
+ "eval_wo_beta": 15.236913681030273,
124
+ "step": 150
125
+ },
126
+ {
127
+ "dpo_loss": 18.452308654785156,
128
+ "epoch": 0.5668398677373642,
129
+ "grad_norm": 2255.157628060128,
130
+ "learning_rate": 4.997168347957521e-06,
131
+ "logits": -0.9300950169563293,
132
+ "logps": -76.25523376464844,
133
+ "loss": 35.0698,
134
+ "objective": 35.79060745239258,
135
+ "ranking_idealized": 0.51583331823349,
136
+ "ranking_idealized_expo": 0.51541668176651,
137
+ "ranking_simple": 0.5104166865348816,
138
+ "regularize": 35.79060745239258,
139
+ "step": 200,
140
+ "wo_beta": 15.353928565979004
141
+ },
142
+ {
143
+ "epoch": 0.5668398677373642,
144
+ "eval_dpo_loss": 21.391788482666016,
145
+ "eval_logits": -0.8775973916053772,
146
+ "eval_logps": -82.15784454345703,
147
+ "eval_loss": 41.12628173828125,
148
+ "eval_objective": 40.45929718017578,
149
+ "eval_ranking_idealized": 0.5212215185165405,
150
+ "eval_ranking_idealized_expo": 0.5212215185165405,
151
+ "eval_ranking_simple": 0.5124223828315735,
152
+ "eval_regularize": 40.45929718017578,
153
+ "eval_runtime": 307.0481,
154
+ "eval_samples_per_second": 18.857,
155
+ "eval_steps_per_second": 1.573,
156
+ "eval_wo_beta": 14.911209106445312,
157
+ "step": 200
158
+ },
159
+ {
160
+ "dpo_loss": 19.97781753540039,
161
+ "epoch": 0.7085498346717053,
162
+ "grad_norm": 1862.9598435340467,
163
+ "learning_rate": 4.973122855144066e-06,
164
+ "logits": -0.7163826823234558,
165
+ "logps": -77.4970932006836,
166
+ "loss": 37.7822,
167
+ "objective": 38.173789978027344,
168
+ "ranking_idealized": 0.5166666507720947,
169
+ "ranking_idealized_expo": 0.5162500143051147,
170
+ "ranking_simple": 0.5112500190734863,
171
+ "regularize": 38.173789978027344,
172
+ "step": 250,
173
+ "wo_beta": 15.578652381896973
174
+ },
175
+ {
176
+ "epoch": 0.7085498346717053,
177
+ "eval_dpo_loss": 21.928752899169922,
178
+ "eval_logits": -0.641853392124176,
179
+ "eval_logps": -83.0038833618164,
180
+ "eval_loss": 44.07463836669922,
181
+ "eval_objective": 43.393341064453125,
182
+ "eval_ranking_idealized": 0.5212215185165405,
183
+ "eval_ranking_idealized_expo": 0.5212215185165405,
184
+ "eval_ranking_simple": 0.5279502868652344,
185
+ "eval_regularize": 43.393341064453125,
186
+ "eval_runtime": 307.199,
187
+ "eval_samples_per_second": 18.848,
188
+ "eval_steps_per_second": 1.572,
189
+ "eval_wo_beta": 14.620430946350098,
190
+ "step": 250
191
+ },
192
+ {
193
+ "dpo_loss": 17.413480758666992,
194
+ "epoch": 0.8502598016060463,
195
+ "grad_norm": 1744.6732754071961,
196
+ "learning_rate": 4.924776641419513e-06,
197
+ "logits": -0.40934881567955017,
198
+ "logps": -79.10726165771484,
199
+ "loss": 35.2811,
200
+ "objective": 35.4559326171875,
201
+ "ranking_idealized": 0.4962500035762787,
202
+ "ranking_idealized_expo": 0.4950000047683716,
203
+ "ranking_simple": 0.502916693687439,
204
+ "regularize": 35.4559326171875,
205
+ "step": 300,
206
+ "wo_beta": 15.202095031738281
207
+ },
208
+ {
209
+ "epoch": 0.8502598016060463,
210
+ "eval_dpo_loss": 21.43065071105957,
211
+ "eval_logits": -0.5315975546836853,
212
+ "eval_logps": -83.84294891357422,
213
+ "eval_loss": 43.6626091003418,
214
+ "eval_objective": 43.46427536010742,
215
+ "eval_ranking_idealized": 0.5212215185165405,
216
+ "eval_ranking_idealized_expo": 0.5212215185165405,
217
+ "eval_ranking_simple": 0.5320910811424255,
218
+ "eval_regularize": 43.46427536010742,
219
+ "eval_runtime": 307.206,
220
+ "eval_samples_per_second": 18.847,
221
+ "eval_steps_per_second": 1.572,
222
+ "eval_wo_beta": 14.544736862182617,
223
+ "step": 300
224
+ },
225
+ {
226
+ "dpo_loss": 17.524351119995117,
227
+ "epoch": 0.9919697685403873,
228
+ "grad_norm": 1787.213275862853,
229
+ "learning_rate": 4.8526047530778175e-06,
230
+ "logits": -0.5016722679138184,
231
+ "logps": -80.09149169921875,
232
+ "loss": 33.8034,
233
+ "objective": 34.494503021240234,
234
+ "ranking_idealized": 0.5262500047683716,
235
+ "ranking_idealized_expo": 0.5254166722297668,
236
+ "ranking_simple": 0.5249999761581421,
237
+ "regularize": 34.494503021240234,
238
+ "step": 350,
239
+ "wo_beta": 15.207830429077148
240
+ },
241
+ {
242
+ "epoch": 0.9919697685403873,
243
+ "eval_dpo_loss": 23.330080032348633,
244
+ "eval_logits": -0.593406081199646,
245
+ "eval_logps": -84.05725860595703,
246
+ "eval_loss": 45.264923095703125,
247
+ "eval_objective": 45.35862731933594,
248
+ "eval_ranking_idealized": 0.5212215185165405,
249
+ "eval_ranking_idealized_expo": 0.5212215185165405,
250
+ "eval_ranking_simple": 0.523809552192688,
251
+ "eval_regularize": 45.35862731933594,
252
+ "eval_runtime": 307.0631,
253
+ "eval_samples_per_second": 18.856,
254
+ "eval_steps_per_second": 1.573,
255
+ "eval_wo_beta": 14.60231876373291,
256
+ "step": 350
257
+ },
258
+ {
259
+ "dpo_loss": 16.205705642700195,
260
+ "epoch": 1.1336797354747283,
261
+ "grad_norm": 1658.338167111395,
262
+ "learning_rate": 4.757316345716554e-06,
263
+ "logits": -0.5499605536460876,
264
+ "logps": -80.1341552734375,
265
+ "loss": 30.8702,
266
+ "objective": 30.992847442626953,
267
+ "ranking_idealized": 0.5333333611488342,
268
+ "ranking_idealized_expo": 0.5320833325386047,
269
+ "ranking_simple": 0.528333306312561,
270
+ "regularize": 30.992847442626953,
271
+ "step": 400,
272
+ "wo_beta": 15.376312255859375
273
+ },
274
+ {
275
+ "epoch": 1.1336797354747283,
276
+ "eval_dpo_loss": 23.827035903930664,
277
+ "eval_logits": -0.62712162733078,
278
+ "eval_logps": -82.20217895507812,
279
+ "eval_loss": 47.269775390625,
280
+ "eval_objective": 47.26739501953125,
281
+ "eval_ranking_idealized": 0.5212215185165405,
282
+ "eval_ranking_idealized_expo": 0.5212215185165405,
283
+ "eval_ranking_simple": 0.5248447060585022,
284
+ "eval_regularize": 47.26739501953125,
285
+ "eval_runtime": 307.8491,
286
+ "eval_samples_per_second": 18.808,
287
+ "eval_steps_per_second": 1.569,
288
+ "eval_wo_beta": 14.336685180664062,
289
+ "step": 400
290
+ },
291
+ {
292
+ "dpo_loss": 14.983359336853027,
293
+ "epoch": 1.2753897024090695,
294
+ "grad_norm": 1630.7914622079197,
295
+ "learning_rate": 4.639847716126855e-06,
296
+ "logits": -0.5104279518127441,
297
+ "logps": -78.46994018554688,
298
+ "loss": 29.5027,
299
+ "objective": 29.416109085083008,
300
+ "ranking_idealized": 0.5195833444595337,
301
+ "ranking_idealized_expo": 0.5191666483879089,
302
+ "ranking_simple": 0.5170833468437195,
303
+ "regularize": 29.416109085083008,
304
+ "step": 450,
305
+ "wo_beta": 16.006542205810547
306
+ },
307
+ {
308
+ "epoch": 1.2753897024090695,
309
+ "eval_dpo_loss": 25.179445266723633,
310
+ "eval_logits": -0.5507553815841675,
311
+ "eval_logps": -82.72330474853516,
312
+ "eval_loss": 49.341182708740234,
313
+ "eval_objective": 49.47369384765625,
314
+ "eval_ranking_idealized": 0.5212215185165405,
315
+ "eval_ranking_idealized_expo": 0.5212215185165405,
316
+ "eval_ranking_simple": 0.5201863646507263,
317
+ "eval_regularize": 49.47369384765625,
318
+ "eval_runtime": 307.2653,
319
+ "eval_samples_per_second": 18.844,
320
+ "eval_steps_per_second": 1.572,
321
+ "eval_wo_beta": 14.343340873718262,
322
+ "step": 450
323
+ },
324
+ {
325
+ "dpo_loss": 13.962078094482422,
326
+ "epoch": 1.4170996693434104,
327
+ "grad_norm": 1627.1136853969401,
328
+ "learning_rate": 4.501353102310901e-06,
329
+ "logits": -0.4764183461666107,
330
+ "logps": -78.08194732666016,
331
+ "loss": 27.7693,
332
+ "objective": 28.35871696472168,
333
+ "ranking_idealized": 0.49791666865348816,
334
+ "ranking_idealized_expo": 0.4970833361148834,
335
+ "ranking_simple": 0.503333330154419,
336
+ "regularize": 28.35871696472168,
337
+ "step": 500,
338
+ "wo_beta": 15.235273361206055
339
+ },
340
+ {
341
+ "epoch": 1.4170996693434104,
342
+ "eval_dpo_loss": 24.62739372253418,
343
+ "eval_logits": -0.5208410024642944,
344
+ "eval_logps": -83.14039611816406,
345
+ "eval_loss": 48.41379928588867,
346
+ "eval_objective": 48.561553955078125,
347
+ "eval_ranking_idealized": 0.5212215185165405,
348
+ "eval_ranking_idealized_expo": 0.5212215185165405,
349
+ "eval_ranking_simple": 0.5181159377098083,
350
+ "eval_regularize": 48.561553955078125,
351
+ "eval_runtime": 313.5843,
352
+ "eval_samples_per_second": 18.464,
353
+ "eval_steps_per_second": 1.54,
354
+ "eval_wo_beta": 14.325936317443848,
355
+ "step": 500
356
+ },
357
+ {
358
+ "dpo_loss": 14.243717193603516,
359
+ "epoch": 1.5588096362777515,
360
+ "grad_norm": 1567.3979312158642,
361
+ "learning_rate": 4.34319334202531e-06,
362
+ "logits": -0.4176904857158661,
363
+ "logps": -79.26414489746094,
364
+ "loss": 26.3455,
365
+ "objective": 27.205766677856445,
366
+ "ranking_idealized": 0.5112500190734863,
367
+ "ranking_idealized_expo": 0.5104166865348816,
368
+ "ranking_simple": 0.5066666603088379,
369
+ "regularize": 27.205766677856445,
370
+ "step": 550,
371
+ "wo_beta": 15.118928909301758
372
+ },
373
+ {
374
+ "epoch": 1.5588096362777515,
375
+ "eval_dpo_loss": 24.8875732421875,
376
+ "eval_logits": -0.5377052426338196,
377
+ "eval_logps": -81.67108154296875,
378
+ "eval_loss": 49.475399017333984,
379
+ "eval_objective": 49.75130081176758,
380
+ "eval_ranking_idealized": 0.5212215185165405,
381
+ "eval_ranking_idealized_expo": 0.5212215185165405,
382
+ "eval_ranking_simple": 0.5263975262641907,
383
+ "eval_regularize": 49.75130081176758,
384
+ "eval_runtime": 307.1071,
385
+ "eval_samples_per_second": 18.853,
386
+ "eval_steps_per_second": 1.573,
387
+ "eval_wo_beta": 14.233548164367676,
388
+ "step": 550
389
+ },
390
+ {
391
+ "dpo_loss": 13.567865371704102,
392
+ "epoch": 1.7005196032120926,
393
+ "grad_norm": 1510.6295336293697,
394
+ "learning_rate": 4.16692250129073e-06,
395
+ "logits": -0.4348069727420807,
396
+ "logps": -78.36796569824219,
397
+ "loss": 25.3777,
398
+ "objective": 25.583778381347656,
399
+ "ranking_idealized": 0.51541668176651,
400
+ "ranking_idealized_expo": 0.5149999856948853,
401
+ "ranking_simple": 0.5049999952316284,
402
+ "regularize": 25.583778381347656,
403
+ "step": 600,
404
+ "wo_beta": 15.017353057861328
405
+ },
406
+ {
407
+ "epoch": 1.7005196032120926,
408
+ "eval_dpo_loss": 24.62792205810547,
409
+ "eval_logits": -0.5633407235145569,
410
+ "eval_logps": -81.369873046875,
411
+ "eval_loss": 48.80782699584961,
412
+ "eval_objective": 49.26447677612305,
413
+ "eval_ranking_idealized": 0.5212215185165405,
414
+ "eval_ranking_idealized_expo": 0.5212215185165405,
415
+ "eval_ranking_simple": 0.523809552192688,
416
+ "eval_regularize": 49.26447677612305,
417
+ "eval_runtime": 307.6769,
418
+ "eval_samples_per_second": 18.818,
419
+ "eval_steps_per_second": 1.57,
420
+ "eval_wo_beta": 14.197225570678711,
421
+ "step": 600
422
+ },
423
+ {
424
+ "dpo_loss": 12.823990821838379,
425
+ "epoch": 1.8422295701464337,
426
+ "grad_norm": 1590.0809438470442,
427
+ "learning_rate": 3.974272604254906e-06,
428
+ "logits": -0.45912277698516846,
429
+ "logps": -77.55583190917969,
430
+ "loss": 24.4429,
431
+ "objective": 24.74443817138672,
432
+ "ranking_idealized": 0.5291666388511658,
433
+ "ranking_idealized_expo": 0.527916669845581,
434
+ "ranking_simple": 0.5270833373069763,
435
+ "regularize": 24.74443817138672,
436
+ "step": 650,
437
+ "wo_beta": 15.796711921691895
438
+ },
439
+ {
440
+ "epoch": 1.8422295701464337,
441
+ "eval_dpo_loss": 25.341928482055664,
442
+ "eval_logits": -0.475749671459198,
443
+ "eval_logps": -81.65654754638672,
444
+ "eval_loss": 49.71050262451172,
445
+ "eval_objective": 49.81724548339844,
446
+ "eval_ranking_idealized": 0.5212215185165405,
447
+ "eval_ranking_idealized_expo": 0.5212215185165405,
448
+ "eval_ranking_simple": 0.5191511511802673,
449
+ "eval_regularize": 49.81724548339844,
450
+ "eval_runtime": 318.0633,
451
+ "eval_samples_per_second": 18.204,
452
+ "eval_steps_per_second": 1.519,
453
+ "eval_wo_beta": 14.336784362792969,
454
+ "step": 650
455
+ },
456
+ {
457
+ "dpo_loss": 11.803265571594238,
458
+ "epoch": 1.9839395370807746,
459
+ "grad_norm": 1573.6320557673569,
460
+ "learning_rate": 3.767136614452458e-06,
461
+ "logits": -0.44002941250801086,
462
+ "logps": -77.62532043457031,
463
+ "loss": 22.5358,
464
+ "objective": 22.4056339263916,
465
+ "ranking_idealized": 0.5129166841506958,
466
+ "ranking_idealized_expo": 0.5108333230018616,
467
+ "ranking_simple": 0.5058333277702332,
468
+ "regularize": 22.4056339263916,
469
+ "step": 700,
470
+ "wo_beta": 15.435830116271973
471
+ },
472
+ {
473
+ "epoch": 1.9839395370807746,
474
+ "eval_dpo_loss": 26.279430389404297,
475
+ "eval_logits": -0.5139885544776917,
476
+ "eval_logps": -80.61864471435547,
477
+ "eval_loss": 51.679359436035156,
478
+ "eval_objective": 51.56280517578125,
479
+ "eval_ranking_idealized": 0.5212215185165405,
480
+ "eval_ranking_idealized_expo": 0.5212215185165405,
481
+ "eval_ranking_simple": 0.5248447060585022,
482
+ "eval_regularize": 51.56280517578125,
483
+ "eval_runtime": 307.1755,
484
+ "eval_samples_per_second": 18.849,
485
+ "eval_steps_per_second": 1.572,
486
+ "eval_wo_beta": 14.074385643005371,
487
+ "step": 700
488
+ },
489
+ {
490
+ "dpo_loss": 10.530390739440918,
491
+ "epoch": 2.1256495040151155,
492
+ "grad_norm": 1447.9001618253178,
493
+ "learning_rate": 3.547549834686222e-06,
494
+ "logits": -0.4438280165195465,
495
+ "logps": -79.3443374633789,
496
+ "loss": 20.6864,
497
+ "objective": 20.564796447753906,
498
+ "ranking_idealized": 0.5129166841506958,
499
+ "ranking_idealized_expo": 0.5112500190734863,
500
+ "ranking_simple": 0.512499988079071,
501
+ "regularize": 20.564796447753906,
502
+ "step": 750,
503
+ "wo_beta": 15.44257640838623
504
+ },
505
+ {
506
+ "epoch": 2.1256495040151155,
507
+ "eval_dpo_loss": 25.791982650756836,
508
+ "eval_logits": -0.4510954022407532,
509
+ "eval_logps": -83.94737243652344,
510
+ "eval_loss": 50.90283966064453,
511
+ "eval_objective": 51.139808654785156,
512
+ "eval_ranking_idealized": 0.5212215185165405,
513
+ "eval_ranking_idealized_expo": 0.5212215185165405,
514
+ "eval_ranking_simple": 0.5274327397346497,
515
+ "eval_regularize": 51.139808654785156,
516
+ "eval_runtime": 307.3519,
517
+ "eval_samples_per_second": 18.838,
518
+ "eval_steps_per_second": 1.571,
519
+ "eval_wo_beta": 14.28470230102539,
520
+ "step": 750
521
+ },
522
+ {
523
+ "dpo_loss": 10.331942558288574,
524
+ "epoch": 2.2673594709494567,
525
+ "grad_norm": 1416.622520151804,
526
+ "learning_rate": 3.3176699082935546e-06,
527
+ "logits": -0.4105643630027771,
528
+ "logps": -81.301513671875,
529
+ "loss": 19.5881,
530
+ "objective": 19.708881378173828,
531
+ "ranking_idealized": 0.512499988079071,
532
+ "ranking_idealized_expo": 0.512499988079071,
533
+ "ranking_simple": 0.5162500143051147,
534
+ "regularize": 19.708881378173828,
535
+ "step": 800,
536
+ "wo_beta": 15.041363716125488
537
+ },
538
+ {
539
+ "epoch": 2.2673594709494567,
540
+ "eval_dpo_loss": 26.223230361938477,
541
+ "eval_logits": -0.45186811685562134,
542
+ "eval_logps": -84.14128112792969,
543
+ "eval_loss": 51.44403076171875,
544
+ "eval_objective": 51.835060119628906,
545
+ "eval_ranking_idealized": 0.5212215185165405,
546
+ "eval_ranking_idealized_expo": 0.5212215185165405,
547
+ "eval_ranking_simple": 0.5274327397346497,
548
+ "eval_regularize": 51.835060119628906,
549
+ "eval_runtime": 307.4841,
550
+ "eval_samples_per_second": 18.83,
551
+ "eval_steps_per_second": 1.571,
552
+ "eval_wo_beta": 14.21197509765625,
553
+ "step": 800
554
+ },
555
+ {
556
+ "dpo_loss": 9.117318153381348,
557
+ "epoch": 2.409069437883798,
558
+ "grad_norm": 1511.1151822215572,
559
+ "learning_rate": 3.0797556183036582e-06,
560
+ "logits": -0.4155246615409851,
561
+ "logps": -80.53886413574219,
562
+ "loss": 18.5246,
563
+ "objective": 18.382122039794922,
564
+ "ranking_idealized": 0.5145833492279053,
565
+ "ranking_idealized_expo": 0.5133333206176758,
566
+ "ranking_simple": 0.5141666531562805,
567
+ "regularize": 18.382122039794922,
568
+ "step": 850,
569
+ "wo_beta": 15.248088836669922
570
+ },
571
+ {
572
+ "epoch": 2.409069437883798,
573
+ "eval_dpo_loss": 26.526891708374023,
574
+ "eval_logits": -0.5061497688293457,
575
+ "eval_logps": -82.96385192871094,
576
+ "eval_loss": 52.282501220703125,
577
+ "eval_objective": 52.2313346862793,
578
+ "eval_ranking_idealized": 0.5212215185165405,
579
+ "eval_ranking_idealized_expo": 0.5212215185165405,
580
+ "eval_ranking_simple": 0.5284678936004639,
581
+ "eval_regularize": 52.2313346862793,
582
+ "eval_runtime": 307.2591,
583
+ "eval_samples_per_second": 18.844,
584
+ "eval_steps_per_second": 1.572,
585
+ "eval_wo_beta": 14.120504379272461,
586
+ "step": 850
587
+ },
588
+ {
589
+ "dpo_loss": 8.65651798248291,
590
+ "epoch": 2.550779404818139,
591
+ "grad_norm": 1500.724487309093,
592
+ "learning_rate": 2.8361446928038298e-06,
593
+ "logits": -0.4497624337673187,
594
+ "logps": -79.77722930908203,
595
+ "loss": 17.4115,
596
+ "objective": 17.32391929626465,
597
+ "ranking_idealized": 0.518750011920929,
598
+ "ranking_idealized_expo": 0.5183333158493042,
599
+ "ranking_simple": 0.5179166793823242,
600
+ "regularize": 17.32391929626465,
601
+ "step": 900,
602
+ "wo_beta": 15.50606918334961
603
+ },
604
+ {
605
+ "epoch": 2.550779404818139,
606
+ "eval_dpo_loss": 26.54765510559082,
607
+ "eval_logits": -0.5079280138015747,
608
+ "eval_logps": -83.98892211914062,
609
+ "eval_loss": 52.268577575683594,
610
+ "eval_objective": 52.27949905395508,
611
+ "eval_ranking_idealized": 0.5212215185165405,
612
+ "eval_ranking_idealized_expo": 0.5212215185165405,
613
+ "eval_ranking_simple": 0.5289855003356934,
614
+ "eval_regularize": 52.27949905395508,
615
+ "eval_runtime": 307.3895,
616
+ "eval_samples_per_second": 18.836,
617
+ "eval_steps_per_second": 1.571,
618
+ "eval_wo_beta": 14.197465896606445,
619
+ "step": 900
620
+ },
621
+ {
622
+ "dpo_loss": 8.308319091796875,
623
+ "epoch": 2.69248937175248,
624
+ "grad_norm": 1453.978726592987,
625
+ "learning_rate": 2.5892308345974517e-06,
626
+ "logits": -0.4583713412284851,
627
+ "logps": -80.14180755615234,
628
+ "loss": 16.2052,
629
+ "objective": 16.429227828979492,
630
+ "ranking_idealized": 0.5079166889190674,
631
+ "ranking_idealized_expo": 0.5058333277702332,
632
+ "ranking_simple": 0.5074999928474426,
633
+ "regularize": 16.429227828979492,
634
+ "step": 950,
635
+ "wo_beta": 15.596735000610352
636
+ },
637
+ {
638
+ "epoch": 2.69248937175248,
639
+ "eval_dpo_loss": 26.657089233398438,
640
+ "eval_logits": -0.46912574768066406,
641
+ "eval_logps": -83.12673950195312,
642
+ "eval_loss": 52.40416717529297,
643
+ "eval_objective": 52.389137268066406,
644
+ "eval_ranking_idealized": 0.5212215185165405,
645
+ "eval_ranking_idealized_expo": 0.5212215185165405,
646
+ "eval_ranking_simple": 0.523809552192688,
647
+ "eval_regularize": 52.389137268066406,
648
+ "eval_runtime": 307.39,
649
+ "eval_samples_per_second": 18.836,
650
+ "eval_steps_per_second": 1.571,
651
+ "eval_wo_beta": 14.298489570617676,
652
+ "step": 950
653
+ },
654
+ {
655
+ "dpo_loss": 7.868130683898926,
656
+ "epoch": 2.838923004251299,
657
+ "grad_norm": 1371.5890318912852,
658
+ "learning_rate": 2.341440200858589e-06,
659
+ "logits": -0.3988785743713379,
660
+ "logps": -78.35469055175781,
661
+ "loss": 15.0384,
662
+ "objective": 15.024641990661621,
663
+ "ranking_idealized": 0.5112500190734863,
664
+ "ranking_idealized_expo": 0.5112500190734863,
665
+ "ranking_simple": 0.5066666603088379,
666
+ "regularize": 15.024641990661621,
667
+ "step": 1000,
668
+ "wo_beta": 15.029138565063477
669
+ },
670
+ {
671
+ "epoch": 2.838923004251299,
672
+ "eval_dpo_loss": 26.16453742980957,
673
+ "eval_logits": -0.4550507366657257,
674
+ "eval_logps": -82.82769012451172,
675
+ "eval_loss": 51.76364517211914,
676
+ "eval_objective": 51.644718170166016,
677
+ "eval_ranking_idealized": 0.5212215185165405,
678
+ "eval_ranking_idealized_expo": 0.5212215185165405,
679
+ "eval_ranking_simple": 0.5263975262641907,
680
+ "eval_regularize": 51.644718170166016,
681
+ "eval_runtime": 307.9823,
682
+ "eval_samples_per_second": 18.8,
683
+ "eval_steps_per_second": 1.568,
684
+ "eval_wo_beta": 14.203557968139648,
685
+ "step": 1000
686
+ },
687
+ {
688
+ "dpo_loss": 7.561364650726318,
689
+ "epoch": 2.9806329711856403,
690
+ "grad_norm": 1438.5247466117469,
691
+ "learning_rate": 2.0952075638923656e-06,
692
+ "logits": -0.39186450839042664,
693
+ "logps": -79.17125701904297,
694
+ "loss": 14.381,
695
+ "objective": 14.444308280944824,
696
+ "ranking_idealized": 0.5183333158493042,
697
+ "ranking_idealized_expo": 0.5174999833106995,
698
+ "ranking_simple": 0.5245833396911621,
699
+ "regularize": 14.444308280944824,
700
+ "step": 1050,
701
+ "wo_beta": 15.485770225524902
702
+ },
703
+ {
704
+ "epoch": 2.9806329711856403,
705
+ "eval_dpo_loss": 26.504281997680664,
706
+ "eval_logits": -0.4121534526348114,
707
+ "eval_logps": -83.05400848388672,
708
+ "eval_loss": 51.82139587402344,
709
+ "eval_objective": 51.90236282348633,
710
+ "eval_ranking_idealized": 0.5212215185165405,
711
+ "eval_ranking_idealized_expo": 0.5212215185165405,
712
+ "eval_ranking_simple": 0.5248447060585022,
713
+ "eval_regularize": 51.90236282348633,
714
+ "eval_runtime": 307.2005,
715
+ "eval_samples_per_second": 18.848,
716
+ "eval_steps_per_second": 1.572,
717
+ "eval_wo_beta": 14.16685962677002,
718
+ "step": 1050
719
+ },
720
+ {
721
+ "dpo_loss": 6.576974868774414,
722
+ "epoch": 3.122342938119981,
723
+ "grad_norm": 1479.1539218663233,
724
+ "learning_rate": 1.852952387243698e-06,
725
+ "logits": -0.37988409399986267,
726
+ "logps": -80.17594146728516,
727
+ "loss": 12.5437,
728
+ "objective": 12.73067855834961,
729
+ "ranking_idealized": 0.5299999713897705,
730
+ "ranking_idealized_expo": 0.528333306312561,
731
+ "ranking_simple": 0.5266666412353516,
732
+ "regularize": 12.73067855834961,
733
+ "step": 1100,
734
+ "wo_beta": 15.62684440612793
735
+ },
736
+ {
737
+ "epoch": 3.122342938119981,
738
+ "eval_dpo_loss": 26.185077667236328,
739
+ "eval_logits": -0.4407959282398224,
740
+ "eval_logps": -83.87307739257812,
741
+ "eval_loss": 51.601688385009766,
742
+ "eval_objective": 51.89978790283203,
743
+ "eval_ranking_idealized": 0.5212215185165405,
744
+ "eval_ranking_idealized_expo": 0.5212215185165405,
745
+ "eval_ranking_simple": 0.5253623127937317,
746
+ "eval_regularize": 51.89978790283203,
747
+ "eval_runtime": 308.2578,
748
+ "eval_samples_per_second": 18.783,
749
+ "eval_steps_per_second": 1.567,
750
+ "eval_wo_beta": 14.176854133605957,
751
+ "step": 1100
752
+ },
753
+ {
754
+ "dpo_loss": 5.700263023376465,
755
+ "epoch": 3.264052905054322,
756
+ "grad_norm": 1402.4578249025758,
757
+ "learning_rate": 1.617055052228768e-06,
758
+ "logits": -0.39078637957572937,
759
+ "logps": -80.27751159667969,
760
+ "loss": 11.3828,
761
+ "objective": 11.245396614074707,
762
+ "ranking_idealized": 0.5091666579246521,
763
+ "ranking_idealized_expo": 0.5083333253860474,
764
+ "ranking_simple": 0.5104166865348816,
765
+ "regularize": 11.245396614074707,
766
+ "step": 1150,
767
+ "wo_beta": 15.349074363708496
768
+ },
769
+ {
770
+ "epoch": 3.264052905054322,
771
+ "eval_dpo_loss": 26.20229148864746,
772
+ "eval_logits": -0.4506087601184845,
773
+ "eval_logps": -84.2103500366211,
774
+ "eval_loss": 51.586910247802734,
775
+ "eval_objective": 51.72679138183594,
776
+ "eval_ranking_idealized": 0.5212215185165405,
777
+ "eval_ranking_idealized_expo": 0.5212215185165405,
778
+ "eval_ranking_simple": 0.5258799195289612,
779
+ "eval_regularize": 51.72679138183594,
780
+ "eval_runtime": 307.5329,
781
+ "eval_samples_per_second": 18.827,
782
+ "eval_steps_per_second": 1.571,
783
+ "eval_wo_beta": 14.176774024963379,
784
+ "step": 1150
785
+ },
786
+ {
787
+ "dpo_loss": 5.425318241119385,
788
+ "epoch": 3.4057628719886632,
789
+ "grad_norm": 1477.9539586967678,
790
+ "learning_rate": 1.3898334684855647e-06,
791
+ "logits": -0.3910551071166992,
792
+ "logps": -81.23528289794922,
793
+ "loss": 10.5152,
794
+ "objective": 10.480737686157227,
795
+ "ranking_idealized": 0.5079166889190674,
796
+ "ranking_idealized_expo": 0.5079166889190674,
797
+ "ranking_simple": 0.5049999952316284,
798
+ "regularize": 10.480737686157227,
799
+ "step": 1200,
800
+ "wo_beta": 15.531842231750488
801
+ },
802
+ {
803
+ "epoch": 3.4057628719886632,
804
+ "eval_dpo_loss": 26.307344436645508,
805
+ "eval_logits": -0.4568469524383545,
806
+ "eval_logps": -84.14852905273438,
807
+ "eval_loss": 51.58594512939453,
808
+ "eval_objective": 51.662628173828125,
809
+ "eval_ranking_idealized": 0.5212215185165405,
810
+ "eval_ranking_idealized_expo": 0.5212215185165405,
811
+ "eval_ranking_simple": 0.5253623127937317,
812
+ "eval_regularize": 51.662628173828125,
813
+ "eval_runtime": 307.0369,
814
+ "eval_samples_per_second": 18.858,
815
+ "eval_steps_per_second": 1.573,
816
+ "eval_wo_beta": 14.14501953125,
817
+ "step": 1200
818
+ },
819
+ {
820
+ "epoch": 3.4057628719886632,
821
+ "step": 1200,
822
+ "total_flos": 0.0,
823
+ "train_loss": 2.660881093343099,
824
+ "train_runtime": 6833.7834,
825
+ "train_samples_per_second": 37.17,
826
+ "train_steps_per_second": 0.258
827
+ }
828
+ ],
829
+ "logging_steps": 50,
830
+ "max_steps": 1760,
831
+ "num_input_tokens_seen": 0,
832
+ "num_train_epochs": 5,
833
+ "save_steps": 50,
834
+ "stateful_callbacks": {
835
+ "EarlyStoppingCallback": {
836
+ "args": {
837
+ "early_stopping_patience": 5,
838
+ "early_stopping_threshold": 0.0
839
+ },
840
+ "attributes": {
841
+ "early_stopping_patience_counter": 0
842
+ }
843
+ },
844
+ "TrainerControl": {
845
+ "args": {
846
+ "should_epoch_stop": false,
847
+ "should_evaluate": false,
848
+ "should_log": false,
849
+ "should_save": true,
850
+ "should_training_stop": true
851
+ },
852
+ "attributes": {}
853
+ }
854
+ },
855
+ "total_flos": 0.0,
856
+ "train_batch_size": 4,
857
+ "trial_name": null,
858
+ "trial_params": null
859
+ }