hZzy commited on
Commit
77d4d54
1 Parent(s): aba672e

Model save

Browse files
README.md ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
4
+ tags:
5
+ - trl
6
+ - expo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: qwen2.5-0.5b-expo-L1EXPO-ES-0.1
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/f736bh29)
17
+ # qwen2.5-0.5b-expo-L1EXPO-ES-0.1
18
+
19
+ This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.5175
22
+ - Logps: -83.3791
23
+ - Logits: -0.5087
24
+ - Objective: 0.5185
25
+ - Dpo Loss: 0.7500
26
+ - Regularize: 0.5185
27
+ - Ranking Simple: 0.5311
28
+ - Ranking Idealized: 0.6030
29
+ - Ranking Idealized Expo: 0.5223
30
+ - Wo Beta: 14.2158
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 5e-06
50
+ - train_batch_size: 4
51
+ - eval_batch_size: 4
52
+ - seed: 42
53
+ - distributed_type: multi-GPU
54
+ - num_devices: 3
55
+ - gradient_accumulation_steps: 12
56
+ - total_train_batch_size: 144
57
+ - total_eval_batch_size: 12
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: cosine
60
+ - lr_scheduler_warmup_ratio: 0.1
61
+ - num_epochs: 5
62
+
63
+ ### Training results
64
+
65
+ | Training Loss | Epoch | Step | Dpo Loss | Logits | Logps | Validation Loss | Objective | Ranking Idealized | Ranking Idealized Expo | Ranking Simple | Regularize | Wo Beta |
66
+ |:-------------:|:------:|:----:|:--------:|:-------:|:--------:|:---------------:|:---------:|:-----------------:|:----------------------:|:--------------:|:----------:|:-------:|
67
+ | 0.0448 | 0.1417 | 50 | 0.6936 | -1.4299 | -90.3888 | 0.0622 | 0.0621 | 0.6030 | 0.5223 | 0.5243 | 0.0621 | 16.0768 |
68
+ | 0.1716 | 0.2834 | 100 | 0.6982 | -1.3597 | -88.7675 | 0.1556 | 0.1559 | 0.6030 | 0.5223 | 0.5274 | 0.1559 | 15.9436 |
69
+ | 0.2858 | 0.4251 | 150 | 0.7183 | -1.2546 | -79.5067 | 0.2912 | 0.2923 | 0.6030 | 0.5223 | 0.5228 | 0.2923 | 15.0570 |
70
+ | 0.3544 | 0.5668 | 200 | 0.7309 | -0.8432 | -83.8485 | 0.3898 | 0.3890 | 0.6030 | 0.5223 | 0.5228 | 0.3890 | 14.7122 |
71
+ | 0.375 | 0.7085 | 250 | 0.7353 | -0.6734 | -81.2900 | 0.4398 | 0.4375 | 0.6030 | 0.5223 | 0.5243 | 0.4375 | 14.4729 |
72
+ | 0.3592 | 0.8503 | 300 | 0.7348 | -0.5501 | -84.4144 | 0.4422 | 0.4388 | 0.6030 | 0.5223 | 0.5233 | 0.4388 | 14.4403 |
73
+ | 0.3351 | 0.9920 | 350 | 0.7354 | -0.5360 | -82.9375 | 0.4676 | 0.4602 | 0.6030 | 0.5223 | 0.5342 | 0.4602 | 14.2722 |
74
+ | 0.3056 | 1.1337 | 400 | 0.7470 | -0.5686 | -80.5606 | 0.4842 | 0.4804 | 0.6030 | 0.5223 | 0.5254 | 0.4804 | 14.2812 |
75
+ | 0.2932 | 1.2754 | 450 | 0.7439 | -0.5565 | -83.6231 | 0.4805 | 0.4755 | 0.6030 | 0.5223 | 0.5280 | 0.4755 | 14.4640 |
76
+ | 0.2864 | 1.4171 | 500 | 0.7510 | -0.6557 | -82.9178 | 0.4964 | 0.4971 | 0.6030 | 0.5223 | 0.5274 | 0.4971 | 14.2823 |
77
+ | 0.2635 | 1.5588 | 550 | 0.7503 | -0.6184 | -81.1614 | 0.5023 | 0.5043 | 0.6030 | 0.5223 | 0.5228 | 0.5043 | 14.0632 |
78
+ | 0.2561 | 1.7005 | 600 | 0.7487 | -0.5805 | -84.7039 | 0.4980 | 0.4964 | 0.6030 | 0.5223 | 0.5233 | 0.4964 | 14.3352 |
79
+ | 0.2448 | 1.8422 | 650 | 0.7503 | -0.4274 | -83.4629 | 0.5171 | 0.5191 | 0.6030 | 0.5223 | 0.5233 | 0.5191 | 14.2153 |
80
+ | 0.2235 | 1.9839 | 700 | 0.7483 | -0.5057 | -81.7196 | 0.4963 | 0.4949 | 0.6030 | 0.5223 | 0.5233 | 0.4949 | 14.2026 |
81
+ | 0.21 | 2.1256 | 750 | 0.7512 | -0.4757 | -82.5192 | 0.5234 | 0.5225 | 0.6030 | 0.5223 | 0.5254 | 0.5225 | 14.0055 |
82
+ | 0.1988 | 2.2674 | 800 | 0.7496 | -0.5578 | -81.0564 | 0.5140 | 0.5114 | 0.6030 | 0.5223 | 0.5295 | 0.5114 | 14.1030 |
83
+ | 0.1845 | 2.4091 | 850 | 0.7516 | -0.5129 | -82.6326 | 0.5205 | 0.5186 | 0.6030 | 0.5223 | 0.5311 | 0.5186 | 14.1518 |
84
+ | 0.1741 | 2.5508 | 900 | 0.7507 | -0.4790 | -82.9809 | 0.5132 | 0.5118 | 0.6030 | 0.5223 | 0.5238 | 0.5118 | 14.2459 |
85
+ | 0.1659 | 2.6925 | 950 | 0.7500 | -0.4840 | -83.8330 | 0.5189 | 0.5193 | 0.6030 | 0.5223 | 0.5238 | 0.5193 | 14.3029 |
86
+ | 0.1539 | 2.8342 | 1000 | 0.7499 | -0.4671 | -82.8831 | 0.5137 | 0.5127 | 0.6030 | 0.5223 | 0.5269 | 0.5127 | 14.1925 |
87
+ | 0.1445 | 2.9806 | 1050 | 0.5116 | -83.1677| -0.5531 | 0.5112 | 0.7478 | 0.5112 | 0.5248 | 0.6030 | 0.5223 | 14.2141 |
88
+ | 0.1261 | 3.1223 | 1100 | 0.5157 | -83.5954| -0.5488 | 0.5165 | 0.7515 | 0.5165 | 0.5233 | 0.6030 | 0.5223 | 14.1783 |
89
+ | 0.1146 | 3.2641 | 1150 | 0.5175 | -83.4265| -0.5372 | 0.5161 | 0.7487 | 0.5161 | 0.5264 | 0.6030 | 0.5223 | 14.1956 |
90
+ | 0.1076 | 3.4058 | 1200 | 0.5169 | -83.9912| -0.4946 | 0.5160 | 0.7492 | 0.5160 | 0.5274 | 0.6030 | 0.5223 | 14.1241 |
91
+ | 0.0981 | 3.5475 | 1250 | 0.5175 | -83.3791| -0.5087 | 0.5185 | 0.7500 | 0.5185 | 0.5311 | 0.6030 | 0.5223 | 14.2158 |
92
+
93
+
94
+ ### Framework versions
95
+
96
+ - Transformers 4.42.0
97
+ - Pytorch 2.3.0+cu121
98
+ - Datasets 2.19.1
99
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.5474728389230044,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.02363297004699707,
5
+ "train_runtime": 6884.3268,
6
+ "train_samples": 50802,
7
+ "train_samples_per_second": 36.897,
8
+ "train_steps_per_second": 0.256
9
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151644,
3
+ "eos_token_id": 151645,
4
+ "max_new_tokens": 2048,
5
+ "pad_token_id": 151645,
6
+ "transformers_version": "4.42.0"
7
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ae54586a166e11103d8ec73d922ba0117d148e302b1e516a449d6a22ad478c7
3
  size 1975192208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86b9cdf79660b95dd9c80e5ec372eea48253b74b82348ad4bb5bd1fe5a76fc5
3
  size 1975192208
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.5474728389230044,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.02363297004699707,
5
+ "train_runtime": 6884.3268,
6
+ "train_samples": 50802,
7
+ "train_samples_per_second": 36.897,
8
+ "train_steps_per_second": 0.256
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,892 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 14.005528450012207,
3
+ "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L1EXPO-ES-0.1/checkpoint-750",
4
+ "epoch": 3.5474728389230044,
5
+ "eval_steps": 50,
6
+ "global_step": 1250,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "dpo_loss": 0.6931471824645996,
13
+ "epoch": 0.002834199338686821,
14
+ "grad_norm": 36.885068816813735,
15
+ "learning_rate": 2.840909090909091e-08,
16
+ "logits": -1.359458565711975,
17
+ "logps": -84.69721221923828,
18
+ "loss": 0.0051,
19
+ "objective": 0.0046141319908201694,
20
+ "ranking_idealized": 0.6458333134651184,
21
+ "ranking_idealized_expo": 0.5833333134651184,
22
+ "ranking_simple": 0.5833333134651184,
23
+ "regularize": 0.0046141319908201694,
24
+ "step": 1,
25
+ "wo_beta": 14.840873718261719
26
+ },
27
+ {
28
+ "dpo_loss": 0.6924303770065308,
29
+ "epoch": 0.14170996693434104,
30
+ "grad_norm": 33.742835742644765,
31
+ "learning_rate": 1.4204545454545458e-06,
32
+ "logits": -1.4566550254821777,
33
+ "logps": -84.23489379882812,
34
+ "loss": 0.0448,
35
+ "objective": 0.04280169680714607,
36
+ "ranking_idealized": 0.608418345451355,
37
+ "ranking_idealized_expo": 0.5229591727256775,
38
+ "ranking_simple": 0.5221088528633118,
39
+ "regularize": 0.04280169680714607,
40
+ "step": 50,
41
+ "wo_beta": 15.654285430908203
42
+ },
43
+ {
44
+ "epoch": 0.14170996693434104,
45
+ "eval_dpo_loss": 0.6935604810714722,
46
+ "eval_logits": -1.4298962354660034,
47
+ "eval_logps": -90.38883209228516,
48
+ "eval_loss": 0.062233828008174896,
49
+ "eval_objective": 0.062142737209796906,
50
+ "eval_ranking_idealized": 0.6030020713806152,
51
+ "eval_ranking_idealized_expo": 0.5222567319869995,
52
+ "eval_ranking_simple": 0.5243270993232727,
53
+ "eval_regularize": 0.062142737209796906,
54
+ "eval_runtime": 308.7283,
55
+ "eval_samples_per_second": 18.754,
56
+ "eval_steps_per_second": 1.564,
57
+ "eval_wo_beta": 16.076759338378906,
58
+ "step": 50
59
+ },
60
+ {
61
+ "dpo_loss": 0.7021370530128479,
62
+ "epoch": 0.2834199338686821,
63
+ "grad_norm": 31.729424259376046,
64
+ "learning_rate": 2.8409090909090916e-06,
65
+ "logits": -1.3946272134780884,
66
+ "logps": -82.87468719482422,
67
+ "loss": 0.1716,
68
+ "objective": 0.17463459074497223,
69
+ "ranking_idealized": 0.6016666889190674,
70
+ "ranking_idealized_expo": 0.5141666531562805,
71
+ "ranking_simple": 0.51541668176651,
72
+ "regularize": 0.17463459074497223,
73
+ "step": 100,
74
+ "wo_beta": 15.276419639587402
75
+ },
76
+ {
77
+ "epoch": 0.2834199338686821,
78
+ "eval_dpo_loss": 0.6981683969497681,
79
+ "eval_logits": -1.3597227334976196,
80
+ "eval_logps": -88.76753234863281,
81
+ "eval_loss": 0.1556352823972702,
82
+ "eval_objective": 0.15588510036468506,
83
+ "eval_ranking_idealized": 0.6030020713806152,
84
+ "eval_ranking_idealized_expo": 0.5222567319869995,
85
+ "eval_ranking_simple": 0.5274327397346497,
86
+ "eval_regularize": 0.15588510036468506,
87
+ "eval_runtime": 308.3116,
88
+ "eval_samples_per_second": 18.78,
89
+ "eval_steps_per_second": 1.567,
90
+ "eval_wo_beta": 15.943617820739746,
91
+ "step": 100
92
+ },
93
+ {
94
+ "dpo_loss": 0.7128496170043945,
95
+ "epoch": 0.42512990080302315,
96
+ "grad_norm": 24.11205441144116,
97
+ "learning_rate": 4.2613636363636365e-06,
98
+ "logits": -1.251375675201416,
99
+ "logps": -79.16511535644531,
100
+ "loss": 0.2858,
101
+ "objective": 0.2846659719944,
102
+ "ranking_idealized": 0.6066666841506958,
103
+ "ranking_idealized_expo": 0.5287500023841858,
104
+ "ranking_simple": 0.5274999737739563,
105
+ "regularize": 0.2846659719944,
106
+ "step": 150,
107
+ "wo_beta": 15.22218132019043
108
+ },
109
+ {
110
+ "epoch": 0.42512990080302315,
111
+ "eval_dpo_loss": 0.718317449092865,
112
+ "eval_logits": -1.2545664310455322,
113
+ "eval_logps": -79.50674438476562,
114
+ "eval_loss": 0.2911944091320038,
115
+ "eval_objective": 0.2922578752040863,
116
+ "eval_ranking_idealized": 0.6030020713806152,
117
+ "eval_ranking_idealized_expo": 0.5222567319869995,
118
+ "eval_ranking_simple": 0.522774338722229,
119
+ "eval_regularize": 0.2922578752040863,
120
+ "eval_runtime": 308.3872,
121
+ "eval_samples_per_second": 18.775,
122
+ "eval_steps_per_second": 1.566,
123
+ "eval_wo_beta": 15.05699634552002,
124
+ "step": 150
125
+ },
126
+ {
127
+ "dpo_loss": 0.7332326173782349,
128
+ "epoch": 0.5668398677373642,
129
+ "grad_norm": 19.83023586651878,
130
+ "learning_rate": 4.997168347957521e-06,
131
+ "logits": -1.015448808670044,
132
+ "logps": -76.77922058105469,
133
+ "loss": 0.3544,
134
+ "objective": 0.3552749752998352,
135
+ "ranking_idealized": 0.5924999713897705,
136
+ "ranking_idealized_expo": 0.5166666507720947,
137
+ "ranking_simple": 0.5020833611488342,
138
+ "regularize": 0.3552749752998352,
139
+ "step": 200,
140
+ "wo_beta": 15.356170654296875
141
+ },
142
+ {
143
+ "epoch": 0.5668398677373642,
144
+ "eval_dpo_loss": 0.7308588027954102,
145
+ "eval_logits": -0.8432308435440063,
146
+ "eval_logps": -83.84849548339844,
147
+ "eval_loss": 0.3898463547229767,
148
+ "eval_objective": 0.3890216052532196,
149
+ "eval_ranking_idealized": 0.6030020713806152,
150
+ "eval_ranking_idealized_expo": 0.5222567319869995,
151
+ "eval_ranking_simple": 0.522774338722229,
152
+ "eval_regularize": 0.3890216052532196,
153
+ "eval_runtime": 307.8908,
154
+ "eval_samples_per_second": 18.805,
155
+ "eval_steps_per_second": 1.569,
156
+ "eval_wo_beta": 14.712230682373047,
157
+ "step": 200
158
+ },
159
+ {
160
+ "dpo_loss": 0.7182620763778687,
161
+ "epoch": 0.7085498346717053,
162
+ "grad_norm": 19.421703617823624,
163
+ "learning_rate": 4.973122855144066e-06,
164
+ "logits": -0.7580794095993042,
165
+ "logps": -78.16065216064453,
166
+ "loss": 0.375,
167
+ "objective": 0.37717047333717346,
168
+ "ranking_idealized": 0.5991666913032532,
169
+ "ranking_idealized_expo": 0.5170833468437195,
170
+ "ranking_simple": 0.5195833444595337,
171
+ "regularize": 0.37717047333717346,
172
+ "step": 250,
173
+ "wo_beta": 15.653904914855957
174
+ },
175
+ {
176
+ "epoch": 0.7085498346717053,
177
+ "eval_dpo_loss": 0.735299289226532,
178
+ "eval_logits": -0.673379123210907,
179
+ "eval_logps": -81.28996276855469,
180
+ "eval_loss": 0.4397831857204437,
181
+ "eval_objective": 0.43750080466270447,
182
+ "eval_ranking_idealized": 0.6030020713806152,
183
+ "eval_ranking_idealized_expo": 0.5222567319869995,
184
+ "eval_ranking_simple": 0.5243270993232727,
185
+ "eval_regularize": 0.43750080466270447,
186
+ "eval_runtime": 351.635,
187
+ "eval_samples_per_second": 16.466,
188
+ "eval_steps_per_second": 1.374,
189
+ "eval_wo_beta": 14.472906112670898,
190
+ "step": 250
191
+ },
192
+ {
193
+ "dpo_loss": 0.72139972448349,
194
+ "epoch": 0.8502598016060463,
195
+ "grad_norm": 19.146332148877114,
196
+ "learning_rate": 4.924776641419513e-06,
197
+ "logits": -0.564231812953949,
198
+ "logps": -79.54463195800781,
199
+ "loss": 0.3592,
200
+ "objective": 0.3580860495567322,
201
+ "ranking_idealized": 0.5799999833106995,
202
+ "ranking_idealized_expo": 0.4970833361148834,
203
+ "ranking_simple": 0.5024999976158142,
204
+ "regularize": 0.3580860495567322,
205
+ "step": 300,
206
+ "wo_beta": 15.114410400390625
207
+ },
208
+ {
209
+ "epoch": 0.8502598016060463,
210
+ "eval_dpo_loss": 0.734784722328186,
211
+ "eval_logits": -0.5500932335853577,
212
+ "eval_logps": -84.41443634033203,
213
+ "eval_loss": 0.442239373922348,
214
+ "eval_objective": 0.43877851963043213,
215
+ "eval_ranking_idealized": 0.6030020713806152,
216
+ "eval_ranking_idealized_expo": 0.5222567319869995,
217
+ "eval_ranking_simple": 0.5232919454574585,
218
+ "eval_regularize": 0.43877851963043213,
219
+ "eval_runtime": 404.6503,
220
+ "eval_samples_per_second": 14.309,
221
+ "eval_steps_per_second": 1.194,
222
+ "eval_wo_beta": 14.44029712677002,
223
+ "step": 300
224
+ },
225
+ {
226
+ "dpo_loss": 0.717363178730011,
227
+ "epoch": 0.9919697685403873,
228
+ "grad_norm": 17.11167089420277,
229
+ "learning_rate": 4.8526047530778175e-06,
230
+ "logits": -0.5000432133674622,
231
+ "logps": -79.52243041992188,
232
+ "loss": 0.3351,
233
+ "objective": 0.33388689160346985,
234
+ "ranking_idealized": 0.60916668176651,
235
+ "ranking_idealized_expo": 0.5270833373069763,
236
+ "ranking_simple": 0.5262500047683716,
237
+ "regularize": 0.33388689160346985,
238
+ "step": 350,
239
+ "wo_beta": 15.228816986083984
240
+ },
241
+ {
242
+ "epoch": 0.9919697685403873,
243
+ "eval_dpo_loss": 0.735403299331665,
244
+ "eval_logits": -0.5360206961631775,
245
+ "eval_logps": -82.93754577636719,
246
+ "eval_loss": 0.4675760865211487,
247
+ "eval_objective": 0.4601868689060211,
248
+ "eval_ranking_idealized": 0.6030020713806152,
249
+ "eval_ranking_idealized_expo": 0.5222567319869995,
250
+ "eval_ranking_simple": 0.5341615080833435,
251
+ "eval_regularize": 0.4601868689060211,
252
+ "eval_runtime": 308.044,
253
+ "eval_samples_per_second": 18.796,
254
+ "eval_steps_per_second": 1.568,
255
+ "eval_wo_beta": 14.272198677062988,
256
+ "step": 350
257
+ },
258
+ {
259
+ "dpo_loss": 0.7120790481567383,
260
+ "epoch": 1.1336797354747283,
261
+ "grad_norm": 17.95944897166815,
262
+ "learning_rate": 4.757316345716554e-06,
263
+ "logits": -0.5945844054222107,
264
+ "logps": -77.92745208740234,
265
+ "loss": 0.3056,
266
+ "objective": 0.3030960261821747,
267
+ "ranking_idealized": 0.6087499856948853,
268
+ "ranking_idealized_expo": 0.5337499976158142,
269
+ "ranking_simple": 0.5316666960716248,
270
+ "regularize": 0.3030960261821747,
271
+ "step": 400,
272
+ "wo_beta": 15.400132179260254
273
+ },
274
+ {
275
+ "epoch": 1.1336797354747283,
276
+ "eval_dpo_loss": 0.7469586730003357,
277
+ "eval_logits": -0.5685753226280212,
278
+ "eval_logps": -80.56059265136719,
279
+ "eval_loss": 0.48422977328300476,
280
+ "eval_objective": 0.48037421703338623,
281
+ "eval_ranking_idealized": 0.6030020713806152,
282
+ "eval_ranking_idealized_expo": 0.5222567319869995,
283
+ "eval_ranking_simple": 0.5253623127937317,
284
+ "eval_regularize": 0.48037421703338623,
285
+ "eval_runtime": 314.7441,
286
+ "eval_samples_per_second": 18.396,
287
+ "eval_steps_per_second": 1.535,
288
+ "eval_wo_beta": 14.281224250793457,
289
+ "step": 400
290
+ },
291
+ {
292
+ "dpo_loss": 0.7077716588973999,
293
+ "epoch": 1.2753897024090695,
294
+ "grad_norm": 15.643717424703908,
295
+ "learning_rate": 4.639847716126855e-06,
296
+ "logits": -0.5214452743530273,
297
+ "logps": -78.54476928710938,
298
+ "loss": 0.2932,
299
+ "objective": 0.29641959071159363,
300
+ "ranking_idealized": 0.5975000262260437,
301
+ "ranking_idealized_expo": 0.5199999809265137,
302
+ "ranking_simple": 0.5199999809265137,
303
+ "regularize": 0.29641959071159363,
304
+ "step": 450,
305
+ "wo_beta": 15.791983604431152
306
+ },
307
+ {
308
+ "epoch": 1.2753897024090695,
309
+ "eval_dpo_loss": 0.7439451217651367,
310
+ "eval_logits": -0.5565418004989624,
311
+ "eval_logps": -83.62307739257812,
312
+ "eval_loss": 0.48051390051841736,
313
+ "eval_objective": 0.47548484802246094,
314
+ "eval_ranking_idealized": 0.6030020713806152,
315
+ "eval_ranking_idealized_expo": 0.5222567319869995,
316
+ "eval_ranking_simple": 0.5279502868652344,
317
+ "eval_regularize": 0.47548484802246094,
318
+ "eval_runtime": 308.6602,
319
+ "eval_samples_per_second": 18.758,
320
+ "eval_steps_per_second": 1.565,
321
+ "eval_wo_beta": 14.464012145996094,
322
+ "step": 450
323
+ },
324
+ {
325
+ "dpo_loss": 0.7124494314193726,
326
+ "epoch": 1.4170996693434104,
327
+ "grad_norm": 15.324248073271528,
328
+ "learning_rate": 4.501353102310901e-06,
329
+ "logits": -0.5437880754470825,
330
+ "logps": -78.15093994140625,
331
+ "loss": 0.2864,
332
+ "objective": 0.2836955189704895,
333
+ "ranking_idealized": 0.57833331823349,
334
+ "ranking_idealized_expo": 0.4983333349227905,
335
+ "ranking_simple": 0.5,
336
+ "regularize": 0.2836955189704895,
337
+ "step": 500,
338
+ "wo_beta": 15.279319763183594
339
+ },
340
+ {
341
+ "epoch": 1.4170996693434104,
342
+ "eval_dpo_loss": 0.7510210275650024,
343
+ "eval_logits": -0.6556914448738098,
344
+ "eval_logps": -82.91778564453125,
345
+ "eval_loss": 0.4964132010936737,
346
+ "eval_objective": 0.4971453845500946,
347
+ "eval_ranking_idealized": 0.6030020713806152,
348
+ "eval_ranking_idealized_expo": 0.5222567319869995,
349
+ "eval_ranking_simple": 0.5274327397346497,
350
+ "eval_regularize": 0.4971453845500946,
351
+ "eval_runtime": 307.59,
352
+ "eval_samples_per_second": 18.824,
353
+ "eval_steps_per_second": 1.57,
354
+ "eval_wo_beta": 14.282269477844238,
355
+ "step": 500
356
+ },
357
+ {
358
+ "dpo_loss": 0.7097735404968262,
359
+ "epoch": 1.5588096362777515,
360
+ "grad_norm": 15.632233049837359,
361
+ "learning_rate": 4.34319334202531e-06,
362
+ "logits": -0.5551173686981201,
363
+ "logps": -78.2597885131836,
364
+ "loss": 0.2635,
365
+ "objective": 0.264424592256546,
366
+ "ranking_idealized": 0.5945833325386047,
367
+ "ranking_idealized_expo": 0.5116666555404663,
368
+ "ranking_simple": 0.512499988079071,
369
+ "regularize": 0.264424592256546,
370
+ "step": 550,
371
+ "wo_beta": 15.108202934265137
372
+ },
373
+ {
374
+ "epoch": 1.5588096362777515,
375
+ "eval_dpo_loss": 0.7502700686454773,
376
+ "eval_logits": -0.6183538436889648,
377
+ "eval_logps": -81.16139221191406,
378
+ "eval_loss": 0.502347469329834,
379
+ "eval_objective": 0.5043270587921143,
380
+ "eval_ranking_idealized": 0.6030020713806152,
381
+ "eval_ranking_idealized_expo": 0.5222567319869995,
382
+ "eval_ranking_simple": 0.522774338722229,
383
+ "eval_regularize": 0.5043270587921143,
384
+ "eval_runtime": 307.9389,
385
+ "eval_samples_per_second": 18.802,
386
+ "eval_steps_per_second": 1.568,
387
+ "eval_wo_beta": 14.063152313232422,
388
+ "step": 550
389
+ },
390
+ {
391
+ "dpo_loss": 0.708111047744751,
392
+ "epoch": 1.7005196032120926,
393
+ "grad_norm": 15.258212996967583,
394
+ "learning_rate": 4.16692250129073e-06,
395
+ "logits": -0.48317351937294006,
396
+ "logps": -79.20243835449219,
397
+ "loss": 0.2561,
398
+ "objective": 0.2518368065357208,
399
+ "ranking_idealized": 0.6004166603088379,
400
+ "ranking_idealized_expo": 0.51583331823349,
401
+ "ranking_simple": 0.5074999928474426,
402
+ "regularize": 0.2518368065357208,
403
+ "step": 600,
404
+ "wo_beta": 15.102338790893555
405
+ },
406
+ {
407
+ "epoch": 1.7005196032120926,
408
+ "eval_dpo_loss": 0.7486839294433594,
409
+ "eval_logits": -0.5805073380470276,
410
+ "eval_logps": -84.7038803100586,
411
+ "eval_loss": 0.49804311990737915,
412
+ "eval_objective": 0.49642521142959595,
413
+ "eval_ranking_idealized": 0.6030020713806152,
414
+ "eval_ranking_idealized_expo": 0.5222567319869995,
415
+ "eval_ranking_simple": 0.5232919454574585,
416
+ "eval_regularize": 0.49642521142959595,
417
+ "eval_runtime": 308.8037,
418
+ "eval_samples_per_second": 18.75,
419
+ "eval_steps_per_second": 1.564,
420
+ "eval_wo_beta": 14.335240364074707,
421
+ "step": 600
422
+ },
423
+ {
424
+ "dpo_loss": 0.7057015299797058,
425
+ "epoch": 1.8422295701464337,
426
+ "grad_norm": 15.137988457806076,
427
+ "learning_rate": 3.974272604254906e-06,
428
+ "logits": -0.44393202662467957,
429
+ "logps": -80.69384002685547,
430
+ "loss": 0.2448,
431
+ "objective": 0.24766255915164948,
432
+ "ranking_idealized": 0.6058333516120911,
433
+ "ranking_idealized_expo": 0.5295833349227905,
434
+ "ranking_simple": 0.5350000262260437,
435
+ "regularize": 0.24766255915164948,
436
+ "step": 650,
437
+ "wo_beta": 15.657441139221191
438
+ },
439
+ {
440
+ "epoch": 1.8422295701464337,
441
+ "eval_dpo_loss": 0.750303328037262,
442
+ "eval_logits": -0.42741408944129944,
443
+ "eval_logps": -83.46288299560547,
444
+ "eval_loss": 0.5170512795448303,
445
+ "eval_objective": 0.5191380381584167,
446
+ "eval_ranking_idealized": 0.6030020713806152,
447
+ "eval_ranking_idealized_expo": 0.5222567319869995,
448
+ "eval_ranking_simple": 0.5232919454574585,
449
+ "eval_regularize": 0.5191380381584167,
450
+ "eval_runtime": 311.9724,
451
+ "eval_samples_per_second": 18.559,
452
+ "eval_steps_per_second": 1.548,
453
+ "eval_wo_beta": 14.215264320373535,
454
+ "step": 650
455
+ },
456
+ {
457
+ "dpo_loss": 0.7065611481666565,
458
+ "epoch": 1.9839395370807746,
459
+ "grad_norm": 16.215629133943352,
460
+ "learning_rate": 3.767136614452458e-06,
461
+ "logits": -0.43040552735328674,
462
+ "logps": -79.25086212158203,
463
+ "loss": 0.2235,
464
+ "objective": 0.22704358398914337,
465
+ "ranking_idealized": 0.5954166650772095,
466
+ "ranking_idealized_expo": 0.5129166841506958,
467
+ "ranking_simple": 0.5083333253860474,
468
+ "regularize": 0.22704358398914337,
469
+ "step": 700,
470
+ "wo_beta": 15.360807418823242
471
+ },
472
+ {
473
+ "epoch": 1.9839395370807746,
474
+ "eval_dpo_loss": 0.7482582330703735,
475
+ "eval_logits": -0.5056679844856262,
476
+ "eval_logps": -81.71964263916016,
477
+ "eval_loss": 0.49625101685523987,
478
+ "eval_objective": 0.4948585331439972,
479
+ "eval_ranking_idealized": 0.6030020713806152,
480
+ "eval_ranking_idealized_expo": 0.5222567319869995,
481
+ "eval_ranking_simple": 0.5232919454574585,
482
+ "eval_regularize": 0.4948585331439972,
483
+ "eval_runtime": 312.5183,
484
+ "eval_samples_per_second": 18.527,
485
+ "eval_steps_per_second": 1.546,
486
+ "eval_wo_beta": 14.20261001586914,
487
+ "step": 700
488
+ },
489
+ {
490
+ "dpo_loss": 0.7034626007080078,
491
+ "epoch": 2.1256495040151155,
492
+ "grad_norm": 14.439374560790109,
493
+ "learning_rate": 3.547549834686222e-06,
494
+ "logits": -0.4137415587902069,
495
+ "logps": -78.55575561523438,
496
+ "loss": 0.21,
497
+ "objective": 0.20508375763893127,
498
+ "ranking_idealized": 0.6066666841506958,
499
+ "ranking_idealized_expo": 0.5133333206176758,
500
+ "ranking_simple": 0.5099999904632568,
501
+ "regularize": 0.20508375763893127,
502
+ "step": 750,
503
+ "wo_beta": 15.407984733581543
504
+ },
505
+ {
506
+ "epoch": 2.1256495040151155,
507
+ "eval_dpo_loss": 0.7511767148971558,
508
+ "eval_logits": -0.47565215826034546,
509
+ "eval_logps": -82.51917266845703,
510
+ "eval_loss": 0.5233561396598816,
511
+ "eval_objective": 0.5224636197090149,
512
+ "eval_ranking_idealized": 0.6030020713806152,
513
+ "eval_ranking_idealized_expo": 0.5222567319869995,
514
+ "eval_ranking_simple": 0.5253623127937317,
515
+ "eval_regularize": 0.5224636197090149,
516
+ "eval_runtime": 307.6951,
517
+ "eval_samples_per_second": 18.817,
518
+ "eval_steps_per_second": 1.57,
519
+ "eval_wo_beta": 14.005528450012207,
520
+ "step": 750
521
+ },
522
+ {
523
+ "dpo_loss": 0.6996860504150391,
524
+ "epoch": 2.2673594709494567,
525
+ "grad_norm": 14.763670008384054,
526
+ "learning_rate": 3.3176699082935546e-06,
527
+ "logits": -0.44839462637901306,
528
+ "logps": -79.29713439941406,
529
+ "loss": 0.1988,
530
+ "objective": 0.19862671196460724,
531
+ "ranking_idealized": 0.5941666960716248,
532
+ "ranking_idealized_expo": 0.5129166841506958,
533
+ "ranking_simple": 0.5116666555404663,
534
+ "regularize": 0.19862671196460724,
535
+ "step": 800,
536
+ "wo_beta": 14.99027156829834
537
+ },
538
+ {
539
+ "epoch": 2.2673594709494567,
540
+ "eval_dpo_loss": 0.7496048808097839,
541
+ "eval_logits": -0.5577788949012756,
542
+ "eval_logps": -81.05644226074219,
543
+ "eval_loss": 0.5139943361282349,
544
+ "eval_objective": 0.511448085308075,
545
+ "eval_ranking_idealized": 0.6030020713806152,
546
+ "eval_ranking_idealized_expo": 0.5222567319869995,
547
+ "eval_ranking_simple": 0.5295031070709229,
548
+ "eval_regularize": 0.511448085308075,
549
+ "eval_runtime": 308.0865,
550
+ "eval_samples_per_second": 18.793,
551
+ "eval_steps_per_second": 1.568,
552
+ "eval_wo_beta": 14.102994918823242,
553
+ "step": 800
554
+ },
555
+ {
556
+ "dpo_loss": 0.7008050680160522,
557
+ "epoch": 2.409069437883798,
558
+ "grad_norm": 14.833532828768124,
559
+ "learning_rate": 3.0797556183036582e-06,
560
+ "logits": -0.4541783630847931,
561
+ "logps": -78.78826141357422,
562
+ "loss": 0.1845,
563
+ "objective": 0.18498587608337402,
564
+ "ranking_idealized": 0.5979166626930237,
565
+ "ranking_idealized_expo": 0.5166666507720947,
566
+ "ranking_simple": 0.5095833539962769,
567
+ "regularize": 0.18498587608337402,
568
+ "step": 850,
569
+ "wo_beta": 15.201929092407227
570
+ },
571
+ {
572
+ "epoch": 2.409069437883798,
573
+ "eval_dpo_loss": 0.7516361474990845,
574
+ "eval_logits": -0.512949526309967,
575
+ "eval_logps": -82.63258361816406,
576
+ "eval_loss": 0.520516574382782,
577
+ "eval_objective": 0.518623948097229,
578
+ "eval_ranking_idealized": 0.6030020713806152,
579
+ "eval_ranking_idealized_expo": 0.5222567319869995,
580
+ "eval_ranking_simple": 0.5310559272766113,
581
+ "eval_regularize": 0.518623948097229,
582
+ "eval_runtime": 309.5699,
583
+ "eval_samples_per_second": 18.703,
584
+ "eval_steps_per_second": 1.56,
585
+ "eval_wo_beta": 14.151838302612305,
586
+ "step": 850
587
+ },
588
+ {
589
+ "dpo_loss": 0.7016371488571167,
590
+ "epoch": 2.550779404818139,
591
+ "grad_norm": 16.31271001796413,
592
+ "learning_rate": 2.8361446928038298e-06,
593
+ "logits": -0.42467382550239563,
594
+ "logps": -79.11641693115234,
595
+ "loss": 0.1741,
596
+ "objective": 0.1748921126127243,
597
+ "ranking_idealized": 0.5887500047683716,
598
+ "ranking_idealized_expo": 0.518750011920929,
599
+ "ranking_simple": 0.51583331823349,
600
+ "regularize": 0.1748921126127243,
601
+ "step": 900,
602
+ "wo_beta": 15.531429290771484
603
+ },
604
+ {
605
+ "epoch": 2.550779404818139,
606
+ "eval_dpo_loss": 0.7507295608520508,
607
+ "eval_logits": -0.4789924621582031,
608
+ "eval_logps": -82.98091125488281,
609
+ "eval_loss": 0.5132278800010681,
610
+ "eval_objective": 0.5117725729942322,
611
+ "eval_ranking_idealized": 0.6030020713806152,
612
+ "eval_ranking_idealized_expo": 0.5222567319869995,
613
+ "eval_ranking_simple": 0.523809552192688,
614
+ "eval_regularize": 0.5117725729942322,
615
+ "eval_runtime": 311.1948,
616
+ "eval_samples_per_second": 18.606,
617
+ "eval_steps_per_second": 1.552,
618
+ "eval_wo_beta": 14.245877265930176,
619
+ "step": 900
620
+ },
621
+ {
622
+ "dpo_loss": 0.6993770003318787,
623
+ "epoch": 2.69248937175248,
624
+ "grad_norm": 14.7867391875567,
625
+ "learning_rate": 2.5892308345974517e-06,
626
+ "logits": -0.4542914927005768,
627
+ "logps": -79.9416732788086,
628
+ "loss": 0.1659,
629
+ "objective": 0.16475924849510193,
630
+ "ranking_idealized": 0.5975000262260437,
631
+ "ranking_idealized_expo": 0.5087500214576721,
632
+ "ranking_simple": 0.5141666531562805,
633
+ "regularize": 0.16475924849510193,
634
+ "step": 950,
635
+ "wo_beta": 15.521940231323242
636
+ },
637
+ {
638
+ "epoch": 2.69248937175248,
639
+ "eval_dpo_loss": 0.7500060796737671,
640
+ "eval_logits": -0.4840329587459564,
641
+ "eval_logps": -83.8330307006836,
642
+ "eval_loss": 0.5188658237457275,
643
+ "eval_objective": 0.5192957520484924,
644
+ "eval_ranking_idealized": 0.6030020713806152,
645
+ "eval_ranking_idealized_expo": 0.5222567319869995,
646
+ "eval_ranking_simple": 0.523809552192688,
647
+ "eval_regularize": 0.5192957520484924,
648
+ "eval_runtime": 310.1876,
649
+ "eval_samples_per_second": 18.666,
650
+ "eval_steps_per_second": 1.557,
651
+ "eval_wo_beta": 14.302889823913574,
652
+ "step": 950
653
+ },
654
+ {
655
+ "dpo_loss": 0.7024207711219788,
656
+ "epoch": 2.8341993386868207,
657
+ "grad_norm": 14.42838893249385,
658
+ "learning_rate": 2.341440200858589e-06,
659
+ "logits": -0.4285065233707428,
660
+ "logps": -78.83317565917969,
661
+ "loss": 0.1539,
662
+ "objective": 0.15508733689785004,
663
+ "ranking_idealized": 0.6020833253860474,
664
+ "ranking_idealized_expo": 0.5104166865348816,
665
+ "ranking_simple": 0.5091666579246521,
666
+ "regularize": 0.15508733689785004,
667
+ "step": 1000,
668
+ "wo_beta": 15.06278133392334
669
+ },
670
+ {
671
+ "epoch": 2.8341993386868207,
672
+ "eval_dpo_loss": 0.7498777508735657,
673
+ "eval_logits": -0.46712055802345276,
674
+ "eval_logps": -82.88314056396484,
675
+ "eval_loss": 0.5136557817459106,
676
+ "eval_objective": 0.5127004981040955,
677
+ "eval_ranking_idealized": 0.6030020713806152,
678
+ "eval_ranking_idealized_expo": 0.5222567319869995,
679
+ "eval_ranking_simple": 0.5269151329994202,
680
+ "eval_regularize": 0.5127004981040955,
681
+ "eval_runtime": 308.4784,
682
+ "eval_samples_per_second": 18.77,
683
+ "eval_steps_per_second": 1.566,
684
+ "eval_wo_beta": 14.192536354064941,
685
+ "step": 1000
686
+ },
687
+ {
688
+ "dpo_loss": 0.6952893733978271,
689
+ "epoch": 2.9806329711856403,
690
+ "grad_norm": 14.77922239882152,
691
+ "learning_rate": 2.0952075638923656e-06,
692
+ "logits": -0.4941651523113251,
693
+ "logps": -79.95951080322266,
694
+ "loss": 0.1445,
695
+ "objective": 0.14514465630054474,
696
+ "ranking_idealized": 0.6079166531562805,
697
+ "ranking_idealized_expo": 0.5183333158493042,
698
+ "ranking_simple": 0.5220833420753479,
699
+ "regularize": 0.14514465630054474,
700
+ "step": 1050,
701
+ "wo_beta": 15.50859546661377
702
+ },
703
+ {
704
+ "epoch": 2.9806329711856403,
705
+ "eval_dpo_loss": 0.7478482127189636,
706
+ "eval_logits": -0.5530552864074707,
707
+ "eval_logps": -83.16773986816406,
708
+ "eval_loss": 0.5116256475448608,
709
+ "eval_objective": 0.511193573474884,
710
+ "eval_ranking_idealized": 0.6030020713806152,
711
+ "eval_ranking_idealized_expo": 0.5222567319869995,
712
+ "eval_ranking_simple": 0.5248447060585022,
713
+ "eval_regularize": 0.511193573474884,
714
+ "eval_runtime": 310.4184,
715
+ "eval_samples_per_second": 18.652,
716
+ "eval_steps_per_second": 1.556,
717
+ "eval_wo_beta": 14.214123725891113,
718
+ "step": 1050
719
+ },
720
+ {
721
+ "dpo_loss": 0.6958988308906555,
722
+ "epoch": 3.122342938119981,
723
+ "grad_norm": 14.047766487106658,
724
+ "learning_rate": 1.852952387243698e-06,
725
+ "logits": -0.4758566915988922,
726
+ "logps": -80.04509735107422,
727
+ "loss": 0.1261,
728
+ "objective": 0.12395481020212173,
729
+ "ranking_idealized": 0.6070833206176758,
730
+ "ranking_idealized_expo": 0.5304166674613953,
731
+ "ranking_simple": 0.5270833373069763,
732
+ "regularize": 0.12395481020212173,
733
+ "step": 1100,
734
+ "wo_beta": 15.6008939743042
735
+ },
736
+ {
737
+ "epoch": 3.122342938119981,
738
+ "eval_dpo_loss": 0.7515185475349426,
739
+ "eval_logits": -0.5487966537475586,
740
+ "eval_logps": -83.59542846679688,
741
+ "eval_loss": 0.515699565410614,
742
+ "eval_objective": 0.5165062546730042,
743
+ "eval_ranking_idealized": 0.6030020713806152,
744
+ "eval_ranking_idealized_expo": 0.5222567319869995,
745
+ "eval_ranking_simple": 0.5232919454574585,
746
+ "eval_regularize": 0.5165062546730042,
747
+ "eval_runtime": 309.3286,
748
+ "eval_samples_per_second": 18.718,
749
+ "eval_steps_per_second": 1.561,
750
+ "eval_wo_beta": 14.178275108337402,
751
+ "step": 1100
752
+ },
753
+ {
754
+ "dpo_loss": 0.6931909322738647,
755
+ "epoch": 3.264052905054322,
756
+ "grad_norm": 14.963308385384513,
757
+ "learning_rate": 1.617055052228768e-06,
758
+ "logits": -0.4729629456996918,
759
+ "logps": -79.84102630615234,
760
+ "loss": 0.1146,
761
+ "objective": 0.11260777711868286,
762
+ "ranking_idealized": 0.5950000286102295,
763
+ "ranking_idealized_expo": 0.5095833539962769,
764
+ "ranking_simple": 0.5149999856948853,
765
+ "regularize": 0.11260777711868286,
766
+ "step": 1150,
767
+ "wo_beta": 15.298945426940918
768
+ },
769
+ {
770
+ "epoch": 3.264052905054322,
771
+ "eval_dpo_loss": 0.7487252354621887,
772
+ "eval_logits": -0.5372445583343506,
773
+ "eval_logps": -83.42646789550781,
774
+ "eval_loss": 0.5174793601036072,
775
+ "eval_objective": 0.5160741806030273,
776
+ "eval_ranking_idealized": 0.6030020713806152,
777
+ "eval_ranking_idealized_expo": 0.5222567319869995,
778
+ "eval_ranking_simple": 0.5263975262641907,
779
+ "eval_regularize": 0.5160741806030273,
780
+ "eval_runtime": 308.2096,
781
+ "eval_samples_per_second": 18.786,
782
+ "eval_steps_per_second": 1.567,
783
+ "eval_wo_beta": 14.195608139038086,
784
+ "step": 1150
785
+ },
786
+ {
787
+ "dpo_loss": 0.6932617425918579,
788
+ "epoch": 3.4057628719886632,
789
+ "grad_norm": 14.929601093854455,
790
+ "learning_rate": 1.3898334684855647e-06,
791
+ "logits": -0.4731375575065613,
792
+ "logps": -80.88990783691406,
793
+ "loss": 0.1076,
794
+ "objective": 0.10945354402065277,
795
+ "ranking_idealized": 0.5933333039283752,
796
+ "ranking_idealized_expo": 0.5083333253860474,
797
+ "ranking_simple": 0.5070833563804626,
798
+ "regularize": 0.10945354402065277,
799
+ "step": 1200,
800
+ "wo_beta": 15.574357032775879
801
+ },
802
+ {
803
+ "epoch": 3.4057628719886632,
804
+ "eval_dpo_loss": 0.7491946816444397,
805
+ "eval_logits": -0.4946048855781555,
806
+ "eval_logps": -83.99122619628906,
807
+ "eval_loss": 0.5169116258621216,
808
+ "eval_objective": 0.5159533619880676,
809
+ "eval_ranking_idealized": 0.6030020713806152,
810
+ "eval_ranking_idealized_expo": 0.5222567319869995,
811
+ "eval_ranking_simple": 0.5274327397346497,
812
+ "eval_regularize": 0.5159533619880676,
813
+ "eval_runtime": 309.8199,
814
+ "eval_samples_per_second": 18.688,
815
+ "eval_steps_per_second": 1.559,
816
+ "eval_wo_beta": 14.124122619628906,
817
+ "step": 1200
818
+ },
819
+ {
820
+ "dpo_loss": 0.6921232342720032,
821
+ "epoch": 3.5474728389230044,
822
+ "grad_norm": 14.184398569090499,
823
+ "learning_rate": 1.1735202983664803e-06,
824
+ "logits": -0.45394301414489746,
825
+ "logps": -80.82902526855469,
826
+ "loss": 0.0981,
827
+ "objective": 0.09646416455507278,
828
+ "ranking_idealized": 0.5962499976158142,
829
+ "ranking_idealized_expo": 0.5166666507720947,
830
+ "ranking_simple": 0.51541668176651,
831
+ "regularize": 0.09646416455507278,
832
+ "step": 1250,
833
+ "wo_beta": 15.384990692138672
834
+ },
835
+ {
836
+ "epoch": 3.5474728389230044,
837
+ "eval_dpo_loss": 0.750022828578949,
838
+ "eval_logits": -0.5087407231330872,
839
+ "eval_logps": -83.37907409667969,
840
+ "eval_loss": 0.5174669623374939,
841
+ "eval_objective": 0.5184855461120605,
842
+ "eval_ranking_idealized": 0.6030020713806152,
843
+ "eval_ranking_idealized_expo": 0.5222567319869995,
844
+ "eval_ranking_simple": 0.5310559272766113,
845
+ "eval_regularize": 0.5184855461120605,
846
+ "eval_runtime": 307.8564,
847
+ "eval_samples_per_second": 18.807,
848
+ "eval_steps_per_second": 1.569,
849
+ "eval_wo_beta": 14.215774536132812,
850
+ "step": 1250
851
+ },
852
+ {
853
+ "epoch": 3.5474728389230044,
854
+ "step": 1250,
855
+ "total_flos": 0.0,
856
+ "train_loss": 0.02363297004699707,
857
+ "train_runtime": 6884.3268,
858
+ "train_samples_per_second": 36.897,
859
+ "train_steps_per_second": 0.256
860
+ }
861
+ ],
862
+ "logging_steps": 50,
863
+ "max_steps": 1760,
864
+ "num_input_tokens_seen": 0,
865
+ "num_train_epochs": 5,
866
+ "save_steps": 50,
867
+ "stateful_callbacks": {
868
+ "EarlyStoppingCallback": {
869
+ "args": {
870
+ "early_stopping_patience": 5,
871
+ "early_stopping_threshold": 0.0
872
+ },
873
+ "attributes": {
874
+ "early_stopping_patience_counter": 0
875
+ }
876
+ },
877
+ "TrainerControl": {
878
+ "args": {
879
+ "should_epoch_stop": false,
880
+ "should_evaluate": false,
881
+ "should_log": false,
882
+ "should_save": true,
883
+ "should_training_stop": true
884
+ },
885
+ "attributes": {}
886
+ }
887
+ },
888
+ "total_flos": 0.0,
889
+ "train_batch_size": 4,
890
+ "trial_name": null,
891
+ "trial_params": null
892
+ }