Model save
Browse files- README.md +22 -19
- all_results.json +4 -4
- model.safetensors +1 -1
- train_results.json +4 -4
- trainer_state.json +1875 -9
README.md
CHANGED
@@ -2,18 +2,11 @@
|
|
2 |
license: apache-2.0
|
3 |
base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
-
- ndcg
|
7 |
-
- trl
|
8 |
-
- expo
|
9 |
-
- generated_from_trainer
|
10 |
- trl
|
11 |
- expo
|
12 |
- alignment-handbook
|
13 |
- ndcg
|
14 |
- generated_from_trainer
|
15 |
-
datasets:
|
16 |
-
- hZzy/train_pairwise
|
17 |
model-index:
|
18 |
- name: qwen2.5-0.5b-expo-DPO-ES-TRY
|
19 |
results: []
|
@@ -22,21 +15,21 @@ model-index:
|
|
22 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
23 |
should probably proofread and complete it, then remove this comment. -->
|
24 |
|
25 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/
|
26 |
# qwen2.5-0.5b-expo-DPO-ES-TRY
|
27 |
|
28 |
-
This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on
|
29 |
It achieves the following results on the evaluation set:
|
30 |
-
- Loss: 0.
|
31 |
-
- Logps: -
|
32 |
-
- Logits: -2.
|
33 |
-
- Objective: 0.
|
34 |
-
- Dpo Loss: 0.
|
35 |
-
- Regularize: 0.
|
36 |
-
- Ranking Simple: 0.
|
37 |
- Ranking Idealized: 0.6046
|
38 |
- Ranking Idealized Expo: 0.5280
|
39 |
-
- Dpo Wo Beta: -6.
|
40 |
|
41 |
## Model description
|
42 |
|
@@ -61,8 +54,8 @@ The following hyperparameters were used during training:
|
|
61 |
- seed: 42
|
62 |
- distributed_type: multi-GPU
|
63 |
- num_devices: 6
|
64 |
-
- gradient_accumulation_steps:
|
65 |
-
- total_train_batch_size:
|
66 |
- total_eval_batch_size: 12
|
67 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
68 |
- lr_scheduler_type: cosine
|
@@ -82,6 +75,16 @@ The following hyperparameters were used during training:
|
|
82 |
| 0.1227 | 2.1030 | 371 | 0.9224 | -6.4510 | -1.8645 | -122.0016 | 0.8844 | 0.9224 | 0.5888 | 0.5093 | 0.5424 | 0.9224 |
|
83 |
| 0.133 | 2.4034 | 424 | 0.8786 | -5.8878 | -2.0277 | -117.1217 | 0.8448 | 0.8786 | 0.5888 | 0.5093 | 0.5413 | 0.8786 |
|
84 |
| 0.1211 | 2.7085 | 477 | 0.8739 | -5.8152 | -2.0272 | -116.4230 | 0.8371 | 0.8739 | 0.5888 | 0.5093 | 0.5403 | 0.8739 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
|
87 |
### Framework versions
|
|
|
2 |
license: apache-2.0
|
3 |
base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
|
4 |
tags:
|
|
|
|
|
|
|
|
|
|
|
5 |
- trl
|
6 |
- expo
|
7 |
- alignment-handbook
|
8 |
- ndcg
|
9 |
- generated_from_trainer
|
|
|
|
|
10 |
model-index:
|
11 |
- name: qwen2.5-0.5b-expo-DPO-ES-TRY
|
12 |
results: []
|
|
|
15 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
16 |
should probably proofread and complete it, then remove this comment. -->
|
17 |
|
18 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/rg2tu2w0)
|
19 |
# qwen2.5-0.5b-expo-DPO-ES-TRY
|
20 |
|
21 |
+
This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on an unknown dataset.
|
22 |
It achieves the following results on the evaluation set:
|
23 |
+
- Loss: 0.8682
|
24 |
+
- Logps: -123.1454
|
25 |
+
- Logits: -2.5127
|
26 |
+
- Objective: 0.8882
|
27 |
+
- Dpo Loss: 0.8882
|
28 |
+
- Regularize: 0.8882
|
29 |
+
- Ranking Simple: 0.5600
|
30 |
- Ranking Idealized: 0.6046
|
31 |
- Ranking Idealized Expo: 0.5280
|
32 |
+
- Dpo Wo Beta: -6.1537
|
33 |
|
34 |
## Model description
|
35 |
|
|
|
54 |
- seed: 42
|
55 |
- distributed_type: multi-GPU
|
56 |
- num_devices: 6
|
57 |
+
- gradient_accumulation_steps: 6
|
58 |
+
- total_train_batch_size: 72
|
59 |
- total_eval_batch_size: 12
|
60 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
61 |
- lr_scheduler_type: cosine
|
|
|
75 |
| 0.1227 | 2.1030 | 371 | 0.9224 | -6.4510 | -1.8645 | -122.0016 | 0.8844 | 0.9224 | 0.5888 | 0.5093 | 0.5424 | 0.9224 |
|
76 |
| 0.133 | 2.4034 | 424 | 0.8786 | -5.8878 | -2.0277 | -117.1217 | 0.8448 | 0.8786 | 0.5888 | 0.5093 | 0.5413 | 0.8786 |
|
77 |
| 0.1211 | 2.7085 | 477 | 0.8739 | -5.8152 | -2.0272 | -116.4230 | 0.8371 | 0.8739 | 0.5888 | 0.5093 | 0.5403 | 0.8739 |
|
78 |
+
| 0.0858 | 1.5045 | 530 | 0.8753 | -5.9229 | -2.4530 | -118.2529 | 0.8505 | 0.8753 | 0.6046 | 0.5280 | 0.5683 | 0.8753 |
|
79 |
+
| 0.1274 | 1.6547 | 583 | 0.8264 | -5.2847 | -2.4380 | -119.5907 | 0.8086 | 0.8264 | 0.6046 | 0.5280 | 0.5870 | 0.8264 |
|
80 |
+
| 0.1614 | 1.8049 | 636 | 0.8243 | -5.2813 | -2.4850 | -117.8585 | 0.8209 | 0.8243 | 0.6046 | 0.5280 | 0.5818 | 0.8243 |
|
81 |
+
| 0.1616 | 1.9551 | 689 | 0.8576 | -5.7234 | -2.4656 | -119.3221 | 0.8383 | 0.8576 | 0.6046 | 0.5280 | 0.5797 | 0.8576 |
|
82 |
+
| 0.1063 | 2.1053 | 742 | 0.9824 | -7.3310 | -2.2712 | -133.3637 | 0.9486 | 0.9824 | 0.6046 | 0.5280 | 0.5518 | 0.9824 |
|
83 |
+
| 0.1017 | 2.2556 | 795 | 0.8904 | -6.2055 | -2.4490 | -123.5745 | 0.8711 | 0.8904 | 0.6046 | 0.5280 | 0.5683 | 0.8904 |
|
84 |
+
| 0.1225 | 2.4058 | 848 | 0.9035 | -6.3529 | -2.4743 | -124.5336 | 0.8822 | 0.9035 | 0.6046 | 0.5280 | 0.5569 | 0.9035 |
|
85 |
+
| 0.1157 | 2.5583 | 901 | 0.8718 | -124.4583 | -2.4886 | 0.8941 | 0.8941 | 0.8941 | 0.5621 | 0.6046 | 0.5280 | -6.2136 |
|
86 |
+
| 0.1387 | 2.7085 | 954 | 0.8688 | -123.4143 | -2.5086 | 0.8892 | 0.8892 | 0.8892 | 0.5580 | 0.6046 | 0.5280 | -6.1610 |
|
87 |
+
| 0.1219 | 2.8588 | 1007 | 0.8682 | -123.1454 | -2.5127 | 0.8882 | 0.8882 | 0.8882 | 0.5600 | 0.6046 | 0.5280 | -6.1537 |
|
88 |
|
89 |
|
90 |
### Framework versions
|
all_results.json
CHANGED
@@ -15,9 +15,9 @@
|
|
15 |
"eval_samples_per_second": 18.1,
|
16 |
"eval_steps_per_second": 1.51,
|
17 |
"total_flos": 0.0,
|
18 |
-
"train_loss": 0.
|
19 |
-
"train_runtime":
|
20 |
"train_samples": 50802,
|
21 |
-
"train_samples_per_second":
|
22 |
-
"train_steps_per_second":
|
23 |
}
|
|
|
15 |
"eval_samples_per_second": 18.1,
|
16 |
"eval_steps_per_second": 1.51,
|
17 |
"total_flos": 0.0,
|
18 |
+
"train_loss": 0.022545777056648425,
|
19 |
+
"train_runtime": 4386.5835,
|
20 |
"train_samples": 50802,
|
21 |
+
"train_samples_per_second": 34.744,
|
22 |
+
"train_steps_per_second": 0.241
|
23 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1975192208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:194207d30450b9e4443cc4f6e08ed7bac1e41c86af1d51b105835e6a19278e3d
|
3 |
size 1975192208
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 2.9976381672177608,
|
3 |
"total_flos": 0.0,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 50802,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second":
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.9976381672177608,
|
3 |
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.022545777056648425,
|
5 |
+
"train_runtime": 4386.5835,
|
6 |
"train_samples": 50802,
|
7 |
+
"train_samples_per_second": 34.744,
|
8 |
+
"train_steps_per_second": 0.241
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-TRY/checkpoint-
|
4 |
"epoch": 2.9976381672177608,
|
5 |
"eval_steps": 53,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1857,18 +1857,1884 @@
|
|
1857 |
"regularize": 0.13335375487804413,
|
1858 |
"step": 525
|
1859 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1860 |
{
|
1861 |
"epoch": 2.9976381672177608,
|
1862 |
-
"step":
|
1863 |
"total_flos": 0.0,
|
1864 |
-
"train_loss": 0.
|
1865 |
-
"train_runtime":
|
1866 |
-
"train_samples_per_second":
|
1867 |
-
"train_steps_per_second":
|
1868 |
}
|
1869 |
],
|
1870 |
"logging_steps": 5,
|
1871 |
-
"max_steps":
|
1872 |
"num_input_tokens_seen": 0,
|
1873 |
"num_train_epochs": 3,
|
1874 |
"save_steps": 53,
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5869565010070801,
|
3 |
+
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-TRY/checkpoint-583",
|
4 |
"epoch": 2.9976381672177608,
|
5 |
"eval_steps": 53,
|
6 |
+
"global_step": 1056,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1857 |
"regularize": 0.13335375487804413,
|
1858 |
"step": 525
|
1859 |
},
|
1860 |
+
{
|
1861 |
+
"dpo_loss": 0.08052093535661697,
|
1862 |
+
"dpo_wo_beta": -0.004379949066787958,
|
1863 |
+
"epoch": 1.5044874822862542,
|
1864 |
+
"grad_norm": 9.015683090612498,
|
1865 |
+
"learning_rate": 2.9196388040863695e-06,
|
1866 |
+
"logits": -1.833287239074707,
|
1867 |
+
"logps": -114.10733795166016,
|
1868 |
+
"loss": 0.0858,
|
1869 |
+
"objective": 0.08052093535661697,
|
1870 |
+
"ranking_idealized": 0.6666666865348816,
|
1871 |
+
"ranking_idealized_expo": 0.5416666865348816,
|
1872 |
+
"ranking_simple": 0.9375,
|
1873 |
+
"regularize": 0.08052093535661697,
|
1874 |
+
"step": 530
|
1875 |
+
},
|
1876 |
+
{
|
1877 |
+
"epoch": 1.5044874822862542,
|
1878 |
+
"eval_dpo_loss": 0.8753401041030884,
|
1879 |
+
"eval_dpo_wo_beta": -5.922874450683594,
|
1880 |
+
"eval_logits": -2.4529590606689453,
|
1881 |
+
"eval_logps": -118.25288391113281,
|
1882 |
+
"eval_loss": 0.8505071997642517,
|
1883 |
+
"eval_objective": 0.8753401041030884,
|
1884 |
+
"eval_ranking_idealized": 0.6045548915863037,
|
1885 |
+
"eval_ranking_idealized_expo": 0.5279502868652344,
|
1886 |
+
"eval_ranking_simple": 0.5683229565620422,
|
1887 |
+
"eval_regularize": 0.8753401041030884,
|
1888 |
+
"eval_runtime": 344.4459,
|
1889 |
+
"eval_samples_per_second": 16.81,
|
1890 |
+
"eval_steps_per_second": 1.402,
|
1891 |
+
"step": 530
|
1892 |
+
},
|
1893 |
+
{
|
1894 |
+
"dpo_loss": 0.06306228041648865,
|
1895 |
+
"dpo_wo_beta": -0.08629266172647476,
|
1896 |
+
"epoch": 1.5186584789796882,
|
1897 |
+
"grad_norm": 12.634106279395441,
|
1898 |
+
"learning_rate": 2.8788330549198512e-06,
|
1899 |
+
"logits": -1.8371531963348389,
|
1900 |
+
"logps": -115.510009765625,
|
1901 |
+
"loss": 0.0766,
|
1902 |
+
"objective": 0.06306228041648865,
|
1903 |
+
"ranking_idealized": 0.550000011920929,
|
1904 |
+
"ranking_idealized_expo": 0.4833333194255829,
|
1905 |
+
"ranking_simple": 0.9166666865348816,
|
1906 |
+
"regularize": 0.06306228041648865,
|
1907 |
+
"step": 535
|
1908 |
+
},
|
1909 |
+
{
|
1910 |
+
"dpo_loss": 0.13102607429027557,
|
1911 |
+
"dpo_wo_beta": -0.41934680938720703,
|
1912 |
+
"epoch": 1.5328294756731222,
|
1913 |
+
"grad_norm": 14.583212522491863,
|
1914 |
+
"learning_rate": 2.8379237365787426e-06,
|
1915 |
+
"logits": -1.7649121284484863,
|
1916 |
+
"logps": -127.44564819335938,
|
1917 |
+
"loss": 0.0928,
|
1918 |
+
"objective": 0.13102607429027557,
|
1919 |
+
"ranking_idealized": 0.5833333134651184,
|
1920 |
+
"ranking_idealized_expo": 0.5249999761581421,
|
1921 |
+
"ranking_simple": 0.8999999761581421,
|
1922 |
+
"regularize": 0.13102607429027557,
|
1923 |
+
"step": 540
|
1924 |
+
},
|
1925 |
+
{
|
1926 |
+
"dpo_loss": 0.12010473757982254,
|
1927 |
+
"dpo_wo_beta": -0.2892196476459503,
|
1928 |
+
"epoch": 1.5470004723665565,
|
1929 |
+
"grad_norm": 18.74636858022955,
|
1930 |
+
"learning_rate": 2.7969220332622004e-06,
|
1931 |
+
"logits": -1.7403244972229004,
|
1932 |
+
"logps": -126.89453125,
|
1933 |
+
"loss": 0.1114,
|
1934 |
+
"objective": 0.12010473757982254,
|
1935 |
+
"ranking_idealized": 0.6499999761581421,
|
1936 |
+
"ranking_idealized_expo": 0.550000011920929,
|
1937 |
+
"ranking_simple": 0.925000011920929,
|
1938 |
+
"regularize": 0.12010473757982254,
|
1939 |
+
"step": 545
|
1940 |
+
},
|
1941 |
+
{
|
1942 |
+
"dpo_loss": 0.05550822243094444,
|
1943 |
+
"dpo_wo_beta": -0.0006199590279720724,
|
1944 |
+
"epoch": 1.5611714690599907,
|
1945 |
+
"grad_norm": 16.65748534309405,
|
1946 |
+
"learning_rate": 2.7558391544265127e-06,
|
1947 |
+
"logits": -1.7434070110321045,
|
1948 |
+
"logps": -120.79520416259766,
|
1949 |
+
"loss": 0.0872,
|
1950 |
+
"objective": 0.05550822243094444,
|
1951 |
+
"ranking_idealized": 0.6000000238418579,
|
1952 |
+
"ranking_idealized_expo": 0.4833333194255829,
|
1953 |
+
"ranking_simple": 0.9750000238418579,
|
1954 |
+
"regularize": 0.05550822243094444,
|
1955 |
+
"step": 550
|
1956 |
+
},
|
1957 |
+
{
|
1958 |
+
"dpo_loss": 0.12240471690893173,
|
1959 |
+
"dpo_wo_beta": -0.22643856704235077,
|
1960 |
+
"epoch": 1.5753424657534247,
|
1961 |
+
"grad_norm": 12.27653222683965,
|
1962 |
+
"learning_rate": 2.714686331720543e-06,
|
1963 |
+
"logits": -1.8163702487945557,
|
1964 |
+
"logps": -123.55280303955078,
|
1965 |
+
"loss": 0.103,
|
1966 |
+
"objective": 0.12240471690893173,
|
1967 |
+
"ranking_idealized": 0.6083333492279053,
|
1968 |
+
"ranking_idealized_expo": 0.5416666865348816,
|
1969 |
+
"ranking_simple": 0.875,
|
1970 |
+
"regularize": 0.12240471690893173,
|
1971 |
+
"step": 555
|
1972 |
+
},
|
1973 |
+
{
|
1974 |
+
"dpo_loss": 0.1260891556739807,
|
1975 |
+
"dpo_wo_beta": -0.384922057390213,
|
1976 |
+
"epoch": 1.5895134624468588,
|
1977 |
+
"grad_norm": 14.054554594902122,
|
1978 |
+
"learning_rate": 2.6734748159151104e-06,
|
1979 |
+
"logits": -1.8347235918045044,
|
1980 |
+
"logps": -115.50324249267578,
|
1981 |
+
"loss": 0.1171,
|
1982 |
+
"objective": 0.1260891556739807,
|
1983 |
+
"ranking_idealized": 0.5333333611488342,
|
1984 |
+
"ranking_idealized_expo": 0.4749999940395355,
|
1985 |
+
"ranking_simple": 0.8916666507720947,
|
1986 |
+
"regularize": 0.1260891556739807,
|
1987 |
+
"step": 560
|
1988 |
+
},
|
1989 |
+
{
|
1990 |
+
"dpo_loss": 0.11428937315940857,
|
1991 |
+
"dpo_wo_beta": -0.142462819814682,
|
1992 |
+
"epoch": 1.6036844591402928,
|
1993 |
+
"grad_norm": 8.888993907970832,
|
1994 |
+
"learning_rate": 2.632215873827142e-06,
|
1995 |
+
"logits": -1.9027162790298462,
|
1996 |
+
"logps": -117.14366912841797,
|
1997 |
+
"loss": 0.1142,
|
1998 |
+
"objective": 0.11428937315940857,
|
1999 |
+
"ranking_idealized": 0.5416666865348816,
|
2000 |
+
"ranking_idealized_expo": 0.4583333432674408,
|
2001 |
+
"ranking_simple": 0.8916666507720947,
|
2002 |
+
"regularize": 0.11428937315940857,
|
2003 |
+
"step": 565
|
2004 |
+
},
|
2005 |
+
{
|
2006 |
+
"dpo_loss": 0.14770367741584778,
|
2007 |
+
"dpo_wo_beta": -0.5861695408821106,
|
2008 |
+
"epoch": 1.6178554558337268,
|
2009 |
+
"grad_norm": 9.396256315509223,
|
2010 |
+
"learning_rate": 2.5909207852394363e-06,
|
2011 |
+
"logits": -2.088587760925293,
|
2012 |
+
"logps": -113.1082992553711,
|
2013 |
+
"loss": 0.1098,
|
2014 |
+
"objective": 0.14770367741584778,
|
2015 |
+
"ranking_idealized": 0.6000000238418579,
|
2016 |
+
"ranking_idealized_expo": 0.5333333611488342,
|
2017 |
+
"ranking_simple": 0.9083333611488342,
|
2018 |
+
"regularize": 0.14770367741584778,
|
2019 |
+
"step": 570
|
2020 |
+
},
|
2021 |
+
{
|
2022 |
+
"dpo_loss": 0.1413314789533615,
|
2023 |
+
"dpo_wo_beta": -0.3796103298664093,
|
2024 |
+
"epoch": 1.632026452527161,
|
2025 |
+
"grad_norm": 16.325704474226345,
|
2026 |
+
"learning_rate": 2.5496008398168844e-06,
|
2027 |
+
"logits": -1.9472541809082031,
|
2028 |
+
"logps": -117.12679290771484,
|
2029 |
+
"loss": 0.1404,
|
2030 |
+
"objective": 0.1413314789533615,
|
2031 |
+
"ranking_idealized": 0.6583333611488342,
|
2032 |
+
"ranking_idealized_expo": 0.6166666746139526,
|
2033 |
+
"ranking_simple": 0.9166666865348816,
|
2034 |
+
"regularize": 0.1413314789533615,
|
2035 |
+
"step": 575
|
2036 |
+
},
|
2037 |
+
{
|
2038 |
+
"dpo_loss": 0.1715042144060135,
|
2039 |
+
"dpo_wo_beta": -0.48858124017715454,
|
2040 |
+
"epoch": 1.6461974492205953,
|
2041 |
+
"grad_norm": 11.97898567349363,
|
2042 |
+
"learning_rate": 2.508267334019988e-06,
|
2043 |
+
"logits": -1.935112476348877,
|
2044 |
+
"logps": -107.23208618164062,
|
2045 |
+
"loss": 0.1274,
|
2046 |
+
"objective": 0.1715042144060135,
|
2047 |
+
"ranking_idealized": 0.5583333373069763,
|
2048 |
+
"ranking_idealized_expo": 0.5,
|
2049 |
+
"ranking_simple": 0.8999999761581421,
|
2050 |
+
"regularize": 0.1715042144060135,
|
2051 |
+
"step": 580
|
2052 |
+
},
|
2053 |
+
{
|
2054 |
+
"epoch": 1.6547000472366555,
|
2055 |
+
"eval_dpo_loss": 0.8264312148094177,
|
2056 |
+
"eval_dpo_wo_beta": -5.2847466468811035,
|
2057 |
+
"eval_logits": -2.4379913806915283,
|
2058 |
+
"eval_logps": -119.59071350097656,
|
2059 |
+
"eval_loss": 0.808626651763916,
|
2060 |
+
"eval_objective": 0.8264312148094177,
|
2061 |
+
"eval_ranking_idealized": 0.6045548915863037,
|
2062 |
+
"eval_ranking_idealized_expo": 0.5279502868652344,
|
2063 |
+
"eval_ranking_simple": 0.5869565010070801,
|
2064 |
+
"eval_regularize": 0.8264312148094177,
|
2065 |
+
"eval_runtime": 355.6593,
|
2066 |
+
"eval_samples_per_second": 16.28,
|
2067 |
+
"eval_steps_per_second": 1.358,
|
2068 |
+
"step": 583
|
2069 |
+
},
|
2070 |
+
{
|
2071 |
+
"dpo_loss": 0.1341490000486374,
|
2072 |
+
"dpo_wo_beta": -0.5180007815361023,
|
2073 |
+
"epoch": 1.6603684459140293,
|
2074 |
+
"grad_norm": 15.27561747473412,
|
2075 |
+
"learning_rate": 2.46693156801652e-06,
|
2076 |
+
"logits": -1.8945667743682861,
|
2077 |
+
"logps": -110.8204116821289,
|
2078 |
+
"loss": 0.1227,
|
2079 |
+
"objective": 0.1341490000486374,
|
2080 |
+
"ranking_idealized": 0.6000000238418579,
|
2081 |
+
"ranking_idealized_expo": 0.5166666507720947,
|
2082 |
+
"ranking_simple": 0.9083333611488342,
|
2083 |
+
"regularize": 0.1341490000486374,
|
2084 |
+
"step": 585
|
2085 |
+
},
|
2086 |
+
{
|
2087 |
+
"dpo_loss": 0.13649246096611023,
|
2088 |
+
"dpo_wo_beta": -0.5407892465591431,
|
2089 |
+
"epoch": 1.6745394426074633,
|
2090 |
+
"grad_norm": 8.066407586221112,
|
2091 |
+
"learning_rate": 2.4256048425921693e-06,
|
2092 |
+
"logits": -1.8402847051620483,
|
2093 |
+
"logps": -117.5975112915039,
|
2094 |
+
"loss": 0.1263,
|
2095 |
+
"objective": 0.13649246096611023,
|
2096 |
+
"ranking_idealized": 0.6000000238418579,
|
2097 |
+
"ranking_idealized_expo": 0.46666666865348816,
|
2098 |
+
"ranking_simple": 0.9083333611488342,
|
2099 |
+
"regularize": 0.13649246096611023,
|
2100 |
+
"step": 590
|
2101 |
+
},
|
2102 |
+
{
|
2103 |
+
"dpo_loss": 0.14796380698680878,
|
2104 |
+
"dpo_wo_beta": -0.5909832119941711,
|
2105 |
+
"epoch": 1.6887104393008974,
|
2106 |
+
"grad_norm": 17.4246056389601,
|
2107 |
+
"learning_rate": 2.384298456061023e-06,
|
2108 |
+
"logits": -1.8368481397628784,
|
2109 |
+
"logps": -117.76559448242188,
|
2110 |
+
"loss": 0.1544,
|
2111 |
+
"objective": 0.14796380698680878,
|
2112 |
+
"ranking_idealized": 0.675000011920929,
|
2113 |
+
"ranking_idealized_expo": 0.5916666388511658,
|
2114 |
+
"ranking_simple": 0.8916666507720947,
|
2115 |
+
"regularize": 0.14796380698680878,
|
2116 |
+
"step": 595
|
2117 |
+
},
|
2118 |
+
{
|
2119 |
+
"dpo_loss": 0.15043622255325317,
|
2120 |
+
"dpo_wo_beta": -0.6607655882835388,
|
2121 |
+
"epoch": 1.7028814359943316,
|
2122 |
+
"grad_norm": 18.447927668502214,
|
2123 |
+
"learning_rate": 2.3430237011767166e-06,
|
2124 |
+
"logits": -1.839497447013855,
|
2125 |
+
"logps": -112.93590545654297,
|
2126 |
+
"loss": 0.15,
|
2127 |
+
"objective": 0.15043622255325317,
|
2128 |
+
"ranking_idealized": 0.6333333253860474,
|
2129 |
+
"ranking_idealized_expo": 0.550000011920929,
|
2130 |
+
"ranking_simple": 0.875,
|
2131 |
+
"regularize": 0.15043622255325317,
|
2132 |
+
"step": 600
|
2133 |
+
},
|
2134 |
+
{
|
2135 |
+
"dpo_loss": 0.11465544998645782,
|
2136 |
+
"dpo_wo_beta": -0.34068503975868225,
|
2137 |
+
"epoch": 1.7170524326877659,
|
2138 |
+
"grad_norm": 12.561303443006217,
|
2139 |
+
"learning_rate": 2.30179186204511e-06,
|
2140 |
+
"logits": -1.7405670881271362,
|
2141 |
+
"logps": -112.9049072265625,
|
2142 |
+
"loss": 0.1278,
|
2143 |
+
"objective": 0.11465544998645782,
|
2144 |
+
"ranking_idealized": 0.5583333373069763,
|
2145 |
+
"ranking_idealized_expo": 0.5,
|
2146 |
+
"ranking_simple": 0.8999999761581421,
|
2147 |
+
"regularize": 0.11465544998645782,
|
2148 |
+
"step": 605
|
2149 |
+
},
|
2150 |
+
{
|
2151 |
+
"dpo_loss": 0.1477740854024887,
|
2152 |
+
"dpo_wo_beta": -0.4079127609729767,
|
2153 |
+
"epoch": 1.7312234293811999,
|
2154 |
+
"grad_norm": 17.631548474702864,
|
2155 |
+
"learning_rate": 2.2606142110393248e-06,
|
2156 |
+
"logits": -1.7865701913833618,
|
2157 |
+
"logps": -109.32853698730469,
|
2158 |
+
"loss": 0.1331,
|
2159 |
+
"objective": 0.1477740854024887,
|
2160 |
+
"ranking_idealized": 0.5833333134651184,
|
2161 |
+
"ranking_idealized_expo": 0.5333333611488342,
|
2162 |
+
"ranking_simple": 0.875,
|
2163 |
+
"regularize": 0.1477740854024887,
|
2164 |
+
"step": 610
|
2165 |
+
},
|
2166 |
+
{
|
2167 |
+
"dpo_loss": 0.18279190361499786,
|
2168 |
+
"dpo_wo_beta": -0.8598226308822632,
|
2169 |
+
"epoch": 1.745394426074634,
|
2170 |
+
"grad_norm": 10.88386046876842,
|
2171 |
+
"learning_rate": 2.2195020057179897e-06,
|
2172 |
+
"logits": -1.7821184396743774,
|
2173 |
+
"logps": -111.59290313720703,
|
2174 |
+
"loss": 0.136,
|
2175 |
+
"objective": 0.18279190361499786,
|
2176 |
+
"ranking_idealized": 0.5833333134651184,
|
2177 |
+
"ranking_idealized_expo": 0.5166666507720947,
|
2178 |
+
"ranking_simple": 0.875,
|
2179 |
+
"regularize": 0.18279190361499786,
|
2180 |
+
"step": 615
|
2181 |
+
},
|
2182 |
+
{
|
2183 |
+
"dpo_loss": 0.1030309647321701,
|
2184 |
+
"dpo_wo_beta": -0.05606275424361229,
|
2185 |
+
"epoch": 1.759565422768068,
|
2186 |
+
"grad_norm": 12.949368950505932,
|
2187 |
+
"learning_rate": 2.1784664857475356e-06,
|
2188 |
+
"logits": -1.780458688735962,
|
2189 |
+
"logps": -108.14535522460938,
|
2190 |
+
"loss": 0.1303,
|
2191 |
+
"objective": 0.1030309647321701,
|
2192 |
+
"ranking_idealized": 0.6416666507720947,
|
2193 |
+
"ranking_idealized_expo": 0.5333333611488342,
|
2194 |
+
"ranking_simple": 0.9666666388511658,
|
2195 |
+
"regularize": 0.1030309647321701,
|
2196 |
+
"step": 620
|
2197 |
+
},
|
2198 |
+
{
|
2199 |
+
"dpo_loss": 0.158640518784523,
|
2200 |
+
"dpo_wo_beta": -0.4262932240962982,
|
2201 |
+
"epoch": 1.7737364194615022,
|
2202 |
+
"grad_norm": 15.36682532028932,
|
2203 |
+
"learning_rate": 2.1375188698293855e-06,
|
2204 |
+
"logits": -1.657003402709961,
|
2205 |
+
"logps": -110.36738586425781,
|
2206 |
+
"loss": 0.161,
|
2207 |
+
"objective": 0.158640518784523,
|
2208 |
+
"ranking_idealized": 0.6166666746139526,
|
2209 |
+
"ranking_idealized_expo": 0.5249999761581421,
|
2210 |
+
"ranking_simple": 0.9083333611488342,
|
2211 |
+
"regularize": 0.158640518784523,
|
2212 |
+
"step": 625
|
2213 |
+
},
|
2214 |
+
{
|
2215 |
+
"dpo_loss": 0.21892648935317993,
|
2216 |
+
"dpo_wo_beta": -0.9036411046981812,
|
2217 |
+
"epoch": 1.7879074161549362,
|
2218 |
+
"grad_norm": 14.959909753098716,
|
2219 |
+
"learning_rate": 2.096670352632873e-06,
|
2220 |
+
"logits": -1.7339377403259277,
|
2221 |
+
"logps": -107.9156723022461,
|
2222 |
+
"loss": 0.1673,
|
2223 |
+
"objective": 0.21892648935317993,
|
2224 |
+
"ranking_idealized": 0.5833333134651184,
|
2225 |
+
"ranking_idealized_expo": 0.49166667461395264,
|
2226 |
+
"ranking_simple": 0.7833333611488342,
|
2227 |
+
"regularize": 0.21892648935317993,
|
2228 |
+
"step": 630
|
2229 |
+
},
|
2230 |
+
{
|
2231 |
+
"dpo_loss": 0.14205454289913177,
|
2232 |
+
"dpo_wo_beta": -0.47540512681007385,
|
2233 |
+
"epoch": 1.8020784128483704,
|
2234 |
+
"grad_norm": 22.25702396524442,
|
2235 |
+
"learning_rate": 2.0559321017347286e-06,
|
2236 |
+
"logits": -2.000821352005005,
|
2237 |
+
"logps": -119.52375793457031,
|
2238 |
+
"loss": 0.1614,
|
2239 |
+
"objective": 0.14205454289913177,
|
2240 |
+
"ranking_idealized": 0.675000011920929,
|
2241 |
+
"ranking_idealized_expo": 0.5166666507720947,
|
2242 |
+
"ranking_simple": 0.8999999761581421,
|
2243 |
+
"regularize": 0.14205454289913177,
|
2244 |
+
"step": 635
|
2245 |
+
},
|
2246 |
+
{
|
2247 |
+
"epoch": 1.8049126121870571,
|
2248 |
+
"eval_dpo_loss": 0.8243346214294434,
|
2249 |
+
"eval_dpo_wo_beta": -5.281310081481934,
|
2250 |
+
"eval_logits": -2.4850430488586426,
|
2251 |
+
"eval_logps": -117.85846710205078,
|
2252 |
+
"eval_loss": 0.8209081292152405,
|
2253 |
+
"eval_objective": 0.8243346214294434,
|
2254 |
+
"eval_ranking_idealized": 0.6045548915863037,
|
2255 |
+
"eval_ranking_idealized_expo": 0.5279502868652344,
|
2256 |
+
"eval_ranking_simple": 0.5817805528640747,
|
2257 |
+
"eval_regularize": 0.8243346214294434,
|
2258 |
+
"eval_runtime": 351.818,
|
2259 |
+
"eval_samples_per_second": 16.457,
|
2260 |
+
"eval_steps_per_second": 1.373,
|
2261 |
+
"step": 636
|
2262 |
+
},
|
2263 |
+
{
|
2264 |
+
"dpo_loss": 0.13119691610336304,
|
2265 |
+
"dpo_wo_beta": -0.36706313490867615,
|
2266 |
+
"epoch": 1.8162494095418045,
|
2267 |
+
"grad_norm": 20.631621209893076,
|
2268 |
+
"learning_rate": 2.01531525456598e-06,
|
2269 |
+
"logits": -1.8479942083358765,
|
2270 |
+
"logps": -111.6270523071289,
|
2271 |
+
"loss": 0.1417,
|
2272 |
+
"objective": 0.13119691610336304,
|
2273 |
+
"ranking_idealized": 0.5249999761581421,
|
2274 |
+
"ranking_idealized_expo": 0.4833333194255829,
|
2275 |
+
"ranking_simple": 0.8583333492279053,
|
2276 |
+
"regularize": 0.13119691610336304,
|
2277 |
+
"step": 640
|
2278 |
+
},
|
2279 |
+
{
|
2280 |
+
"dpo_loss": 0.1571781039237976,
|
2281 |
+
"dpo_wo_beta": -0.5080724954605103,
|
2282 |
+
"epoch": 1.8304204062352385,
|
2283 |
+
"grad_norm": 20.508347622723388,
|
2284 |
+
"learning_rate": 1.974830915367086e-06,
|
2285 |
+
"logits": -1.9629262685775757,
|
2286 |
+
"logps": -122.33467102050781,
|
2287 |
+
"loss": 0.1732,
|
2288 |
+
"objective": 0.1571781039237976,
|
2289 |
+
"ranking_idealized": 0.6333333253860474,
|
2290 |
+
"ranking_idealized_expo": 0.5333333611488342,
|
2291 |
+
"ranking_simple": 0.875,
|
2292 |
+
"regularize": 0.1571781039237976,
|
2293 |
+
"step": 645
|
2294 |
+
},
|
2295 |
+
{
|
2296 |
+
"dpo_loss": 0.18711452186107635,
|
2297 |
+
"dpo_wo_beta": -0.5334885120391846,
|
2298 |
+
"epoch": 1.8445914029286725,
|
2299 |
+
"grad_norm": 17.78377983289135,
|
2300 |
+
"learning_rate": 1.93449015215215e-06,
|
2301 |
+
"logits": -1.960700511932373,
|
2302 |
+
"logps": -116.45585632324219,
|
2303 |
+
"loss": 0.1873,
|
2304 |
+
"objective": 0.18711452186107635,
|
2305 |
+
"ranking_idealized": 0.6333333253860474,
|
2306 |
+
"ranking_idealized_expo": 0.5,
|
2307 |
+
"ranking_simple": 0.875,
|
2308 |
+
"regularize": 0.18711452186107635,
|
2309 |
+
"step": 650
|
2310 |
+
},
|
2311 |
+
{
|
2312 |
+
"dpo_loss": 0.14089132845401764,
|
2313 |
+
"dpo_wo_beta": -0.257717490196228,
|
2314 |
+
"epoch": 1.8587623996221068,
|
2315 |
+
"grad_norm": 17.423684978791957,
|
2316 |
+
"learning_rate": 1.8943039936830347e-06,
|
2317 |
+
"logits": -1.7539128065109253,
|
2318 |
+
"logps": -105.96385192871094,
|
2319 |
+
"loss": 0.1703,
|
2320 |
+
"objective": 0.14089132845401764,
|
2321 |
+
"ranking_idealized": 0.5666666626930237,
|
2322 |
+
"ranking_idealized_expo": 0.46666666865348816,
|
2323 |
+
"ranking_simple": 0.8416666388511658,
|
2324 |
+
"regularize": 0.14089132845401764,
|
2325 |
+
"step": 655
|
2326 |
+
},
|
2327 |
+
{
|
2328 |
+
"dpo_loss": 0.12644439935684204,
|
2329 |
+
"dpo_wo_beta": -0.18994450569152832,
|
2330 |
+
"epoch": 1.872933396315541,
|
2331 |
+
"grad_norm": 14.114136095930363,
|
2332 |
+
"learning_rate": 1.8542834264542091e-06,
|
2333 |
+
"logits": -1.8740805387496948,
|
2334 |
+
"logps": -112.58109283447266,
|
2335 |
+
"loss": 0.1679,
|
2336 |
+
"objective": 0.12644439935684204,
|
2337 |
+
"ranking_idealized": 0.5666666626930237,
|
2338 |
+
"ranking_idealized_expo": 0.4833333194255829,
|
2339 |
+
"ranking_simple": 0.8833333253860474,
|
2340 |
+
"regularize": 0.12644439935684204,
|
2341 |
+
"step": 660
|
2342 |
+
},
|
2343 |
+
{
|
2344 |
+
"dpo_loss": 0.1596754938364029,
|
2345 |
+
"dpo_wo_beta": -0.5118387341499329,
|
2346 |
+
"epoch": 1.887104393008975,
|
2347 |
+
"grad_norm": 17.02657500840854,
|
2348 |
+
"learning_rate": 1.814439391689151e-06,
|
2349 |
+
"logits": -2.012057065963745,
|
2350 |
+
"logps": -106.6546401977539,
|
2351 |
+
"loss": 0.1748,
|
2352 |
+
"objective": 0.1596754938364029,
|
2353 |
+
"ranking_idealized": 0.6166666746139526,
|
2354 |
+
"ranking_idealized_expo": 0.5249999761581421,
|
2355 |
+
"ranking_simple": 0.8583333492279053,
|
2356 |
+
"regularize": 0.1596754938364029,
|
2357 |
+
"step": 665
|
2358 |
+
},
|
2359 |
+
{
|
2360 |
+
"dpo_loss": 0.1905670315027237,
|
2361 |
+
"dpo_wo_beta": -0.7486369609832764,
|
2362 |
+
"epoch": 1.901275389702409,
|
2363 |
+
"grad_norm": 14.652072743525036,
|
2364 |
+
"learning_rate": 1.7747827823491253e-06,
|
2365 |
+
"logits": -1.7807596921920776,
|
2366 |
+
"logps": -107.37975311279297,
|
2367 |
+
"loss": 0.1695,
|
2368 |
+
"objective": 0.1905670315027237,
|
2369 |
+
"ranking_idealized": 0.6000000238418579,
|
2370 |
+
"ranking_idealized_expo": 0.4833333194255829,
|
2371 |
+
"ranking_simple": 0.8666666746139526,
|
2372 |
+
"regularize": 0.1905670315027237,
|
2373 |
+
"step": 670
|
2374 |
+
},
|
2375 |
+
{
|
2376 |
+
"dpo_loss": 0.14669080078601837,
|
2377 |
+
"dpo_wo_beta": -0.3554477095603943,
|
2378 |
+
"epoch": 1.915446386395843,
|
2379 |
+
"grad_norm": 15.076428386388748,
|
2380 |
+
"learning_rate": 1.7353244401551566e-06,
|
2381 |
+
"logits": -1.8374218940734863,
|
2382 |
+
"logps": -107.03987884521484,
|
2383 |
+
"loss": 0.1555,
|
2384 |
+
"objective": 0.14669080078601837,
|
2385 |
+
"ranking_idealized": 0.6416666507720947,
|
2386 |
+
"ranking_idealized_expo": 0.5416666865348816,
|
2387 |
+
"ranking_simple": 0.8999999761581421,
|
2388 |
+
"regularize": 0.14669080078601837,
|
2389 |
+
"step": 675
|
2390 |
+
},
|
2391 |
+
{
|
2392 |
+
"dpo_loss": 0.15277433395385742,
|
2393 |
+
"dpo_wo_beta": -0.5579003095626831,
|
2394 |
+
"epoch": 1.9296173830892773,
|
2395 |
+
"grad_norm": 14.063016020977683,
|
2396 |
+
"learning_rate": 1.6960751526240122e-06,
|
2397 |
+
"logits": -1.9377697706222534,
|
2398 |
+
"logps": -118.40213775634766,
|
2399 |
+
"loss": 0.1487,
|
2400 |
+
"objective": 0.15277433395385742,
|
2401 |
+
"ranking_idealized": 0.625,
|
2402 |
+
"ranking_idealized_expo": 0.5416666865348816,
|
2403 |
+
"ranking_simple": 0.8833333253860474,
|
2404 |
+
"regularize": 0.15277433395385742,
|
2405 |
+
"step": 680
|
2406 |
+
},
|
2407 |
+
{
|
2408 |
+
"dpo_loss": 0.1468452513217926,
|
2409 |
+
"dpo_wo_beta": -0.34248843789100647,
|
2410 |
+
"epoch": 1.9437883797827114,
|
2411 |
+
"grad_norm": 20.037932580562952,
|
2412 |
+
"learning_rate": 1.6570456501189996e-06,
|
2413 |
+
"logits": -1.822561502456665,
|
2414 |
+
"logps": -116.04502868652344,
|
2415 |
+
"loss": 0.1616,
|
2416 |
+
"objective": 0.1468452513217926,
|
2417 |
+
"ranking_idealized": 0.5083333253860474,
|
2418 |
+
"ranking_idealized_expo": 0.46666666865348816,
|
2419 |
+
"ranking_simple": 0.8999999761581421,
|
2420 |
+
"regularize": 0.1468452513217926,
|
2421 |
+
"step": 685
|
2422 |
+
},
|
2423 |
+
{
|
2424 |
+
"epoch": 1.9551251771374587,
|
2425 |
+
"eval_dpo_loss": 0.857575535774231,
|
2426 |
+
"eval_dpo_wo_beta": -5.723405361175537,
|
2427 |
+
"eval_logits": -2.465576410293579,
|
2428 |
+
"eval_logps": -119.32209777832031,
|
2429 |
+
"eval_loss": 0.8382942080497742,
|
2430 |
+
"eval_objective": 0.857575535774231,
|
2431 |
+
"eval_ranking_idealized": 0.6045548915863037,
|
2432 |
+
"eval_ranking_idealized_expo": 0.5279502868652344,
|
2433 |
+
"eval_ranking_simple": 0.5797101259231567,
|
2434 |
+
"eval_regularize": 0.857575535774231,
|
2435 |
+
"eval_runtime": 359.3448,
|
2436 |
+
"eval_samples_per_second": 16.113,
|
2437 |
+
"eval_steps_per_second": 1.344,
|
2438 |
+
"step": 689
|
2439 |
+
},
|
2440 |
+
{
|
2441 |
+
"dpo_loss": 0.11434569954872131,
|
2442 |
+
"dpo_wo_beta": -0.045675624161958694,
|
2443 |
+
"epoch": 1.9579593764761456,
|
2444 |
+
"grad_norm": 13.437704226981893,
|
2445 |
+
"learning_rate": 1.6182466029163974e-06,
|
2446 |
+
"logits": -1.8100759983062744,
|
2447 |
+
"logps": -115.22444915771484,
|
2448 |
+
"loss": 0.1309,
|
2449 |
+
"objective": 0.11434569954872131,
|
2450 |
+
"ranking_idealized": 0.6166666746139526,
|
2451 |
+
"ranking_idealized_expo": 0.574999988079071,
|
2452 |
+
"ranking_simple": 0.8999999761581421,
|
2453 |
+
"regularize": 0.11434569954872131,
|
2454 |
+
"step": 690
|
2455 |
+
},
|
2456 |
+
{
|
2457 |
+
"dpo_loss": 0.19163179397583008,
|
2458 |
+
"dpo_wo_beta": -0.46952199935913086,
|
2459 |
+
"epoch": 1.9721303731695796,
|
2460 |
+
"grad_norm": 20.21550526964092,
|
2461 |
+
"learning_rate": 1.5796886182883053e-06,
|
2462 |
+
"logits": -1.829925775527954,
|
2463 |
+
"logps": -114.886962890625,
|
2464 |
+
"loss": 0.1753,
|
2465 |
+
"objective": 0.19163179397583008,
|
2466 |
+
"ranking_idealized": 0.5666666626930237,
|
2467 |
+
"ranking_idealized_expo": 0.4416666626930237,
|
2468 |
+
"ranking_simple": 0.8166666626930237,
|
2469 |
+
"regularize": 0.19163179397583008,
|
2470 |
+
"step": 695
|
2471 |
+
},
|
2472 |
+
{
|
2473 |
+
"dpo_loss": 0.18629121780395508,
|
2474 |
+
"dpo_wo_beta": -0.7477880120277405,
|
2475 |
+
"epoch": 1.9863013698630136,
|
2476 |
+
"grad_norm": 14.017066914132515,
|
2477 |
+
"learning_rate": 1.541382237602721e-06,
|
2478 |
+
"logits": -1.7919304370880127,
|
2479 |
+
"logps": -115.88526916503906,
|
2480 |
+
"loss": 0.176,
|
2481 |
+
"objective": 0.18629121780395508,
|
2482 |
+
"ranking_idealized": 0.5916666388511658,
|
2483 |
+
"ranking_idealized_expo": 0.5166666507720947,
|
2484 |
+
"ranking_simple": 0.8833333253860474,
|
2485 |
+
"regularize": 0.18629121780395508,
|
2486 |
+
"step": 700
|
2487 |
+
},
|
2488 |
+
{
|
2489 |
+
"dpo_loss": 0.20123900473117828,
|
2490 |
+
"dpo_wo_beta": -0.6124710440635681,
|
2491 |
+
"epoch": 2.0004723665564477,
|
2492 |
+
"grad_norm": 21.887245138739928,
|
2493 |
+
"learning_rate": 1.5033379334416376e-06,
|
2494 |
+
"logits": -1.786551594734192,
|
2495 |
+
"logps": -114.37857055664062,
|
2496 |
+
"loss": 0.2015,
|
2497 |
+
"objective": 0.20123900473117828,
|
2498 |
+
"ranking_idealized": 0.625,
|
2499 |
+
"ranking_idealized_expo": 0.5249999761581421,
|
2500 |
+
"ranking_simple": 0.8333333134651184,
|
2501 |
+
"regularize": 0.20123900473117828,
|
2502 |
+
"step": 705
|
2503 |
+
},
|
2504 |
+
{
|
2505 |
+
"dpo_loss": 0.08176574856042862,
|
2506 |
+
"dpo_wo_beta": -0.0886942520737648,
|
2507 |
+
"epoch": 2.0146433632498817,
|
2508 |
+
"grad_norm": 12.07978325252889,
|
2509 |
+
"learning_rate": 1.465566106737942e-06,
|
2510 |
+
"logits": -1.8880345821380615,
|
2511 |
+
"logps": -114.7125244140625,
|
2512 |
+
"loss": 0.1005,
|
2513 |
+
"objective": 0.08176574856042862,
|
2514 |
+
"ranking_idealized": 0.6000000238418579,
|
2515 |
+
"ranking_idealized_expo": 0.5249999761581421,
|
2516 |
+
"ranking_simple": 0.925000011920929,
|
2517 |
+
"regularize": 0.08176574856042862,
|
2518 |
+
"step": 710
|
2519 |
+
},
|
2520 |
+
{
|
2521 |
+
"dpo_loss": 0.07800193130970001,
|
2522 |
+
"dpo_wo_beta": -0.004813884384930134,
|
2523 |
+
"epoch": 2.028814359943316,
|
2524 |
+
"grad_norm": 10.892350166600437,
|
2525 |
+
"learning_rate": 1.4280770839319073e-06,
|
2526 |
+
"logits": -1.8223975896835327,
|
2527 |
+
"logps": -109.677001953125,
|
2528 |
+
"loss": 0.1064,
|
2529 |
+
"objective": 0.07800193130970001,
|
2530 |
+
"ranking_idealized": 0.6916666626930237,
|
2531 |
+
"ranking_idealized_expo": 0.5333333611488342,
|
2532 |
+
"ranking_simple": 0.949999988079071,
|
2533 |
+
"regularize": 0.07800193130970001,
|
2534 |
+
"step": 715
|
2535 |
+
},
|
2536 |
+
{
|
2537 |
+
"dpo_loss": 0.07249681651592255,
|
2538 |
+
"dpo_wo_beta": -0.03800208121538162,
|
2539 |
+
"epoch": 2.04298535663675,
|
2540 |
+
"grad_norm": 10.585775059236884,
|
2541 |
+
"learning_rate": 1.3908811141480408e-06,
|
2542 |
+
"logits": -1.7804607152938843,
|
2543 |
+
"logps": -119.00810241699219,
|
2544 |
+
"loss": 0.1011,
|
2545 |
+
"objective": 0.07249681651592255,
|
2546 |
+
"ranking_idealized": 0.6333333253860474,
|
2547 |
+
"ranking_idealized_expo": 0.5416666865348816,
|
2548 |
+
"ranking_simple": 0.949999988079071,
|
2549 |
+
"regularize": 0.07249681651592255,
|
2550 |
+
"step": 720
|
2551 |
+
},
|
2552 |
+
{
|
2553 |
+
"dpo_loss": 0.10758433490991592,
|
2554 |
+
"dpo_wo_beta": -0.4225389361381531,
|
2555 |
+
"epoch": 2.057156353330184,
|
2556 |
+
"grad_norm": 16.634608237356296,
|
2557 |
+
"learning_rate": 1.353988366393083e-06,
|
2558 |
+
"logits": -1.8837405443191528,
|
2559 |
+
"logps": -122.06383514404297,
|
2560 |
+
"loss": 0.1068,
|
2561 |
+
"objective": 0.10758433490991592,
|
2562 |
+
"ranking_idealized": 0.6499999761581421,
|
2563 |
+
"ranking_idealized_expo": 0.5583333373069763,
|
2564 |
+
"ranking_simple": 0.925000011920929,
|
2565 |
+
"regularize": 0.10758433490991592,
|
2566 |
+
"step": 725
|
2567 |
+
},
|
2568 |
+
{
|
2569 |
+
"dpo_loss": 0.16440601646900177,
|
2570 |
+
"dpo_wo_beta": -0.5969924926757812,
|
2571 |
+
"epoch": 2.0713273500236182,
|
2572 |
+
"grad_norm": 12.618281047842604,
|
2573 |
+
"learning_rate": 1.3174089267758983e-06,
|
2574 |
+
"logits": -1.7312003374099731,
|
2575 |
+
"logps": -118.56900024414062,
|
2576 |
+
"loss": 0.107,
|
2577 |
+
"objective": 0.16440601646900177,
|
2578 |
+
"ranking_idealized": 0.5166666507720947,
|
2579 |
+
"ranking_idealized_expo": 0.4333333373069763,
|
2580 |
+
"ranking_simple": 0.8583333492279053,
|
2581 |
+
"regularize": 0.16440601646900177,
|
2582 |
+
"step": 730
|
2583 |
+
},
|
2584 |
+
{
|
2585 |
+
"dpo_loss": 0.15623989701271057,
|
2586 |
+
"dpo_wo_beta": -0.9556598663330078,
|
2587 |
+
"epoch": 2.0854983467170523,
|
2588 |
+
"grad_norm": 16.05614648021729,
|
2589 |
+
"learning_rate": 1.2811527957500344e-06,
|
2590 |
+
"logits": -1.6055046319961548,
|
2591 |
+
"logps": -127.75144958496094,
|
2592 |
+
"loss": 0.1095,
|
2593 |
+
"objective": 0.15623989701271057,
|
2594 |
+
"ranking_idealized": 0.5583333373069763,
|
2595 |
+
"ranking_idealized_expo": 0.46666666865348816,
|
2596 |
+
"ranking_simple": 0.949999988079071,
|
2597 |
+
"regularize": 0.15623989701271057,
|
2598 |
+
"step": 735
|
2599 |
+
},
|
2600 |
+
{
|
2601 |
+
"dpo_loss": 0.11614324897527695,
|
2602 |
+
"dpo_wo_beta": -0.22688980400562286,
|
2603 |
+
"epoch": 2.0996693434104867,
|
2604 |
+
"grad_norm": 14.923960342956232,
|
2605 |
+
"learning_rate": 1.245229885379699e-06,
|
2606 |
+
"logits": -1.688416838645935,
|
2607 |
+
"logps": -123.08829498291016,
|
2608 |
+
"loss": 0.1063,
|
2609 |
+
"objective": 0.11614324897527695,
|
2610 |
+
"ranking_idealized": 0.6333333253860474,
|
2611 |
+
"ranking_idealized_expo": 0.5416666865348816,
|
2612 |
+
"ranking_simple": 0.949999988079071,
|
2613 |
+
"regularize": 0.11614324897527695,
|
2614 |
+
"step": 740
|
2615 |
+
},
|
2616 |
+
{
|
2617 |
+
"epoch": 2.10533774208786,
|
2618 |
+
"eval_dpo_loss": 0.9823706746101379,
|
2619 |
+
"eval_dpo_wo_beta": -7.331023216247559,
|
2620 |
+
"eval_logits": -2.2711641788482666,
|
2621 |
+
"eval_logps": -133.36373901367188,
|
2622 |
+
"eval_loss": 0.9485942721366882,
|
2623 |
+
"eval_objective": 0.9823706746101379,
|
2624 |
+
"eval_ranking_idealized": 0.6045548915863037,
|
2625 |
+
"eval_ranking_idealized_expo": 0.5279502868652344,
|
2626 |
+
"eval_ranking_simple": 0.5517598390579224,
|
2627 |
+
"eval_regularize": 0.9823706746101379,
|
2628 |
+
"eval_runtime": 364.9494,
|
2629 |
+
"eval_samples_per_second": 15.865,
|
2630 |
+
"eval_steps_per_second": 1.323,
|
2631 |
+
"step": 742
|
2632 |
+
},
|
2633 |
+
{
|
2634 |
+
"dpo_loss": 0.11820446699857712,
|
2635 |
+
"dpo_wo_beta": -0.5452965497970581,
|
2636 |
+
"epoch": 2.1138403401039207,
|
2637 |
+
"grad_norm": 14.405167306826879,
|
2638 |
+
"learning_rate": 1.2096500166298992e-06,
|
2639 |
+
"logits": -1.4971224069595337,
|
2640 |
+
"logps": -132.55892944335938,
|
2641 |
+
"loss": 0.1031,
|
2642 |
+
"objective": 0.11820446699857712,
|
2643 |
+
"ranking_idealized": 0.6916666626930237,
|
2644 |
+
"ranking_idealized_expo": 0.6166666746139526,
|
2645 |
+
"ranking_simple": 0.925000011920929,
|
2646 |
+
"regularize": 0.11820446699857712,
|
2647 |
+
"step": 745
|
2648 |
+
},
|
2649 |
+
{
|
2650 |
+
"dpo_loss": 0.08092837035655975,
|
2651 |
+
"dpo_wo_beta": -0.21483030915260315,
|
2652 |
+
"epoch": 2.1280113367973548,
|
2653 |
+
"grad_norm": 12.035987152428243,
|
2654 |
+
"learning_rate": 1.1744229166814889e-06,
|
2655 |
+
"logits": -1.698511004447937,
|
2656 |
+
"logps": -129.5937042236328,
|
2657 |
+
"loss": 0.0957,
|
2658 |
+
"objective": 0.08092837035655975,
|
2659 |
+
"ranking_idealized": 0.6666666865348816,
|
2660 |
+
"ranking_idealized_expo": 0.574999988079071,
|
2661 |
+
"ranking_simple": 0.9583333134651184,
|
2662 |
+
"regularize": 0.08092837035655975,
|
2663 |
+
"step": 750
|
2664 |
+
},
|
2665 |
+
{
|
2666 |
+
"dpo_loss": 0.09838299453258514,
|
2667 |
+
"dpo_wo_beta": -0.330010324716568,
|
2668 |
+
"epoch": 2.142182333490789,
|
2669 |
+
"grad_norm": 15.638085348810199,
|
2670 |
+
"learning_rate": 1.1395582162718524e-06,
|
2671 |
+
"logits": -1.6223360300064087,
|
2672 |
+
"logps": -128.86538696289062,
|
2673 |
+
"loss": 0.1147,
|
2674 |
+
"objective": 0.09838299453258514,
|
2675 |
+
"ranking_idealized": 0.5916666388511658,
|
2676 |
+
"ranking_idealized_expo": 0.4833333194255829,
|
2677 |
+
"ranking_simple": 0.925000011920929,
|
2678 |
+
"regularize": 0.09838299453258514,
|
2679 |
+
"step": 755
|
2680 |
+
},
|
2681 |
+
{
|
2682 |
+
"dpo_loss": 0.10984232276678085,
|
2683 |
+
"dpo_wo_beta": -0.2521561086177826,
|
2684 |
+
"epoch": 2.156353330184223,
|
2685 |
+
"grad_norm": 18.280317761955644,
|
2686 |
+
"learning_rate": 1.1050654470619602e-06,
|
2687 |
+
"logits": -1.6547772884368896,
|
2688 |
+
"logps": -118.33650970458984,
|
2689 |
+
"loss": 0.1127,
|
2690 |
+
"objective": 0.10984232276678085,
|
2691 |
+
"ranking_idealized": 0.5833333134651184,
|
2692 |
+
"ranking_idealized_expo": 0.5249999761581421,
|
2693 |
+
"ranking_simple": 0.9166666865348816,
|
2694 |
+
"regularize": 0.10984232276678085,
|
2695 |
+
"step": 760
|
2696 |
+
},
|
2697 |
+
{
|
2698 |
+
"dpo_loss": 0.11440528929233551,
|
2699 |
+
"dpo_wo_beta": -0.2931906580924988,
|
2700 |
+
"epoch": 2.170524326877657,
|
2701 |
+
"grad_norm": 12.536707104746414,
|
2702 |
+
"learning_rate": 1.0709540390305061e-06,
|
2703 |
+
"logits": -1.692717432975769,
|
2704 |
+
"logps": -118.69541931152344,
|
2705 |
+
"loss": 0.1215,
|
2706 |
+
"objective": 0.11440528929233551,
|
2707 |
+
"ranking_idealized": 0.5833333134651184,
|
2708 |
+
"ranking_idealized_expo": 0.5333333611488342,
|
2709 |
+
"ranking_simple": 0.9333333373069763,
|
2710 |
+
"regularize": 0.11440528929233551,
|
2711 |
+
"step": 765
|
2712 |
+
},
|
2713 |
+
{
|
2714 |
+
"dpo_loss": 0.06226298585534096,
|
2715 |
+
"dpo_wo_beta": -0.023008961230516434,
|
2716 |
+
"epoch": 2.1846953235710913,
|
2717 |
+
"grad_norm": 12.278836680753214,
|
2718 |
+
"learning_rate": 1.0372333178958462e-06,
|
2719 |
+
"logits": -1.8234201669692993,
|
2720 |
+
"logps": -122.00631713867188,
|
2721 |
+
"loss": 0.1046,
|
2722 |
+
"objective": 0.06226298585534096,
|
2723 |
+
"ranking_idealized": 0.5833333134651184,
|
2724 |
+
"ranking_idealized_expo": 0.49166667461395264,
|
2725 |
+
"ranking_simple": 0.9583333134651184,
|
2726 |
+
"regularize": 0.06226298585534096,
|
2727 |
+
"step": 770
|
2728 |
+
},
|
2729 |
+
{
|
2730 |
+
"dpo_loss": 0.13250760734081268,
|
2731 |
+
"dpo_wo_beta": -0.465701699256897,
|
2732 |
+
"epoch": 2.1988663202645253,
|
2733 |
+
"grad_norm": 18.99094104963921,
|
2734 |
+
"learning_rate": 1.0039125025664392e-06,
|
2735 |
+
"logits": -1.7803070545196533,
|
2736 |
+
"logps": -124.71762084960938,
|
2737 |
+
"loss": 0.1111,
|
2738 |
+
"objective": 0.13250760734081268,
|
2739 |
+
"ranking_idealized": 0.5416666865348816,
|
2740 |
+
"ranking_idealized_expo": 0.5083333253860474,
|
2741 |
+
"ranking_simple": 0.875,
|
2742 |
+
"regularize": 0.13250760734081268,
|
2743 |
+
"step": 775
|
2744 |
+
},
|
2745 |
+
{
|
2746 |
+
"dpo_loss": 0.13606421649456024,
|
2747 |
+
"dpo_wo_beta": -0.6162300705909729,
|
2748 |
+
"epoch": 2.2130373169579594,
|
2749 |
+
"grad_norm": 16.36095800479351,
|
2750 |
+
"learning_rate": 9.710007026204896e-07,
|
2751 |
+
"logits": -1.7376734018325806,
|
2752 |
+
"logps": -122.50430297851562,
|
2753 |
+
"loss": 0.1045,
|
2754 |
+
"objective": 0.13606421649456024,
|
2755 |
+
"ranking_idealized": 0.5,
|
2756 |
+
"ranking_idealized_expo": 0.4333333373069763,
|
2757 |
+
"ranking_simple": 0.8999999761581421,
|
2758 |
+
"regularize": 0.13606421649456024,
|
2759 |
+
"step": 780
|
2760 |
+
},
|
2761 |
+
{
|
2762 |
+
"dpo_loss": 0.11954029649496078,
|
2763 |
+
"dpo_wo_beta": -0.5017859935760498,
|
2764 |
+
"epoch": 2.2272083136513934,
|
2765 |
+
"grad_norm": 14.897356026126795,
|
2766 |
+
"learning_rate": 9.385069158154805e-07,
|
2767 |
+
"logits": -1.7108873128890991,
|
2768 |
+
"logps": -119.73731994628906,
|
2769 |
+
"loss": 0.1223,
|
2770 |
+
"objective": 0.11954029649496078,
|
2771 |
+
"ranking_idealized": 0.5583333373069763,
|
2772 |
+
"ranking_idealized_expo": 0.4583333432674408,
|
2773 |
+
"ranking_simple": 0.8916666507720947,
|
2774 |
+
"regularize": 0.11954029649496078,
|
2775 |
+
"step": 785
|
2776 |
+
},
|
2777 |
+
{
|
2778 |
+
"dpo_loss": 0.0877259224653244,
|
2779 |
+
"dpo_wo_beta": -0.3332770764827728,
|
2780 |
+
"epoch": 2.2413793103448274,
|
2781 |
+
"grad_norm": 14.08354106572375,
|
2782 |
+
"learning_rate": 9.064400256282757e-07,
|
2783 |
+
"logits": -1.7486475706100464,
|
2784 |
+
"logps": -122.89460754394531,
|
2785 |
+
"loss": 0.1063,
|
2786 |
+
"objective": 0.0877259224653244,
|
2787 |
+
"ranking_idealized": 0.625,
|
2788 |
+
"ranking_idealized_expo": 0.5666666626930237,
|
2789 |
+
"ranking_simple": 0.949999988079071,
|
2790 |
+
"regularize": 0.0877259224653244,
|
2791 |
+
"step": 790
|
2792 |
+
},
|
2793 |
+
{
|
2794 |
+
"dpo_loss": 0.09928978979587555,
|
2795 |
+
"dpo_wo_beta": -0.10460276901721954,
|
2796 |
+
"epoch": 2.255550307038262,
|
2797 |
+
"grad_norm": 15.557463813809257,
|
2798 |
+
"learning_rate": 8.74808798826467e-07,
|
2799 |
+
"logits": -1.8421998023986816,
|
2800 |
+
"logps": -120.40747833251953,
|
2801 |
+
"loss": 0.1017,
|
2802 |
+
"objective": 0.09928978979587555,
|
2803 |
+
"ranking_idealized": 0.699999988079071,
|
2804 |
+
"ranking_idealized_expo": 0.6000000238418579,
|
2805 |
+
"ranking_simple": 0.9416666626930237,
|
2806 |
+
"regularize": 0.09928978979587555,
|
2807 |
+
"step": 795
|
2808 |
+
},
|
2809 |
+
{
|
2810 |
+
"epoch": 2.255550307038262,
|
2811 |
+
"eval_dpo_loss": 0.8903655409812927,
|
2812 |
+
"eval_dpo_wo_beta": -6.205545902252197,
|
2813 |
+
"eval_logits": -2.4489927291870117,
|
2814 |
+
"eval_logps": -123.57449340820312,
|
2815 |
+
"eval_loss": 0.8711386919021606,
|
2816 |
+
"eval_objective": 0.8903655409812927,
|
2817 |
+
"eval_ranking_idealized": 0.6045548915863037,
|
2818 |
+
"eval_ranking_idealized_expo": 0.5279502868652344,
|
2819 |
+
"eval_ranking_simple": 0.5683229565620422,
|
2820 |
+
"eval_regularize": 0.8903655409812927,
|
2821 |
+
"eval_runtime": 347.0103,
|
2822 |
+
"eval_samples_per_second": 16.685,
|
2823 |
+
"eval_steps_per_second": 1.392,
|
2824 |
+
"step": 795
|
2825 |
+
},
|
2826 |
+
{
|
2827 |
+
"dpo_loss": 0.1340600550174713,
|
2828 |
+
"dpo_wo_beta": -0.5811701416969299,
|
2829 |
+
"epoch": 2.269721303731696,
|
2830 |
+
"grad_norm": 13.668309173603514,
|
2831 |
+
"learning_rate": 8.436218830716259e-07,
|
2832 |
+
"logits": -1.8454309701919556,
|
2833 |
+
"logps": -120.35511779785156,
|
2834 |
+
"loss": 0.1264,
|
2835 |
+
"objective": 0.1340600550174713,
|
2836 |
+
"ranking_idealized": 0.6499999761581421,
|
2837 |
+
"ranking_idealized_expo": 0.550000011920929,
|
2838 |
+
"ranking_simple": 0.925000011920929,
|
2839 |
+
"regularize": 0.1340600550174713,
|
2840 |
+
"step": 800
|
2841 |
+
},
|
2842 |
+
{
|
2843 |
+
"dpo_loss": 0.10816308110952377,
|
2844 |
+
"dpo_wo_beta": -0.1633211225271225,
|
2845 |
+
"epoch": 2.28389230042513,
|
2846 |
+
"grad_norm": 13.957508503091352,
|
2847 |
+
"learning_rate": 8.1288780455512e-07,
|
2848 |
+
"logits": -1.8811193704605103,
|
2849 |
+
"logps": -119.9056625366211,
|
2850 |
+
"loss": 0.1133,
|
2851 |
+
"objective": 0.10816308110952377,
|
2852 |
+
"ranking_idealized": 0.6000000238418579,
|
2853 |
+
"ranking_idealized_expo": 0.550000011920929,
|
2854 |
+
"ranking_simple": 0.9166666865348816,
|
2855 |
+
"regularize": 0.10816308110952377,
|
2856 |
+
"step": 805
|
2857 |
+
},
|
2858 |
+
{
|
2859 |
+
"dpo_loss": 0.1715787798166275,
|
2860 |
+
"dpo_wo_beta": -0.7547404766082764,
|
2861 |
+
"epoch": 2.298063297118564,
|
2862 |
+
"grad_norm": 12.765274097098457,
|
2863 |
+
"learning_rate": 7.826149656671386e-07,
|
2864 |
+
"logits": -1.9726245403289795,
|
2865 |
+
"logps": -114.58699798583984,
|
2866 |
+
"loss": 0.1146,
|
2867 |
+
"objective": 0.1715787798166275,
|
2868 |
+
"ranking_idealized": 0.6416666507720947,
|
2869 |
+
"ranking_idealized_expo": 0.5333333611488342,
|
2870 |
+
"ranking_simple": 0.9166666865348816,
|
2871 |
+
"regularize": 0.1715787798166275,
|
2872 |
+
"step": 810
|
2873 |
+
},
|
2874 |
+
{
|
2875 |
+
"dpo_loss": 0.09882104396820068,
|
2876 |
+
"dpo_wo_beta": -0.18077202141284943,
|
2877 |
+
"epoch": 2.312234293811998,
|
2878 |
+
"grad_norm": 11.818365173405272,
|
2879 |
+
"learning_rate": 7.528116426995605e-07,
|
2880 |
+
"logits": -1.7834192514419556,
|
2881 |
+
"logps": -118.5341796875,
|
2882 |
+
"loss": 0.0843,
|
2883 |
+
"objective": 0.09882104396820068,
|
2884 |
+
"ranking_idealized": 0.550000011920929,
|
2885 |
+
"ranking_idealized_expo": 0.4333333373069763,
|
2886 |
+
"ranking_simple": 0.8916666507720947,
|
2887 |
+
"regularize": 0.09882104396820068,
|
2888 |
+
"step": 815
|
2889 |
+
},
|
2890 |
+
{
|
2891 |
+
"dpo_loss": 0.1070082038640976,
|
2892 |
+
"dpo_wo_beta": -0.44529280066490173,
|
2893 |
+
"epoch": 2.3264052905054324,
|
2894 |
+
"grad_norm": 13.918285230217345,
|
2895 |
+
"learning_rate": 7.234859835833022e-07,
|
2896 |
+
"logits": -1.8069401979446411,
|
2897 |
+
"logps": -123.11463928222656,
|
2898 |
+
"loss": 0.1153,
|
2899 |
+
"objective": 0.1070082038640976,
|
2900 |
+
"ranking_idealized": 0.6083333492279053,
|
2901 |
+
"ranking_idealized_expo": 0.5583333373069763,
|
2902 |
+
"ranking_simple": 0.949999988079071,
|
2903 |
+
"regularize": 0.1070082038640976,
|
2904 |
+
"step": 820
|
2905 |
+
},
|
2906 |
+
{
|
2907 |
+
"dpo_loss": 0.14794430136680603,
|
2908 |
+
"dpo_wo_beta": -0.5907248258590698,
|
2909 |
+
"epoch": 2.3405762871988665,
|
2910 |
+
"grad_norm": 14.849580151366643,
|
2911 |
+
"learning_rate": 6.94646005660749e-07,
|
2912 |
+
"logits": -1.808493971824646,
|
2913 |
+
"logps": -116.64714050292969,
|
2914 |
+
"loss": 0.1107,
|
2915 |
+
"objective": 0.14794430136680603,
|
2916 |
+
"ranking_idealized": 0.6333333253860474,
|
2917 |
+
"ranking_idealized_expo": 0.5583333373069763,
|
2918 |
+
"ranking_simple": 0.9083333611488342,
|
2919 |
+
"regularize": 0.14794430136680603,
|
2920 |
+
"step": 825
|
2921 |
+
},
|
2922 |
+
{
|
2923 |
+
"dpo_loss": 0.08306514471769333,
|
2924 |
+
"dpo_wo_beta": -0.10000230371952057,
|
2925 |
+
"epoch": 2.3547472838923005,
|
2926 |
+
"grad_norm": 12.857101782602014,
|
2927 |
+
"learning_rate": 6.662995934939007e-07,
|
2928 |
+
"logits": -1.7857582569122314,
|
2929 |
+
"logps": -123.92733764648438,
|
2930 |
+
"loss": 0.1063,
|
2931 |
+
"objective": 0.08306514471769333,
|
2932 |
+
"ranking_idealized": 0.5833333134651184,
|
2933 |
+
"ranking_idealized_expo": 0.5166666507720947,
|
2934 |
+
"ranking_simple": 0.9583333134651184,
|
2935 |
+
"regularize": 0.08306514471769333,
|
2936 |
+
"step": 830
|
2937 |
+
},
|
2938 |
+
{
|
2939 |
+
"dpo_loss": 0.11827471107244492,
|
2940 |
+
"dpo_wo_beta": -0.40130358934402466,
|
2941 |
+
"epoch": 2.3689182805857345,
|
2942 |
+
"grad_norm": 13.825829745811577,
|
2943 |
+
"learning_rate": 6.384544967088063e-07,
|
2944 |
+
"logits": -1.8356945514678955,
|
2945 |
+
"logps": -122.48320770263672,
|
2946 |
+
"loss": 0.124,
|
2947 |
+
"objective": 0.11827471107244492,
|
2948 |
+
"ranking_idealized": 0.6000000238418579,
|
2949 |
+
"ranking_idealized_expo": 0.4583333432674408,
|
2950 |
+
"ranking_simple": 0.9333333373069763,
|
2951 |
+
"regularize": 0.11827471107244492,
|
2952 |
+
"step": 835
|
2953 |
+
},
|
2954 |
+
{
|
2955 |
+
"dpo_loss": 0.14169135689735413,
|
2956 |
+
"dpo_wo_beta": -0.3359481692314148,
|
2957 |
+
"epoch": 2.3830892772791685,
|
2958 |
+
"grad_norm": 18.243146391325514,
|
2959 |
+
"learning_rate": 6.111183278768956e-07,
|
2960 |
+
"logits": -1.8658840656280518,
|
2961 |
+
"logps": -123.42705535888672,
|
2962 |
+
"loss": 0.1321,
|
2963 |
+
"objective": 0.14169135689735413,
|
2964 |
+
"ranking_idealized": 0.5,
|
2965 |
+
"ranking_idealized_expo": 0.44999998807907104,
|
2966 |
+
"ranking_simple": 0.9083333611488342,
|
2967 |
+
"regularize": 0.14169135689735413,
|
2968 |
+
"step": 840
|
2969 |
+
},
|
2970 |
+
{
|
2971 |
+
"dpo_loss": 0.09381429105997086,
|
2972 |
+
"dpo_wo_beta": -0.14171645045280457,
|
2973 |
+
"epoch": 2.3972602739726026,
|
2974 |
+
"grad_norm": 24.500088347031785,
|
2975 |
+
"learning_rate": 5.842985604337769e-07,
|
2976 |
+
"logits": -1.7731019258499146,
|
2977 |
+
"logps": -125.81861877441406,
|
2978 |
+
"loss": 0.1225,
|
2979 |
+
"objective": 0.09381429105997086,
|
2980 |
+
"ranking_idealized": 0.6083333492279053,
|
2981 |
+
"ranking_idealized_expo": 0.5416666865348816,
|
2982 |
+
"ranking_simple": 0.8999999761581421,
|
2983 |
+
"regularize": 0.09381429105997086,
|
2984 |
+
"step": 845
|
2985 |
+
},
|
2986 |
+
{
|
2987 |
+
"epoch": 2.4057628719886632,
|
2988 |
+
"eval_dpo_loss": 0.9035148620605469,
|
2989 |
+
"eval_dpo_wo_beta": -6.352902889251709,
|
2990 |
+
"eval_logits": -2.4742591381073,
|
2991 |
+
"eval_logps": -124.53355407714844,
|
2992 |
+
"eval_loss": 0.882164478302002,
|
2993 |
+
"eval_objective": 0.9035148620605469,
|
2994 |
+
"eval_ranking_idealized": 0.6045548915863037,
|
2995 |
+
"eval_ranking_idealized_expo": 0.5279502868652344,
|
2996 |
+
"eval_ranking_simple": 0.5569358468055725,
|
2997 |
+
"eval_regularize": 0.9035148620605469,
|
2998 |
+
"eval_runtime": 366.478,
|
2999 |
+
"eval_samples_per_second": 15.799,
|
3000 |
+
"eval_steps_per_second": 1.318,
|
3001 |
+
"step": 848
|
3002 |
+
},
|
3003 |
+
{
|
3004 |
+
"dpo_loss": 0.10245585441589355,
|
3005 |
+
"dpo_wo_beta": -0.2030431628227234,
|
3006 |
+
"epoch": 2.413793103448276,
|
3007 |
+
"grad_norm": 22.363955547911008,
|
3008 |
+
"learning_rate": 5.580025266360764e-07,
|
3009 |
+
"logits": -1.7464776039123535,
|
3010 |
+
"logps": -122.80999755859375,
|
3011 |
+
"loss": 0.1449,
|
3012 |
+
"objective": 0.10245585441589355,
|
3013 |
+
"ranking_idealized": 0.6666666865348816,
|
3014 |
+
"ranking_idealized_expo": 0.5625,
|
3015 |
+
"ranking_simple": 0.9583333134651184,
|
3016 |
+
"regularize": 0.10245585441589355,
|
3017 |
+
"step": 850
|
3018 |
+
},
|
3019 |
+
{
|
3020 |
+
"dpo_loss": 0.14077231287956238,
|
3021 |
+
"dpo_wo_beta": -0.15207929909229279,
|
3022 |
+
"epoch": 2.42796410014171,
|
3023 |
+
"grad_norm": 14.848409925173506,
|
3024 |
+
"learning_rate": 5.322374155568688e-07,
|
3025 |
+
"logits": -1.8929237127304077,
|
3026 |
+
"logps": -115.12696838378906,
|
3027 |
+
"loss": 0.1124,
|
3028 |
+
"objective": 0.14077231287956238,
|
3029 |
+
"ranking_idealized": 0.5666666626930237,
|
3030 |
+
"ranking_idealized_expo": 0.5083333253860474,
|
3031 |
+
"ranking_simple": 0.9166666865348816,
|
3032 |
+
"regularize": 0.14077231287956238,
|
3033 |
+
"step": 855
|
3034 |
+
},
|
3035 |
+
{
|
3036 |
+
"dpo_loss": 0.1414971649646759,
|
3037 |
+
"dpo_wo_beta": -0.5974557995796204,
|
3038 |
+
"epoch": 2.442135096835144,
|
3039 |
+
"grad_norm": 11.710235822935351,
|
3040 |
+
"learning_rate": 5.070102711202606e-07,
|
3041 |
+
"logits": -1.7974507808685303,
|
3042 |
+
"logps": -121.47347259521484,
|
3043 |
+
"loss": 0.1083,
|
3044 |
+
"objective": 0.1414971649646759,
|
3045 |
+
"ranking_idealized": 0.6166666746139526,
|
3046 |
+
"ranking_idealized_expo": 0.5583333373069763,
|
3047 |
+
"ranking_simple": 0.9333333373069763,
|
3048 |
+
"regularize": 0.1414971649646759,
|
3049 |
+
"step": 860
|
3050 |
+
},
|
3051 |
+
{
|
3052 |
+
"dpo_loss": 0.09845638275146484,
|
3053 |
+
"dpo_wo_beta": -0.1998511403799057,
|
3054 |
+
"epoch": 2.4563060935285783,
|
3055 |
+
"grad_norm": 15.827446562598988,
|
3056 |
+
"learning_rate": 4.823279901756498e-07,
|
3057 |
+
"logits": -1.816353440284729,
|
3058 |
+
"logps": -122.7919692993164,
|
3059 |
+
"loss": 0.1063,
|
3060 |
+
"objective": 0.09845638275146484,
|
3061 |
+
"ranking_idealized": 0.5833333134651184,
|
3062 |
+
"ranking_idealized_expo": 0.5166666507720947,
|
3063 |
+
"ranking_simple": 0.9083333611488342,
|
3064 |
+
"regularize": 0.09845638275146484,
|
3065 |
+
"step": 865
|
3066 |
+
},
|
3067 |
+
{
|
3068 |
+
"dpo_loss": 0.11614971607923508,
|
3069 |
+
"dpo_wo_beta": -0.34934201836586,
|
3070 |
+
"epoch": 2.4704770902220123,
|
3071 |
+
"grad_norm": 15.618833316654502,
|
3072 |
+
"learning_rate": 4.581973206121948e-07,
|
3073 |
+
"logits": -1.9400283098220825,
|
3074 |
+
"logps": -119.73554992675781,
|
3075 |
+
"loss": 0.1153,
|
3076 |
+
"objective": 0.11614971607923508,
|
3077 |
+
"ranking_idealized": 0.5916666388511658,
|
3078 |
+
"ranking_idealized_expo": 0.5083333253860474,
|
3079 |
+
"ranking_simple": 0.925000011920929,
|
3080 |
+
"regularize": 0.11614971607923508,
|
3081 |
+
"step": 870
|
3082 |
+
},
|
3083 |
+
{
|
3084 |
+
"dpo_loss": 0.2065262645483017,
|
3085 |
+
"dpo_wo_beta": -1.1751881837844849,
|
3086 |
+
"epoch": 2.4846480869154464,
|
3087 |
+
"grad_norm": 17.223610041291963,
|
3088 |
+
"learning_rate": 4.3462485951401126e-07,
|
3089 |
+
"logits": -1.7437902688980103,
|
3090 |
+
"logps": -120.61251831054688,
|
3091 |
+
"loss": 0.1238,
|
3092 |
+
"objective": 0.2065262645483017,
|
3093 |
+
"ranking_idealized": 0.625,
|
3094 |
+
"ranking_idealized_expo": 0.574999988079071,
|
3095 |
+
"ranking_simple": 0.8666666746139526,
|
3096 |
+
"regularize": 0.2065262645483017,
|
3097 |
+
"step": 875
|
3098 |
+
},
|
3099 |
+
{
|
3100 |
+
"dpo_loss": 0.13995474576950073,
|
3101 |
+
"dpo_wo_beta": -0.27328214049339294,
|
3102 |
+
"epoch": 2.4988190836088804,
|
3103 |
+
"grad_norm": 11.937037662011573,
|
3104 |
+
"learning_rate": 4.116170513565942e-07,
|
3105 |
+
"logits": -1.9172199964523315,
|
3106 |
+
"logps": -115.04023742675781,
|
3107 |
+
"loss": 0.1093,
|
3108 |
+
"objective": 0.13995474576950073,
|
3109 |
+
"ranking_idealized": 0.5249999761581421,
|
3110 |
+
"ranking_idealized_expo": 0.49166667461395264,
|
3111 |
+
"ranking_simple": 0.8999999761581421,
|
3112 |
+
"regularize": 0.13995474576950073,
|
3113 |
+
"step": 880
|
3114 |
+
},
|
3115 |
+
{
|
3116 |
+
"dpo_loss": 0.13740381598472595,
|
3117 |
+
"dpo_wo_beta": -0.38727322220802307,
|
3118 |
+
"epoch": 2.5129900803023144,
|
3119 |
+
"grad_norm": 16.35266837824131,
|
3120 |
+
"learning_rate": 3.891801862449629e-07,
|
3121 |
+
"logits": -1.8533929586410522,
|
3122 |
+
"logps": -115.91497039794922,
|
3123 |
+
"loss": 0.1385,
|
3124 |
+
"objective": 0.13740381598472595,
|
3125 |
+
"ranking_idealized": 0.6083333492279053,
|
3126 |
+
"ranking_idealized_expo": 0.5416666865348816,
|
3127 |
+
"ranking_simple": 0.8916666507720947,
|
3128 |
+
"regularize": 0.13740381598472595,
|
3129 |
+
"step": 885
|
3130 |
+
},
|
3131 |
+
{
|
3132 |
+
"dpo_loss": 0.11444827914237976,
|
3133 |
+
"dpo_wo_beta": -0.31205496191978455,
|
3134 |
+
"epoch": 2.527161076995749,
|
3135 |
+
"grad_norm": 12.141683375579714,
|
3136 |
+
"learning_rate": 3.6732039819400686e-07,
|
3137 |
+
"logits": -1.6747931241989136,
|
3138 |
+
"logps": -116.25071716308594,
|
3139 |
+
"loss": 0.1173,
|
3140 |
+
"objective": 0.11444827914237976,
|
3141 |
+
"ranking_idealized": 0.5166666507720947,
|
3142 |
+
"ranking_idealized_expo": 0.4416666626930237,
|
3143 |
+
"ranking_simple": 0.9083333611488342,
|
3144 |
+
"regularize": 0.11444827914237976,
|
3145 |
+
"step": 890
|
3146 |
+
},
|
3147 |
+
{
|
3148 |
+
"dpo_loss": 0.21558310091495514,
|
3149 |
+
"dpo_wo_beta": -0.7720097303390503,
|
3150 |
+
"epoch": 2.541332073689183,
|
3151 |
+
"grad_norm": 17.5258932616616,
|
3152 |
+
"learning_rate": 3.46043663451511e-07,
|
3153 |
+
"logits": -1.938331127166748,
|
3154 |
+
"logps": -121.76246643066406,
|
3155 |
+
"loss": 0.1324,
|
3156 |
+
"objective": 0.21558310091495514,
|
3157 |
+
"ranking_idealized": 0.550000011920929,
|
3158 |
+
"ranking_idealized_expo": 0.4833333194255829,
|
3159 |
+
"ranking_simple": 0.8333333134651184,
|
3160 |
+
"regularize": 0.21558310091495514,
|
3161 |
+
"step": 895
|
3162 |
+
},
|
3163 |
+
{
|
3164 |
+
"dpo_loss": 0.1236240565776825,
|
3165 |
+
"dpo_wo_beta": -0.21698738634586334,
|
3166 |
+
"epoch": 2.555503070382617,
|
3167 |
+
"grad_norm": 16.69855766001795,
|
3168 |
+
"learning_rate": 3.253557988643072e-07,
|
3169 |
+
"logits": -1.8755207061767578,
|
3170 |
+
"logps": -119.14775848388672,
|
3171 |
+
"loss": 0.1157,
|
3172 |
+
"objective": 0.1236240565776825,
|
3173 |
+
"ranking_idealized": 0.625,
|
3174 |
+
"ranking_idealized_expo": 0.550000011920929,
|
3175 |
+
"ranking_simple": 0.9083333611488342,
|
3176 |
+
"regularize": 0.1236240565776825,
|
3177 |
+
"step": 900
|
3178 |
+
},
|
3179 |
+
{
|
3180 |
+
"epoch": 2.558337269721304,
|
3181 |
+
"eval_dpo_loss": 0.894111156463623,
|
3182 |
+
"eval_dpo_wo_beta": -6.213596343994141,
|
3183 |
+
"eval_logits": -2.4885809421539307,
|
3184 |
+
"eval_logps": -124.45829010009766,
|
3185 |
+
"eval_loss": 0.8717960715293884,
|
3186 |
+
"eval_objective": 0.894111156463623,
|
3187 |
+
"eval_ranking_idealized": 0.6045548915863037,
|
3188 |
+
"eval_ranking_idealized_expo": 0.5279502868652344,
|
3189 |
+
"eval_ranking_simple": 0.5621117949485779,
|
3190 |
+
"eval_regularize": 0.894111156463623,
|
3191 |
+
"eval_runtime": 342.6574,
|
3192 |
+
"eval_samples_per_second": 16.897,
|
3193 |
+
"eval_steps_per_second": 1.41,
|
3194 |
+
"step": 901
|
3195 |
+
},
|
3196 |
+
{
|
3197 |
+
"dpo_loss": 0.09204068034887314,
|
3198 |
+
"dpo_wo_beta": -0.17730669677257538,
|
3199 |
+
"epoch": 2.569674067076051,
|
3200 |
+
"grad_norm": 14.161016786369684,
|
3201 |
+
"learning_rate": 3.052624602880064e-07,
|
3202 |
+
"logits": -1.8424724340438843,
|
3203 |
+
"logps": -117.90782928466797,
|
3204 |
+
"loss": 0.0986,
|
3205 |
+
"objective": 0.09204068034887314,
|
3206 |
+
"ranking_idealized": 0.6000000238418579,
|
3207 |
+
"ranking_idealized_expo": 0.5333333611488342,
|
3208 |
+
"ranking_simple": 0.925000011920929,
|
3209 |
+
"regularize": 0.09204068034887314,
|
3210 |
+
"step": 905
|
3211 |
+
},
|
3212 |
+
{
|
3213 |
+
"dpo_loss": 0.11361932754516602,
|
3214 |
+
"dpo_wo_beta": -0.38244467973709106,
|
3215 |
+
"epoch": 2.583845063769485,
|
3216 |
+
"grad_norm": 12.421803606538058,
|
3217 |
+
"learning_rate": 2.8576914104074425e-07,
|
3218 |
+
"logits": -2.0089211463928223,
|
3219 |
+
"logps": -116.39904022216797,
|
3220 |
+
"loss": 0.1122,
|
3221 |
+
"objective": 0.11361932754516602,
|
3222 |
+
"ranking_idealized": 0.5916666388511658,
|
3223 |
+
"ranking_idealized_expo": 0.4583333432674408,
|
3224 |
+
"ranking_simple": 0.9166666865348816,
|
3225 |
+
"regularize": 0.11361932754516602,
|
3226 |
+
"step": 910
|
3227 |
+
},
|
3228 |
+
{
|
3229 |
+
"dpo_loss": 0.13471105694770813,
|
3230 |
+
"dpo_wo_beta": -0.5014829039573669,
|
3231 |
+
"epoch": 2.5980160604629194,
|
3232 |
+
"grad_norm": 15.12060209950672,
|
3233 |
+
"learning_rate": 2.6688117040136463e-07,
|
3234 |
+
"logits": -1.981037974357605,
|
3235 |
+
"logps": -121.86930084228516,
|
3236 |
+
"loss": 0.1153,
|
3237 |
+
"objective": 0.13471105694770813,
|
3238 |
+
"ranking_idealized": 0.625,
|
3239 |
+
"ranking_idealized_expo": 0.5166666507720947,
|
3240 |
+
"ranking_simple": 0.949999988079071,
|
3241 |
+
"regularize": 0.13471105694770813,
|
3242 |
+
"step": 915
|
3243 |
+
},
|
3244 |
+
{
|
3245 |
+
"dpo_loss": 0.08700807392597198,
|
3246 |
+
"dpo_wo_beta": -0.1876840889453888,
|
3247 |
+
"epoch": 2.6121870571563535,
|
3248 |
+
"grad_norm": 17.811975214160903,
|
3249 |
+
"learning_rate": 2.486037121524448e-07,
|
3250 |
+
"logits": -1.898934245109558,
|
3251 |
+
"logps": -120.13009643554688,
|
3252 |
+
"loss": 0.114,
|
3253 |
+
"objective": 0.08700807392597198,
|
3254 |
+
"ranking_idealized": 0.6333333253860474,
|
3255 |
+
"ranking_idealized_expo": 0.550000011920929,
|
3256 |
+
"ranking_simple": 0.9416666626930237,
|
3257 |
+
"regularize": 0.08700807392597198,
|
3258 |
+
"step": 920
|
3259 |
+
},
|
3260 |
+
{
|
3261 |
+
"dpo_loss": 0.0918925479054451,
|
3262 |
+
"dpo_wo_beta": -0.14420188963413239,
|
3263 |
+
"epoch": 2.6263580538497875,
|
3264 |
+
"grad_norm": 16.6284598820075,
|
3265 |
+
"learning_rate": 2.3094176316856982e-07,
|
3266 |
+
"logits": -1.8268101215362549,
|
3267 |
+
"logps": -121.2860107421875,
|
3268 |
+
"loss": 0.1064,
|
3269 |
+
"objective": 0.0918925479054451,
|
3270 |
+
"ranking_idealized": 0.550000011920929,
|
3271 |
+
"ranking_idealized_expo": 0.4749999940395355,
|
3272 |
+
"ranking_simple": 0.9083333611488342,
|
3273 |
+
"regularize": 0.0918925479054451,
|
3274 |
+
"step": 925
|
3275 |
+
},
|
3276 |
+
{
|
3277 |
+
"dpo_loss": 0.15886452794075012,
|
3278 |
+
"dpo_wo_beta": -0.7213179469108582,
|
3279 |
+
"epoch": 2.6405290505432215,
|
3280 |
+
"grad_norm": 15.32952011074324,
|
3281 |
+
"learning_rate": 2.13900152050239e-07,
|
3282 |
+
"logits": -1.9606980085372925,
|
3283 |
+
"logps": -110.05363464355469,
|
3284 |
+
"loss": 0.1179,
|
3285 |
+
"objective": 0.15886452794075012,
|
3286 |
+
"ranking_idealized": 0.5416666865348816,
|
3287 |
+
"ranking_idealized_expo": 0.4833333194255829,
|
3288 |
+
"ranking_simple": 0.8999999761581421,
|
3289 |
+
"regularize": 0.15886452794075012,
|
3290 |
+
"step": 930
|
3291 |
+
},
|
3292 |
+
{
|
3293 |
+
"dpo_loss": 0.13784056901931763,
|
3294 |
+
"dpo_wo_beta": -0.5006576776504517,
|
3295 |
+
"epoch": 2.6547000472366555,
|
3296 |
+
"grad_norm": 14.658353796887713,
|
3297 |
+
"learning_rate": 1.9748353780377234e-07,
|
3298 |
+
"logits": -1.9395031929016113,
|
3299 |
+
"logps": -119.9269790649414,
|
3300 |
+
"loss": 0.1254,
|
3301 |
+
"objective": 0.13784056901931763,
|
3302 |
+
"ranking_idealized": 0.6583333611488342,
|
3303 |
+
"ranking_idealized_expo": 0.6000000238418579,
|
3304 |
+
"ranking_simple": 0.8916666507720947,
|
3305 |
+
"regularize": 0.13784056901931763,
|
3306 |
+
"step": 935
|
3307 |
+
},
|
3308 |
+
{
|
3309 |
+
"dpo_loss": 0.12213913351297379,
|
3310 |
+
"dpo_wo_beta": -0.43640393018722534,
|
3311 |
+
"epoch": 2.66887104393009,
|
3312 |
+
"grad_norm": 14.844912954939305,
|
3313 |
+
"learning_rate": 1.8169640856758652e-07,
|
3314 |
+
"logits": -1.952646017074585,
|
3315 |
+
"logps": -121.75474548339844,
|
3316 |
+
"loss": 0.1117,
|
3317 |
+
"objective": 0.12213913351297379,
|
3318 |
+
"ranking_idealized": 0.6000000238418579,
|
3319 |
+
"ranking_idealized_expo": 0.5083333253860474,
|
3320 |
+
"ranking_simple": 0.9166666865348816,
|
3321 |
+
"regularize": 0.12213913351297379,
|
3322 |
+
"step": 940
|
3323 |
+
},
|
3324 |
+
{
|
3325 |
+
"dpo_loss": 0.1136295348405838,
|
3326 |
+
"dpo_wo_beta": -0.17752434313297272,
|
3327 |
+
"epoch": 2.6830420406235236,
|
3328 |
+
"grad_norm": 13.220834082246482,
|
3329 |
+
"learning_rate": 1.6654308038518057e-07,
|
3330 |
+
"logits": -1.7970060110092163,
|
3331 |
+
"logps": -117.90103149414062,
|
3332 |
+
"loss": 0.1054,
|
3333 |
+
"objective": 0.1136295348405838,
|
3334 |
+
"ranking_idealized": 0.5833333134651184,
|
3335 |
+
"ranking_idealized_expo": 0.49166667461395264,
|
3336 |
+
"ranking_simple": 0.9083333611488342,
|
3337 |
+
"regularize": 0.1136295348405838,
|
3338 |
+
"step": 945
|
3339 |
+
},
|
3340 |
+
{
|
3341 |
+
"dpo_loss": 0.16729401051998138,
|
3342 |
+
"dpo_wo_beta": -0.7473469972610474,
|
3343 |
+
"epoch": 2.697213037316958,
|
3344 |
+
"grad_norm": 14.369145851061829,
|
3345 |
+
"learning_rate": 1.5202769602517514e-07,
|
3346 |
+
"logits": -1.8816020488739014,
|
3347 |
+
"logps": -115.14582061767578,
|
3348 |
+
"loss": 0.1387,
|
3349 |
+
"objective": 0.16729401051998138,
|
3350 |
+
"ranking_idealized": 0.625,
|
3351 |
+
"ranking_idealized_expo": 0.5,
|
3352 |
+
"ranking_simple": 0.8999999761581421,
|
3353 |
+
"regularize": 0.16729401051998138,
|
3354 |
+
"step": 950
|
3355 |
+
},
|
3356 |
+
{
|
3357 |
+
"epoch": 2.708549834671705,
|
3358 |
+
"eval_dpo_loss": 0.8891981840133667,
|
3359 |
+
"eval_dpo_wo_beta": -6.16098690032959,
|
3360 |
+
"eval_logits": -2.508572816848755,
|
3361 |
+
"eval_logps": -123.41434478759766,
|
3362 |
+
"eval_loss": 0.8687644004821777,
|
3363 |
+
"eval_objective": 0.8891981840133667,
|
3364 |
+
"eval_ranking_idealized": 0.6045548915863037,
|
3365 |
+
"eval_ranking_idealized_expo": 0.5279502868652344,
|
3366 |
+
"eval_ranking_simple": 0.5579710006713867,
|
3367 |
+
"eval_regularize": 0.8891981840133667,
|
3368 |
+
"eval_runtime": 379.9179,
|
3369 |
+
"eval_samples_per_second": 15.24,
|
3370 |
+
"eval_steps_per_second": 1.271,
|
3371 |
+
"step": 954
|
3372 |
+
},
|
3373 |
+
{
|
3374 |
+
"dpo_loss": 0.16232462227344513,
|
3375 |
+
"dpo_wo_beta": -0.5988053679466248,
|
3376 |
+
"epoch": 2.711384034010392,
|
3377 |
+
"grad_norm": 18.96223814657741,
|
3378 |
+
"learning_rate": 1.381542238487188e-07,
|
3379 |
+
"logits": -1.8838648796081543,
|
3380 |
+
"logps": -119.06771087646484,
|
3381 |
+
"loss": 0.1298,
|
3382 |
+
"objective": 0.16232462227344513,
|
3383 |
+
"ranking_idealized": 0.5833333134651184,
|
3384 |
+
"ranking_idealized_expo": 0.5083333253860474,
|
3385 |
+
"ranking_simple": 0.8999999761581421,
|
3386 |
+
"regularize": 0.16232462227344513,
|
3387 |
+
"step": 955
|
3388 |
+
},
|
3389 |
+
{
|
3390 |
+
"dpo_loss": 0.14441066980361938,
|
3391 |
+
"dpo_wo_beta": -0.4333815276622772,
|
3392 |
+
"epoch": 2.725555030703826,
|
3393 |
+
"grad_norm": 13.716940458471806,
|
3394 |
+
"learning_rate": 1.2492645672457838e-07,
|
3395 |
+
"logits": -2.032045364379883,
|
3396 |
+
"logps": -113.05756378173828,
|
3397 |
+
"loss": 0.1189,
|
3398 |
+
"objective": 0.14441066980361938,
|
3399 |
+
"ranking_idealized": 0.6000000238418579,
|
3400 |
+
"ranking_idealized_expo": 0.5083333253860474,
|
3401 |
+
"ranking_simple": 0.8999999761581421,
|
3402 |
+
"regularize": 0.14441066980361938,
|
3403 |
+
"step": 960
|
3404 |
+
},
|
3405 |
+
{
|
3406 |
+
"dpo_loss": 0.0981813296675682,
|
3407 |
+
"dpo_wo_beta": -0.19802381098270416,
|
3408 |
+
"epoch": 2.73972602739726,
|
3409 |
+
"grad_norm": 15.207158850260772,
|
3410 |
+
"learning_rate": 1.1234801099220787e-07,
|
3411 |
+
"logits": -1.7988998889923096,
|
3412 |
+
"logps": -122.7618408203125,
|
3413 |
+
"loss": 0.1111,
|
3414 |
+
"objective": 0.0981813296675682,
|
3415 |
+
"ranking_idealized": 0.625,
|
3416 |
+
"ranking_idealized_expo": 0.5666666626930237,
|
3417 |
+
"ranking_simple": 0.9333333373069763,
|
3418 |
+
"regularize": 0.0981813296675682,
|
3419 |
+
"step": 965
|
3420 |
+
},
|
3421 |
+
{
|
3422 |
+
"dpo_loss": 0.1318301260471344,
|
3423 |
+
"dpo_wo_beta": -0.2933843731880188,
|
3424 |
+
"epoch": 2.753897024090694,
|
3425 |
+
"grad_norm": 19.758130540339153,
|
3426 |
+
"learning_rate": 1.004223254730749e-07,
|
3427 |
+
"logits": -1.7169368267059326,
|
3428 |
+
"logps": -120.43925476074219,
|
3429 |
+
"loss": 0.1278,
|
3430 |
+
"objective": 0.1318301260471344,
|
3431 |
+
"ranking_idealized": 0.6083333492279053,
|
3432 |
+
"ranking_idealized_expo": 0.4833333194255829,
|
3433 |
+
"ranking_simple": 0.8916666507720947,
|
3434 |
+
"regularize": 0.1318301260471344,
|
3435 |
+
"step": 970
|
3436 |
+
},
|
3437 |
+
{
|
3438 |
+
"dpo_loss": 0.058168552815914154,
|
3439 |
+
"dpo_wo_beta": -8.394511678488925e-05,
|
3440 |
+
"epoch": 2.7680680207841286,
|
3441 |
+
"grad_norm": 12.662905511630496,
|
3442 |
+
"learning_rate": 8.915266053052374e-08,
|
3443 |
+
"logits": -1.902711033821106,
|
3444 |
+
"logps": -116.11229705810547,
|
3445 |
+
"loss": 0.0999,
|
3446 |
+
"objective": 0.058168552815914154,
|
3447 |
+
"ranking_idealized": 0.6333333253860474,
|
3448 |
+
"ranking_idealized_expo": 0.5,
|
3449 |
+
"ranking_simple": 0.925000011920929,
|
3450 |
+
"regularize": 0.058168552815914154,
|
3451 |
+
"step": 975
|
3452 |
+
},
|
3453 |
+
{
|
3454 |
+
"dpo_loss": 0.10343047231435776,
|
3455 |
+
"dpo_wo_beta": -0.17137210071086884,
|
3456 |
+
"epoch": 2.7822390174775626,
|
3457 |
+
"grad_norm": 18.09899373584344,
|
3458 |
+
"learning_rate": 7.854209717842231e-08,
|
3459 |
+
"logits": -1.8915067911148071,
|
3460 |
+
"logps": -118.23885345458984,
|
3461 |
+
"loss": 0.1108,
|
3462 |
+
"objective": 0.10343047231435776,
|
3463 |
+
"ranking_idealized": 0.5416666865348816,
|
3464 |
+
"ranking_idealized_expo": 0.4416666626930237,
|
3465 |
+
"ranking_simple": 0.949999988079071,
|
3466 |
+
"regularize": 0.10343047231435776,
|
3467 |
+
"step": 980
|
3468 |
+
},
|
3469 |
+
{
|
3470 |
+
"dpo_loss": 0.11380515992641449,
|
3471 |
+
"dpo_wo_beta": -0.2935677468776703,
|
3472 |
+
"epoch": 2.7964100141709967,
|
3473 |
+
"grad_norm": 15.185634936609162,
|
3474 |
+
"learning_rate": 6.859353623884569e-08,
|
3475 |
+
"logits": -1.851272463798523,
|
3476 |
+
"logps": -114.0352783203125,
|
3477 |
+
"loss": 0.1005,
|
3478 |
+
"objective": 0.11380515992641449,
|
3479 |
+
"ranking_idealized": 0.625,
|
3480 |
+
"ranking_idealized_expo": 0.5166666507720947,
|
3481 |
+
"ranking_simple": 0.8833333253860474,
|
3482 |
+
"regularize": 0.11380515992641449,
|
3483 |
+
"step": 985
|
3484 |
+
},
|
3485 |
+
{
|
3486 |
+
"dpo_loss": 0.10726428776979446,
|
3487 |
+
"dpo_wo_beta": -0.24069656431674957,
|
3488 |
+
"epoch": 2.8105810108644307,
|
3489 |
+
"grad_norm": 16.660819641239655,
|
3490 |
+
"learning_rate": 5.930969754901844e-08,
|
3491 |
+
"logits": -1.8860033750534058,
|
3492 |
+
"logps": -116.69475555419922,
|
3493 |
+
"loss": 0.1285,
|
3494 |
+
"objective": 0.10726428776979446,
|
3495 |
+
"ranking_idealized": 0.49166667461395264,
|
3496 |
+
"ranking_idealized_expo": 0.4333333373069763,
|
3497 |
+
"ranking_simple": 0.8833333253860474,
|
3498 |
+
"regularize": 0.10726428776979446,
|
3499 |
+
"step": 990
|
3500 |
+
},
|
3501 |
+
{
|
3502 |
+
"dpo_loss": 0.12048947066068649,
|
3503 |
+
"dpo_wo_beta": -0.4086553454399109,
|
3504 |
+
"epoch": 2.8247520075578647,
|
3505 |
+
"grad_norm": 18.459518556479477,
|
3506 |
+
"learning_rate": 5.069311921774039e-08,
|
3507 |
+
"logits": -1.8980218172073364,
|
3508 |
+
"logps": -120.89018249511719,
|
3509 |
+
"loss": 0.1282,
|
3510 |
+
"objective": 0.12048947066068649,
|
3511 |
+
"ranking_idealized": 0.6416666507720947,
|
3512 |
+
"ranking_idealized_expo": 0.550000011920929,
|
3513 |
+
"ranking_simple": 0.925000011920929,
|
3514 |
+
"regularize": 0.12048947066068649,
|
3515 |
+
"step": 995
|
3516 |
+
},
|
3517 |
+
{
|
3518 |
+
"dpo_loss": 0.08448319137096405,
|
3519 |
+
"dpo_wo_beta": -0.17254652082920074,
|
3520 |
+
"epoch": 2.838923004251299,
|
3521 |
+
"grad_norm": 13.073827319036173,
|
3522 |
+
"learning_rate": 4.2746156931490756e-08,
|
3523 |
+
"logits": -1.8106515407562256,
|
3524 |
+
"logps": -113.39408874511719,
|
3525 |
+
"loss": 0.1237,
|
3526 |
+
"objective": 0.08448319137096405,
|
3527 |
+
"ranking_idealized": 0.6000000238418579,
|
3528 |
+
"ranking_idealized_expo": 0.49166667461395264,
|
3529 |
+
"ranking_simple": 0.9333333373069763,
|
3530 |
+
"regularize": 0.08448319137096405,
|
3531 |
+
"step": 1000
|
3532 |
+
},
|
3533 |
+
{
|
3534 |
+
"dpo_loss": 0.12625885009765625,
|
3535 |
+
"dpo_wo_beta": -0.28751522302627563,
|
3536 |
+
"epoch": 2.853094000944733,
|
3537 |
+
"grad_norm": 11.352338165734487,
|
3538 |
+
"learning_rate": 3.547098331040916e-08,
|
3539 |
+
"logits": -1.8715885877609253,
|
3540 |
+
"logps": -115.85346984863281,
|
3541 |
+
"loss": 0.1219,
|
3542 |
+
"objective": 0.12625885009765625,
|
3543 |
+
"ranking_idealized": 0.5833333134651184,
|
3544 |
+
"ranking_idealized_expo": 0.4749999940395355,
|
3545 |
+
"ranking_simple": 0.8833333253860474,
|
3546 |
+
"regularize": 0.12625885009765625,
|
3547 |
+
"step": 1005
|
3548 |
+
},
|
3549 |
+
{
|
3550 |
+
"epoch": 2.8587623996221065,
|
3551 |
+
"eval_dpo_loss": 0.8881875872612,
|
3552 |
+
"eval_dpo_wo_beta": -6.153732776641846,
|
3553 |
+
"eval_logits": -2.5127134323120117,
|
3554 |
+
"eval_logps": -123.14542388916016,
|
3555 |
+
"eval_loss": 0.868183434009552,
|
3556 |
+
"eval_objective": 0.8881875872612,
|
3557 |
+
"eval_ranking_idealized": 0.6045548915863037,
|
3558 |
+
"eval_ranking_idealized_expo": 0.5279502868652344,
|
3559 |
+
"eval_ranking_simple": 0.5600414276123047,
|
3560 |
+
"eval_regularize": 0.8881875872612,
|
3561 |
+
"eval_runtime": 375.0227,
|
3562 |
+
"eval_samples_per_second": 15.439,
|
3563 |
+
"eval_steps_per_second": 1.288,
|
3564 |
+
"step": 1007
|
3565 |
+
},
|
3566 |
+
{
|
3567 |
+
"dpo_loss": 0.12578138709068298,
|
3568 |
+
"dpo_wo_beta": -0.4055772125720978,
|
3569 |
+
"epoch": 2.8672649976381672,
|
3570 |
+
"grad_norm": 13.654143333766829,
|
3571 |
+
"learning_rate": 2.8869587314321324e-08,
|
3572 |
+
"logits": -1.871021032333374,
|
3573 |
+
"logps": -118.54988861083984,
|
3574 |
+
"loss": 0.1301,
|
3575 |
+
"objective": 0.12578138709068298,
|
3576 |
+
"ranking_idealized": 0.6499999761581421,
|
3577 |
+
"ranking_idealized_expo": 0.5666666626930237,
|
3578 |
+
"ranking_simple": 0.9166666865348816,
|
3579 |
+
"regularize": 0.12578138709068298,
|
3580 |
+
"step": 1010
|
3581 |
+
},
|
3582 |
+
{
|
3583 |
+
"dpo_loss": 0.11141829192638397,
|
3584 |
+
"dpo_wo_beta": -0.452913373708725,
|
3585 |
+
"epoch": 2.8814359943316012,
|
3586 |
+
"grad_norm": 14.00967768494451,
|
3587 |
+
"learning_rate": 2.2943773698977935e-08,
|
3588 |
+
"logits": -1.8538991212844849,
|
3589 |
+
"logps": -119.40221405029297,
|
3590 |
+
"loss": 0.1157,
|
3591 |
+
"objective": 0.11141829192638397,
|
3592 |
+
"ranking_idealized": 0.6000000238418579,
|
3593 |
+
"ranking_idealized_expo": 0.5,
|
3594 |
+
"ranking_simple": 0.9416666626930237,
|
3595 |
+
"regularize": 0.11141829192638397,
|
3596 |
+
"step": 1015
|
3597 |
+
},
|
3598 |
+
{
|
3599 |
+
"dpo_loss": 0.10787668824195862,
|
3600 |
+
"dpo_wo_beta": -0.35029396414756775,
|
3601 |
+
"epoch": 2.8956069910250353,
|
3602 |
+
"grad_norm": 12.590503217808422,
|
3603 |
+
"learning_rate": 1.7695162522652352e-08,
|
3604 |
+
"logits": -1.9000986814498901,
|
3605 |
+
"logps": -122.90519714355469,
|
3606 |
+
"loss": 0.1076,
|
3607 |
+
"objective": 0.10787668824195862,
|
3608 |
+
"ranking_idealized": 0.6083333492279053,
|
3609 |
+
"ranking_idealized_expo": 0.5,
|
3610 |
+
"ranking_simple": 0.8999999761581421,
|
3611 |
+
"regularize": 0.10787668824195862,
|
3612 |
+
"step": 1020
|
3613 |
+
},
|
3614 |
+
{
|
3615 |
+
"dpo_loss": 0.11394120752811432,
|
3616 |
+
"dpo_wo_beta": -0.36261746287345886,
|
3617 |
+
"epoch": 2.9097779877184697,
|
3618 |
+
"grad_norm": 10.606906345600125,
|
3619 |
+
"learning_rate": 1.3125188703233815e-08,
|
3620 |
+
"logits": -1.8986002206802368,
|
3621 |
+
"logps": -116.403564453125,
|
3622 |
+
"loss": 0.1089,
|
3623 |
+
"objective": 0.11394120752811432,
|
3624 |
+
"ranking_idealized": 0.5583333373069763,
|
3625 |
+
"ranking_idealized_expo": 0.44999998807907104,
|
3626 |
+
"ranking_simple": 0.9333333373069763,
|
3627 |
+
"regularize": 0.11394120752811432,
|
3628 |
+
"step": 1025
|
3629 |
+
},
|
3630 |
+
{
|
3631 |
+
"dpo_loss": 0.0906638652086258,
|
3632 |
+
"dpo_wo_beta": -0.1376449316740036,
|
3633 |
+
"epoch": 2.9239489844119038,
|
3634 |
+
"grad_norm": 12.750602018189479,
|
3635 |
+
"learning_rate": 9.235101625932885e-09,
|
3636 |
+
"logits": -2.033400058746338,
|
3637 |
+
"logps": -113.65220642089844,
|
3638 |
+
"loss": 0.1197,
|
3639 |
+
"objective": 0.0906638652086258,
|
3640 |
+
"ranking_idealized": 0.6000000238418579,
|
3641 |
+
"ranking_idealized_expo": 0.5249999761581421,
|
3642 |
+
"ranking_simple": 0.925000011920929,
|
3643 |
+
"regularize": 0.0906638652086258,
|
3644 |
+
"step": 1030
|
3645 |
+
},
|
3646 |
+
{
|
3647 |
+
"dpo_loss": 0.10265343636274338,
|
3648 |
+
"dpo_wo_beta": -0.1061137467622757,
|
3649 |
+
"epoch": 2.938119981105338,
|
3650 |
+
"grad_norm": 15.296605965797069,
|
3651 |
+
"learning_rate": 6.025964801714412e-09,
|
3652 |
+
"logits": -1.8468897342681885,
|
3653 |
+
"logps": -119.85134887695312,
|
3654 |
+
"loss": 0.1049,
|
3655 |
+
"objective": 0.10265343636274338,
|
3656 |
+
"ranking_idealized": 0.6083333492279053,
|
3657 |
+
"ranking_idealized_expo": 0.49166667461395264,
|
3658 |
+
"ranking_simple": 0.8916666507720947,
|
3659 |
+
"regularize": 0.10265343636274338,
|
3660 |
+
"step": 1035
|
3661 |
+
},
|
3662 |
+
{
|
3663 |
+
"dpo_loss": 0.09396873414516449,
|
3664 |
+
"dpo_wo_beta": -0.1912200003862381,
|
3665 |
+
"epoch": 2.952290977798772,
|
3666 |
+
"grad_norm": 15.90972962002085,
|
3667 |
+
"learning_rate": 3.4986555765434415e-09,
|
3668 |
+
"logits": -1.8800926208496094,
|
3669 |
+
"logps": -122.51961517333984,
|
3670 |
+
"loss": 0.0994,
|
3671 |
+
"objective": 0.09396873414516449,
|
3672 |
+
"ranking_idealized": 0.6499999761581421,
|
3673 |
+
"ranking_idealized_expo": 0.5583333373069763,
|
3674 |
+
"ranking_simple": 0.9083333611488342,
|
3675 |
+
"regularize": 0.09396873414516449,
|
3676 |
+
"step": 1040
|
3677 |
+
},
|
3678 |
+
{
|
3679 |
+
"dpo_loss": 0.14692950248718262,
|
3680 |
+
"dpo_wo_beta": -0.6586350798606873,
|
3681 |
+
"epoch": 2.966461974492206,
|
3682 |
+
"grad_norm": 8.0573279067109,
|
3683 |
+
"learning_rate": 1.6538648915270794e-09,
|
3684 |
+
"logits": -1.8756026029586792,
|
3685 |
+
"logps": -119.65303039550781,
|
3686 |
+
"loss": 0.1082,
|
3687 |
+
"objective": 0.14692950248718262,
|
3688 |
+
"ranking_idealized": 0.6416666507720947,
|
3689 |
+
"ranking_idealized_expo": 0.5416666865348816,
|
3690 |
+
"ranking_simple": 0.9166666865348816,
|
3691 |
+
"regularize": 0.14692950248718262,
|
3692 |
+
"step": 1045
|
3693 |
+
},
|
3694 |
+
{
|
3695 |
+
"dpo_loss": 0.07982174307107925,
|
3696 |
+
"dpo_wo_beta": -0.0584401935338974,
|
3697 |
+
"epoch": 2.9806329711856403,
|
3698 |
+
"grad_norm": 9.88886611903832,
|
3699 |
+
"learning_rate": 4.920970940180958e-10,
|
3700 |
+
"logits": -1.912126898765564,
|
3701 |
+
"logps": -116.61032104492188,
|
3702 |
+
"loss": 0.0891,
|
3703 |
+
"objective": 0.07982174307107925,
|
3704 |
+
"ranking_idealized": 0.6166666746139526,
|
3705 |
+
"ranking_idealized_expo": 0.5249999761581421,
|
3706 |
+
"ranking_simple": 0.925000011920929,
|
3707 |
+
"regularize": 0.07982174307107925,
|
3708 |
+
"step": 1050
|
3709 |
+
},
|
3710 |
+
{
|
3711 |
+
"dpo_loss": 0.07663024961948395,
|
3712 |
+
"dpo_wo_beta": -0.015355088748037815,
|
3713 |
+
"epoch": 2.9948039678790743,
|
3714 |
+
"grad_norm": 13.025122561532541,
|
3715 |
+
"learning_rate": 1.3669799732163314e-11,
|
3716 |
+
"logits": -1.775391697883606,
|
3717 |
+
"logps": -116.977294921875,
|
3718 |
+
"loss": 0.0869,
|
3719 |
+
"objective": 0.07663024961948395,
|
3720 |
+
"ranking_idealized": 0.6333333253860474,
|
3721 |
+
"ranking_idealized_expo": 0.550000011920929,
|
3722 |
+
"ranking_simple": 0.8999999761581421,
|
3723 |
+
"regularize": 0.07663024961948395,
|
3724 |
+
"step": 1055
|
3725 |
+
},
|
3726 |
{
|
3727 |
"epoch": 2.9976381672177608,
|
3728 |
+
"step": 1056,
|
3729 |
"total_flos": 0.0,
|
3730 |
+
"train_loss": 0.022545777056648425,
|
3731 |
+
"train_runtime": 4386.5835,
|
3732 |
+
"train_samples_per_second": 34.744,
|
3733 |
+
"train_steps_per_second": 0.241
|
3734 |
}
|
3735 |
],
|
3736 |
"logging_steps": 5,
|
3737 |
+
"max_steps": 1056,
|
3738 |
"num_input_tokens_seen": 0,
|
3739 |
"num_train_epochs": 3,
|
3740 |
"save_steps": 53,
|