Model save
Browse files- README.md +24 -32
- adapter_config.json +5 -5
- adapter_model.safetensors +2 -2
- all_results.json +4 -4
- train_results.json +4 -4
- trainer_state.json +366 -414
- training_args.bin +1 -1
README.md
CHANGED
@@ -2,10 +2,6 @@
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
5 |
-
- choo-choo
|
6 |
-
- trl
|
7 |
-
- dpo
|
8 |
-
- generated_from_trainer
|
9 |
- trl
|
10 |
- dpo
|
11 |
- generated_from_trainer
|
@@ -20,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
20 |
|
21 |
# zephyr-7b-lora-dpo-dibt-v0
|
22 |
|
23 |
-
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on
|
24 |
It achieves the following results on the evaluation set:
|
25 |
-
- Loss: 0.
|
26 |
-
- Rewards/chosen: 0.
|
27 |
-
- Rewards/rejected:
|
28 |
-
- Rewards/accuracies: 0.
|
29 |
-
- Rewards/margins:
|
30 |
-
- Logps/rejected: -
|
31 |
-
- Logps/chosen: -
|
32 |
-
- Logits/rejected: -2.
|
33 |
-
- Logits/chosen: -2.
|
34 |
|
35 |
## Model description
|
36 |
|
@@ -49,12 +45,11 @@ More information needed
|
|
49 |
### Training hyperparameters
|
50 |
|
51 |
The following hyperparameters were used during training:
|
52 |
-
- learning_rate: 5e-
|
53 |
-
- train_batch_size:
|
54 |
-
- eval_batch_size:
|
55 |
- seed: 42
|
56 |
-
-
|
57 |
-
- gradient_accumulation_steps: 2
|
58 |
- total_train_batch_size: 16
|
59 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
60 |
- lr_scheduler_type: cosine
|
@@ -65,19 +60,16 @@ The following hyperparameters were used during training:
|
|
65 |
|
66 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
67 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.6848 | 1.6 | 165 | 0.6885 | 0.0171 | 0.0069 | 0.5924 | 0.0102 | -302.1031 | -309.3650 | -2.7007 | -2.7426 |
|
79 |
-
| 0.6906 | 1.75 | 180 | 0.6897 | 0.0183 | 0.0107 | 0.5652 | 0.0077 | -302.0652 | -309.3525 | -2.7007 | -2.7426 |
|
80 |
-
| 0.6878 | 1.89 | 195 | 0.6880 | 0.0206 | 0.0095 | 0.5543 | 0.0111 | -302.0764 | -309.3295 | -2.7006 | -2.7426 |
|
81 |
|
82 |
|
83 |
### Framework versions
|
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
|
|
|
|
|
|
|
|
5 |
- trl
|
6 |
- dpo
|
7 |
- generated_from_trainer
|
|
|
16 |
|
17 |
# zephyr-7b-lora-dpo-dibt-v0
|
18 |
|
19 |
+
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on an unknown dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.1666
|
22 |
+
- Rewards/chosen: -0.7428
|
23 |
+
- Rewards/rejected: -5.5139
|
24 |
+
- Rewards/accuracies: 0.9375
|
25 |
+
- Rewards/margins: 4.7711
|
26 |
+
- Logps/rejected: -387.5656
|
27 |
+
- Logps/chosen: -341.2073
|
28 |
+
- Logits/rejected: -2.1864
|
29 |
+
- Logits/chosen: -2.2314
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
45 |
### Training hyperparameters
|
46 |
|
47 |
The following hyperparameters were used during training:
|
48 |
+
- learning_rate: 5e-05
|
49 |
+
- train_batch_size: 4
|
50 |
+
- eval_batch_size: 16
|
51 |
- seed: 42
|
52 |
+
- gradient_accumulation_steps: 4
|
|
|
53 |
- total_train_batch_size: 16
|
54 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
55 |
- lr_scheduler_type: cosine
|
|
|
60 |
|
61 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
62 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
63 |
+
| 0.6028 | 0.19 | 20 | 0.5286 | 0.8789 | 0.4471 | 0.8125 | 0.4318 | -327.9556 | -324.9910 | -2.6143 | -2.6401 |
|
64 |
+
| 0.3363 | 0.39 | 40 | 0.3232 | 0.5215 | -1.1097 | 0.8594 | 1.6312 | -343.5236 | -328.5651 | -2.5076 | -2.5352 |
|
65 |
+
| 0.2458 | 0.58 | 60 | 0.2501 | 0.5738 | -1.8685 | 0.9115 | 2.4423 | -351.1114 | -328.0413 | -2.5602 | -2.5924 |
|
66 |
+
| 0.2116 | 0.78 | 80 | 0.1991 | -0.6755 | -3.8274 | 0.9167 | 3.1519 | -370.7006 | -340.5351 | -2.3129 | -2.3427 |
|
67 |
+
| 0.1386 | 0.97 | 100 | 0.2002 | 0.2920 | -3.0192 | 0.9375 | 3.3111 | -362.6181 | -330.8600 | -2.3132 | -2.3535 |
|
68 |
+
| 0.0458 | 1.17 | 120 | 0.1748 | -1.3802 | -5.8772 | 0.9479 | 4.4969 | -391.1983 | -347.5820 | -2.2290 | -2.2717 |
|
69 |
+
| 0.0426 | 1.36 | 140 | 0.1755 | -0.0635 | -4.3090 | 0.9375 | 4.2455 | -375.5160 | -334.4143 | -2.1959 | -2.2403 |
|
70 |
+
| 0.029 | 1.55 | 160 | 0.1692 | -0.7990 | -5.4881 | 0.9375 | 4.6891 | -387.3076 | -341.7697 | -2.1893 | -2.2329 |
|
71 |
+
| 0.0676 | 1.75 | 180 | 0.1676 | -0.6944 | -5.4513 | 0.9375 | 4.7569 | -386.9397 | -340.7238 | -2.1864 | -2.2314 |
|
72 |
+
| 0.0517 | 1.94 | 200 | 0.1666 | -0.7428 | -5.5139 | 0.9375 | 4.7711 | -387.5656 | -341.2073 | -2.1864 | -2.2314 |
|
|
|
|
|
|
|
73 |
|
74 |
|
75 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -19,13 +19,13 @@
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
-
"
|
23 |
"q_proj",
|
24 |
-
"o_proj",
|
25 |
-
"up_proj",
|
26 |
"gate_proj",
|
27 |
-
"
|
28 |
-
"
|
|
|
|
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM",
|
31 |
"use_rslora": false
|
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
+
"v_proj",
|
23 |
"q_proj",
|
|
|
|
|
24 |
"gate_proj",
|
25 |
+
"down_proj",
|
26 |
+
"up_proj",
|
27 |
+
"k_proj",
|
28 |
+
"o_proj"
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM",
|
31 |
"use_rslora": false
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9f9dcda5a1bb845877e783f80b70edbd516461f7ed542c3bdbef85cd88c3304
|
3 |
+
size 83946192
|
all_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second": 0.
|
6 |
-
"train_steps_per_second": 0.
|
7 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
+
"train_loss": 0.1882365908726905,
|
4 |
+
"train_runtime": 5068.0756,
|
5 |
+
"train_samples_per_second": 0.65,
|
6 |
+
"train_steps_per_second": 0.041
|
7 |
}
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second": 0.
|
6 |
-
"train_steps_per_second": 0.
|
7 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
+
"train_loss": 0.1882365908726905,
|
4 |
+
"train_runtime": 5068.0756,
|
5 |
+
"train_samples_per_second": 0.65,
|
6 |
+
"train_steps_per_second": 0.041
|
7 |
}
|
trainer_state.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 2.0,
|
5 |
-
"eval_steps":
|
6 |
"global_step": 206,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
@@ -10,12 +10,12 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.01,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate": 2.
|
15 |
-
"logits/chosen": -2.
|
16 |
-
"logits/rejected": -2.
|
17 |
-
"logps/chosen": -
|
18 |
-
"logps/rejected": -
|
19 |
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
@@ -25,520 +25,472 @@
|
|
25 |
},
|
26 |
{
|
27 |
"epoch": 0.1,
|
28 |
-
"grad_norm":
|
29 |
-
"learning_rate": 2.
|
30 |
-
"logits/chosen": -2.
|
31 |
-
"logits/rejected": -2.
|
32 |
-
"logps/chosen": -
|
33 |
-
"logps/rejected": -
|
34 |
-
"loss": 0.
|
35 |
-
"rewards/accuracies": 0.
|
36 |
-
"rewards/chosen": 0.
|
37 |
-
"rewards/margins": 0.
|
38 |
-
"rewards/rejected": 0.
|
39 |
"step": 10
|
40 |
},
|
41 |
-
{
|
42 |
-
"epoch": 0.15,
|
43 |
-
"eval_logits/chosen": -2.7432966232299805,
|
44 |
-
"eval_logits/rejected": -2.7012619972229004,
|
45 |
-
"eval_logps/chosen": -309.50311279296875,
|
46 |
-
"eval_logps/rejected": -302.12677001953125,
|
47 |
-
"eval_loss": 0.6940569281578064,
|
48 |
-
"eval_rewards/accuracies": 0.42934781312942505,
|
49 |
-
"eval_rewards/chosen": 0.0032746284268796444,
|
50 |
-
"eval_rewards/margins": -0.0012154963333159685,
|
51 |
-
"eval_rewards/rejected": 0.004490124061703682,
|
52 |
-
"eval_runtime": 80.1148,
|
53 |
-
"eval_samples_per_second": 2.297,
|
54 |
-
"eval_steps_per_second": 0.287,
|
55 |
-
"step": 15
|
56 |
-
},
|
57 |
{
|
58 |
"epoch": 0.19,
|
59 |
-
"grad_norm": 6.
|
60 |
-
"learning_rate": 4.
|
61 |
-
"logits/chosen": -2.
|
62 |
-
"logits/rejected": -2.
|
63 |
-
"logps/chosen": -
|
64 |
-
"logps/rejected": -
|
65 |
-
"loss": 0.
|
66 |
-
"rewards/accuracies": 0.
|
67 |
-
"rewards/chosen":
|
68 |
-
"rewards/margins":
|
69 |
-
"rewards/rejected": 0.
|
70 |
"step": 20
|
71 |
},
|
72 |
{
|
73 |
-
"epoch": 0.
|
74 |
-
"
|
75 |
-
"
|
76 |
-
"
|
77 |
-
"
|
78 |
-
"
|
79 |
-
"
|
80 |
-
"
|
81 |
-
"
|
82 |
-
"
|
83 |
-
"
|
84 |
-
"
|
85 |
-
"
|
|
|
86 |
},
|
87 |
{
|
88 |
"epoch": 0.29,
|
89 |
-
"
|
90 |
-
"
|
91 |
-
"
|
92 |
-
"
|
93 |
-
"
|
94 |
-
"
|
95 |
-
"
|
96 |
-
"
|
97 |
-
"
|
98 |
-
"
|
99 |
-
"
|
100 |
-
"eval_steps_per_second": 0.287,
|
101 |
"step": 30
|
102 |
},
|
103 |
{
|
104 |
"epoch": 0.39,
|
105 |
-
"grad_norm":
|
106 |
-
"learning_rate": 4.870996167038154e-
|
107 |
-
"logits/chosen": -2.
|
108 |
-
"logits/rejected": -2.
|
109 |
-
"logps/chosen": -
|
110 |
-
"logps/rejected": -
|
111 |
-
"loss": 0.
|
112 |
-
"rewards/accuracies": 0.
|
113 |
-
"rewards/chosen": 0.
|
114 |
-
"rewards/margins":
|
115 |
-
"rewards/rejected": -0.
|
116 |
"step": 40
|
117 |
},
|
118 |
{
|
119 |
-
"epoch": 0.
|
120 |
-
"eval_logits/chosen": -2.
|
121 |
-
"eval_logits/rejected": -2.
|
122 |
-
"eval_logps/chosen": -
|
123 |
-
"eval_logps/rejected": -
|
124 |
-
"eval_loss": 0.
|
125 |
-
"eval_rewards/accuracies": 0.
|
126 |
-
"eval_rewards/chosen": 0.
|
127 |
-
"eval_rewards/margins":
|
128 |
-
"eval_rewards/rejected":
|
129 |
-
"eval_runtime":
|
130 |
-
"eval_samples_per_second":
|
131 |
-
"eval_steps_per_second": 0.
|
132 |
-
"step":
|
133 |
},
|
134 |
{
|
135 |
"epoch": 0.49,
|
136 |
-
"grad_norm":
|
137 |
-
"learning_rate": 4.
|
138 |
-
"logits/chosen": -2.
|
139 |
-
"logits/rejected": -2.
|
140 |
-
"logps/chosen": -
|
141 |
-
"logps/rejected": -
|
142 |
-
"loss": 0.
|
143 |
-
"rewards/accuracies": 0.
|
144 |
-
"rewards/chosen": 0.
|
145 |
-
"rewards/margins":
|
146 |
-
"rewards/rejected":
|
147 |
"step": 50
|
148 |
},
|
149 |
{
|
150 |
"epoch": 0.58,
|
151 |
-
"grad_norm":
|
152 |
-
"learning_rate": 4.
|
153 |
-
"logits/chosen": -2.
|
154 |
-
"logits/rejected": -2.
|
155 |
-
"logps/chosen": -
|
156 |
-
"logps/rejected": -
|
157 |
-
"loss": 0.
|
158 |
-
"rewards/accuracies": 0.
|
159 |
-
"rewards/chosen": 0.
|
160 |
-
"rewards/margins":
|
161 |
-
"rewards/rejected":
|
162 |
"step": 60
|
163 |
},
|
164 |
{
|
165 |
"epoch": 0.58,
|
166 |
-
"eval_logits/chosen": -2.
|
167 |
-
"eval_logits/rejected": -2.
|
168 |
-
"eval_logps/chosen": -
|
169 |
-
"eval_logps/rejected": -
|
170 |
-
"eval_loss": 0.
|
171 |
-
"eval_rewards/accuracies": 0.
|
172 |
-
"eval_rewards/chosen": 0.
|
173 |
-
"eval_rewards/margins":
|
174 |
-
"eval_rewards/rejected":
|
175 |
-
"eval_runtime":
|
176 |
-
"eval_samples_per_second":
|
177 |
-
"eval_steps_per_second": 0.
|
178 |
"step": 60
|
179 |
},
|
180 |
{
|
181 |
"epoch": 0.68,
|
182 |
-
"grad_norm":
|
183 |
-
"learning_rate": 4.
|
184 |
-
"logits/chosen": -2.
|
185 |
-
"logits/rejected": -2.
|
186 |
-
"logps/chosen": -
|
187 |
-
"logps/rejected": -
|
188 |
-
"loss": 0.
|
189 |
-
"rewards/accuracies": 0.
|
190 |
-
"rewards/chosen":
|
191 |
-
"rewards/margins":
|
192 |
-
"rewards/rejected":
|
193 |
"step": 70
|
194 |
},
|
195 |
-
{
|
196 |
-
"epoch": 0.73,
|
197 |
-
"eval_logits/chosen": -2.742759943008423,
|
198 |
-
"eval_logits/rejected": -2.700887680053711,
|
199 |
-
"eval_logps/chosen": -309.398681640625,
|
200 |
-
"eval_logps/rejected": -302.1106872558594,
|
201 |
-
"eval_loss": 0.6897082328796387,
|
202 |
-
"eval_rewards/accuracies": 0.5489130616188049,
|
203 |
-
"eval_rewards/chosen": 0.01371287927031517,
|
204 |
-
"eval_rewards/margins": 0.0076101175509393215,
|
205 |
-
"eval_rewards/rejected": 0.006102761719375849,
|
206 |
-
"eval_runtime": 80.0628,
|
207 |
-
"eval_samples_per_second": 2.298,
|
208 |
-
"eval_steps_per_second": 0.287,
|
209 |
-
"step": 75
|
210 |
-
},
|
211 |
{
|
212 |
"epoch": 0.78,
|
213 |
-
"grad_norm":
|
214 |
-
"learning_rate": 3.
|
215 |
-
"logits/chosen": -2.
|
216 |
-
"logits/rejected": -2.
|
217 |
-
"logps/chosen": -
|
218 |
-
"logps/rejected": -
|
219 |
-
"loss": 0.
|
220 |
-
"rewards/accuracies": 0.
|
221 |
-
"rewards/chosen": 0.
|
222 |
-
"rewards/margins":
|
223 |
-
"rewards/rejected":
|
224 |
"step": 80
|
225 |
},
|
226 |
{
|
227 |
-
"epoch": 0.
|
228 |
-
"
|
229 |
-
"
|
230 |
-
"
|
231 |
-
"
|
232 |
-
"
|
233 |
-
"
|
234 |
-
"
|
235 |
-
"
|
236 |
-
"
|
237 |
-
"
|
238 |
-
"
|
239 |
-
"
|
|
|
240 |
},
|
241 |
{
|
242 |
"epoch": 0.87,
|
243 |
-
"
|
244 |
-
"
|
245 |
-
"
|
246 |
-
"
|
247 |
-
"
|
248 |
-
"
|
249 |
-
"
|
250 |
-
"
|
251 |
-
"
|
252 |
-
"
|
253 |
-
"
|
254 |
-
"eval_steps_per_second": 0.287,
|
255 |
"step": 90
|
256 |
},
|
257 |
{
|
258 |
"epoch": 0.97,
|
259 |
-
"grad_norm":
|
260 |
-
"learning_rate": 3.
|
261 |
-
"logits/chosen": -2.
|
262 |
-
"logits/rejected": -2.
|
263 |
-
"logps/chosen": -
|
264 |
-
"logps/rejected": -
|
265 |
-
"loss": 0.
|
266 |
-
"rewards/accuracies": 0.
|
267 |
-
"rewards/chosen": 0.
|
268 |
-
"rewards/margins":
|
269 |
-
"rewards/rejected":
|
270 |
"step": 100
|
271 |
},
|
272 |
{
|
273 |
-
"epoch":
|
274 |
-
"eval_logits/chosen": -2.
|
275 |
-
"eval_logits/rejected": -2.
|
276 |
-
"eval_logps/chosen": -
|
277 |
-
"eval_logps/rejected": -
|
278 |
-
"eval_loss": 0.
|
279 |
-
"eval_rewards/accuracies": 0.
|
280 |
-
"eval_rewards/chosen": 0.
|
281 |
-
"eval_rewards/margins":
|
282 |
-
"eval_rewards/rejected":
|
283 |
-
"eval_runtime":
|
284 |
-
"eval_samples_per_second":
|
285 |
-
"eval_steps_per_second": 0.
|
286 |
-
"step":
|
287 |
},
|
288 |
{
|
289 |
"epoch": 1.07,
|
290 |
-
"grad_norm":
|
291 |
-
"learning_rate": 2.648501373438142e-
|
292 |
-
"logits/chosen": -2.
|
293 |
-
"logits/rejected": -2.
|
294 |
-
"logps/chosen": -
|
295 |
-
"logps/rejected": -
|
296 |
-
"loss": 0.
|
297 |
-
"rewards/accuracies": 0.
|
298 |
-
"rewards/chosen": 0.
|
299 |
-
"rewards/margins":
|
300 |
-
"rewards/rejected":
|
301 |
"step": 110
|
302 |
},
|
303 |
{
|
304 |
"epoch": 1.17,
|
305 |
-
"grad_norm":
|
306 |
-
"learning_rate": 2.
|
307 |
-
"logits/chosen": -2.
|
308 |
-
"logits/rejected": -2.
|
309 |
-
"logps/chosen": -
|
310 |
-
"logps/rejected": -
|
311 |
-
"loss": 0.
|
312 |
-
"rewards/accuracies": 0.
|
313 |
-
"rewards/chosen":
|
314 |
-
"rewards/margins":
|
315 |
-
"rewards/rejected":
|
316 |
"step": 120
|
317 |
},
|
318 |
{
|
319 |
"epoch": 1.17,
|
320 |
-
"eval_logits/chosen": -2.
|
321 |
-
"eval_logits/rejected": -2.
|
322 |
-
"eval_logps/chosen": -
|
323 |
-
"eval_logps/rejected": -
|
324 |
-
"eval_loss": 0.
|
325 |
-
"eval_rewards/accuracies": 0.
|
326 |
-
"eval_rewards/chosen":
|
327 |
-
"eval_rewards/margins":
|
328 |
-
"eval_rewards/rejected":
|
329 |
-
"eval_runtime":
|
330 |
-
"eval_samples_per_second":
|
331 |
-
"eval_steps_per_second": 0.
|
332 |
"step": 120
|
333 |
},
|
334 |
{
|
335 |
"epoch": 1.26,
|
336 |
-
"grad_norm":
|
337 |
-
"learning_rate": 1.8086397307570723e-
|
338 |
-
"logits/chosen": -2.
|
339 |
-
"logits/rejected": -2.
|
340 |
-
"logps/chosen": -
|
341 |
-
"logps/rejected": -
|
342 |
-
"loss": 0.
|
343 |
-
"rewards/accuracies": 0.
|
344 |
-
"rewards/chosen": 0.
|
345 |
-
"rewards/margins":
|
346 |
-
"rewards/rejected":
|
347 |
"step": 130
|
348 |
},
|
349 |
-
{
|
350 |
-
"epoch": 1.31,
|
351 |
-
"eval_logits/chosen": -2.7425851821899414,
|
352 |
-
"eval_logits/rejected": -2.7006468772888184,
|
353 |
-
"eval_logps/chosen": -309.3547058105469,
|
354 |
-
"eval_logps/rejected": -302.1122741699219,
|
355 |
-
"eval_loss": 0.6874393820762634,
|
356 |
-
"eval_rewards/accuracies": 0.5489130616188049,
|
357 |
-
"eval_rewards/chosen": 0.018117737025022507,
|
358 |
-
"eval_rewards/margins": 0.012173162773251534,
|
359 |
-
"eval_rewards/rejected": 0.005944571923464537,
|
360 |
-
"eval_runtime": 80.1104,
|
361 |
-
"eval_samples_per_second": 2.297,
|
362 |
-
"eval_steps_per_second": 0.287,
|
363 |
-
"step": 135
|
364 |
-
},
|
365 |
{
|
366 |
"epoch": 1.36,
|
367 |
-
"grad_norm":
|
368 |
-
"learning_rate": 1.
|
369 |
-
"logits/chosen": -2.
|
370 |
-
"logits/rejected": -2.
|
371 |
-
"logps/chosen": -
|
372 |
-
"logps/rejected": -
|
373 |
-
"loss": 0.
|
374 |
-
"rewards/accuracies": 0.
|
375 |
-
"rewards/chosen": 0.
|
376 |
-
"rewards/margins":
|
377 |
-
"rewards/rejected":
|
378 |
"step": 140
|
379 |
},
|
380 |
{
|
381 |
-
"epoch": 1.
|
382 |
-
"
|
383 |
-
"
|
384 |
-
"
|
385 |
-
"
|
386 |
-
"
|
387 |
-
"
|
388 |
-
"
|
389 |
-
"
|
390 |
-
"
|
391 |
-
"
|
392 |
-
"
|
393 |
-
"
|
|
|
394 |
},
|
395 |
{
|
396 |
"epoch": 1.46,
|
397 |
-
"
|
398 |
-
"
|
399 |
-
"
|
400 |
-
"
|
401 |
-
"
|
402 |
-
"
|
403 |
-
"
|
404 |
-
"
|
405 |
-
"
|
406 |
-
"
|
407 |
-
"
|
408 |
-
"eval_steps_per_second": 0.287,
|
409 |
"step": 150
|
410 |
},
|
411 |
{
|
412 |
"epoch": 1.55,
|
413 |
-
"grad_norm":
|
414 |
-
"learning_rate": 7.
|
415 |
-
"logits/chosen": -2.
|
416 |
-
"logits/rejected": -2.
|
417 |
-
"logps/chosen": -
|
418 |
-
"logps/rejected": -
|
419 |
-
"loss": 0.
|
420 |
-
"rewards/accuracies": 0.
|
421 |
-
"rewards/chosen": 0.
|
422 |
-
"rewards/margins":
|
423 |
-
"rewards/rejected":
|
424 |
"step": 160
|
425 |
},
|
426 |
{
|
427 |
-
"epoch": 1.
|
428 |
-
"eval_logits/chosen": -2.
|
429 |
-
"eval_logits/rejected": -2.
|
430 |
-
"eval_logps/chosen": -
|
431 |
-
"eval_logps/rejected": -
|
432 |
-
"eval_loss": 0.
|
433 |
-
"eval_rewards/accuracies": 0.
|
434 |
-
"eval_rewards/chosen": 0.
|
435 |
-
"eval_rewards/margins":
|
436 |
-
"eval_rewards/rejected":
|
437 |
-
"eval_runtime":
|
438 |
-
"eval_samples_per_second":
|
439 |
-
"eval_steps_per_second": 0.
|
440 |
-
"step":
|
441 |
},
|
442 |
{
|
443 |
"epoch": 1.65,
|
444 |
-
"grad_norm":
|
445 |
-
"learning_rate": 4.527965223149957e-
|
446 |
-
"logits/chosen": -2.
|
447 |
-
"logits/rejected": -2.
|
448 |
-
"logps/chosen": -
|
449 |
-
"logps/rejected": -
|
450 |
-
"loss": 0.
|
451 |
-
"rewards/accuracies": 0.
|
452 |
-
"rewards/chosen": 0.
|
453 |
-
"rewards/margins":
|
454 |
-
"rewards/rejected":
|
455 |
"step": 170
|
456 |
},
|
457 |
{
|
458 |
"epoch": 1.75,
|
459 |
-
"grad_norm":
|
460 |
-
"learning_rate": 2.
|
461 |
-
"logits/chosen": -2.
|
462 |
-
"logits/rejected": -2.
|
463 |
-
"logps/chosen": -
|
464 |
-
"logps/rejected": -
|
465 |
-
"loss": 0.
|
466 |
-
"rewards/accuracies": 0.
|
467 |
-
"rewards/chosen": 0.
|
468 |
-
"rewards/margins":
|
469 |
-
"rewards/rejected":
|
470 |
"step": 180
|
471 |
},
|
472 |
{
|
473 |
"epoch": 1.75,
|
474 |
-
"eval_logits/chosen": -2.
|
475 |
-
"eval_logits/rejected": -2.
|
476 |
-
"eval_logps/chosen": -
|
477 |
-
"eval_logps/rejected": -
|
478 |
-
"eval_loss": 0.
|
479 |
-
"eval_rewards/accuracies": 0.
|
480 |
-
"eval_rewards/chosen": 0.
|
481 |
-
"eval_rewards/margins":
|
482 |
-
"eval_rewards/rejected":
|
483 |
-
"eval_runtime":
|
484 |
-
"eval_samples_per_second":
|
485 |
-
"eval_steps_per_second": 0.
|
486 |
"step": 180
|
487 |
},
|
488 |
{
|
489 |
"epoch": 1.84,
|
490 |
-
"grad_norm":
|
491 |
-
"learning_rate": 9.171341179489034e-
|
492 |
-
"logits/chosen": -2.
|
493 |
-
"logits/rejected": -2.
|
494 |
-
"logps/chosen": -
|
495 |
-
"logps/rejected": -
|
496 |
-
"loss": 0.
|
497 |
-
"rewards/accuracies": 0
|
498 |
-
"rewards/chosen": 0.
|
499 |
-
"rewards/margins":
|
500 |
-
"rewards/rejected":
|
501 |
"step": 190
|
502 |
},
|
503 |
{
|
504 |
-
"epoch": 1.
|
505 |
-
"
|
506 |
-
"
|
507 |
-
"
|
508 |
-
"
|
509 |
-
"
|
510 |
-
"
|
511 |
-
"
|
512 |
-
"
|
513 |
-
"
|
514 |
-
"
|
515 |
-
"
|
516 |
-
"
|
517 |
-
"step": 195
|
518 |
},
|
519 |
{
|
520 |
"epoch": 1.94,
|
521 |
-
"
|
522 |
-
"
|
523 |
-
"
|
524 |
-
"
|
525 |
-
"
|
526 |
-
"
|
527 |
-
"
|
528 |
-
"
|
529 |
-
"
|
530 |
-
"
|
531 |
-
"
|
|
|
532 |
"step": 200
|
533 |
},
|
534 |
{
|
535 |
"epoch": 2.0,
|
536 |
"step": 206,
|
537 |
"total_flos": 0.0,
|
538 |
-
"train_loss": 0.
|
539 |
-
"train_runtime":
|
540 |
-
"train_samples_per_second": 0.
|
541 |
-
"train_steps_per_second": 0.
|
542 |
}
|
543 |
],
|
544 |
"logging_steps": 10,
|
@@ -547,7 +499,7 @@
|
|
547 |
"num_train_epochs": 2,
|
548 |
"save_steps": 500,
|
549 |
"total_flos": 0.0,
|
550 |
-
"train_batch_size":
|
551 |
"trial_name": null,
|
552 |
"trial_params": null
|
553 |
}
|
|
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 2.0,
|
5 |
+
"eval_steps": 20,
|
6 |
"global_step": 206,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.01,
|
13 |
+
"grad_norm": 8.9375,
|
14 |
+
"learning_rate": 2.3809523809523808e-06,
|
15 |
+
"logits/chosen": -2.7700600624084473,
|
16 |
+
"logits/rejected": -2.8606302738189697,
|
17 |
+
"logps/chosen": -421.64996337890625,
|
18 |
+
"logps/rejected": -531.4378662109375,
|
19 |
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
|
|
25 |
},
|
26 |
{
|
27 |
"epoch": 0.1,
|
28 |
+
"grad_norm": 7.375,
|
29 |
+
"learning_rate": 2.380952380952381e-05,
|
30 |
+
"logits/chosen": -2.7684054374694824,
|
31 |
+
"logits/rejected": -2.7337145805358887,
|
32 |
+
"logps/chosen": -333.7870178222656,
|
33 |
+
"logps/rejected": -312.4859313964844,
|
34 |
+
"loss": 0.6852,
|
35 |
+
"rewards/accuracies": 0.5277777910232544,
|
36 |
+
"rewards/chosen": 0.030088074505329132,
|
37 |
+
"rewards/margins": 0.01666567102074623,
|
38 |
+
"rewards/rejected": 0.013422403484582901,
|
39 |
"step": 10
|
40 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
{
|
42 |
"epoch": 0.19,
|
43 |
+
"grad_norm": 6.125,
|
44 |
+
"learning_rate": 4.761904761904762e-05,
|
45 |
+
"logits/chosen": -2.8010494709014893,
|
46 |
+
"logits/rejected": -2.79127836227417,
|
47 |
+
"logps/chosen": -331.8260498046875,
|
48 |
+
"logps/rejected": -332.01409912109375,
|
49 |
+
"loss": 0.6028,
|
50 |
+
"rewards/accuracies": 0.8187500238418579,
|
51 |
+
"rewards/chosen": 0.41256317496299744,
|
52 |
+
"rewards/margins": 0.21184520423412323,
|
53 |
+
"rewards/rejected": 0.2007180005311966,
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
+
"epoch": 0.19,
|
58 |
+
"eval_logits/chosen": -2.6401147842407227,
|
59 |
+
"eval_logits/rejected": -2.614283800125122,
|
60 |
+
"eval_logps/chosen": -324.9909973144531,
|
61 |
+
"eval_logps/rejected": -327.9555969238281,
|
62 |
+
"eval_loss": 0.5285959243774414,
|
63 |
+
"eval_rewards/accuracies": 0.8125,
|
64 |
+
"eval_rewards/chosen": 0.878866970539093,
|
65 |
+
"eval_rewards/margins": 0.43177998065948486,
|
66 |
+
"eval_rewards/rejected": 0.44708704948425293,
|
67 |
+
"eval_runtime": 114.9886,
|
68 |
+
"eval_samples_per_second": 1.6,
|
69 |
+
"eval_steps_per_second": 0.104,
|
70 |
+
"step": 20
|
71 |
},
|
72 |
{
|
73 |
"epoch": 0.29,
|
74 |
+
"grad_norm": 4.84375,
|
75 |
+
"learning_rate": 4.9708589101037306e-05,
|
76 |
+
"logits/chosen": -2.6763195991516113,
|
77 |
+
"logits/rejected": -2.651015043258667,
|
78 |
+
"logps/chosen": -356.8539733886719,
|
79 |
+
"logps/rejected": -363.6021423339844,
|
80 |
+
"loss": 0.4643,
|
81 |
+
"rewards/accuracies": 0.887499988079071,
|
82 |
+
"rewards/chosen": 0.943565845489502,
|
83 |
+
"rewards/margins": 0.708370566368103,
|
84 |
+
"rewards/rejected": 0.23519524931907654,
|
|
|
85 |
"step": 30
|
86 |
},
|
87 |
{
|
88 |
"epoch": 0.39,
|
89 |
+
"grad_norm": 5.75,
|
90 |
+
"learning_rate": 4.870996167038154e-05,
|
91 |
+
"logits/chosen": -2.655568838119507,
|
92 |
+
"logits/rejected": -2.6175591945648193,
|
93 |
+
"logps/chosen": -353.34619140625,
|
94 |
+
"logps/rejected": -359.96832275390625,
|
95 |
+
"loss": 0.3363,
|
96 |
+
"rewards/accuracies": 0.8500000238418579,
|
97 |
+
"rewards/chosen": 0.6268302798271179,
|
98 |
+
"rewards/margins": 1.4118897914886475,
|
99 |
+
"rewards/rejected": -0.7850595712661743,
|
100 |
"step": 40
|
101 |
},
|
102 |
{
|
103 |
+
"epoch": 0.39,
|
104 |
+
"eval_logits/chosen": -2.535161256790161,
|
105 |
+
"eval_logits/rejected": -2.5076351165771484,
|
106 |
+
"eval_logps/chosen": -328.5650939941406,
|
107 |
+
"eval_logps/rejected": -343.5235900878906,
|
108 |
+
"eval_loss": 0.32319265604019165,
|
109 |
+
"eval_rewards/accuracies": 0.859375,
|
110 |
+
"eval_rewards/chosen": 0.5214586853981018,
|
111 |
+
"eval_rewards/margins": 1.6311697959899902,
|
112 |
+
"eval_rewards/rejected": -1.1097110509872437,
|
113 |
+
"eval_runtime": 114.9563,
|
114 |
+
"eval_samples_per_second": 1.601,
|
115 |
+
"eval_steps_per_second": 0.104,
|
116 |
+
"step": 40
|
117 |
},
|
118 |
{
|
119 |
"epoch": 0.49,
|
120 |
+
"grad_norm": 4.4375,
|
121 |
+
"learning_rate": 4.7029241811087457e-05,
|
122 |
+
"logits/chosen": -2.682722806930542,
|
123 |
+
"logits/rejected": -2.627808094024658,
|
124 |
+
"logps/chosen": -382.26690673828125,
|
125 |
+
"logps/rejected": -376.25689697265625,
|
126 |
+
"loss": 0.3043,
|
127 |
+
"rewards/accuracies": 0.875,
|
128 |
+
"rewards/chosen": 0.3637928366661072,
|
129 |
+
"rewards/margins": 1.71381413936615,
|
130 |
+
"rewards/rejected": -1.3500211238861084,
|
131 |
"step": 50
|
132 |
},
|
133 |
{
|
134 |
"epoch": 0.58,
|
135 |
+
"grad_norm": 5.875,
|
136 |
+
"learning_rate": 4.471478077342798e-05,
|
137 |
+
"logits/chosen": -2.6791253089904785,
|
138 |
+
"logits/rejected": -2.641322374343872,
|
139 |
+
"logps/chosen": -344.8480529785156,
|
140 |
+
"logps/rejected": -372.0831298828125,
|
141 |
+
"loss": 0.2458,
|
142 |
+
"rewards/accuracies": 0.9125000238418579,
|
143 |
+
"rewards/chosen": 0.6151469349861145,
|
144 |
+
"rewards/margins": 2.2607688903808594,
|
145 |
+
"rewards/rejected": -1.6456218957901,
|
146 |
"step": 60
|
147 |
},
|
148 |
{
|
149 |
"epoch": 0.58,
|
150 |
+
"eval_logits/chosen": -2.592442274093628,
|
151 |
+
"eval_logits/rejected": -2.560177803039551,
|
152 |
+
"eval_logps/chosen": -328.04132080078125,
|
153 |
+
"eval_logps/rejected": -351.1114196777344,
|
154 |
+
"eval_loss": 0.2501268982887268,
|
155 |
+
"eval_rewards/accuracies": 0.9114583134651184,
|
156 |
+
"eval_rewards/chosen": 0.5738345980644226,
|
157 |
+
"eval_rewards/margins": 2.4423279762268066,
|
158 |
+
"eval_rewards/rejected": -1.8684934377670288,
|
159 |
+
"eval_runtime": 115.0094,
|
160 |
+
"eval_samples_per_second": 1.6,
|
161 |
+
"eval_steps_per_second": 0.104,
|
162 |
"step": 60
|
163 |
},
|
164 |
{
|
165 |
"epoch": 0.68,
|
166 |
+
"grad_norm": 4.3125,
|
167 |
+
"learning_rate": 4.1833161387527986e-05,
|
168 |
+
"logits/chosen": -2.6796765327453613,
|
169 |
+
"logits/rejected": -2.6534857749938965,
|
170 |
+
"logps/chosen": -371.1224670410156,
|
171 |
+
"logps/rejected": -358.3480529785156,
|
172 |
+
"loss": 0.2487,
|
173 |
+
"rewards/accuracies": 0.8999999761581421,
|
174 |
+
"rewards/chosen": -1.5859086513519287,
|
175 |
+
"rewards/margins": 2.399681329727173,
|
176 |
+
"rewards/rejected": -3.9855899810791016,
|
177 |
"step": 70
|
178 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
{
|
180 |
"epoch": 0.78,
|
181 |
+
"grad_norm": 4.4375,
|
182 |
+
"learning_rate": 3.84672825965686e-05,
|
183 |
+
"logits/chosen": -2.568530559539795,
|
184 |
+
"logits/rejected": -2.5246570110321045,
|
185 |
+
"logps/chosen": -354.64984130859375,
|
186 |
+
"logps/rejected": -360.8916931152344,
|
187 |
+
"loss": 0.2116,
|
188 |
+
"rewards/accuracies": 0.9125000238418579,
|
189 |
+
"rewards/chosen": -0.9132896661758423,
|
190 |
+
"rewards/margins": 2.766739845275879,
|
191 |
+
"rewards/rejected": -3.680030107498169,
|
192 |
"step": 80
|
193 |
},
|
194 |
{
|
195 |
+
"epoch": 0.78,
|
196 |
+
"eval_logits/chosen": -2.3427236080169678,
|
197 |
+
"eval_logits/rejected": -2.3129446506500244,
|
198 |
+
"eval_logps/chosen": -340.5351257324219,
|
199 |
+
"eval_logps/rejected": -370.7005920410156,
|
200 |
+
"eval_loss": 0.19913233816623688,
|
201 |
+
"eval_rewards/accuracies": 0.9166666865348816,
|
202 |
+
"eval_rewards/chosen": -0.6755423545837402,
|
203 |
+
"eval_rewards/margins": 3.1518704891204834,
|
204 |
+
"eval_rewards/rejected": -3.8274126052856445,
|
205 |
+
"eval_runtime": 114.9725,
|
206 |
+
"eval_samples_per_second": 1.6,
|
207 |
+
"eval_steps_per_second": 0.104,
|
208 |
+
"step": 80
|
209 |
},
|
210 |
{
|
211 |
"epoch": 0.87,
|
212 |
+
"grad_norm": 2.0,
|
213 |
+
"learning_rate": 3.471397460512563e-05,
|
214 |
+
"logits/chosen": -2.428431272506714,
|
215 |
+
"logits/rejected": -2.410618782043457,
|
216 |
+
"logps/chosen": -361.0326232910156,
|
217 |
+
"logps/rejected": -401.57269287109375,
|
218 |
+
"loss": 0.1841,
|
219 |
+
"rewards/accuracies": 0.925000011920929,
|
220 |
+
"rewards/chosen": -0.7255961298942566,
|
221 |
+
"rewards/margins": 3.4665279388427734,
|
222 |
+
"rewards/rejected": -4.192124366760254,
|
|
|
223 |
"step": 90
|
224 |
},
|
225 |
{
|
226 |
"epoch": 0.97,
|
227 |
+
"grad_norm": 5.4375,
|
228 |
+
"learning_rate": 3.0681213250482255e-05,
|
229 |
+
"logits/chosen": -2.3709776401519775,
|
230 |
+
"logits/rejected": -2.353501796722412,
|
231 |
+
"logps/chosen": -331.63623046875,
|
232 |
+
"logps/rejected": -377.9563903808594,
|
233 |
+
"loss": 0.1386,
|
234 |
+
"rewards/accuracies": 0.949999988079071,
|
235 |
+
"rewards/chosen": -0.6168140172958374,
|
236 |
+
"rewards/margins": 3.699888229370117,
|
237 |
+
"rewards/rejected": -4.316702365875244,
|
238 |
"step": 100
|
239 |
},
|
240 |
{
|
241 |
+
"epoch": 0.97,
|
242 |
+
"eval_logits/chosen": -2.3535118103027344,
|
243 |
+
"eval_logits/rejected": -2.3131775856018066,
|
244 |
+
"eval_logps/chosen": -330.8599548339844,
|
245 |
+
"eval_logps/rejected": -362.6181335449219,
|
246 |
+
"eval_loss": 0.20019526779651642,
|
247 |
+
"eval_rewards/accuracies": 0.9375,
|
248 |
+
"eval_rewards/chosen": 0.29197368025779724,
|
249 |
+
"eval_rewards/margins": 3.311133623123169,
|
250 |
+
"eval_rewards/rejected": -3.019160032272339,
|
251 |
+
"eval_runtime": 114.9013,
|
252 |
+
"eval_samples_per_second": 1.601,
|
253 |
+
"eval_steps_per_second": 0.104,
|
254 |
+
"step": 100
|
255 |
},
|
256 |
{
|
257 |
"epoch": 1.07,
|
258 |
+
"grad_norm": 0.76953125,
|
259 |
+
"learning_rate": 2.648501373438142e-05,
|
260 |
+
"logits/chosen": -2.4437708854675293,
|
261 |
+
"logits/rejected": -2.4320626258850098,
|
262 |
+
"logps/chosen": -344.7204895019531,
|
263 |
+
"logps/rejected": -410.47601318359375,
|
264 |
+
"loss": 0.0711,
|
265 |
+
"rewards/accuracies": 0.987500011920929,
|
266 |
+
"rewards/chosen": -0.051882706582546234,
|
267 |
+
"rewards/margins": 5.166382789611816,
|
268 |
+
"rewards/rejected": -5.218265533447266,
|
269 |
"step": 110
|
270 |
},
|
271 |
{
|
272 |
"epoch": 1.17,
|
273 |
+
"grad_norm": 1.4375,
|
274 |
+
"learning_rate": 2.2246093076900144e-05,
|
275 |
+
"logits/chosen": -2.430386781692505,
|
276 |
+
"logits/rejected": -2.34106183052063,
|
277 |
+
"logps/chosen": -400.32452392578125,
|
278 |
+
"logps/rejected": -425.37457275390625,
|
279 |
+
"loss": 0.0458,
|
280 |
+
"rewards/accuracies": 0.987500011920929,
|
281 |
+
"rewards/chosen": -1.0012158155441284,
|
282 |
+
"rewards/margins": 6.266473293304443,
|
283 |
+
"rewards/rejected": -7.2676897048950195,
|
284 |
"step": 120
|
285 |
},
|
286 |
{
|
287 |
"epoch": 1.17,
|
288 |
+
"eval_logits/chosen": -2.2717294692993164,
|
289 |
+
"eval_logits/rejected": -2.2290468215942383,
|
290 |
+
"eval_logps/chosen": -347.5820007324219,
|
291 |
+
"eval_logps/rejected": -391.1982727050781,
|
292 |
+
"eval_loss": 0.17477566003799438,
|
293 |
+
"eval_rewards/accuracies": 0.9479166865348816,
|
294 |
+
"eval_rewards/chosen": -1.3802350759506226,
|
295 |
+
"eval_rewards/margins": 4.496945858001709,
|
296 |
+
"eval_rewards/rejected": -5.877180576324463,
|
297 |
+
"eval_runtime": 114.9627,
|
298 |
+
"eval_samples_per_second": 1.601,
|
299 |
+
"eval_steps_per_second": 0.104,
|
300 |
"step": 120
|
301 |
},
|
302 |
{
|
303 |
"epoch": 1.26,
|
304 |
+
"grad_norm": 1.0546875,
|
305 |
+
"learning_rate": 1.8086397307570723e-05,
|
306 |
+
"logits/chosen": -2.376091957092285,
|
307 |
+
"logits/rejected": -2.3415114879608154,
|
308 |
+
"logps/chosen": -337.0244140625,
|
309 |
+
"logps/rejected": -408.39263916015625,
|
310 |
+
"loss": 0.0283,
|
311 |
+
"rewards/accuracies": 0.9937499761581421,
|
312 |
+
"rewards/chosen": -0.006544408388435841,
|
313 |
+
"rewards/margins": 6.594322204589844,
|
314 |
+
"rewards/rejected": -6.600866794586182,
|
315 |
"step": 130
|
316 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
{
|
318 |
"epoch": 1.36,
|
319 |
+
"grad_norm": 1.3515625,
|
320 |
+
"learning_rate": 1.4125593300137766e-05,
|
321 |
+
"logits/chosen": -2.364224672317505,
|
322 |
+
"logits/rejected": -2.310997724533081,
|
323 |
+
"logps/chosen": -343.6619567871094,
|
324 |
+
"logps/rejected": -398.0953674316406,
|
325 |
+
"loss": 0.0426,
|
326 |
+
"rewards/accuracies": 0.9937499761581421,
|
327 |
+
"rewards/chosen": 0.7415364384651184,
|
328 |
+
"rewards/margins": 6.200386047363281,
|
329 |
+
"rewards/rejected": -5.4588494300842285,
|
330 |
"step": 140
|
331 |
},
|
332 |
{
|
333 |
+
"epoch": 1.36,
|
334 |
+
"eval_logits/chosen": -2.240306854248047,
|
335 |
+
"eval_logits/rejected": -2.195923328399658,
|
336 |
+
"eval_logps/chosen": -334.4142761230469,
|
337 |
+
"eval_logps/rejected": -375.5160217285156,
|
338 |
+
"eval_loss": 0.17553412914276123,
|
339 |
+
"eval_rewards/accuracies": 0.9375,
|
340 |
+
"eval_rewards/chosen": -0.06346017122268677,
|
341 |
+
"eval_rewards/margins": 4.2454915046691895,
|
342 |
+
"eval_rewards/rejected": -4.3089518547058105,
|
343 |
+
"eval_runtime": 115.021,
|
344 |
+
"eval_samples_per_second": 1.6,
|
345 |
+
"eval_steps_per_second": 0.104,
|
346 |
+
"step": 140
|
347 |
},
|
348 |
{
|
349 |
"epoch": 1.46,
|
350 |
+
"grad_norm": 0.8515625,
|
351 |
+
"learning_rate": 1.0477626174477404e-05,
|
352 |
+
"logits/chosen": -2.3424103260040283,
|
353 |
+
"logits/rejected": -2.2978971004486084,
|
354 |
+
"logps/chosen": -347.8079528808594,
|
355 |
+
"logps/rejected": -375.98260498046875,
|
356 |
+
"loss": 0.0508,
|
357 |
+
"rewards/accuracies": 0.9750000238418579,
|
358 |
+
"rewards/chosen": 0.3227364122867584,
|
359 |
+
"rewards/margins": 5.873551845550537,
|
360 |
+
"rewards/rejected": -5.550815582275391,
|
|
|
361 |
"step": 150
|
362 |
},
|
363 |
{
|
364 |
"epoch": 1.55,
|
365 |
+
"grad_norm": 1.96875,
|
366 |
+
"learning_rate": 7.247441302957858e-06,
|
367 |
+
"logits/chosen": -2.3495125770568848,
|
368 |
+
"logits/rejected": -2.307555675506592,
|
369 |
+
"logps/chosen": -331.29718017578125,
|
370 |
+
"logps/rejected": -415.452392578125,
|
371 |
+
"loss": 0.029,
|
372 |
+
"rewards/accuracies": 0.9937499761581421,
|
373 |
+
"rewards/chosen": 0.16269809007644653,
|
374 |
+
"rewards/margins": 6.475255012512207,
|
375 |
+
"rewards/rejected": -6.312556266784668,
|
376 |
"step": 160
|
377 |
},
|
378 |
{
|
379 |
+
"epoch": 1.55,
|
380 |
+
"eval_logits/chosen": -2.232851266860962,
|
381 |
+
"eval_logits/rejected": -2.1893069744110107,
|
382 |
+
"eval_logps/chosen": -341.7696533203125,
|
383 |
+
"eval_logps/rejected": -387.3076171875,
|
384 |
+
"eval_loss": 0.16915130615234375,
|
385 |
+
"eval_rewards/accuracies": 0.9375,
|
386 |
+
"eval_rewards/chosen": -0.7989979386329651,
|
387 |
+
"eval_rewards/margins": 4.689115524291992,
|
388 |
+
"eval_rewards/rejected": -5.4881134033203125,
|
389 |
+
"eval_runtime": 114.9918,
|
390 |
+
"eval_samples_per_second": 1.6,
|
391 |
+
"eval_steps_per_second": 0.104,
|
392 |
+
"step": 160
|
393 |
},
|
394 |
{
|
395 |
"epoch": 1.65,
|
396 |
+
"grad_norm": 0.345703125,
|
397 |
+
"learning_rate": 4.527965223149957e-06,
|
398 |
+
"logits/chosen": -2.407200336456299,
|
399 |
+
"logits/rejected": -2.3430123329162598,
|
400 |
+
"logps/chosen": -387.9550476074219,
|
401 |
+
"logps/rejected": -445.9234313964844,
|
402 |
+
"loss": 0.0175,
|
403 |
+
"rewards/accuracies": 0.9937499761581421,
|
404 |
+
"rewards/chosen": 0.19500017166137695,
|
405 |
+
"rewards/margins": 7.179248809814453,
|
406 |
+
"rewards/rejected": -6.984248161315918,
|
407 |
"step": 170
|
408 |
},
|
409 |
{
|
410 |
"epoch": 1.75,
|
411 |
+
"grad_norm": 3.015625,
|
412 |
+
"learning_rate": 2.397432310532133e-06,
|
413 |
+
"logits/chosen": -2.3570303916931152,
|
414 |
+
"logits/rejected": -2.300320863723755,
|
415 |
+
"logps/chosen": -367.35577392578125,
|
416 |
+
"logps/rejected": -424.9029235839844,
|
417 |
+
"loss": 0.0676,
|
418 |
+
"rewards/accuracies": 0.981249988079071,
|
419 |
+
"rewards/chosen": 0.1322220414876938,
|
420 |
+
"rewards/margins": 6.755249977111816,
|
421 |
+
"rewards/rejected": -6.623027801513672,
|
422 |
"step": 180
|
423 |
},
|
424 |
{
|
425 |
"epoch": 1.75,
|
426 |
+
"eval_logits/chosen": -2.2314395904541016,
|
427 |
+
"eval_logits/rejected": -2.1864326000213623,
|
428 |
+
"eval_logps/chosen": -340.7237854003906,
|
429 |
+
"eval_logps/rejected": -386.9397277832031,
|
430 |
+
"eval_loss": 0.16764594614505768,
|
431 |
+
"eval_rewards/accuracies": 0.9375,
|
432 |
+
"eval_rewards/chosen": -0.6944115161895752,
|
433 |
+
"eval_rewards/margins": 4.756911754608154,
|
434 |
+
"eval_rewards/rejected": -5.45132303237915,
|
435 |
+
"eval_runtime": 114.865,
|
436 |
+
"eval_samples_per_second": 1.602,
|
437 |
+
"eval_steps_per_second": 0.104,
|
438 |
"step": 180
|
439 |
},
|
440 |
{
|
441 |
"epoch": 1.84,
|
442 |
+
"grad_norm": 0.56640625,
|
443 |
+
"learning_rate": 9.171341179489034e-07,
|
444 |
+
"logits/chosen": -2.3660504817962646,
|
445 |
+
"logits/rejected": -2.2959539890289307,
|
446 |
+
"logps/chosen": -335.60052490234375,
|
447 |
+
"logps/rejected": -383.60040283203125,
|
448 |
+
"loss": 0.0178,
|
449 |
+
"rewards/accuracies": 1.0,
|
450 |
+
"rewards/chosen": 0.19160650670528412,
|
451 |
+
"rewards/margins": 6.720318794250488,
|
452 |
+
"rewards/rejected": -6.528712272644043,
|
453 |
"step": 190
|
454 |
},
|
455 |
{
|
456 |
+
"epoch": 1.94,
|
457 |
+
"grad_norm": 3.9375,
|
458 |
+
"learning_rate": 1.296561292287446e-07,
|
459 |
+
"logits/chosen": -2.3115243911743164,
|
460 |
+
"logits/rejected": -2.281430959701538,
|
461 |
+
"logps/chosen": -323.0104675292969,
|
462 |
+
"logps/rejected": -385.94757080078125,
|
463 |
+
"loss": 0.0517,
|
464 |
+
"rewards/accuracies": 0.987500011920929,
|
465 |
+
"rewards/chosen": -0.006203270051628351,
|
466 |
+
"rewards/margins": 6.517538547515869,
|
467 |
+
"rewards/rejected": -6.523741722106934,
|
468 |
+
"step": 200
|
|
|
469 |
},
|
470 |
{
|
471 |
"epoch": 1.94,
|
472 |
+
"eval_logits/chosen": -2.231421709060669,
|
473 |
+
"eval_logits/rejected": -2.186391830444336,
|
474 |
+
"eval_logps/chosen": -341.20733642578125,
|
475 |
+
"eval_logps/rejected": -387.5655517578125,
|
476 |
+
"eval_loss": 0.16659200191497803,
|
477 |
+
"eval_rewards/accuracies": 0.9375,
|
478 |
+
"eval_rewards/chosen": -0.7427660822868347,
|
479 |
+
"eval_rewards/margins": 4.771137237548828,
|
480 |
+
"eval_rewards/rejected": -5.513904094696045,
|
481 |
+
"eval_runtime": 114.34,
|
482 |
+
"eval_samples_per_second": 1.609,
|
483 |
+
"eval_steps_per_second": 0.105,
|
484 |
"step": 200
|
485 |
},
|
486 |
{
|
487 |
"epoch": 2.0,
|
488 |
"step": 206,
|
489 |
"total_flos": 0.0,
|
490 |
+
"train_loss": 0.1882365908726905,
|
491 |
+
"train_runtime": 5068.0756,
|
492 |
+
"train_samples_per_second": 0.65,
|
493 |
+
"train_steps_per_second": 0.041
|
494 |
}
|
495 |
],
|
496 |
"logging_steps": 10,
|
|
|
499 |
"num_train_epochs": 2,
|
500 |
"save_steps": 500,
|
501 |
"total_flos": 0.0,
|
502 |
+
"train_batch_size": 4,
|
503 |
"trial_name": null,
|
504 |
"trial_params": null
|
505 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76581c05c1b2da8bbdb25fc2365035a91b842667aea3cc6aa3a8ecf3c7cded68
|
3 |
size 5112
|