Model save
Browse files- README.md +25 -25
- all_results.json +5 -5
- model.safetensors +1 -1
- train_results.json +5 -5
- trainer_state.json +173 -8
README.md
CHANGED
@@ -2,16 +2,11 @@
|
|
2 |
license: apache-2.0
|
3 |
base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
-
- ndcg
|
7 |
-
- trl
|
8 |
-
- expo
|
9 |
-
- generated_from_trainer
|
10 |
- trl
|
11 |
- expo
|
|
|
|
|
12 |
- generated_from_trainer
|
13 |
-
datasets:
|
14 |
-
- hZzy/train_pairwise
|
15 |
model-index:
|
16 |
- name: qwen2.5-0.5b-expo-DPO-ES-0.01
|
17 |
results: []
|
@@ -20,21 +15,21 @@ model-index:
|
|
20 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
21 |
should probably proofread and complete it, then remove this comment. -->
|
22 |
|
23 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/
|
24 |
# qwen2.5-0.5b-expo-DPO-ES-0.01
|
25 |
|
26 |
-
This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on
|
27 |
It achieves the following results on the evaluation set:
|
28 |
-
- Loss: 0.
|
29 |
-
- Logps: -
|
30 |
-
- Logits: -
|
31 |
-
- Objective: 0.
|
32 |
-
- Dpo Loss: 0.
|
33 |
-
- Regularize: 0.
|
34 |
-
- Ranking Simple: 0.
|
35 |
- Ranking Idealized: 0.8732
|
36 |
- Ranking Idealized Expo: 0.5321
|
37 |
-
- Wo Beta:
|
38 |
|
39 |
## Model description
|
40 |
|
@@ -69,14 +64,19 @@ The following hyperparameters were used during training:
|
|
69 |
|
70 |
### Training results
|
71 |
|
72 |
-
| Training Loss | Epoch | Step |
|
73 |
-
|
74 |
-
| 0.6907 | 0.1417 | 50 | 0.
|
75 |
-
| 0.6701 | 0.2834 | 100 | 0.
|
76 |
-
| 0.637 | 0.4251 | 150 | 0.
|
77 |
-
| 0.5833 | 0.5668 | 200 | 0.
|
78 |
-
| 0.5439 | 0.7085 | 250 | 0.
|
79 |
-
| 0.5077 | 0.8503 | 300 | 0.
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
|
82 |
### Framework versions
|
|
|
2 |
license: apache-2.0
|
3 |
base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
|
4 |
tags:
|
|
|
|
|
|
|
|
|
|
|
5 |
- trl
|
6 |
- expo
|
7 |
+
- alignment-handbook
|
8 |
+
- ndcg
|
9 |
- generated_from_trainer
|
|
|
|
|
10 |
model-index:
|
11 |
- name: qwen2.5-0.5b-expo-DPO-ES-0.01
|
12 |
results: []
|
|
|
15 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
16 |
should probably proofread and complete it, then remove this comment. -->
|
17 |
|
18 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/r1jti62c)
|
19 |
# qwen2.5-0.5b-expo-DPO-ES-0.01
|
20 |
|
21 |
+
This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on an unknown dataset.
|
22 |
It achieves the following results on the evaluation set:
|
23 |
+
- Loss: 0.6901
|
24 |
+
- Logps: -349.5348
|
25 |
+
- Logits: -5.2492
|
26 |
+
- Objective: 0.6972
|
27 |
+
- Dpo Loss: 0.6972
|
28 |
+
- Regularize: 0.6972
|
29 |
+
- Ranking Simple: 0.5719
|
30 |
- Ranking Idealized: 0.8732
|
31 |
- Ranking Idealized Expo: 0.5321
|
32 |
+
- Wo Beta: 31.1765
|
33 |
|
34 |
## Model description
|
35 |
|
|
|
64 |
|
65 |
### Training results
|
66 |
|
67 |
+
| Training Loss | Epoch | Step | Dpo Loss | Logits | Logps | Validation Loss | Objective | Ranking Idealized | Ranking Idealized Expo | Ranking Simple | Regularize | Wo Beta |
|
68 |
+
|:-------------:|:------:|:----:|:--------:|:-------:|:---------:|:---------------:|:---------:|:-----------------:|:----------------------:|:--------------:|:----------:|:-------:|
|
69 |
+
| 0.6907 | 0.1417 | 50 | 0.6902 | -1.7611 | -99.3821 | 0.6894 | 0.6902 | 0.8732 | 0.5321 | 0.5305 | 0.6902 | 9.3575 |
|
70 |
+
| 0.6701 | 0.2834 | 100 | 0.6896 | -1.8467 | -153.5716 | 0.6837 | 0.6896 | 0.8732 | 0.5321 | 0.5518 | 0.6896 | 14.1741 |
|
71 |
+
| 0.637 | 0.4251 | 150 | 0.6765 | -2.4614 | -198.9269 | 0.6723 | 0.6765 | 0.8732 | 0.5321 | 0.5823 | 0.6765 | 17.2098 |
|
72 |
+
| 0.5833 | 0.5668 | 200 | 0.6780 | -3.3478 | -256.0312 | 0.6729 | 0.6780 | 0.8732 | 0.5321 | 0.5797 | 0.6780 | 21.7109 |
|
73 |
+
| 0.5439 | 0.7085 | 250 | 0.6858 | -3.6269 | -257.3546 | 0.6781 | 0.6858 | 0.8732 | 0.5321 | 0.5683 | 0.6858 | 22.9139 |
|
74 |
+
| 0.5077 | 0.8503 | 300 | 0.6685 | -4.6100 | -319.3935 | 0.6640 | 0.6685 | 0.8732 | 0.5321 | 0.5828 | 0.6685 | 23.6506 |
|
75 |
+
| 0.4786 | 0.9920 | 350 | 0.6867 | -368.7014| -4.9192 | 0.6897 | 0.6897 | 0.6897 | 0.5751 | 0.8732 | 0.5321 | 27.7113 |
|
76 |
+
| 0.3619 | 1.1337 | 400 | 0.6961 | -392.7018| -5.5801 | 0.6990 | 0.6990 | 0.6990 | 0.5849 | 0.8732 | 0.5321 | 32.0730 |
|
77 |
+
| 0.3679 | 1.2754 | 450 | 0.6843 | -349.2029| -5.2450 | 0.6953 | 0.6953 | 0.6953 | 0.5885 | 0.8732 | 0.5321 | 31.3199 |
|
78 |
+
| 0.3662 | 1.4171 | 500 | 0.6858 | -350.7137| -5.8233 | 0.6903 | 0.6903 | 0.6903 | 0.5890 | 0.8732 | 0.5321 | 30.2726 |
|
79 |
+
| 0.3485 | 1.5588 | 550 | 0.6901 | -349.5348| -5.2492 | 0.6972 | 0.6972 | 0.6972 | 0.5719 | 0.8732 | 0.5321 | 31.1765 |
|
80 |
|
81 |
|
82 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
"eval_dpo_loss": 0.6902037858963013,
|
4 |
"eval_logits": -1.7611440420150757,
|
5 |
"eval_logps": -99.38214111328125,
|
@@ -15,9 +15,9 @@
|
|
15 |
"eval_steps_per_second": 1.567,
|
16 |
"eval_wo_beta": 9.357504844665527,
|
17 |
"total_flos": 0.0,
|
18 |
-
"train_loss": 0.
|
19 |
-
"train_runtime":
|
20 |
"train_samples": 50802,
|
21 |
-
"train_samples_per_second":
|
22 |
-
"train_steps_per_second": 0.
|
23 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.5588096362777515,
|
3 |
"eval_dpo_loss": 0.6902037858963013,
|
4 |
"eval_logits": -1.7611440420150757,
|
5 |
"eval_logps": -99.38214111328125,
|
|
|
15 |
"eval_steps_per_second": 1.567,
|
16 |
"eval_wo_beta": 9.357504844665527,
|
17 |
"total_flos": 0.0,
|
18 |
+
"train_loss": 0.17483963706276634,
|
19 |
+
"train_runtime": 6877.8936,
|
20 |
"train_samples": 50802,
|
21 |
+
"train_samples_per_second": 36.931,
|
22 |
+
"train_steps_per_second": 0.256
|
23 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1975192208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6fbadd5f72d783e4eef10b0ec932f1d5e13f20e83ebacd844cdbf5fb249cb55
|
3 |
size 1975192208
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
"total_flos": 0.0,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 50802,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.5588096362777515,
|
3 |
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.17483963706276634,
|
5 |
+
"train_runtime": 6877.8936,
|
6 |
"train_samples": 50802,
|
7 |
+
"train_samples_per_second": 36.931,
|
8 |
+
"train_steps_per_second": 0.256
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 9.357504844665527,
|
3 |
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-0.01/checkpoint-50",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -223,13 +223,178 @@
|
|
223 |
"step": 300
|
224 |
},
|
225 |
{
|
226 |
-
"
|
227 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
"total_flos": 0.0,
|
229 |
-
"train_loss": 0.
|
230 |
-
"train_runtime":
|
231 |
-
"train_samples_per_second":
|
232 |
-
"train_steps_per_second": 0.
|
233 |
}
|
234 |
],
|
235 |
"logging_steps": 50,
|
|
|
1 |
{
|
2 |
"best_metric": 9.357504844665527,
|
3 |
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-0.01/checkpoint-50",
|
4 |
+
"epoch": 1.5588096362777515,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 550,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
223 |
"step": 300
|
224 |
},
|
225 |
{
|
226 |
+
"dpo_loss": 0.4693358838558197,
|
227 |
+
"epoch": 0.9919697685403873,
|
228 |
+
"grad_norm": 17.283303885786346,
|
229 |
+
"learning_rate": 4.8526047530778175e-06,
|
230 |
+
"logits": -4.669376373291016,
|
231 |
+
"logps": -331.3940124511719,
|
232 |
+
"loss": 0.4786,
|
233 |
+
"objective": 0.4693358838558197,
|
234 |
+
"ranking_idealized": 0.8804166913032532,
|
235 |
+
"ranking_idealized_expo": 0.5387499928474426,
|
236 |
+
"ranking_simple": 0.7683333158493042,
|
237 |
+
"regularize": 0.4693358838558197,
|
238 |
+
"step": 350,
|
239 |
+
"wo_beta": 12.894118309020996
|
240 |
+
},
|
241 |
+
{
|
242 |
+
"epoch": 0.9919697685403873,
|
243 |
+
"eval_dpo_loss": 0.6896602511405945,
|
244 |
+
"eval_logits": -4.919209957122803,
|
245 |
+
"eval_logps": -368.701416015625,
|
246 |
+
"eval_loss": 0.6866692304611206,
|
247 |
+
"eval_objective": 0.6896602511405945,
|
248 |
+
"eval_ranking_idealized": 0.8731883764266968,
|
249 |
+
"eval_ranking_idealized_expo": 0.5320910811424255,
|
250 |
+
"eval_ranking_simple": 0.5750517845153809,
|
251 |
+
"eval_regularize": 0.6896602511405945,
|
252 |
+
"eval_runtime": 308.8788,
|
253 |
+
"eval_samples_per_second": 18.745,
|
254 |
+
"eval_steps_per_second": 1.564,
|
255 |
+
"eval_wo_beta": 27.71128273010254,
|
256 |
+
"step": 350
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"dpo_loss": 0.3561079800128937,
|
260 |
+
"epoch": 1.1336797354747283,
|
261 |
+
"grad_norm": 15.512579363366164,
|
262 |
+
"learning_rate": 4.757316345716554e-06,
|
263 |
+
"logits": -4.89206075668335,
|
264 |
+
"logps": -377.2569580078125,
|
265 |
+
"loss": 0.3619,
|
266 |
+
"objective": 0.3561079800128937,
|
267 |
+
"ranking_idealized": 0.8895833492279053,
|
268 |
+
"ranking_idealized_expo": 0.5450000166893005,
|
269 |
+
"ranking_simple": 0.8650000095367432,
|
270 |
+
"regularize": 0.3561079800128937,
|
271 |
+
"step": 400,
|
272 |
+
"wo_beta": 8.194127082824707
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"epoch": 1.1336797354747283,
|
276 |
+
"eval_dpo_loss": 0.6989732384681702,
|
277 |
+
"eval_logits": -5.580094814300537,
|
278 |
+
"eval_logps": -392.70184326171875,
|
279 |
+
"eval_loss": 0.6960746049880981,
|
280 |
+
"eval_objective": 0.6989732384681702,
|
281 |
+
"eval_ranking_idealized": 0.8731883764266968,
|
282 |
+
"eval_ranking_idealized_expo": 0.5320910811424255,
|
283 |
+
"eval_ranking_simple": 0.5848861336708069,
|
284 |
+
"eval_regularize": 0.6989732384681702,
|
285 |
+
"eval_runtime": 308.969,
|
286 |
+
"eval_samples_per_second": 18.74,
|
287 |
+
"eval_steps_per_second": 1.563,
|
288 |
+
"eval_wo_beta": 32.072998046875,
|
289 |
+
"step": 400
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"dpo_loss": 0.3608836829662323,
|
293 |
+
"epoch": 1.2753897024090695,
|
294 |
+
"grad_norm": 16.08489095978575,
|
295 |
+
"learning_rate": 4.639847716126855e-06,
|
296 |
+
"logits": -5.059500217437744,
|
297 |
+
"logps": -364.7832336425781,
|
298 |
+
"loss": 0.3679,
|
299 |
+
"objective": 0.3608836829662323,
|
300 |
+
"ranking_idealized": 0.8845833539962769,
|
301 |
+
"ranking_idealized_expo": 0.5266666412353516,
|
302 |
+
"ranking_simple": 0.8412500023841858,
|
303 |
+
"regularize": 0.3608836829662323,
|
304 |
+
"step": 450,
|
305 |
+
"wo_beta": 9.603814125061035
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"epoch": 1.2753897024090695,
|
309 |
+
"eval_dpo_loss": 0.6952692866325378,
|
310 |
+
"eval_logits": -5.245004177093506,
|
311 |
+
"eval_logps": -349.20294189453125,
|
312 |
+
"eval_loss": 0.6843227744102478,
|
313 |
+
"eval_objective": 0.6952692866325378,
|
314 |
+
"eval_ranking_idealized": 0.8731883764266968,
|
315 |
+
"eval_ranking_idealized_expo": 0.5320910811424255,
|
316 |
+
"eval_ranking_simple": 0.5885093212127686,
|
317 |
+
"eval_regularize": 0.6952692866325378,
|
318 |
+
"eval_runtime": 308.243,
|
319 |
+
"eval_samples_per_second": 18.784,
|
320 |
+
"eval_steps_per_second": 1.567,
|
321 |
+
"eval_wo_beta": 31.319866180419922,
|
322 |
+
"step": 450
|
323 |
+
},
|
324 |
+
{
|
325 |
+
"dpo_loss": 0.3647218942642212,
|
326 |
+
"epoch": 1.4170996693434104,
|
327 |
+
"grad_norm": 16.426230236098732,
|
328 |
+
"learning_rate": 4.501353102310901e-06,
|
329 |
+
"logits": -5.431642055511475,
|
330 |
+
"logps": -371.9469299316406,
|
331 |
+
"loss": 0.3662,
|
332 |
+
"objective": 0.3647218942642212,
|
333 |
+
"ranking_idealized": 0.877916693687439,
|
334 |
+
"ranking_idealized_expo": 0.5095833539962769,
|
335 |
+
"ranking_simple": 0.8420833349227905,
|
336 |
+
"regularize": 0.3647218942642212,
|
337 |
+
"step": 500,
|
338 |
+
"wo_beta": 10.912171363830566
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 1.4170996693434104,
|
342 |
+
"eval_dpo_loss": 0.6903234720230103,
|
343 |
+
"eval_logits": -5.823331832885742,
|
344 |
+
"eval_logps": -350.71368408203125,
|
345 |
+
"eval_loss": 0.6858127117156982,
|
346 |
+
"eval_objective": 0.6903234720230103,
|
347 |
+
"eval_ranking_idealized": 0.8731883764266968,
|
348 |
+
"eval_ranking_idealized_expo": 0.5320910811424255,
|
349 |
+
"eval_ranking_simple": 0.589026927947998,
|
350 |
+
"eval_regularize": 0.6903234720230103,
|
351 |
+
"eval_runtime": 308.1061,
|
352 |
+
"eval_samples_per_second": 18.792,
|
353 |
+
"eval_steps_per_second": 1.568,
|
354 |
+
"eval_wo_beta": 30.27263641357422,
|
355 |
+
"step": 500
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"dpo_loss": 0.3470539152622223,
|
359 |
+
"epoch": 1.5588096362777515,
|
360 |
+
"grad_norm": 13.971929990602174,
|
361 |
+
"learning_rate": 4.34319334202531e-06,
|
362 |
+
"logits": -5.66475248336792,
|
363 |
+
"logps": -394.5787048339844,
|
364 |
+
"loss": 0.3485,
|
365 |
+
"objective": 0.3470539152622223,
|
366 |
+
"ranking_idealized": 0.8870833516120911,
|
367 |
+
"ranking_idealized_expo": 0.5220833420753479,
|
368 |
+
"ranking_simple": 0.8383333086967468,
|
369 |
+
"regularize": 0.3470539152622223,
|
370 |
+
"step": 550,
|
371 |
+
"wo_beta": 10.396123886108398
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"epoch": 1.5588096362777515,
|
375 |
+
"eval_dpo_loss": 0.6972023844718933,
|
376 |
+
"eval_logits": -5.249249458312988,
|
377 |
+
"eval_logps": -349.5347900390625,
|
378 |
+
"eval_loss": 0.6901025772094727,
|
379 |
+
"eval_objective": 0.6972023844718933,
|
380 |
+
"eval_ranking_idealized": 0.8731883764266968,
|
381 |
+
"eval_ranking_idealized_expo": 0.5320910811424255,
|
382 |
+
"eval_ranking_simple": 0.5719461441040039,
|
383 |
+
"eval_regularize": 0.6972023844718933,
|
384 |
+
"eval_runtime": 309.5507,
|
385 |
+
"eval_samples_per_second": 18.705,
|
386 |
+
"eval_steps_per_second": 1.56,
|
387 |
+
"eval_wo_beta": 31.17648696899414,
|
388 |
+
"step": 550
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"epoch": 1.5588096362777515,
|
392 |
+
"step": 550,
|
393 |
"total_flos": 0.0,
|
394 |
+
"train_loss": 0.17483963706276634,
|
395 |
+
"train_runtime": 6877.8936,
|
396 |
+
"train_samples_per_second": 36.931,
|
397 |
+
"train_steps_per_second": 0.256
|
398 |
}
|
399 |
],
|
400 |
"logging_steps": 50,
|