hZzy commited on
Commit
020572d
1 Parent(s): 695354c

Model save

Browse files
Files changed (5) hide show
  1. README.md +25 -25
  2. all_results.json +5 -5
  3. model.safetensors +1 -1
  4. train_results.json +5 -5
  5. trainer_state.json +173 -8
README.md CHANGED
@@ -2,16 +2,11 @@
2
  license: apache-2.0
3
  base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
4
  tags:
5
- - alignment-handbook
6
- - ndcg
7
- - trl
8
- - expo
9
- - generated_from_trainer
10
  - trl
11
  - expo
 
 
12
  - generated_from_trainer
13
- datasets:
14
- - hZzy/train_pairwise
15
  model-index:
16
  - name: qwen2.5-0.5b-expo-DPO-ES-0.01
17
  results: []
@@ -20,21 +15,21 @@ model-index:
20
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
21
  should probably proofread and complete it, then remove this comment. -->
22
 
23
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/aeakfz58)
24
  # qwen2.5-0.5b-expo-DPO-ES-0.01
25
 
26
- This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on the hZzy/train_pairwise dataset.
27
  It achieves the following results on the evaluation set:
28
- - Loss: 0.6894
29
- - Logps: -99.3821
30
- - Logits: -1.7611
31
- - Objective: 0.6902
32
- - Dpo Loss: 0.6902
33
- - Regularize: 0.6902
34
- - Ranking Simple: 0.5305
35
  - Ranking Idealized: 0.8732
36
  - Ranking Idealized Expo: 0.5321
37
- - Wo Beta: 9.3575
38
 
39
  ## Model description
40
 
@@ -69,14 +64,19 @@ The following hyperparameters were used during training:
69
 
70
  ### Training results
71
 
72
- | Training Loss | Epoch | Step | Validation Loss | Logps | Logits | Objective | Dpo Loss | Regularize | Ranking Simple | Ranking Idealized | Ranking Idealized Expo | Wo Beta |
73
- |:-------------:|:------:|:----:|:---------------:|:---------:|:-------:|:---------:|:--------:|:----------:|:--------------:|:-----------------:|:----------------------:|:-------:|
74
- | 0.6907 | 0.1417 | 50 | 0.6894 | -99.3821 | -1.7611 | 0.6902 | 0.6902 | 0.6902 | 0.5305 | 0.8732 | 0.5321 | 9.3575 |
75
- | 0.6701 | 0.2834 | 100 | 0.6837 | -153.5716 | -1.8467 | 0.6896 | 0.6896 | 0.6896 | 0.5518 | 0.8732 | 0.5321 | 14.1741 |
76
- | 0.637 | 0.4251 | 150 | 0.6723 | -198.9269 | -2.4614 | 0.6765 | 0.6765 | 0.6765 | 0.5823 | 0.8732 | 0.5321 | 17.2098 |
77
- | 0.5833 | 0.5668 | 200 | 0.6729 | -256.0312 | -3.3478 | 0.6780 | 0.6780 | 0.6780 | 0.5797 | 0.8732 | 0.5321 | 21.7109 |
78
- | 0.5439 | 0.7085 | 250 | 0.6781 | -257.3546 | -3.6269 | 0.6858 | 0.6858 | 0.6858 | 0.5683 | 0.8732 | 0.5321 | 22.9139 |
79
- | 0.5077 | 0.8503 | 300 | 0.6640 | -319.3935 | -4.6100 | 0.6685 | 0.6685 | 0.6685 | 0.5828 | 0.8732 | 0.5321 | 23.6506 |
 
 
 
 
 
80
 
81
 
82
  ### Framework versions
 
2
  license: apache-2.0
3
  base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
4
  tags:
 
 
 
 
 
5
  - trl
6
  - expo
7
+ - alignment-handbook
8
+ - ndcg
9
  - generated_from_trainer
 
 
10
  model-index:
11
  - name: qwen2.5-0.5b-expo-DPO-ES-0.01
12
  results: []
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/r1jti62c)
19
  # qwen2.5-0.5b-expo-DPO-ES-0.01
20
 
21
+ This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.6901
24
+ - Logps: -349.5348
25
+ - Logits: -5.2492
26
+ - Objective: 0.6972
27
+ - Dpo Loss: 0.6972
28
+ - Regularize: 0.6972
29
+ - Ranking Simple: 0.5719
30
  - Ranking Idealized: 0.8732
31
  - Ranking Idealized Expo: 0.5321
32
+ - Wo Beta: 31.1765
33
 
34
  ## Model description
35
 
 
64
 
65
  ### Training results
66
 
67
+ | Training Loss | Epoch | Step | Dpo Loss | Logits | Logps | Validation Loss | Objective | Ranking Idealized | Ranking Idealized Expo | Ranking Simple | Regularize | Wo Beta |
68
+ |:-------------:|:------:|:----:|:--------:|:-------:|:---------:|:---------------:|:---------:|:-----------------:|:----------------------:|:--------------:|:----------:|:-------:|
69
+ | 0.6907 | 0.1417 | 50 | 0.6902 | -1.7611 | -99.3821 | 0.6894 | 0.6902 | 0.8732 | 0.5321 | 0.5305 | 0.6902 | 9.3575 |
70
+ | 0.6701 | 0.2834 | 100 | 0.6896 | -1.8467 | -153.5716 | 0.6837 | 0.6896 | 0.8732 | 0.5321 | 0.5518 | 0.6896 | 14.1741 |
71
+ | 0.637 | 0.4251 | 150 | 0.6765 | -2.4614 | -198.9269 | 0.6723 | 0.6765 | 0.8732 | 0.5321 | 0.5823 | 0.6765 | 17.2098 |
72
+ | 0.5833 | 0.5668 | 200 | 0.6780 | -3.3478 | -256.0312 | 0.6729 | 0.6780 | 0.8732 | 0.5321 | 0.5797 | 0.6780 | 21.7109 |
73
+ | 0.5439 | 0.7085 | 250 | 0.6858 | -3.6269 | -257.3546 | 0.6781 | 0.6858 | 0.8732 | 0.5321 | 0.5683 | 0.6858 | 22.9139 |
74
+ | 0.5077 | 0.8503 | 300 | 0.6685 | -4.6100 | -319.3935 | 0.6640 | 0.6685 | 0.8732 | 0.5321 | 0.5828 | 0.6685 | 23.6506 |
75
+ | 0.4786 | 0.9920 | 350 | 0.6867 | -368.7014| -4.9192 | 0.6897 | 0.6897 | 0.6897 | 0.5751 | 0.8732 | 0.5321 | 27.7113 |
76
+ | 0.3619 | 1.1337 | 400 | 0.6961 | -392.7018| -5.5801 | 0.6990 | 0.6990 | 0.6990 | 0.5849 | 0.8732 | 0.5321 | 32.0730 |
77
+ | 0.3679 | 1.2754 | 450 | 0.6843 | -349.2029| -5.2450 | 0.6953 | 0.6953 | 0.6953 | 0.5885 | 0.8732 | 0.5321 | 31.3199 |
78
+ | 0.3662 | 1.4171 | 500 | 0.6858 | -350.7137| -5.8233 | 0.6903 | 0.6903 | 0.6903 | 0.5890 | 0.8732 | 0.5321 | 30.2726 |
79
+ | 0.3485 | 1.5588 | 550 | 0.6901 | -349.5348| -5.2492 | 0.6972 | 0.6972 | 0.6972 | 0.5719 | 0.8732 | 0.5321 | 31.1765 |
80
 
81
 
82
  ### Framework versions
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 0.8502598016060463,
3
  "eval_dpo_loss": 0.6902037858963013,
4
  "eval_logits": -1.7611440420150757,
5
  "eval_logps": -99.38214111328125,
@@ -15,9 +15,9 @@
15
  "eval_steps_per_second": 1.567,
16
  "eval_wo_beta": 9.357504844665527,
17
  "total_flos": 0.0,
18
- "train_loss": 0.6054576412836711,
19
- "train_runtime": 8287.5068,
20
  "train_samples": 50802,
21
- "train_samples_per_second": 30.65,
22
- "train_steps_per_second": 0.212
23
  }
 
1
  {
2
+ "epoch": 1.5588096362777515,
3
  "eval_dpo_loss": 0.6902037858963013,
4
  "eval_logits": -1.7611440420150757,
5
  "eval_logps": -99.38214111328125,
 
15
  "eval_steps_per_second": 1.567,
16
  "eval_wo_beta": 9.357504844665527,
17
  "total_flos": 0.0,
18
+ "train_loss": 0.17483963706276634,
19
+ "train_runtime": 6877.8936,
20
  "train_samples": 50802,
21
+ "train_samples_per_second": 36.931,
22
+ "train_steps_per_second": 0.256
23
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bef5fbd1a333a0938f0a0e465dcbd17cd3370807d8870ed7c029141e83f0648
3
  size 1975192208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6fbadd5f72d783e4eef10b0ec932f1d5e13f20e83ebacd844cdbf5fb249cb55
3
  size 1975192208
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.8502598016060463,
3
  "total_flos": 0.0,
4
- "train_loss": 0.6054576412836711,
5
- "train_runtime": 8287.5068,
6
  "train_samples": 50802,
7
- "train_samples_per_second": 30.65,
8
- "train_steps_per_second": 0.212
9
  }
 
1
  {
2
+ "epoch": 1.5588096362777515,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.17483963706276634,
5
+ "train_runtime": 6877.8936,
6
  "train_samples": 50802,
7
+ "train_samples_per_second": 36.931,
8
+ "train_steps_per_second": 0.256
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 9.357504844665527,
3
  "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-0.01/checkpoint-50",
4
- "epoch": 0.8502598016060463,
5
  "eval_steps": 50,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -223,13 +223,178 @@
223
  "step": 300
224
  },
225
  {
226
- "epoch": 0.8502598016060463,
227
- "step": 300,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  "total_flos": 0.0,
229
- "train_loss": 0.6054576412836711,
230
- "train_runtime": 8287.5068,
231
- "train_samples_per_second": 30.65,
232
- "train_steps_per_second": 0.212
233
  }
234
  ],
235
  "logging_steps": 50,
 
1
  {
2
  "best_metric": 9.357504844665527,
3
  "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-0.01/checkpoint-50",
4
+ "epoch": 1.5588096362777515,
5
  "eval_steps": 50,
6
+ "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
223
  "step": 300
224
  },
225
  {
226
+ "dpo_loss": 0.4693358838558197,
227
+ "epoch": 0.9919697685403873,
228
+ "grad_norm": 17.283303885786346,
229
+ "learning_rate": 4.8526047530778175e-06,
230
+ "logits": -4.669376373291016,
231
+ "logps": -331.3940124511719,
232
+ "loss": 0.4786,
233
+ "objective": 0.4693358838558197,
234
+ "ranking_idealized": 0.8804166913032532,
235
+ "ranking_idealized_expo": 0.5387499928474426,
236
+ "ranking_simple": 0.7683333158493042,
237
+ "regularize": 0.4693358838558197,
238
+ "step": 350,
239
+ "wo_beta": 12.894118309020996
240
+ },
241
+ {
242
+ "epoch": 0.9919697685403873,
243
+ "eval_dpo_loss": 0.6896602511405945,
244
+ "eval_logits": -4.919209957122803,
245
+ "eval_logps": -368.701416015625,
246
+ "eval_loss": 0.6866692304611206,
247
+ "eval_objective": 0.6896602511405945,
248
+ "eval_ranking_idealized": 0.8731883764266968,
249
+ "eval_ranking_idealized_expo": 0.5320910811424255,
250
+ "eval_ranking_simple": 0.5750517845153809,
251
+ "eval_regularize": 0.6896602511405945,
252
+ "eval_runtime": 308.8788,
253
+ "eval_samples_per_second": 18.745,
254
+ "eval_steps_per_second": 1.564,
255
+ "eval_wo_beta": 27.71128273010254,
256
+ "step": 350
257
+ },
258
+ {
259
+ "dpo_loss": 0.3561079800128937,
260
+ "epoch": 1.1336797354747283,
261
+ "grad_norm": 15.512579363366164,
262
+ "learning_rate": 4.757316345716554e-06,
263
+ "logits": -4.89206075668335,
264
+ "logps": -377.2569580078125,
265
+ "loss": 0.3619,
266
+ "objective": 0.3561079800128937,
267
+ "ranking_idealized": 0.8895833492279053,
268
+ "ranking_idealized_expo": 0.5450000166893005,
269
+ "ranking_simple": 0.8650000095367432,
270
+ "regularize": 0.3561079800128937,
271
+ "step": 400,
272
+ "wo_beta": 8.194127082824707
273
+ },
274
+ {
275
+ "epoch": 1.1336797354747283,
276
+ "eval_dpo_loss": 0.6989732384681702,
277
+ "eval_logits": -5.580094814300537,
278
+ "eval_logps": -392.70184326171875,
279
+ "eval_loss": 0.6960746049880981,
280
+ "eval_objective": 0.6989732384681702,
281
+ "eval_ranking_idealized": 0.8731883764266968,
282
+ "eval_ranking_idealized_expo": 0.5320910811424255,
283
+ "eval_ranking_simple": 0.5848861336708069,
284
+ "eval_regularize": 0.6989732384681702,
285
+ "eval_runtime": 308.969,
286
+ "eval_samples_per_second": 18.74,
287
+ "eval_steps_per_second": 1.563,
288
+ "eval_wo_beta": 32.072998046875,
289
+ "step": 400
290
+ },
291
+ {
292
+ "dpo_loss": 0.3608836829662323,
293
+ "epoch": 1.2753897024090695,
294
+ "grad_norm": 16.08489095978575,
295
+ "learning_rate": 4.639847716126855e-06,
296
+ "logits": -5.059500217437744,
297
+ "logps": -364.7832336425781,
298
+ "loss": 0.3679,
299
+ "objective": 0.3608836829662323,
300
+ "ranking_idealized": 0.8845833539962769,
301
+ "ranking_idealized_expo": 0.5266666412353516,
302
+ "ranking_simple": 0.8412500023841858,
303
+ "regularize": 0.3608836829662323,
304
+ "step": 450,
305
+ "wo_beta": 9.603814125061035
306
+ },
307
+ {
308
+ "epoch": 1.2753897024090695,
309
+ "eval_dpo_loss": 0.6952692866325378,
310
+ "eval_logits": -5.245004177093506,
311
+ "eval_logps": -349.20294189453125,
312
+ "eval_loss": 0.6843227744102478,
313
+ "eval_objective": 0.6952692866325378,
314
+ "eval_ranking_idealized": 0.8731883764266968,
315
+ "eval_ranking_idealized_expo": 0.5320910811424255,
316
+ "eval_ranking_simple": 0.5885093212127686,
317
+ "eval_regularize": 0.6952692866325378,
318
+ "eval_runtime": 308.243,
319
+ "eval_samples_per_second": 18.784,
320
+ "eval_steps_per_second": 1.567,
321
+ "eval_wo_beta": 31.319866180419922,
322
+ "step": 450
323
+ },
324
+ {
325
+ "dpo_loss": 0.3647218942642212,
326
+ "epoch": 1.4170996693434104,
327
+ "grad_norm": 16.426230236098732,
328
+ "learning_rate": 4.501353102310901e-06,
329
+ "logits": -5.431642055511475,
330
+ "logps": -371.9469299316406,
331
+ "loss": 0.3662,
332
+ "objective": 0.3647218942642212,
333
+ "ranking_idealized": 0.877916693687439,
334
+ "ranking_idealized_expo": 0.5095833539962769,
335
+ "ranking_simple": 0.8420833349227905,
336
+ "regularize": 0.3647218942642212,
337
+ "step": 500,
338
+ "wo_beta": 10.912171363830566
339
+ },
340
+ {
341
+ "epoch": 1.4170996693434104,
342
+ "eval_dpo_loss": 0.6903234720230103,
343
+ "eval_logits": -5.823331832885742,
344
+ "eval_logps": -350.71368408203125,
345
+ "eval_loss": 0.6858127117156982,
346
+ "eval_objective": 0.6903234720230103,
347
+ "eval_ranking_idealized": 0.8731883764266968,
348
+ "eval_ranking_idealized_expo": 0.5320910811424255,
349
+ "eval_ranking_simple": 0.589026927947998,
350
+ "eval_regularize": 0.6903234720230103,
351
+ "eval_runtime": 308.1061,
352
+ "eval_samples_per_second": 18.792,
353
+ "eval_steps_per_second": 1.568,
354
+ "eval_wo_beta": 30.27263641357422,
355
+ "step": 500
356
+ },
357
+ {
358
+ "dpo_loss": 0.3470539152622223,
359
+ "epoch": 1.5588096362777515,
360
+ "grad_norm": 13.971929990602174,
361
+ "learning_rate": 4.34319334202531e-06,
362
+ "logits": -5.66475248336792,
363
+ "logps": -394.5787048339844,
364
+ "loss": 0.3485,
365
+ "objective": 0.3470539152622223,
366
+ "ranking_idealized": 0.8870833516120911,
367
+ "ranking_idealized_expo": 0.5220833420753479,
368
+ "ranking_simple": 0.8383333086967468,
369
+ "regularize": 0.3470539152622223,
370
+ "step": 550,
371
+ "wo_beta": 10.396123886108398
372
+ },
373
+ {
374
+ "epoch": 1.5588096362777515,
375
+ "eval_dpo_loss": 0.6972023844718933,
376
+ "eval_logits": -5.249249458312988,
377
+ "eval_logps": -349.5347900390625,
378
+ "eval_loss": 0.6901025772094727,
379
+ "eval_objective": 0.6972023844718933,
380
+ "eval_ranking_idealized": 0.8731883764266968,
381
+ "eval_ranking_idealized_expo": 0.5320910811424255,
382
+ "eval_ranking_simple": 0.5719461441040039,
383
+ "eval_regularize": 0.6972023844718933,
384
+ "eval_runtime": 309.5507,
385
+ "eval_samples_per_second": 18.705,
386
+ "eval_steps_per_second": 1.56,
387
+ "eval_wo_beta": 31.17648696899414,
388
+ "step": 550
389
+ },
390
+ {
391
+ "epoch": 1.5588096362777515,
392
+ "step": 550,
393
  "total_flos": 0.0,
394
+ "train_loss": 0.17483963706276634,
395
+ "train_runtime": 6877.8936,
396
+ "train_samples_per_second": 36.931,
397
+ "train_steps_per_second": 0.256
398
  }
399
  ],
400
  "logging_steps": 50,