hZzy commited on
Commit
10246fa
1 Parent(s): 8e7aec6

Model save

Browse files
Files changed (5) hide show
  1. README.md +23 -23
  2. all_results.json +5 -5
  3. model.safetensors +1 -1
  4. train_results.json +5 -5
  5. trainer_state.json +669 -8
README.md CHANGED
@@ -2,16 +2,11 @@
2
  license: apache-2.0
3
  base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
4
  tags:
5
- - alignment-handbook
6
- - ndcg
7
- - trl
8
- - expo
9
- - generated_from_trainer
10
  - trl
11
  - expo
 
 
12
  - generated_from_trainer
13
- datasets:
14
- - hZzy/train_pairwise
15
  model-index:
16
  - name: qwen2.5-0.5b-expo-DPO-ES-TRY
17
  results: []
@@ -20,21 +15,21 @@ model-index:
20
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
21
  should probably proofread and complete it, then remove this comment. -->
22
 
23
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/dd7oiosi)
24
  # qwen2.5-0.5b-expo-DPO-ES-TRY
25
 
26
- This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on the hZzy/train_pairwise dataset.
27
  It achieves the following results on the evaluation set:
28
- - Loss: 0.6866
29
- - Logps: -91.4116
30
- - Logits: -1.5339
31
- - Objective: 0.6926
32
- - Dpo Loss: 0.6926
33
- - Regularize: 0.6926
34
- - Ranking Simple: 0.5052
35
  - Ranking Idealized: 0.5888
36
  - Ranking Idealized Expo: 0.5093
37
- - Dpo Wo Beta: -0.9551
38
 
39
  ## Model description
40
 
@@ -69,12 +64,17 @@ The following hyperparameters were used during training:
69
 
70
  ### Training results
71
 
72
- | Training Loss | Epoch | Step | Validation Loss | Logps | Logits | Objective | Dpo Loss | Regularize | Ranking Simple | Ranking Idealized | Ranking Idealized Expo | Dpo Wo Beta |
73
- |:-------------:|:------:|:----:|:---------------:|:--------:|:-------:|:---------:|:--------:|:----------:|:--------------:|:-----------------:|:----------------------:|:-----------:|
74
- | 0.672 | 0.2041 | 36 | 0.6866 | -91.4116 | -1.5339 | 0.6926 | 0.6926 | 0.6926 | 0.5052 | 0.5888 | 0.5093 | -0.9551 |
75
- | 0.6533 | 0.4081 | 72 | 0.6769 | -92.4758 | -1.6311 | 0.6885 | 0.6885 | 0.6885 | 0.5176 | 0.5888 | 0.5093 | -1.1473 |
76
- | 0.604 | 0.6122 | 108 | 0.6773 | -94.0584 | -1.7691 | 0.6853 | 0.6853 | 0.6853 | 0.5186 | 0.5888 | 0.5093 | -1.3961 |
77
- | 0.5911 | 0.8162 | 144 | 0.6775 | -95.7578 | -1.8165 | 0.6886 | 0.6886 | 0.6886 | 0.5186 | 0.5888 | 0.5093 | -1.5153 |
 
 
 
 
 
78
 
79
 
80
  ### Framework versions
 
2
  license: apache-2.0
3
  base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
4
  tags:
 
 
 
 
 
5
  - trl
6
  - expo
7
+ - alignment-handbook
8
+ - ndcg
9
  - generated_from_trainer
 
 
10
  model-index:
11
  - name: qwen2.5-0.5b-expo-DPO-ES-TRY
12
  results: []
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/jz5qh3m8)
19
  # qwen2.5-0.5b-expo-DPO-ES-TRY
20
 
21
+ This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.6965
24
+ - Logps: -101.5796
25
+ - Logits: -1.8981
26
+ - Objective: 0.7080
27
+ - Dpo Loss: 0.7080
28
+ - Regularize: 0.7080
29
+ - Ranking Simple: 0.5279
30
  - Ranking Idealized: 0.5888
31
  - Ranking Idealized Expo: 0.5093
32
+ - Dpo Wo Beta: -2.2649
33
 
34
  ## Model description
35
 
 
64
 
65
  ### Training results
66
 
67
+ | Training Loss | Epoch | Step | Dpo Loss | Dpo Wo Beta | Logits | Logps | Validation Loss | Objective | Ranking Idealized | Ranking Idealized Expo | Ranking Simple | Regularize |
68
+ |:-------------:|:------:|:----:|:--------:|:-----------:|:-------:|:---------:|:---------------:|:---------:|:-----------------:|:----------------------:|:--------------:|:----------:|
69
+ | 0.672 | 0.2041 | 36 | 0.6926 | -0.9551 | -1.5339 | -91.4116 | 0.6866 | 0.6926 | 0.5888 | 0.5093 | 0.5052 | 0.6926 |
70
+ | 0.6533 | 0.4081 | 72 | 0.6885 | -1.1473 | -1.6311 | -92.4758 | 0.6769 | 0.6885 | 0.5888 | 0.5093 | 0.5176 | 0.6885 |
71
+ | 0.604 | 0.6122 | 108 | 0.6853 | -1.3961 | -1.7691 | -94.0584 | 0.6773 | 0.6853 | 0.5888 | 0.5093 | 0.5186 | 0.6853 |
72
+ | 0.5911 | 0.8162 | 144 | 0.6886 | -1.5153 | -1.8165 | -95.7578 | 0.6775 | 0.6886 | 0.5888 | 0.5093 | 0.5186 | 0.6886 |
73
+ | 0.5482 | 1.0203 | 180 | 0.6859 | -98.1346 | -1.8457 | 0.6998 | 0.6998 | 0.6998 | 0.5238 | 0.5888 | 0.5093 | -1.8685 |
74
+ | 0.5171 | 1.2244 | 216 | 0.6891 | -99.3936 | -1.8859 | 0.7004 | 0.7004 | 0.7004 | 0.5248 | 0.5888 | 0.5093 | -2.0594 |
75
+ | 0.5093 | 1.4284 | 252 | 0.6999 | -102.0847 | -1.8968 | 0.7119 | 0.7119 | 0.7119 | 0.5238 | 0.5888 | 0.5093 | -2.3165 |
76
+ | 0.4986 | 1.6325 | 288 | 0.6981 | -102.1441 | -1.9005 | 0.7090 | 0.7090 | 0.7090 | 0.5279 | 0.5888 | 0.5093 | -2.2741 |
77
+ | 0.5055 | 1.8366 | 324 | 0.6965 | -101.5796 | -1.8981 | 0.7080 | 0.7080 | 0.7080 | 0.5279 | 0.5888 | 0.5093 | -2.2649 |
78
 
79
 
80
  ### Framework versions
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 0.8162494095418045,
3
  "eval_dpo_loss": 0.6925506591796875,
4
  "eval_dpo_wo_beta": -0.9551481008529663,
5
  "eval_logits": -1.5338705778121948,
@@ -15,9 +15,9 @@
15
  "eval_samples_per_second": 27.496,
16
  "eval_steps_per_second": 1.149,
17
  "total_flos": 0.0,
18
- "train_loss": 0.6411769655015733,
19
- "train_runtime": 5149.1432,
20
  "train_samples": 50802,
21
- "train_samples_per_second": 19.732,
22
- "train_steps_per_second": 0.068
23
  }
 
1
  {
2
+ "epoch": 1.83656117146906,
3
  "eval_dpo_loss": 0.6925506591796875,
4
  "eval_dpo_wo_beta": -0.9551481008529663,
5
  "eval_logits": -1.5338705778121948,
 
15
  "eval_samples_per_second": 27.496,
16
  "eval_steps_per_second": 1.149,
17
  "total_flos": 0.0,
18
+ "train_loss": 0.29064991334338247,
19
+ "train_runtime": 6418.0206,
20
  "train_samples": 50802,
21
+ "train_samples_per_second": 15.831,
22
+ "train_steps_per_second": 0.055
23
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fb0d7b4a7f5bb92d634e8120ceb0a8f5e5de8d7d8d42e2a61024f00cb1ffebd
3
  size 1975192208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34f6cf493c37b45ab541831d42d9bcffbfdbbebf22b7ab1553eec2be8bee018e
3
  size 1975192208
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.8162494095418045,
3
  "total_flos": 0.0,
4
- "train_loss": 0.6411769655015733,
5
- "train_runtime": 5149.1432,
6
  "train_samples": 50802,
7
- "train_samples_per_second": 19.732,
8
- "train_steps_per_second": 0.068
9
  }
 
1
  {
2
+ "epoch": 1.83656117146906,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.29064991334338247,
5
+ "train_runtime": 6418.0206,
6
  "train_samples": 50802,
7
+ "train_samples_per_second": 15.831,
8
+ "train_steps_per_second": 0.055
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": -0.9551481008529663,
3
  "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-TRY/checkpoint-36",
4
- "epoch": 0.8162494095418045,
5
  "eval_steps": 36,
6
- "global_step": 144,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -541,13 +541,674 @@
541
  "step": 144
542
  },
543
  {
544
- "epoch": 0.8162494095418045,
545
- "step": 144,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
  "total_flos": 0.0,
547
- "train_loss": 0.6411769655015733,
548
- "train_runtime": 5149.1432,
549
- "train_samples_per_second": 19.732,
550
- "train_steps_per_second": 0.068
551
  }
552
  ],
553
  "logging_steps": 5,
 
1
  {
2
  "best_metric": -0.9551481008529663,
3
  "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-DPO-ES-TRY/checkpoint-36",
4
+ "epoch": 1.83656117146906,
5
  "eval_steps": 36,
6
+ "global_step": 324,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
541
  "step": 144
542
  },
543
  {
544
+ "dpo_loss": 0.5598483085632324,
545
+ "dpo_wo_beta": -0.7996322512626648,
546
+ "epoch": 0.821917808219178,
547
+ "grad_norm": 14.122746042521586,
548
+ "learning_rate": 3.670263821323034e-07,
549
+ "logits": -1.738029956817627,
550
+ "logps": -87.6530532836914,
551
+ "loss": 0.5822,
552
+ "objective": 0.5598483085632324,
553
+ "ranking_idealized": 0.5416666865348816,
554
+ "ranking_idealized_expo": 0.5,
555
+ "ranking_simple": 0.5208333134651184,
556
+ "regularize": 0.5598483085632324,
557
+ "step": 145
558
+ },
559
+ {
560
+ "dpo_loss": 0.5972100496292114,
561
+ "dpo_wo_beta": -0.8620439171791077,
562
+ "epoch": 0.8502598016060463,
563
+ "grad_norm": 14.65905297499526,
564
+ "learning_rate": 3.5590478660213206e-07,
565
+ "logits": -1.7470637559890747,
566
+ "logps": -85.3743667602539,
567
+ "loss": 0.5857,
568
+ "objective": 0.5972100496292114,
569
+ "ranking_idealized": 0.6166666746139526,
570
+ "ranking_idealized_expo": 0.5416666865348816,
571
+ "ranking_simple": 0.5791666507720947,
572
+ "regularize": 0.5972100496292114,
573
+ "step": 150
574
+ },
575
+ {
576
+ "dpo_loss": 0.5853369235992432,
577
+ "dpo_wo_beta": -1.0713670253753662,
578
+ "epoch": 0.8786017949929145,
579
+ "grad_norm": 14.449237235852513,
580
+ "learning_rate": 3.4452155853680454e-07,
581
+ "logits": -1.7700653076171875,
582
+ "logps": -85.30365753173828,
583
+ "loss": 0.5778,
584
+ "objective": 0.5853369235992432,
585
+ "ranking_idealized": 0.612500011920929,
586
+ "ranking_idealized_expo": 0.5083333253860474,
587
+ "ranking_simple": 0.6000000238418579,
588
+ "regularize": 0.5853369235992432,
589
+ "step": 155
590
+ },
591
+ {
592
+ "dpo_loss": 0.579626202583313,
593
+ "dpo_wo_beta": -1.0907135009765625,
594
+ "epoch": 0.9069437883797827,
595
+ "grad_norm": 16.128315234232378,
596
+ "learning_rate": 3.3290481963801696e-07,
597
+ "logits": -1.7416605949401855,
598
+ "logps": -87.19976043701172,
599
+ "loss": 0.5926,
600
+ "objective": 0.579626202583313,
601
+ "ranking_idealized": 0.5708333253860474,
602
+ "ranking_idealized_expo": 0.49166667461395264,
603
+ "ranking_simple": 0.5625,
604
+ "regularize": 0.579626202583313,
605
+ "step": 160
606
+ },
607
+ {
608
+ "dpo_loss": 0.561370313167572,
609
+ "dpo_wo_beta": -1.004237174987793,
610
+ "epoch": 0.9352857817666509,
611
+ "grad_norm": 19.041287196836052,
612
+ "learning_rate": 3.2108326848442503e-07,
613
+ "logits": -1.71802818775177,
614
+ "logps": -89.13147735595703,
615
+ "loss": 0.574,
616
+ "objective": 0.561370313167572,
617
+ "ranking_idealized": 0.6041666865348816,
618
+ "ranking_idealized_expo": 0.5208333134651184,
619
+ "ranking_simple": 0.5916666388511658,
620
+ "regularize": 0.561370313167572,
621
+ "step": 165
622
+ },
623
+ {
624
+ "dpo_loss": 0.5719048976898193,
625
+ "dpo_wo_beta": -0.9809625148773193,
626
+ "epoch": 0.9636277751535192,
627
+ "grad_norm": 18.87203490676025,
628
+ "learning_rate": 3.0908610963322626e-07,
629
+ "logits": -1.7574745416641235,
630
+ "logps": -92.2130126953125,
631
+ "loss": 0.5857,
632
+ "objective": 0.5719048976898193,
633
+ "ranking_idealized": 0.5791666507720947,
634
+ "ranking_idealized_expo": 0.512499988079071,
635
+ "ranking_simple": 0.5916666388511658,
636
+ "regularize": 0.5719048976898193,
637
+ "step": 170
638
+ },
639
+ {
640
+ "dpo_loss": 0.5651528239250183,
641
+ "dpo_wo_beta": -0.7982299327850342,
642
+ "epoch": 0.9919697685403873,
643
+ "grad_norm": 15.136769327688043,
644
+ "learning_rate": 2.969429814717456e-07,
645
+ "logits": -1.6923960447311401,
646
+ "logps": -93.54885864257812,
647
+ "loss": 0.5728,
648
+ "objective": 0.5651528239250183,
649
+ "ranking_idealized": 0.6541666388511658,
650
+ "ranking_idealized_expo": 0.574999988079071,
651
+ "ranking_simple": 0.6166666746139526,
652
+ "regularize": 0.5651528239250183,
653
+ "step": 175
654
+ },
655
+ {
656
+ "dpo_loss": 0.541616678237915,
657
+ "dpo_wo_beta": -0.9241905212402344,
658
+ "epoch": 1.0203117619272555,
659
+ "grad_norm": 14.869186889382515,
660
+ "learning_rate": 2.846838829972671e-07,
661
+ "logits": -1.8582412004470825,
662
+ "logps": -90.80587005615234,
663
+ "loss": 0.5482,
664
+ "objective": 0.541616678237915,
665
+ "ranking_idealized": 0.6166666746139526,
666
+ "ranking_idealized_expo": 0.5416666865348816,
667
+ "ranking_simple": 0.612500011920929,
668
+ "regularize": 0.541616678237915,
669
+ "step": 180
670
+ },
671
+ {
672
+ "epoch": 1.0203117619272555,
673
+ "eval_dpo_loss": 0.6998419761657715,
674
+ "eval_dpo_wo_beta": -1.868453025817871,
675
+ "eval_logits": -1.8457019329071045,
676
+ "eval_logps": -98.13460540771484,
677
+ "eval_loss": 0.6859121918678284,
678
+ "eval_objective": 0.6998419761657715,
679
+ "eval_ranking_idealized": 0.5888429880142212,
680
+ "eval_ranking_idealized_expo": 0.5092975497245789,
681
+ "eval_ranking_simple": 0.5237603187561035,
682
+ "eval_regularize": 0.6998419761657715,
683
+ "eval_runtime": 211.2462,
684
+ "eval_samples_per_second": 27.409,
685
+ "eval_steps_per_second": 1.146,
686
+ "step": 180
687
+ },
688
+ {
689
+ "dpo_loss": 0.5197845697402954,
690
+ "dpo_wo_beta": -0.6562149524688721,
691
+ "epoch": 1.0486537553141237,
692
+ "grad_norm": 15.717261963609046,
693
+ "learning_rate": 2.7233909970599426e-07,
694
+ "logits": -1.735451579093933,
695
+ "logps": -90.08293914794922,
696
+ "loss": 0.5338,
697
+ "objective": 0.5197845697402954,
698
+ "ranking_idealized": 0.5833333134651184,
699
+ "ranking_idealized_expo": 0.49166667461395264,
700
+ "ranking_simple": 0.5833333134651184,
701
+ "regularize": 0.5197845697402954,
702
+ "step": 185
703
+ },
704
+ {
705
+ "dpo_loss": 0.5209748148918152,
706
+ "dpo_wo_beta": -0.7339054942131042,
707
+ "epoch": 1.076995748700992,
708
+ "grad_norm": 15.852085328699776,
709
+ "learning_rate": 2.5993912877423147e-07,
710
+ "logits": -1.7829344272613525,
711
+ "logps": -91.87959289550781,
712
+ "loss": 0.5286,
713
+ "objective": 0.5209748148918152,
714
+ "ranking_idealized": 0.625,
715
+ "ranking_idealized_expo": 0.5541666746139526,
716
+ "ranking_simple": 0.6166666746139526,
717
+ "regularize": 0.5209748148918152,
718
+ "step": 190
719
+ },
720
+ {
721
+ "dpo_loss": 0.5032804608345032,
722
+ "dpo_wo_beta": -0.56788170337677,
723
+ "epoch": 1.10533774208786,
724
+ "grad_norm": 18.497871574750942,
725
+ "learning_rate": 2.475146037166176e-07,
726
+ "logits": -1.726409912109375,
727
+ "logps": -91.2437744140625,
728
+ "loss": 0.5213,
729
+ "objective": 0.5032804608345032,
730
+ "ranking_idealized": 0.6333333253860474,
731
+ "ranking_idealized_expo": 0.5666666626930237,
732
+ "ranking_simple": 0.6416666507720947,
733
+ "regularize": 0.5032804608345032,
734
+ "step": 195
735
+ },
736
+ {
737
+ "dpo_loss": 0.5304009914398193,
738
+ "dpo_wo_beta": -0.8262697458267212,
739
+ "epoch": 1.1336797354747283,
740
+ "grad_norm": 17.200667685474183,
741
+ "learning_rate": 2.3509621870754504e-07,
742
+ "logits": -1.8239718675613403,
743
+ "logps": -90.6737289428711,
744
+ "loss": 0.5178,
745
+ "objective": 0.5304009914398193,
746
+ "ranking_idealized": 0.6000000238418579,
747
+ "ranking_idealized_expo": 0.5083333253860474,
748
+ "ranking_simple": 0.574999988079071,
749
+ "regularize": 0.5304009914398193,
750
+ "step": 200
751
+ },
752
+ {
753
+ "dpo_loss": 0.5203301310539246,
754
+ "dpo_wo_beta": -0.938244640827179,
755
+ "epoch": 1.1620217288615966,
756
+ "grad_norm": 17.29293963157863,
757
+ "learning_rate": 2.2271465275271983e-07,
758
+ "logits": -1.799241304397583,
759
+ "logps": -92.73731994628906,
760
+ "loss": 0.5275,
761
+ "objective": 0.5203301310539246,
762
+ "ranking_idealized": 0.5708333253860474,
763
+ "ranking_idealized_expo": 0.49166667461395264,
764
+ "ranking_simple": 0.574999988079071,
765
+ "regularize": 0.5203301310539246,
766
+ "step": 205
767
+ },
768
+ {
769
+ "dpo_loss": 0.5207778811454773,
770
+ "dpo_wo_beta": -0.8990126252174377,
771
+ "epoch": 1.1903637222484649,
772
+ "grad_norm": 16.801298356601897,
773
+ "learning_rate": 2.1040049389819624e-07,
774
+ "logits": -1.8129061460494995,
775
+ "logps": -92.11985778808594,
776
+ "loss": 0.5172,
777
+ "objective": 0.5207778811454773,
778
+ "ranking_idealized": 0.5833333134651184,
779
+ "ranking_idealized_expo": 0.5,
780
+ "ranking_simple": 0.574999988079071,
781
+ "regularize": 0.5207778811454773,
782
+ "step": 210
783
+ },
784
+ {
785
+ "dpo_loss": 0.5303699374198914,
786
+ "dpo_wo_beta": -0.8046938180923462,
787
+ "epoch": 1.2187057156353331,
788
+ "grad_norm": 15.406676908767183,
789
+ "learning_rate": 1.9818416366412275e-07,
790
+ "logits": -1.757646918296814,
791
+ "logps": -91.60792541503906,
792
+ "loss": 0.5171,
793
+ "objective": 0.5303699374198914,
794
+ "ranking_idealized": 0.5583333373069763,
795
+ "ranking_idealized_expo": 0.4833333194255829,
796
+ "ranking_simple": 0.5791666507720947,
797
+ "regularize": 0.5303699374198914,
798
+ "step": 215
799
+ },
800
+ {
801
+ "epoch": 1.2243741143127067,
802
+ "eval_dpo_loss": 0.7003704905509949,
803
+ "eval_dpo_wo_beta": -2.0594191551208496,
804
+ "eval_logits": -1.885943055152893,
805
+ "eval_logps": -99.3935775756836,
806
+ "eval_loss": 0.6891233325004578,
807
+ "eval_objective": 0.7003704905509949,
808
+ "eval_ranking_idealized": 0.5888429880142212,
809
+ "eval_ranking_idealized_expo": 0.5092975497245789,
810
+ "eval_ranking_simple": 0.5247933864593506,
811
+ "eval_regularize": 0.7003704905509949,
812
+ "eval_runtime": 210.3607,
813
+ "eval_samples_per_second": 27.524,
814
+ "eval_steps_per_second": 1.15,
815
+ "step": 216
816
+ },
817
+ {
818
+ "dpo_loss": 0.5014426112174988,
819
+ "dpo_wo_beta": -0.6835492849349976,
820
+ "epoch": 1.2470477090222012,
821
+ "grad_norm": 17.734567216291715,
822
+ "learning_rate": 1.8609584188988133e-07,
823
+ "logits": -1.7417553663253784,
824
+ "logps": -91.04163360595703,
825
+ "loss": 0.5162,
826
+ "objective": 0.5014426112174988,
827
+ "ranking_idealized": 0.6541666388511658,
828
+ "ranking_idealized_expo": 0.6041666865348816,
829
+ "ranking_simple": 0.6583333611488342,
830
+ "regularize": 0.5014426112174988,
831
+ "step": 220
832
+ },
833
+ {
834
+ "dpo_loss": 0.5014557242393494,
835
+ "dpo_wo_beta": -0.8168562650680542,
836
+ "epoch": 1.2753897024090695,
837
+ "grad_norm": 16.95873203496136,
838
+ "learning_rate": 1.741653921762879e-07,
839
+ "logits": -1.8029441833496094,
840
+ "logps": -92.49475860595703,
841
+ "loss": 0.5135,
842
+ "objective": 0.5014557242393494,
843
+ "ranking_idealized": 0.6166666746139526,
844
+ "ranking_idealized_expo": 0.5333333611488342,
845
+ "ranking_simple": 0.6291666626930237,
846
+ "regularize": 0.5014557242393494,
847
+ "step": 225
848
+ },
849
+ {
850
+ "dpo_loss": 0.5178931355476379,
851
+ "dpo_wo_beta": -0.9624936580657959,
852
+ "epoch": 1.3037316957959377,
853
+ "grad_norm": 16.74959646013195,
854
+ "learning_rate": 1.624222881090439e-07,
855
+ "logits": -1.7226511240005493,
856
+ "logps": -91.97869110107422,
857
+ "loss": 0.5168,
858
+ "objective": 0.5178931355476379,
859
+ "ranking_idealized": 0.5791666507720947,
860
+ "ranking_idealized_expo": 0.5041666626930237,
861
+ "ranking_simple": 0.5833333134651184,
862
+ "regularize": 0.5178931355476379,
863
+ "step": 230
864
+ },
865
+ {
866
+ "dpo_loss": 0.504324197769165,
867
+ "dpo_wo_beta": -0.7607553601264954,
868
+ "epoch": 1.3320736891828058,
869
+ "grad_norm": 16.27709469673636,
870
+ "learning_rate": 1.5089554044570149e-07,
871
+ "logits": -1.8560264110565186,
872
+ "logps": -91.13007354736328,
873
+ "loss": 0.5205,
874
+ "objective": 0.504324197769165,
875
+ "ranking_idealized": 0.6041666865348816,
876
+ "ranking_idealized_expo": 0.4833333194255829,
877
+ "ranking_simple": 0.6291666626930237,
878
+ "regularize": 0.504324197769165,
879
+ "step": 235
880
+ },
881
+ {
882
+ "dpo_loss": 0.5133432745933533,
883
+ "dpo_wo_beta": -0.8683770298957825,
884
+ "epoch": 1.360415682569674,
885
+ "grad_norm": 17.129996913619507,
886
+ "learning_rate": 1.3961362544602212e-07,
887
+ "logits": -1.8072270154953003,
888
+ "logps": -92.90058898925781,
889
+ "loss": 0.5032,
890
+ "objective": 0.5133432745933533,
891
+ "ranking_idealized": 0.5541666746139526,
892
+ "ranking_idealized_expo": 0.4791666567325592,
893
+ "ranking_simple": 0.574999988079071,
894
+ "regularize": 0.5133432745933533,
895
+ "step": 240
896
+ },
897
+ {
898
+ "dpo_loss": 0.5124314427375793,
899
+ "dpo_wo_beta": -0.7547214031219482,
900
+ "epoch": 1.3887576759565423,
901
+ "grad_norm": 16.459721846313148,
902
+ "learning_rate": 1.2860441452278574e-07,
903
+ "logits": -1.829980731010437,
904
+ "logps": -92.37997436523438,
905
+ "loss": 0.5115,
906
+ "objective": 0.5124314427375793,
907
+ "ranking_idealized": 0.5916666388511658,
908
+ "ranking_idealized_expo": 0.5208333134651184,
909
+ "ranking_simple": 0.6083333492279053,
910
+ "regularize": 0.5124314427375793,
911
+ "step": 245
912
+ },
913
+ {
914
+ "dpo_loss": 0.5035055875778198,
915
+ "dpo_wo_beta": -0.7095458507537842,
916
+ "epoch": 1.4170996693434104,
917
+ "grad_norm": 15.886363535231352,
918
+ "learning_rate": 1.1789510538684522e-07,
919
+ "logits": -1.8016122579574585,
920
+ "logps": -92.37277221679688,
921
+ "loss": 0.5093,
922
+ "objective": 0.5035055875778198,
923
+ "ranking_idealized": 0.574999988079071,
924
+ "ranking_idealized_expo": 0.47083333134651184,
925
+ "ranking_simple": 0.5791666507720947,
926
+ "regularize": 0.5035055875778198,
927
+ "step": 250
928
+ },
929
+ {
930
+ "epoch": 1.4284364666981577,
931
+ "eval_dpo_loss": 0.7118967771530151,
932
+ "eval_dpo_wo_beta": -2.3164827823638916,
933
+ "eval_logits": -1.896795630455017,
934
+ "eval_logps": -102.08467102050781,
935
+ "eval_loss": 0.6999199986457825,
936
+ "eval_objective": 0.7118967771530151,
937
+ "eval_ranking_idealized": 0.5888429880142212,
938
+ "eval_ranking_idealized_expo": 0.5092975497245789,
939
+ "eval_ranking_simple": 0.5237603187561035,
940
+ "eval_regularize": 0.7118967771530151,
941
+ "eval_runtime": 210.1951,
942
+ "eval_samples_per_second": 27.546,
943
+ "eval_steps_per_second": 1.151,
944
+ "step": 252
945
+ },
946
+ {
947
+ "dpo_loss": 0.5017536282539368,
948
+ "dpo_wo_beta": -0.8617421388626099,
949
+ "epoch": 1.4454416627302786,
950
+ "grad_norm": 18.01058733365972,
951
+ "learning_rate": 1.0751215485652643e-07,
952
+ "logits": -1.7449065446853638,
953
+ "logps": -94.58292388916016,
954
+ "loss": 0.5028,
955
+ "objective": 0.5017536282539368,
956
+ "ranking_idealized": 0.6166666746139526,
957
+ "ranking_idealized_expo": 0.5458333492279053,
958
+ "ranking_simple": 0.6291666626930237,
959
+ "regularize": 0.5017536282539368,
960
+ "step": 255
961
+ },
962
+ {
963
+ "dpo_loss": 0.49517151713371277,
964
+ "dpo_wo_beta": -0.7506776452064514,
965
+ "epoch": 1.473783656117147,
966
+ "grad_norm": 17.81035746121776,
967
+ "learning_rate": 9.748121349736891e-08,
968
+ "logits": -1.817198634147644,
969
+ "logps": -95.67717742919922,
970
+ "loss": 0.5086,
971
+ "objective": 0.49517151713371277,
972
+ "ranking_idealized": 0.5666666626930237,
973
+ "ranking_idealized_expo": 0.48750001192092896,
974
+ "ranking_simple": 0.5791666507720947,
975
+ "regularize": 0.49517151713371277,
976
+ "step": 260
977
+ },
978
+ {
979
+ "dpo_loss": 0.5204591751098633,
980
+ "dpo_wo_beta": -1.0298762321472168,
981
+ "epoch": 1.5021256495040152,
982
+ "grad_norm": 16.358665885069936,
983
+ "learning_rate": 8.78270622536716e-08,
984
+ "logits": -1.7978217601776123,
985
+ "logps": -93.08860778808594,
986
+ "loss": 0.5031,
987
+ "objective": 0.5204591751098633,
988
+ "ranking_idealized": 0.5791666507720947,
989
+ "ranking_idealized_expo": 0.5083333253860474,
990
+ "ranking_simple": 0.5916666388511658,
991
+ "regularize": 0.5204591751098633,
992
+ "step": 265
993
+ },
994
+ {
995
+ "dpo_loss": 0.5112512111663818,
996
+ "dpo_wo_beta": -0.8745156526565552,
997
+ "epoch": 1.5304676428908834,
998
+ "grad_norm": 16.95418568294612,
999
+ "learning_rate": 7.857355122839673e-08,
1000
+ "logits": -1.7797691822052002,
1001
+ "logps": -95.0974349975586,
1002
+ "loss": 0.5001,
1003
+ "objective": 0.5112512111663818,
1004
+ "ranking_idealized": 0.574999988079071,
1005
+ "ranking_idealized_expo": 0.5083333253860474,
1006
+ "ranking_simple": 0.612500011920929,
1007
+ "regularize": 0.5112512111663818,
1008
+ "step": 270
1009
+ },
1010
+ {
1011
+ "dpo_loss": 0.4850163459777832,
1012
+ "dpo_wo_beta": -0.7579888105392456,
1013
+ "epoch": 1.5588096362777515,
1014
+ "grad_norm": 16.810103741459905,
1015
+ "learning_rate": 6.97435407626708e-08,
1016
+ "logits": -1.7828667163848877,
1017
+ "logps": -95.46377563476562,
1018
+ "loss": 0.5032,
1019
+ "objective": 0.4850163459777832,
1020
+ "ranking_idealized": 0.6208333373069763,
1021
+ "ranking_idealized_expo": 0.5249999761581421,
1022
+ "ranking_simple": 0.6208333373069763,
1023
+ "regularize": 0.4850163459777832,
1024
+ "step": 275
1025
+ },
1026
+ {
1027
+ "dpo_loss": 0.500173032283783,
1028
+ "dpo_wo_beta": -0.9343259334564209,
1029
+ "epoch": 1.5871516296646198,
1030
+ "grad_norm": 19.77407532453596,
1031
+ "learning_rate": 6.135884496044244e-08,
1032
+ "logits": -1.7596685886383057,
1033
+ "logps": -94.32428741455078,
1034
+ "loss": 0.5082,
1035
+ "objective": 0.500173032283783,
1036
+ "ranking_idealized": 0.574999988079071,
1037
+ "ranking_idealized_expo": 0.5041666626930237,
1038
+ "ranking_simple": 0.5916666388511658,
1039
+ "regularize": 0.500173032283783,
1040
+ "step": 280
1041
+ },
1042
+ {
1043
+ "dpo_loss": 0.4930657148361206,
1044
+ "dpo_wo_beta": -0.8764402270317078,
1045
+ "epoch": 1.615493623051488,
1046
+ "grad_norm": 17.882115521129325,
1047
+ "learning_rate": 5.344017779781834e-08,
1048
+ "logits": -1.8836300373077393,
1049
+ "logps": -95.88809967041016,
1050
+ "loss": 0.4986,
1051
+ "objective": 0.4930657148361206,
1052
+ "ranking_idealized": 0.5874999761581421,
1053
+ "ranking_idealized_expo": 0.512499988079071,
1054
+ "ranking_simple": 0.6416666507720947,
1055
+ "regularize": 0.4930657148361206,
1056
+ "step": 285
1057
+ },
1058
+ {
1059
+ "epoch": 1.632498819083609,
1060
+ "eval_dpo_loss": 0.7089951038360596,
1061
+ "eval_dpo_wo_beta": -2.2741074562072754,
1062
+ "eval_logits": -1.9005335569381714,
1063
+ "eval_logps": -102.14405059814453,
1064
+ "eval_loss": 0.69806969165802,
1065
+ "eval_objective": 0.7089951038360596,
1066
+ "eval_ranking_idealized": 0.5888429880142212,
1067
+ "eval_ranking_idealized_expo": 0.5092975497245789,
1068
+ "eval_ranking_simple": 0.5278925895690918,
1069
+ "eval_regularize": 0.7089951038360596,
1070
+ "eval_runtime": 210.3642,
1071
+ "eval_samples_per_second": 27.524,
1072
+ "eval_steps_per_second": 1.15,
1073
+ "step": 288
1074
+ },
1075
+ {
1076
+ "dpo_loss": 0.48574885725975037,
1077
+ "dpo_wo_beta": -0.6394420266151428,
1078
+ "epoch": 1.643835616438356,
1079
+ "grad_norm": 18.69303873982025,
1080
+ "learning_rate": 4.600710195020982e-08,
1081
+ "logits": -1.8818236589431763,
1082
+ "logps": -93.7010726928711,
1083
+ "loss": 0.4998,
1084
+ "objective": 0.48574885725975037,
1085
+ "ranking_idealized": 0.612500011920929,
1086
+ "ranking_idealized_expo": 0.5625,
1087
+ "ranking_simple": 0.6666666865348816,
1088
+ "regularize": 0.48574885725975037,
1089
+ "step": 290
1090
+ },
1091
+ {
1092
+ "dpo_loss": 0.48394453525543213,
1093
+ "dpo_wo_beta": -0.8024871945381165,
1094
+ "epoch": 1.6721776098252243,
1095
+ "grad_norm": 16.956110275905026,
1096
+ "learning_rate": 3.9077980463711384e-08,
1097
+ "logits": -1.762637734413147,
1098
+ "logps": -92.77220153808594,
1099
+ "loss": 0.5044,
1100
+ "objective": 0.48394453525543213,
1101
+ "ranking_idealized": 0.5791666507720947,
1102
+ "ranking_idealized_expo": 0.4749999940395355,
1103
+ "ranking_simple": 0.6208333373069763,
1104
+ "regularize": 0.48394453525543213,
1105
+ "step": 295
1106
+ },
1107
+ {
1108
+ "dpo_loss": 0.49569427967071533,
1109
+ "dpo_wo_beta": -0.9217907190322876,
1110
+ "epoch": 1.7005196032120926,
1111
+ "grad_norm": 16.550791676698843,
1112
+ "learning_rate": 3.2669931390104374e-08,
1113
+ "logits": -1.8427069187164307,
1114
+ "logps": -94.1619873046875,
1115
+ "loss": 0.5001,
1116
+ "objective": 0.49569427967071533,
1117
+ "ranking_idealized": 0.6583333611488342,
1118
+ "ranking_idealized_expo": 0.574999988079071,
1119
+ "ranking_simple": 0.6499999761581421,
1120
+ "regularize": 0.49569427967071533,
1121
+ "step": 300
1122
+ },
1123
+ {
1124
+ "dpo_loss": 0.5322020053863525,
1125
+ "dpo_wo_beta": -1.2344766855239868,
1126
+ "epoch": 1.7288615965989607,
1127
+ "grad_norm": 18.336457660232934,
1128
+ "learning_rate": 2.679878549755618e-08,
1129
+ "logits": -1.769299864768982,
1130
+ "logps": -94.73688507080078,
1131
+ "loss": 0.5077,
1132
+ "objective": 0.5322020053863525,
1133
+ "ranking_idealized": 0.5625,
1134
+ "ranking_idealized_expo": 0.5083333253860474,
1135
+ "ranking_simple": 0.6041666865348816,
1136
+ "regularize": 0.5322020053863525,
1137
+ "step": 305
1138
+ },
1139
+ {
1140
+ "dpo_loss": 0.5099295973777771,
1141
+ "dpo_wo_beta": -0.9262399077415466,
1142
+ "epoch": 1.7572035899858292,
1143
+ "grad_norm": 17.848858958468,
1144
+ "learning_rate": 2.147904716149135e-08,
1145
+ "logits": -1.8594757318496704,
1146
+ "logps": -91.70903015136719,
1147
+ "loss": 0.5042,
1148
+ "objective": 0.5099295973777771,
1149
+ "ranking_idealized": 0.6083333492279053,
1150
+ "ranking_idealized_expo": 0.5249999761581421,
1151
+ "ranking_simple": 0.6166666746139526,
1152
+ "regularize": 0.5099295973777771,
1153
+ "step": 310
1154
+ },
1155
+ {
1156
+ "dpo_loss": 0.5144280791282654,
1157
+ "dpo_wo_beta": -0.8178034424781799,
1158
+ "epoch": 1.7855455833726972,
1159
+ "grad_norm": 16.57904461321692,
1160
+ "learning_rate": 1.6723858532249778e-08,
1161
+ "logits": -1.753740906715393,
1162
+ "logps": -92.84646606445312,
1163
+ "loss": 0.5027,
1164
+ "objective": 0.5144280791282654,
1165
+ "ranking_idealized": 0.6041666865348816,
1166
+ "ranking_idealized_expo": 0.5083333253860474,
1167
+ "ranking_simple": 0.5874999761581421,
1168
+ "regularize": 0.5144280791282654,
1169
+ "step": 315
1170
+ },
1171
+ {
1172
+ "dpo_loss": 0.5112444162368774,
1173
+ "dpo_wo_beta": -0.9777346253395081,
1174
+ "epoch": 1.8138875767595655,
1175
+ "grad_norm": 18.774453505894844,
1176
+ "learning_rate": 1.254496706805433e-08,
1177
+ "logits": -1.8467546701431274,
1178
+ "logps": -95.57693481445312,
1179
+ "loss": 0.5055,
1180
+ "objective": 0.5112444162368774,
1181
+ "ranking_idealized": 0.6000000238418579,
1182
+ "ranking_idealized_expo": 0.5,
1183
+ "ranking_simple": 0.5958333611488342,
1184
+ "regularize": 0.5112444162368774,
1185
+ "step": 320
1186
+ },
1187
+ {
1188
+ "epoch": 1.83656117146906,
1189
+ "eval_dpo_loss": 0.7079721689224243,
1190
+ "eval_dpo_wo_beta": -2.264868974685669,
1191
+ "eval_logits": -1.8981069326400757,
1192
+ "eval_logps": -101.57964324951172,
1193
+ "eval_loss": 0.6964598298072815,
1194
+ "eval_objective": 0.7079721689224243,
1195
+ "eval_ranking_idealized": 0.5888429880142212,
1196
+ "eval_ranking_idealized_expo": 0.5092975497245789,
1197
+ "eval_ranking_simple": 0.5278925895690918,
1198
+ "eval_regularize": 0.7079721689224243,
1199
+ "eval_runtime": 210.0424,
1200
+ "eval_samples_per_second": 27.566,
1201
+ "eval_steps_per_second": 1.152,
1202
+ "step": 324
1203
+ },
1204
+ {
1205
+ "epoch": 1.83656117146906,
1206
+ "step": 324,
1207
  "total_flos": 0.0,
1208
+ "train_loss": 0.29064991334338247,
1209
+ "train_runtime": 6418.0206,
1210
+ "train_samples_per_second": 15.831,
1211
+ "train_steps_per_second": 0.055
1212
  }
1213
  ],
1214
  "logging_steps": 5,