Jan Majkutewicz commited on
Commit
c097e33
·
verified ·
1 Parent(s): 30b84a4

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,10 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - dpo
8
  - generated_from_trainer
9
  base_model: alignment-handbook/zephyr-7b-sft-full
10
- datasets:
11
- - HuggingFaceH4/ultrafeedback_binarized
12
  model-index:
13
  - name: zephyr-7b-dpo-lora
14
  results: []
@@ -19,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # zephyr-7b-dpo-lora
21
 
22
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the HuggingFaceH4/ultrafeedback_binarized dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.5894
25
- - Rewards/chosen: -0.2738
26
- - Rewards/rejected: -0.6020
27
- - Rewards/accuracies: 0.7035
28
- - Rewards/margins: 0.3282
29
- - Logps/rejected: -321.6407
30
- - Logps/chosen: -310.1199
31
- - Logits/rejected: -2.7529
32
- - Logits/chosen: -2.7746
33
 
34
  ## Model description
35
 
@@ -63,50 +60,50 @@ The following hyperparameters were used during training:
63
 
64
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
- | 0.6929 | 0.0262 | 100 | 0.6930 | -0.0001 | -0.0004 | 0.5250 | 0.0003 | -261.4788 | -282.7496 | -2.8388 | -2.8661 |
67
- | 0.6923 | 0.0523 | 200 | 0.6923 | 0.0008 | -0.0009 | 0.6050 | 0.0017 | -261.5316 | -282.6624 | -2.8380 | -2.8653 |
68
- | 0.6898 | 0.0785 | 300 | 0.6903 | 0.0035 | -0.0024 | 0.6640 | 0.0058 | -261.6760 | -282.3918 | -2.8350 | -2.8623 |
69
- | 0.6872 | 0.1047 | 400 | 0.6862 | 0.0165 | 0.0021 | 0.6670 | 0.0144 | -261.2256 | -281.0900 | -2.8308 | -2.8577 |
70
- | 0.6783 | 0.1309 | 500 | 0.6804 | 0.0209 | -0.0059 | 0.6835 | 0.0267 | -262.0230 | -280.6481 | -2.8215 | -2.8486 |
71
- | 0.6729 | 0.1570 | 600 | 0.6733 | 0.0154 | -0.0272 | 0.6840 | 0.0426 | -264.1608 | -281.1958 | -2.8138 | -2.8410 |
72
- | 0.6665 | 0.1832 | 700 | 0.6638 | -0.0035 | -0.0689 | 0.6755 | 0.0654 | -268.3266 | -283.0863 | -2.8060 | -2.8327 |
73
- | 0.6427 | 0.2094 | 800 | 0.6546 | -0.0214 | -0.1104 | 0.6815 | 0.0889 | -272.4747 | -284.8825 | -2.8020 | -2.8283 |
74
- | 0.6428 | 0.2355 | 900 | 0.6458 | -0.0247 | -0.1383 | 0.6770 | 0.1136 | -275.2685 | -285.2050 | -2.7942 | -2.8199 |
75
- | 0.6381 | 0.2617 | 1000 | 0.6358 | -0.0638 | -0.2074 | 0.6785 | 0.1436 | -282.1761 | -289.1206 | -2.7887 | -2.8138 |
76
- | 0.6488 | 0.2879 | 1100 | 0.6284 | -0.1378 | -0.3055 | 0.6790 | 0.1677 | -291.9890 | -296.5138 | -2.7826 | -2.8071 |
77
- | 0.6427 | 0.3141 | 1200 | 0.6223 | -0.1104 | -0.2986 | 0.6835 | 0.1882 | -291.3028 | -293.7785 | -2.7931 | -2.8165 |
78
- | 0.6131 | 0.3402 | 1300 | 0.6172 | -0.1466 | -0.3514 | 0.6865 | 0.2049 | -296.5806 | -297.3945 | -2.7951 | -2.8180 |
79
- | 0.6326 | 0.3664 | 1400 | 0.6155 | -0.1752 | -0.3896 | 0.6860 | 0.2144 | -300.3966 | -300.2597 | -2.7920 | -2.8147 |
80
- | 0.6128 | 0.3926 | 1500 | 0.6180 | -0.0630 | -0.2687 | 0.6890 | 0.2057 | -288.3090 | -289.0369 | -2.7980 | -2.8198 |
81
- | 0.6223 | 0.4187 | 1600 | 0.6088 | -0.1688 | -0.4097 | 0.6945 | 0.2409 | -302.4074 | -299.6220 | -2.7926 | -2.8148 |
82
- | 0.6338 | 0.4449 | 1700 | 0.6061 | -0.2152 | -0.4665 | 0.6925 | 0.2513 | -308.0869 | -304.2535 | -2.7961 | -2.8181 |
83
- | 0.585 | 0.4711 | 1800 | 0.6050 | -0.1327 | -0.3850 | 0.6915 | 0.2523 | -299.9368 | -296.0054 | -2.7949 | -2.8174 |
84
- | 0.577 | 0.4973 | 1900 | 0.6013 | -0.2170 | -0.4883 | 0.6965 | 0.2713 | -310.2670 | -304.4333 | -2.7954 | -2.8176 |
85
- | 0.5945 | 0.5234 | 2000 | 0.5992 | -0.2107 | -0.4899 | 0.6995 | 0.2793 | -310.4293 | -303.8028 | -2.7903 | -2.8122 |
86
- | 0.5913 | 0.5496 | 2100 | 0.5981 | -0.2373 | -0.5251 | 0.7025 | 0.2879 | -313.9529 | -306.4641 | -2.7863 | -2.8085 |
87
- | 0.5816 | 0.5758 | 2200 | 0.5989 | -0.2688 | -0.5570 | 0.6970 | 0.2883 | -317.1411 | -309.6146 | -2.7849 | -2.8070 |
88
- | 0.5824 | 0.6019 | 2300 | 0.5961 | -0.2227 | -0.5189 | 0.6955 | 0.2961 | -313.3233 | -305.0098 | -2.7821 | -2.8037 |
89
- | 0.602 | 0.6281 | 2400 | 0.5969 | -0.2683 | -0.5669 | 0.6990 | 0.2986 | -318.1251 | -309.5652 | -2.7744 | -2.7961 |
90
- | 0.5792 | 0.6543 | 2500 | 0.5963 | -0.2102 | -0.5041 | 0.6975 | 0.2938 | -311.8429 | -303.7615 | -2.7763 | -2.7980 |
91
- | 0.6028 | 0.6805 | 2600 | 0.5974 | -0.1896 | -0.4790 | 0.6920 | 0.2895 | -309.3417 | -301.6964 | -2.7717 | -2.7933 |
92
- | 0.5854 | 0.7066 | 2700 | 0.5930 | -0.2517 | -0.5615 | 0.7020 | 0.3098 | -317.5864 | -307.9027 | -2.7676 | -2.7892 |
93
- | 0.5994 | 0.7328 | 2800 | 0.5920 | -0.2607 | -0.5775 | 0.7045 | 0.3167 | -319.1838 | -308.8107 | -2.7636 | -2.7851 |
94
- | 0.5837 | 0.7590 | 2900 | 0.5913 | -0.2540 | -0.5721 | 0.7055 | 0.3181 | -318.6511 | -308.1379 | -2.7619 | -2.7834 |
95
- | 0.5858 | 0.7851 | 3000 | 0.5910 | -0.2625 | -0.5835 | 0.7055 | 0.3210 | -319.7853 | -308.9898 | -2.7605 | -2.7819 |
96
- | 0.5685 | 0.8113 | 3100 | 0.5914 | -0.2383 | -0.5571 | 0.7040 | 0.3188 | -317.1507 | -306.5707 | -2.7558 | -2.7777 |
97
- | 0.5753 | 0.8375 | 3200 | 0.5903 | -0.2623 | -0.5868 | 0.7020 | 0.3246 | -320.1224 | -308.9666 | -2.7567 | -2.7783 |
98
- | 0.5769 | 0.8636 | 3300 | 0.5900 | -0.2673 | -0.5934 | 0.7030 | 0.3260 | -320.7757 | -309.4716 | -2.7555 | -2.7771 |
99
- | 0.5608 | 0.8898 | 3400 | 0.5896 | -0.2716 | -0.5988 | 0.7020 | 0.3273 | -321.3196 | -309.8930 | -2.7520 | -2.7739 |
100
- | 0.6008 | 0.9160 | 3500 | 0.5895 | -0.2716 | -0.5994 | 0.7035 | 0.3277 | -321.3745 | -309.9000 | -2.7539 | -2.7755 |
101
- | 0.585 | 0.9422 | 3600 | 0.5895 | -0.2722 | -0.6000 | 0.7020 | 0.3279 | -321.4418 | -309.9531 | -2.7549 | -2.7764 |
102
- | 0.567 | 0.9683 | 3700 | 0.5893 | -0.2738 | -0.6022 | 0.7015 | 0.3284 | -321.6555 | -310.1171 | -2.7539 | -2.7755 |
103
- | 0.5834 | 0.9945 | 3800 | 0.5893 | -0.2740 | -0.6023 | 0.7025 | 0.3283 | -321.6666 | -310.1333 | -2.7525 | -2.7742 |
104
 
105
 
106
  ### Framework versions
107
 
108
  - PEFT 0.10.0
109
- - Transformers 4.40.0
110
  - Pytorch 2.2.0
111
  - Datasets 2.16.1
112
  - Tokenizers 0.19.1
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
8
  base_model: alignment-handbook/zephyr-7b-sft-full
 
 
9
  model-index:
10
  - name: zephyr-7b-dpo-lora
11
  results: []
 
16
 
17
  # zephyr-7b-dpo-lora
18
 
19
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.6776
22
+ - Rewards/chosen: 0.0182
23
+ - Rewards/rejected: -0.0146
24
+ - Rewards/accuracies: 0.6855
25
+ - Rewards/margins: 0.0328
26
+ - Logps/rejected: -262.9002
27
+ - Logps/chosen: -280.9546
28
+ - Logits/rejected: -2.8233
29
+ - Logits/chosen: -2.8504
30
 
31
  ## Model description
32
 
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6929 | 0.0262 | 100 | 0.6930 | 0.0001 | -0.0001 | 0.5135 | 0.0002 | -261.4512 | -282.7630 | -2.8381 | -2.8655 |
64
+ | 0.693 | 0.0523 | 200 | 0.6928 | 0.0001 | -0.0005 | 0.5470 | 0.0007 | -261.4925 | -282.7611 | -2.8349 | -2.8626 |
65
+ | 0.692 | 0.0785 | 300 | 0.6921 | 0.0010 | -0.0011 | 0.6050 | 0.0021 | -261.5461 | -282.6746 | -2.8378 | -2.8650 |
66
+ | 0.6913 | 0.1047 | 400 | 0.6910 | 0.0036 | -0.0008 | 0.6395 | 0.0044 | -261.5211 | -282.4127 | -2.8349 | -2.8622 |
67
+ | 0.689 | 0.1309 | 500 | 0.6895 | 0.0049 | -0.0024 | 0.6700 | 0.0073 | -261.6805 | -282.2831 | -2.8389 | -2.8656 |
68
+ | 0.6875 | 0.1570 | 600 | 0.6880 | 0.0059 | -0.0047 | 0.6690 | 0.0106 | -261.9060 | -282.1841 | -2.8332 | -2.8603 |
69
+ | 0.6874 | 0.1832 | 700 | 0.6864 | 0.0084 | -0.0055 | 0.6785 | 0.0138 | -261.9842 | -281.9370 | -2.8342 | -2.8610 |
70
+ | 0.682 | 0.2094 | 800 | 0.6850 | 0.0107 | -0.0060 | 0.6800 | 0.0167 | -262.0419 | -281.7033 | -2.8307 | -2.8578 |
71
+ | 0.6837 | 0.2355 | 900 | 0.6840 | 0.0136 | -0.0054 | 0.6840 | 0.0190 | -261.9797 | -281.4180 | -2.8304 | -2.8573 |
72
+ | 0.6819 | 0.2617 | 1000 | 0.6828 | 0.0161 | -0.0054 | 0.6810 | 0.0215 | -261.9830 | -281.1678 | -2.8269 | -2.8540 |
73
+ | 0.6836 | 0.2879 | 1100 | 0.6818 | 0.0179 | -0.0057 | 0.6785 | 0.0236 | -262.0052 | -280.9853 | -2.8258 | -2.8529 |
74
+ | 0.685 | 0.3141 | 1200 | 0.6810 | 0.0221 | -0.0032 | 0.6810 | 0.0253 | -261.7610 | -280.5679 | -2.8238 | -2.8510 |
75
+ | 0.6785 | 0.3402 | 1300 | 0.6803 | 0.0209 | -0.0061 | 0.6840 | 0.0270 | -262.0453 | -280.6852 | -2.8259 | -2.8529 |
76
+ | 0.6828 | 0.3664 | 1400 | 0.6796 | 0.0217 | -0.0066 | 0.6865 | 0.0283 | -262.1007 | -280.6062 | -2.8233 | -2.8505 |
77
+ | 0.6795 | 0.3926 | 1500 | 0.6792 | 0.0226 | -0.0068 | 0.6830 | 0.0293 | -262.1143 | -280.5175 | -2.8250 | -2.8520 |
78
+ | 0.6801 | 0.4187 | 1600 | 0.6788 | 0.0194 | -0.0107 | 0.6845 | 0.0301 | -262.5066 | -280.8286 | -2.8245 | -2.8516 |
79
+ | 0.6839 | 0.4449 | 1700 | 0.6785 | 0.0204 | -0.0104 | 0.6855 | 0.0308 | -262.4770 | -280.7289 | -2.8261 | -2.8530 |
80
+ | 0.6793 | 0.4711 | 1800 | 0.6782 | 0.0188 | -0.0126 | 0.6870 | 0.0314 | -262.6961 | -280.8936 | -2.8248 | -2.8519 |
81
+ | 0.6766 | 0.4973 | 1900 | 0.6781 | 0.0188 | -0.0129 | 0.6810 | 0.0317 | -262.7311 | -280.8921 | -2.8281 | -2.8548 |
82
+ | 0.6762 | 0.5234 | 2000 | 0.6778 | 0.0190 | -0.0133 | 0.6840 | 0.0323 | -262.7651 | -280.8749 | -2.8270 | -2.8538 |
83
+ | 0.6796 | 0.5496 | 2100 | 0.6777 | 0.0184 | -0.0141 | 0.6795 | 0.0325 | -262.8513 | -280.9321 | -2.8299 | -2.8564 |
84
+ | 0.6736 | 0.5758 | 2200 | 0.6777 | 0.0181 | -0.0145 | 0.6825 | 0.0326 | -262.8893 | -280.9635 | -2.8306 | -2.8571 |
85
+ | 0.6779 | 0.6019 | 2300 | 0.6776 | 0.0176 | -0.0152 | 0.6875 | 0.0327 | -262.9558 | -281.0184 | -2.8281 | -2.8548 |
86
+ | 0.6782 | 0.6281 | 2400 | 0.6777 | 0.0179 | -0.0148 | 0.6835 | 0.0327 | -262.9155 | -280.9810 | -2.8273 | -2.8540 |
87
+ | 0.6753 | 0.6543 | 2500 | 0.6776 | 0.0181 | -0.0147 | 0.6805 | 0.0328 | -262.9074 | -280.9631 | -2.8256 | -2.8525 |
88
+ | 0.6776 | 0.6805 | 2600 | 0.6776 | 0.0181 | -0.0148 | 0.6775 | 0.0329 | -262.9167 | -280.9641 | -2.8226 | -2.8498 |
89
+ | 0.6774 | 0.7066 | 2700 | 0.6775 | 0.0182 | -0.0149 | 0.6860 | 0.0331 | -262.9263 | -280.9553 | -2.8261 | -2.8530 |
90
+ | 0.679 | 0.7328 | 2800 | 0.6774 | 0.0184 | -0.0148 | 0.6850 | 0.0332 | -262.9162 | -280.9359 | -2.8271 | -2.8539 |
91
+ | 0.6782 | 0.7590 | 2900 | 0.6775 | 0.0181 | -0.0150 | 0.6845 | 0.0330 | -262.9336 | -280.9681 | -2.8260 | -2.8529 |
92
+ | 0.6784 | 0.7851 | 3000 | 0.6774 | 0.0180 | -0.0152 | 0.6890 | 0.0332 | -262.9586 | -280.9731 | -2.8283 | -2.8550 |
93
+ | 0.6713 | 0.8113 | 3100 | 0.6775 | 0.0181 | -0.0149 | 0.6825 | 0.0330 | -262.9238 | -280.9596 | -2.8280 | -2.8547 |
94
+ | 0.6774 | 0.8375 | 3200 | 0.6774 | 0.0182 | -0.0150 | 0.6830 | 0.0332 | -262.9411 | -280.9583 | -2.8275 | -2.8543 |
95
+ | 0.6781 | 0.8636 | 3300 | 0.6775 | 0.0182 | -0.0148 | 0.6810 | 0.0329 | -262.9146 | -280.9559 | -2.8293 | -2.8559 |
96
+ | 0.6733 | 0.8898 | 3400 | 0.6775 | 0.0180 | -0.0150 | 0.6825 | 0.0330 | -262.9403 | -280.9770 | -2.8237 | -2.8508 |
97
+ | 0.6739 | 0.9160 | 3500 | 0.6775 | 0.0180 | -0.0150 | 0.6850 | 0.0331 | -262.9413 | -280.9686 | -2.8311 | -2.8575 |
98
+ | 0.6807 | 0.9422 | 3600 | 0.6775 | 0.0182 | -0.0148 | 0.6855 | 0.0330 | -262.9205 | -280.9524 | -2.8257 | -2.8527 |
99
+ | 0.6731 | 0.9683 | 3700 | 0.6775 | 0.0182 | -0.0147 | 0.6835 | 0.0330 | -262.9113 | -280.9514 | -2.8239 | -2.8510 |
100
+ | 0.675 | 0.9945 | 3800 | 0.6776 | 0.0182 | -0.0146 | 0.6855 | 0.0328 | -262.9002 | -280.9546 | -2.8233 | -2.8504 |
101
 
102
 
103
  ### Framework versions
104
 
105
  - PEFT 0.10.0
106
+ - Transformers 4.40.2
107
  - Pytorch 2.2.0
108
  - Datasets 2.16.1
109
  - Tokenizers 0.19.1
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57f6f6831f650677ba4077648352cd1acad1ebfe2f56602f9a1c08feea65dd25
3
  size 335605144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:194e6bfe4247ce75b342f152b59aa0facda256cb7c78ea0f88c5b290aaf10375
3
  size 335605144
all_results.json CHANGED
@@ -1,22 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.7746472358703613,
4
- "eval_logits/rejected": -2.752934455871582,
5
- "eval_logps/chosen": -310.119873046875,
6
- "eval_logps/rejected": -321.6407165527344,
7
- "eval_loss": 0.5893968343734741,
8
- "eval_rewards/accuracies": 0.703499972820282,
9
- "eval_rewards/chosen": -0.27382245659828186,
10
- "eval_rewards/margins": 0.32820531725883484,
11
- "eval_rewards/rejected": -0.6020277142524719,
12
- "eval_runtime": 692.2285,
13
- "eval_samples": 2000,
14
- "eval_samples_per_second": 2.889,
15
- "eval_steps_per_second": 0.361,
16
  "total_flos": 0.0,
17
- "train_loss": 0.6164219083351729,
18
- "train_runtime": 73481.1174,
19
  "train_samples": 61134,
20
- "train_samples_per_second": 0.832,
21
- "train_steps_per_second": 0.052
22
  }
 
1
  {
2
  "epoch": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
+ "train_loss": 0.680465580483626,
5
+ "train_runtime": 64957.9706,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 0.941,
8
+ "train_steps_per_second": 0.059
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.6164219083351729,
5
- "train_runtime": 73481.1174,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 0.832,
8
- "train_steps_per_second": 0.052
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.680465580483626,
5
+ "train_runtime": 64957.9706,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 0.941,
8
+ "train_steps_per_second": 0.059
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff