Jan Majkutewicz
commited on
Model save
Browse files- README.md +49 -52
- adapter_model.safetensors +1 -1
- all_results.json +4 -17
- train_results.json +4 -4
- trainer_state.json +0 -0
README.md
CHANGED
@@ -2,13 +2,10 @@
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
- trl
|
7 |
- dpo
|
8 |
- generated_from_trainer
|
9 |
base_model: alignment-handbook/zephyr-7b-sft-full
|
10 |
-
datasets:
|
11 |
-
- HuggingFaceH4/ultrafeedback_binarized
|
12 |
model-index:
|
13 |
- name: zephyr-7b-dpo-lora
|
14 |
results: []
|
@@ -19,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
19 |
|
20 |
# zephyr-7b-dpo-lora
|
21 |
|
22 |
-
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the
|
23 |
It achieves the following results on the evaluation set:
|
24 |
-
- Loss: 0.
|
25 |
-
- Rewards/chosen:
|
26 |
-
- Rewards/rejected: -0.
|
27 |
-
- Rewards/accuracies: 0.
|
28 |
-
- Rewards/margins: 0.
|
29 |
-
- Logps/rejected: -
|
30 |
-
- Logps/chosen: -
|
31 |
-
- Logits/rejected: -2.
|
32 |
-
- Logits/chosen: -2.
|
33 |
|
34 |
## Model description
|
35 |
|
@@ -63,50 +60,50 @@ The following hyperparameters were used during training:
|
|
63 |
|
64 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
65 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
66 |
-
| 0.6929 | 0.0262 | 100 | 0.6930 |
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
-
| 0.
|
81 |
-
| 0.
|
82 |
-
| 0.
|
83 |
-
| 0.
|
84 |
-
| 0.
|
85 |
-
| 0.
|
86 |
-
| 0.
|
87 |
-
| 0.
|
88 |
-
| 0.
|
89 |
-
| 0.
|
90 |
-
| 0.
|
91 |
-
| 0.
|
92 |
-
| 0.
|
93 |
-
| 0.
|
94 |
-
| 0.
|
95 |
-
| 0.
|
96 |
-
| 0.
|
97 |
-
| 0.
|
98 |
-
| 0.
|
99 |
-
| 0.
|
100 |
-
| 0.
|
101 |
-
| 0.
|
102 |
-
| 0.
|
103 |
-
| 0.
|
104 |
|
105 |
|
106 |
### Framework versions
|
107 |
|
108 |
- PEFT 0.10.0
|
109 |
-
- Transformers 4.40.
|
110 |
- Pytorch 2.2.0
|
111 |
- Datasets 2.16.1
|
112 |
- Tokenizers 0.19.1
|
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
|
|
5 |
- trl
|
6 |
- dpo
|
7 |
- generated_from_trainer
|
8 |
base_model: alignment-handbook/zephyr-7b-sft-full
|
|
|
|
|
9 |
model-index:
|
10 |
- name: zephyr-7b-dpo-lora
|
11 |
results: []
|
|
|
16 |
|
17 |
# zephyr-7b-dpo-lora
|
18 |
|
19 |
+
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.6776
|
22 |
+
- Rewards/chosen: 0.0182
|
23 |
+
- Rewards/rejected: -0.0146
|
24 |
+
- Rewards/accuracies: 0.6855
|
25 |
+
- Rewards/margins: 0.0328
|
26 |
+
- Logps/rejected: -262.9002
|
27 |
+
- Logps/chosen: -280.9546
|
28 |
+
- Logits/rejected: -2.8233
|
29 |
+
- Logits/chosen: -2.8504
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
60 |
|
61 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
62 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
63 |
+
| 0.6929 | 0.0262 | 100 | 0.6930 | 0.0001 | -0.0001 | 0.5135 | 0.0002 | -261.4512 | -282.7630 | -2.8381 | -2.8655 |
|
64 |
+
| 0.693 | 0.0523 | 200 | 0.6928 | 0.0001 | -0.0005 | 0.5470 | 0.0007 | -261.4925 | -282.7611 | -2.8349 | -2.8626 |
|
65 |
+
| 0.692 | 0.0785 | 300 | 0.6921 | 0.0010 | -0.0011 | 0.6050 | 0.0021 | -261.5461 | -282.6746 | -2.8378 | -2.8650 |
|
66 |
+
| 0.6913 | 0.1047 | 400 | 0.6910 | 0.0036 | -0.0008 | 0.6395 | 0.0044 | -261.5211 | -282.4127 | -2.8349 | -2.8622 |
|
67 |
+
| 0.689 | 0.1309 | 500 | 0.6895 | 0.0049 | -0.0024 | 0.6700 | 0.0073 | -261.6805 | -282.2831 | -2.8389 | -2.8656 |
|
68 |
+
| 0.6875 | 0.1570 | 600 | 0.6880 | 0.0059 | -0.0047 | 0.6690 | 0.0106 | -261.9060 | -282.1841 | -2.8332 | -2.8603 |
|
69 |
+
| 0.6874 | 0.1832 | 700 | 0.6864 | 0.0084 | -0.0055 | 0.6785 | 0.0138 | -261.9842 | -281.9370 | -2.8342 | -2.8610 |
|
70 |
+
| 0.682 | 0.2094 | 800 | 0.6850 | 0.0107 | -0.0060 | 0.6800 | 0.0167 | -262.0419 | -281.7033 | -2.8307 | -2.8578 |
|
71 |
+
| 0.6837 | 0.2355 | 900 | 0.6840 | 0.0136 | -0.0054 | 0.6840 | 0.0190 | -261.9797 | -281.4180 | -2.8304 | -2.8573 |
|
72 |
+
| 0.6819 | 0.2617 | 1000 | 0.6828 | 0.0161 | -0.0054 | 0.6810 | 0.0215 | -261.9830 | -281.1678 | -2.8269 | -2.8540 |
|
73 |
+
| 0.6836 | 0.2879 | 1100 | 0.6818 | 0.0179 | -0.0057 | 0.6785 | 0.0236 | -262.0052 | -280.9853 | -2.8258 | -2.8529 |
|
74 |
+
| 0.685 | 0.3141 | 1200 | 0.6810 | 0.0221 | -0.0032 | 0.6810 | 0.0253 | -261.7610 | -280.5679 | -2.8238 | -2.8510 |
|
75 |
+
| 0.6785 | 0.3402 | 1300 | 0.6803 | 0.0209 | -0.0061 | 0.6840 | 0.0270 | -262.0453 | -280.6852 | -2.8259 | -2.8529 |
|
76 |
+
| 0.6828 | 0.3664 | 1400 | 0.6796 | 0.0217 | -0.0066 | 0.6865 | 0.0283 | -262.1007 | -280.6062 | -2.8233 | -2.8505 |
|
77 |
+
| 0.6795 | 0.3926 | 1500 | 0.6792 | 0.0226 | -0.0068 | 0.6830 | 0.0293 | -262.1143 | -280.5175 | -2.8250 | -2.8520 |
|
78 |
+
| 0.6801 | 0.4187 | 1600 | 0.6788 | 0.0194 | -0.0107 | 0.6845 | 0.0301 | -262.5066 | -280.8286 | -2.8245 | -2.8516 |
|
79 |
+
| 0.6839 | 0.4449 | 1700 | 0.6785 | 0.0204 | -0.0104 | 0.6855 | 0.0308 | -262.4770 | -280.7289 | -2.8261 | -2.8530 |
|
80 |
+
| 0.6793 | 0.4711 | 1800 | 0.6782 | 0.0188 | -0.0126 | 0.6870 | 0.0314 | -262.6961 | -280.8936 | -2.8248 | -2.8519 |
|
81 |
+
| 0.6766 | 0.4973 | 1900 | 0.6781 | 0.0188 | -0.0129 | 0.6810 | 0.0317 | -262.7311 | -280.8921 | -2.8281 | -2.8548 |
|
82 |
+
| 0.6762 | 0.5234 | 2000 | 0.6778 | 0.0190 | -0.0133 | 0.6840 | 0.0323 | -262.7651 | -280.8749 | -2.8270 | -2.8538 |
|
83 |
+
| 0.6796 | 0.5496 | 2100 | 0.6777 | 0.0184 | -0.0141 | 0.6795 | 0.0325 | -262.8513 | -280.9321 | -2.8299 | -2.8564 |
|
84 |
+
| 0.6736 | 0.5758 | 2200 | 0.6777 | 0.0181 | -0.0145 | 0.6825 | 0.0326 | -262.8893 | -280.9635 | -2.8306 | -2.8571 |
|
85 |
+
| 0.6779 | 0.6019 | 2300 | 0.6776 | 0.0176 | -0.0152 | 0.6875 | 0.0327 | -262.9558 | -281.0184 | -2.8281 | -2.8548 |
|
86 |
+
| 0.6782 | 0.6281 | 2400 | 0.6777 | 0.0179 | -0.0148 | 0.6835 | 0.0327 | -262.9155 | -280.9810 | -2.8273 | -2.8540 |
|
87 |
+
| 0.6753 | 0.6543 | 2500 | 0.6776 | 0.0181 | -0.0147 | 0.6805 | 0.0328 | -262.9074 | -280.9631 | -2.8256 | -2.8525 |
|
88 |
+
| 0.6776 | 0.6805 | 2600 | 0.6776 | 0.0181 | -0.0148 | 0.6775 | 0.0329 | -262.9167 | -280.9641 | -2.8226 | -2.8498 |
|
89 |
+
| 0.6774 | 0.7066 | 2700 | 0.6775 | 0.0182 | -0.0149 | 0.6860 | 0.0331 | -262.9263 | -280.9553 | -2.8261 | -2.8530 |
|
90 |
+
| 0.679 | 0.7328 | 2800 | 0.6774 | 0.0184 | -0.0148 | 0.6850 | 0.0332 | -262.9162 | -280.9359 | -2.8271 | -2.8539 |
|
91 |
+
| 0.6782 | 0.7590 | 2900 | 0.6775 | 0.0181 | -0.0150 | 0.6845 | 0.0330 | -262.9336 | -280.9681 | -2.8260 | -2.8529 |
|
92 |
+
| 0.6784 | 0.7851 | 3000 | 0.6774 | 0.0180 | -0.0152 | 0.6890 | 0.0332 | -262.9586 | -280.9731 | -2.8283 | -2.8550 |
|
93 |
+
| 0.6713 | 0.8113 | 3100 | 0.6775 | 0.0181 | -0.0149 | 0.6825 | 0.0330 | -262.9238 | -280.9596 | -2.8280 | -2.8547 |
|
94 |
+
| 0.6774 | 0.8375 | 3200 | 0.6774 | 0.0182 | -0.0150 | 0.6830 | 0.0332 | -262.9411 | -280.9583 | -2.8275 | -2.8543 |
|
95 |
+
| 0.6781 | 0.8636 | 3300 | 0.6775 | 0.0182 | -0.0148 | 0.6810 | 0.0329 | -262.9146 | -280.9559 | -2.8293 | -2.8559 |
|
96 |
+
| 0.6733 | 0.8898 | 3400 | 0.6775 | 0.0180 | -0.0150 | 0.6825 | 0.0330 | -262.9403 | -280.9770 | -2.8237 | -2.8508 |
|
97 |
+
| 0.6739 | 0.9160 | 3500 | 0.6775 | 0.0180 | -0.0150 | 0.6850 | 0.0331 | -262.9413 | -280.9686 | -2.8311 | -2.8575 |
|
98 |
+
| 0.6807 | 0.9422 | 3600 | 0.6775 | 0.0182 | -0.0148 | 0.6855 | 0.0330 | -262.9205 | -280.9524 | -2.8257 | -2.8527 |
|
99 |
+
| 0.6731 | 0.9683 | 3700 | 0.6775 | 0.0182 | -0.0147 | 0.6835 | 0.0330 | -262.9113 | -280.9514 | -2.8239 | -2.8510 |
|
100 |
+
| 0.675 | 0.9945 | 3800 | 0.6776 | 0.0182 | -0.0146 | 0.6855 | 0.0328 | -262.9002 | -280.9546 | -2.8233 | -2.8504 |
|
101 |
|
102 |
|
103 |
### Framework versions
|
104 |
|
105 |
- PEFT 0.10.0
|
106 |
+
- Transformers 4.40.2
|
107 |
- Pytorch 2.2.0
|
108 |
- Datasets 2.16.1
|
109 |
- Tokenizers 0.19.1
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 335605144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:194e6bfe4247ce75b342f152b59aa0facda256cb7c78ea0f88c5b290aaf10375
|
3 |
size 335605144
|
all_results.json
CHANGED
@@ -1,22 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"eval_logits/chosen": -2.7746472358703613,
|
4 |
-
"eval_logits/rejected": -2.752934455871582,
|
5 |
-
"eval_logps/chosen": -310.119873046875,
|
6 |
-
"eval_logps/rejected": -321.6407165527344,
|
7 |
-
"eval_loss": 0.5893968343734741,
|
8 |
-
"eval_rewards/accuracies": 0.703499972820282,
|
9 |
-
"eval_rewards/chosen": -0.27382245659828186,
|
10 |
-
"eval_rewards/margins": 0.32820531725883484,
|
11 |
-
"eval_rewards/rejected": -0.6020277142524719,
|
12 |
-
"eval_runtime": 692.2285,
|
13 |
-
"eval_samples": 2000,
|
14 |
-
"eval_samples_per_second": 2.889,
|
15 |
-
"eval_steps_per_second": 0.361,
|
16 |
"total_flos": 0.0,
|
17 |
-
"train_loss": 0.
|
18 |
-
"train_runtime":
|
19 |
"train_samples": 61134,
|
20 |
-
"train_samples_per_second": 0.
|
21 |
-
"train_steps_per_second": 0.
|
22 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.680465580483626,
|
5 |
+
"train_runtime": 64957.9706,
|
6 |
"train_samples": 61134,
|
7 |
+
"train_samples_per_second": 0.941,
|
8 |
+
"train_steps_per_second": 0.059
|
9 |
}
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
"total_flos": 0.0,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 61134,
|
7 |
-
"train_samples_per_second": 0.
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.680465580483626,
|
5 |
+
"train_runtime": 64957.9706,
|
6 |
"train_samples": 61134,
|
7 |
+
"train_samples_per_second": 0.941,
|
8 |
+
"train_steps_per_second": 0.059
|
9 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|