Model save
Browse files- README.md +124 -0
- adapter_model.safetensors +1 -1
- all_results.json +21 -0
- eval_results.json +16 -0
- runs/Mar06_15-34-32_SYS-4029GP-TRT/events.out.tfevents.1709710548.SYS-4029GP-TRT.2258942.0 +2 -2
- runs/Mar06_15-34-32_SYS-4029GP-TRT/events.out.tfevents.1709765707.SYS-4029GP-TRT.2258942.1 +3 -0
- train_results.json +8 -0
- trainer_state.json +0 -0
README.md
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
library_name: peft
|
4 |
+
tags:
|
5 |
+
- trl
|
6 |
+
- dpo
|
7 |
+
- generated_from_trainer
|
8 |
+
base_model: mistralai/Mistral-7B-v0.1
|
9 |
+
model-index:
|
10 |
+
- name: zephyr-7b-dpo-qlora
|
11 |
+
results: []
|
12 |
+
---
|
13 |
+
|
14 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
15 |
+
should probably proofread and complete it, then remove this comment. -->
|
16 |
+
|
17 |
+
# zephyr-7b-dpo-qlora
|
18 |
+
|
19 |
+
This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
|
20 |
+
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.4880
|
22 |
+
- Rewards/chosen: -2.8615
|
23 |
+
- Rewards/rejected: -3.9313
|
24 |
+
- Rewards/accuracies: 0.7262
|
25 |
+
- Rewards/margins: 1.0698
|
26 |
+
- Logps/rejected: -626.2534
|
27 |
+
- Logps/chosen: -549.3907
|
28 |
+
- Logits/rejected: 1.3412
|
29 |
+
- Logits/chosen: 0.7713
|
30 |
+
|
31 |
+
## Model description
|
32 |
+
|
33 |
+
More information needed
|
34 |
+
|
35 |
+
## Intended uses & limitations
|
36 |
+
|
37 |
+
More information needed
|
38 |
+
|
39 |
+
## Training and evaluation data
|
40 |
+
|
41 |
+
More information needed
|
42 |
+
|
43 |
+
## Training procedure
|
44 |
+
|
45 |
+
### Training hyperparameters
|
46 |
+
|
47 |
+
The following hyperparameters were used during training:
|
48 |
+
- learning_rate: 5e-06
|
49 |
+
- train_batch_size: 1
|
50 |
+
- eval_batch_size: 8
|
51 |
+
- seed: 42
|
52 |
+
- distributed_type: multi-GPU
|
53 |
+
- num_devices: 3
|
54 |
+
- gradient_accumulation_steps: 4
|
55 |
+
- total_train_batch_size: 12
|
56 |
+
- total_eval_batch_size: 24
|
57 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
58 |
+
- lr_scheduler_type: cosine
|
59 |
+
- lr_scheduler_warmup_ratio: 0.1
|
60 |
+
- num_epochs: 1
|
61 |
+
|
62 |
+
### Training results
|
63 |
+
|
64 |
+
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
65 |
+
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
66 |
+
| 0.6884 | 0.02 | 100 | 0.6868 | 0.0390 | 0.0284 | 0.6146 | 0.0106 | -230.2779 | -259.3362 | -2.3476 | -2.3366 |
|
67 |
+
| 0.6654 | 0.04 | 200 | 0.6657 | 0.0334 | -0.0194 | 0.6399 | 0.0528 | -235.0622 | -259.9052 | -2.2635 | -2.2585 |
|
68 |
+
| 0.6346 | 0.06 | 300 | 0.6431 | -0.2564 | -0.3692 | 0.6533 | 0.1128 | -270.0399 | -288.8787 | -2.2107 | -2.2217 |
|
69 |
+
| 0.5888 | 0.08 | 400 | 0.6162 | -0.4195 | -0.6312 | 0.6518 | 0.2118 | -296.2420 | -305.1884 | -1.9579 | -1.9905 |
|
70 |
+
| 0.5806 | 0.1 | 500 | 0.5916 | -1.3171 | -1.6507 | 0.6637 | 0.3337 | -398.1920 | -394.9468 | -0.4990 | -0.5253 |
|
71 |
+
| 0.6219 | 0.12 | 600 | 0.5753 | -1.1344 | -1.5063 | 0.6503 | 0.3719 | -383.7478 | -376.6808 | 0.0384 | -0.0361 |
|
72 |
+
| 0.5586 | 0.14 | 700 | 0.5733 | -0.7892 | -1.1878 | 0.6667 | 0.3986 | -351.8957 | -342.1609 | 0.3073 | 0.2473 |
|
73 |
+
| 0.6123 | 0.16 | 800 | 0.5578 | -1.2731 | -1.7042 | 0.6652 | 0.4311 | -403.5397 | -390.5542 | 1.0809 | 1.0327 |
|
74 |
+
| 0.555 | 0.18 | 900 | 0.5461 | -1.1941 | -1.8087 | 0.6771 | 0.6146 | -413.9875 | -382.6491 | 1.4158 | 1.3993 |
|
75 |
+
| 0.4905 | 0.2 | 1000 | 0.5463 | -1.2469 | -1.9528 | 0.6890 | 0.7058 | -428.3945 | -387.9334 | 0.8211 | 0.7732 |
|
76 |
+
| 0.5214 | 0.22 | 1100 | 0.5356 | -1.2786 | -1.8992 | 0.6979 | 0.6206 | -423.0347 | -391.1008 | 1.3945 | 1.4163 |
|
77 |
+
| 0.4988 | 0.24 | 1200 | 0.5307 | -1.2179 | -1.9293 | 0.6979 | 0.7115 | -426.0503 | -385.0261 | 1.0273 | 0.9228 |
|
78 |
+
| 0.5324 | 0.26 | 1300 | 0.5320 | -1.4512 | -2.1779 | 0.7024 | 0.7267 | -450.9060 | -408.3595 | 0.9344 | 0.5917 |
|
79 |
+
| 0.5286 | 0.27 | 1400 | 0.5193 | -1.3777 | -2.1412 | 0.7039 | 0.7634 | -447.2371 | -401.0145 | 1.1979 | 0.8244 |
|
80 |
+
| 0.6095 | 0.29 | 1500 | 0.5206 | -1.1730 | -1.8883 | 0.7009 | 0.7153 | -421.9497 | -380.5422 | 0.3598 | -0.0238 |
|
81 |
+
| 0.5627 | 0.31 | 1600 | 0.5225 | -1.8811 | -2.7733 | 0.6935 | 0.8922 | -510.4463 | -451.3462 | 0.7395 | 0.4147 |
|
82 |
+
| 0.5222 | 0.33 | 1700 | 0.5210 | -1.1883 | -1.8477 | 0.7143 | 0.6593 | -417.8853 | -382.0739 | -0.0643 | -0.3844 |
|
83 |
+
| 0.5163 | 0.35 | 1800 | 0.5219 | -1.1780 | -1.9783 | 0.7247 | 0.8003 | -430.9522 | -381.0428 | 1.3000 | 0.9605 |
|
84 |
+
| 0.511 | 0.37 | 1900 | 0.5214 | -1.8532 | -2.7395 | 0.7188 | 0.8863 | -507.0662 | -448.5622 | 1.3052 | 0.9550 |
|
85 |
+
| 0.484 | 0.39 | 2000 | 0.5161 | -1.7800 | -2.6182 | 0.7188 | 0.8382 | -494.9370 | -441.2427 | 1.6339 | 1.3132 |
|
86 |
+
| 0.4863 | 0.41 | 2100 | 0.5183 | -2.7826 | -3.8427 | 0.7158 | 1.0600 | -617.3857 | -541.5035 | 2.3428 | 2.0461 |
|
87 |
+
| 0.5233 | 0.43 | 2200 | 0.5115 | -1.7702 | -2.6185 | 0.7173 | 0.8483 | -494.9643 | -440.2580 | 0.9791 | 0.5628 |
|
88 |
+
| 0.5343 | 0.45 | 2300 | 0.5079 | -1.4313 | -2.2210 | 0.7202 | 0.7897 | -455.2213 | -406.3701 | 1.0255 | 0.5469 |
|
89 |
+
| 0.5251 | 0.47 | 2400 | 0.5088 | -2.7117 | -3.7995 | 0.7173 | 1.0878 | -613.0708 | -534.4126 | 2.1153 | 1.5133 |
|
90 |
+
| 0.5104 | 0.49 | 2500 | 0.5006 | -2.9970 | -4.0022 | 0.7202 | 1.0052 | -633.3362 | -562.9377 | 2.2889 | 1.7461 |
|
91 |
+
| 0.429 | 0.51 | 2600 | 0.5238 | -3.6282 | -4.8032 | 0.7143 | 1.1750 | -713.4386 | -626.0600 | 3.6631 | 3.2827 |
|
92 |
+
| 0.4255 | 0.53 | 2700 | 0.4993 | -2.4946 | -3.5067 | 0.7188 | 1.0121 | -583.7889 | -512.7010 | 2.1920 | 1.6873 |
|
93 |
+
| 0.4733 | 0.55 | 2800 | 0.4990 | -3.2116 | -4.2800 | 0.7202 | 1.0684 | -661.1174 | -584.3987 | 2.6796 | 2.2111 |
|
94 |
+
| 0.5394 | 0.57 | 2900 | 0.5040 | -2.9132 | -3.9276 | 0.7158 | 1.0143 | -625.8766 | -554.5653 | 1.7758 | 1.2351 |
|
95 |
+
| 0.5128 | 0.59 | 3000 | 0.5061 | -2.5974 | -3.5725 | 0.7173 | 0.9750 | -590.3638 | -522.9818 | 2.1284 | 1.6663 |
|
96 |
+
| 0.5215 | 0.61 | 3100 | 0.4960 | -2.2632 | -3.1876 | 0.7188 | 0.9245 | -551.8787 | -489.5560 | 1.4432 | 0.8594 |
|
97 |
+
| 0.5023 | 0.63 | 3200 | 0.4999 | -2.8630 | -3.9641 | 0.7128 | 1.1011 | -629.5237 | -549.5392 | 1.9057 | 1.2951 |
|
98 |
+
| 0.5042 | 0.65 | 3300 | 0.4904 | -2.8448 | -3.8793 | 0.7307 | 1.0345 | -621.0500 | -547.7245 | 1.9776 | 1.4334 |
|
99 |
+
| 0.498 | 0.67 | 3400 | 0.4879 | -2.8423 | -3.8097 | 0.7321 | 0.9673 | -614.0843 | -547.4754 | 1.4781 | 0.9608 |
|
100 |
+
| 0.4987 | 0.69 | 3500 | 0.4902 | -2.6926 | -3.7172 | 0.7307 | 1.0246 | -604.8372 | -532.4977 | 1.3819 | 0.8557 |
|
101 |
+
| 0.5824 | 0.71 | 3600 | 0.4908 | -2.5673 | -3.5933 | 0.7292 | 1.0260 | -592.4445 | -519.9661 | 1.1037 | 0.5336 |
|
102 |
+
| 0.425 | 0.73 | 3700 | 0.4906 | -2.7666 | -3.8246 | 0.7307 | 1.0580 | -615.5826 | -539.9020 | 1.2903 | 0.7257 |
|
103 |
+
| 0.4756 | 0.75 | 3800 | 0.4916 | -2.8732 | -3.9598 | 0.7292 | 1.0866 | -629.0961 | -550.5607 | 1.5015 | 0.9387 |
|
104 |
+
| 0.4597 | 0.77 | 3900 | 0.4896 | -2.8617 | -3.9425 | 0.7277 | 1.0808 | -627.3712 | -549.4086 | 1.3350 | 0.7636 |
|
105 |
+
| 0.4649 | 0.79 | 4000 | 0.4885 | -2.8682 | -3.9370 | 0.7232 | 1.0688 | -626.8230 | -550.0615 | 1.2903 | 0.7213 |
|
106 |
+
| 0.4689 | 0.8 | 4100 | 0.4880 | -2.8425 | -3.9060 | 0.7232 | 1.0634 | -623.7166 | -547.4950 | 1.2495 | 0.6763 |
|
107 |
+
| 0.4275 | 0.82 | 4200 | 0.4877 | -2.8671 | -3.9353 | 0.7232 | 1.0682 | -626.6478 | -549.9532 | 1.3067 | 0.7331 |
|
108 |
+
| 0.5325 | 0.84 | 4300 | 0.4881 | -2.8855 | -3.9630 | 0.7262 | 1.0775 | -629.4202 | -551.7905 | 1.3795 | 0.8070 |
|
109 |
+
| 0.532 | 0.86 | 4400 | 0.4881 | -2.8672 | -3.9406 | 0.7277 | 1.0734 | -627.1785 | -549.9610 | 1.3435 | 0.7732 |
|
110 |
+
| 0.4558 | 0.88 | 4500 | 0.4879 | -2.8560 | -3.9259 | 0.7262 | 1.0699 | -625.7067 | -548.8392 | 1.3411 | 0.7711 |
|
111 |
+
| 0.5541 | 0.9 | 4600 | 0.4882 | -2.8601 | -3.9295 | 0.7262 | 1.0694 | -626.0704 | -549.2481 | 1.3428 | 0.7729 |
|
112 |
+
| 0.5743 | 0.92 | 4700 | 0.4879 | -2.8641 | -3.9344 | 0.7262 | 1.0702 | -626.5551 | -549.6526 | 1.3445 | 0.7755 |
|
113 |
+
| 0.4657 | 0.94 | 4800 | 0.4880 | -2.8626 | -3.9322 | 0.7292 | 1.0696 | -626.3386 | -549.4993 | 1.3437 | 0.7749 |
|
114 |
+
| 0.5126 | 0.96 | 4900 | 0.4880 | -2.8636 | -3.9339 | 0.7277 | 1.0703 | -626.5126 | -549.6042 | 1.3440 | 0.7748 |
|
115 |
+
| 0.3967 | 0.98 | 5000 | 0.4880 | -2.8643 | -3.9344 | 0.7262 | 1.0702 | -626.5614 | -549.6658 | 1.3424 | 0.7736 |
|
116 |
+
|
117 |
+
|
118 |
+
### Framework versions
|
119 |
+
|
120 |
+
- PEFT 0.7.1
|
121 |
+
- Transformers 4.36.2
|
122 |
+
- Pytorch 2.2.1+cu121
|
123 |
+
- Datasets 2.14.6
|
124 |
+
- Tokenizers 0.15.2
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671150064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:410e76cb28e04cb1edadb3e715cbc6d5d9bcad7252058a6ba7d0dcf856bb59e8
|
3 |
size 671150064
|
all_results.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"eval_logits/chosen": 0.7713278532028198,
|
4 |
+
"eval_logits/rejected": 1.3411734104156494,
|
5 |
+
"eval_logps/chosen": -549.3906860351562,
|
6 |
+
"eval_logps/rejected": -626.2533569335938,
|
7 |
+
"eval_loss": 0.487976998090744,
|
8 |
+
"eval_rewards/accuracies": 0.726190447807312,
|
9 |
+
"eval_rewards/chosen": -2.8615000247955322,
|
10 |
+
"eval_rewards/margins": 1.0698496103286743,
|
11 |
+
"eval_rewards/rejected": -3.931349515914917,
|
12 |
+
"eval_runtime": 475.1451,
|
13 |
+
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 4.209,
|
15 |
+
"eval_steps_per_second": 0.177,
|
16 |
+
"train_loss": 0.5211080308048501,
|
17 |
+
"train_runtime": 54683.4674,
|
18 |
+
"train_samples": 61135,
|
19 |
+
"train_samples_per_second": 1.118,
|
20 |
+
"train_steps_per_second": 0.093
|
21 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"eval_logits/chosen": 0.7713278532028198,
|
4 |
+
"eval_logits/rejected": 1.3411734104156494,
|
5 |
+
"eval_logps/chosen": -549.3906860351562,
|
6 |
+
"eval_logps/rejected": -626.2533569335938,
|
7 |
+
"eval_loss": 0.487976998090744,
|
8 |
+
"eval_rewards/accuracies": 0.726190447807312,
|
9 |
+
"eval_rewards/chosen": -2.8615000247955322,
|
10 |
+
"eval_rewards/margins": 1.0698496103286743,
|
11 |
+
"eval_rewards/rejected": -3.931349515914917,
|
12 |
+
"eval_runtime": 475.1451,
|
13 |
+
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 4.209,
|
15 |
+
"eval_steps_per_second": 0.177
|
16 |
+
}
|
runs/Mar06_15-34-32_SYS-4029GP-TRT/events.out.tfevents.1709710548.SYS-4029GP-TRT.2258942.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3aebe0cc357f5f81188ad1816b2963875c4c6e54c7629b08b52184d10067bac7
|
3 |
+
size 365478
|
runs/Mar06_15-34-32_SYS-4029GP-TRT/events.out.tfevents.1709765707.SYS-4029GP-TRT.2258942.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a9d44dc7554be151800f908ce88c31d5a67165fc6c05b447c1de764d5534021
|
3 |
+
size 828
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"train_loss": 0.5211080308048501,
|
4 |
+
"train_runtime": 54683.4674,
|
5 |
+
"train_samples": 61135,
|
6 |
+
"train_samples_per_second": 1.118,
|
7 |
+
"train_steps_per_second": 0.093
|
8 |
+
}
|
trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|