lewtun HF staff commited on
Commit
df12624
1 Parent(s): e64e0ba

Model save

Browse files
README.md CHANGED
@@ -2,35 +2,31 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - dpo
9
  - generated_from_trainer
10
- datasets:
11
- - HuggingFaceH4/ultrafeedback_binarized_fixed
12
  base_model: mistralai/Mistral-7B-v0.1
13
  model-index:
14
- - name: zephyr-7b-dpo-lora
15
  results: []
16
  ---
17
 
18
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
19
  should probably proofread and complete it, then remove this comment. -->
20
 
21
- # zephyr-7b-dpo-lora
22
 
23
- This model is a fine-tuned version of [lewtun/zephyr-7b-sft-qlora](https://huggingface.co/lewtun/zephyr-7b-sft-qlora) on the HuggingFaceH4/ultrafeedback_binarized_fixed dataset.
24
  It achieves the following results on the evaluation set:
25
- - Loss: 0.5133
26
- - Rewards/chosen: -1.2447
27
- - Rewards/rejected: -2.1118
28
- - Rewards/accuracies: 0.7539
29
- - Rewards/margins: 0.8671
30
- - Logps/rejected: -457.0128
31
- - Logps/chosen: -385.9082
32
- - Logits/rejected: 1.2523
33
- - Logits/chosen: 0.7989
34
 
35
  ## Model description
36
 
@@ -66,25 +62,25 @@ The following hyperparameters were used during training:
66
 
67
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
68
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
69
- | 0.6918 | 0.05 | 100 | 0.6914 | 0.0059 | 0.0018 | 0.7109 | 0.0041 | -245.6464 | -260.8458 | -2.1364 | -2.2285 |
70
- | 0.6619 | 0.1 | 200 | 0.6497 | -0.0263 | -0.1318 | 0.7070 | 0.1056 | -259.0110 | -264.0628 | -2.0537 | -2.1558 |
71
- | 0.6077 | 0.16 | 300 | 0.6083 | -0.2610 | -0.5505 | 0.7188 | 0.2895 | -300.8820 | -287.5379 | -1.8505 | -1.9870 |
72
- | 0.5813 | 0.21 | 400 | 0.5857 | -0.5019 | -0.9224 | 0.7344 | 0.4205 | -338.0691 | -311.6292 | -1.7834 | -1.9347 |
73
- | 0.6033 | 0.26 | 500 | 0.5684 | -0.6480 | -1.1327 | 0.7578 | 0.4847 | -359.0957 | -326.2360 | -1.0646 | -1.2844 |
74
- | 0.5338 | 0.31 | 600 | 0.5431 | -0.9068 | -1.6081 | 0.7539 | 0.7013 | -406.6367 | -352.1152 | -0.0058 | -0.3463 |
75
- | 0.5235 | 0.37 | 700 | 0.5304 | -1.0331 | -1.8281 | 0.7461 | 0.7951 | -428.6434 | -364.7436 | 0.2246 | -0.1374 |
76
- | 0.5241 | 0.42 | 800 | 0.5276 | -0.9760 | -1.7110 | 0.7578 | 0.7350 | -416.9325 | -359.0362 | 0.3361 | -0.0432 |
77
- | 0.5332 | 0.47 | 900 | 0.5257 | -1.2407 | -2.0657 | 0.75 | 0.8250 | -452.3993 | -385.5118 | 0.8926 | 0.4681 |
78
- | 0.531 | 0.52 | 1000 | 0.5232 | -1.1277 | -1.8553 | 0.7461 | 0.7276 | -431.3623 | -374.2120 | 0.2825 | -0.0766 |
79
- | 0.4864 | 0.58 | 1100 | 0.5172 | -1.1670 | -1.9894 | 0.75 | 0.8224 | -444.7675 | -378.1358 | 1.1814 | 0.7409 |
80
- | 0.5467 | 0.63 | 1200 | 0.5196 | -1.3633 | -2.1690 | 0.7383 | 0.8058 | -462.7306 | -397.7628 | 1.3020 | 0.8593 |
81
- | 0.5125 | 0.68 | 1300 | 0.5179 | -1.2033 | -2.0041 | 0.7422 | 0.8009 | -446.2437 | -381.7657 | 1.1045 | 0.6639 |
82
- | 0.4881 | 0.73 | 1400 | 0.5158 | -1.2792 | -2.1334 | 0.7539 | 0.8543 | -459.1728 | -389.3554 | 1.1891 | 0.7445 |
83
- | 0.5273 | 0.78 | 1500 | 0.5135 | -1.2081 | -2.0746 | 0.7539 | 0.8664 | -453.2860 | -382.2505 | 1.2533 | 0.7973 |
84
- | 0.5317 | 0.84 | 1600 | 0.5140 | -1.2815 | -2.1592 | 0.75 | 0.8777 | -461.7518 | -389.5859 | 1.2752 | 0.8202 |
85
- | 0.5384 | 0.89 | 1700 | 0.5134 | -1.2549 | -2.1287 | 0.7539 | 0.8738 | -458.7038 | -386.9291 | 1.2938 | 0.8384 |
86
- | 0.5619 | 0.94 | 1800 | 0.5135 | -1.2438 | -2.1108 | 0.7578 | 0.8670 | -456.9133 | -385.8195 | 1.2532 | 0.7986 |
87
- | 0.5169 | 0.99 | 1900 | 0.5133 | -1.2447 | -2.1118 | 0.7539 | 0.8671 | -457.0128 | -385.9082 | 1.2523 | 0.7989 |
88
 
89
 
90
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
8
  base_model: mistralai/Mistral-7B-v0.1
9
  model-index:
10
+ - name: zephyr-7b-dpo-qlora
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # zephyr-7b-dpo-qlora
18
 
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.5325
22
+ - Rewards/chosen: -1.2325
23
+ - Rewards/rejected: -2.0565
24
+ - Rewards/accuracies: 0.7656
25
+ - Rewards/margins: 0.8240
26
+ - Logps/rejected: -457.4398
27
+ - Logps/chosen: -373.4022
28
+ - Logits/rejected: 0.7596
29
+ - Logits/chosen: 0.5001
30
 
31
  ## Model description
32
 
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
+ | 0.6916 | 0.05 | 100 | 0.6912 | 0.0059 | 0.0019 | 0.6484 | 0.0041 | -251.6075 | -249.5596 | -2.2040 | -2.2621 |
66
+ | 0.655 | 0.1 | 200 | 0.6498 | -0.0559 | -0.1762 | 0.7070 | 0.1203 | -269.4106 | -255.7421 | -2.1011 | -2.1614 |
67
+ | 0.6342 | 0.16 | 300 | 0.6146 | -0.3407 | -0.6269 | 0.7031 | 0.2862 | -314.4839 | -284.2224 | -1.9037 | -1.9793 |
68
+ | 0.6121 | 0.21 | 400 | 0.5946 | -0.4657 | -0.8916 | 0.7031 | 0.4259 | -340.9551 | -296.7203 | -1.8717 | -1.9543 |
69
+ | 0.5973 | 0.26 | 500 | 0.5938 | -0.3681 | -0.7766 | 0.7305 | 0.4085 | -329.4522 | -286.9666 | -1.8440 | -1.9282 |
70
+ | 0.5473 | 0.31 | 600 | 0.5774 | -0.6893 | -1.2264 | 0.7344 | 0.5371 | -374.4341 | -319.0812 | -1.6815 | -1.7726 |
71
+ | 0.5792 | 0.37 | 700 | 0.5709 | -0.6635 | -1.2100 | 0.7578 | 0.5465 | -372.7989 | -316.5072 | -1.4783 | -1.5775 |
72
+ | 0.5194 | 0.42 | 800 | 0.5590 | -1.0208 | -1.6453 | 0.7461 | 0.6245 | -416.3269 | -352.2357 | -0.3791 | -0.5486 |
73
+ | 0.5367 | 0.47 | 900 | 0.5492 | -1.1477 | -1.8521 | 0.7266 | 0.7044 | -437.0040 | -364.9276 | -0.0908 | -0.2899 |
74
+ | 0.5575 | 0.52 | 1000 | 0.5450 | -1.1704 | -1.9048 | 0.7344 | 0.7344 | -442.2755 | -367.1964 | 0.2761 | 0.0498 |
75
+ | 0.5507 | 0.58 | 1100 | 0.5429 | -1.1040 | -1.8671 | 0.7422 | 0.7631 | -438.5026 | -360.5551 | 0.5339 | 0.2877 |
76
+ | 0.5305 | 0.63 | 1200 | 0.5366 | -1.1557 | -1.9243 | 0.7578 | 0.7686 | -444.2217 | -365.7241 | 0.7350 | 0.4755 |
77
+ | 0.5171 | 0.68 | 1300 | 0.5304 | -1.3741 | -2.1678 | 0.7656 | 0.7937 | -468.5735 | -387.5681 | 0.7686 | 0.5029 |
78
+ | 0.4875 | 0.73 | 1400 | 0.5321 | -1.3228 | -2.1513 | 0.7578 | 0.8285 | -466.9267 | -382.4329 | 0.8566 | 0.5926 |
79
+ | 0.5216 | 0.78 | 1500 | 0.5326 | -1.2006 | -2.0034 | 0.7617 | 0.8028 | -452.1298 | -370.2103 | 0.7189 | 0.4630 |
80
+ | 0.4894 | 0.84 | 1600 | 0.5327 | -1.2300 | -2.0556 | 0.7656 | 0.8256 | -457.3565 | -373.1585 | 0.7405 | 0.4828 |
81
+ | 0.5179 | 0.89 | 1700 | 0.5326 | -1.2313 | -2.0558 | 0.7656 | 0.8245 | -457.3720 | -373.2860 | 0.7604 | 0.5012 |
82
+ | 0.5534 | 0.94 | 1800 | 0.5325 | -1.2309 | -2.0558 | 0.7656 | 0.8249 | -457.3779 | -373.2437 | 0.7550 | 0.4957 |
83
+ | 0.5539 | 0.99 | 1900 | 0.5325 | -1.2325 | -2.0565 | 0.7656 | 0.8240 | -457.4398 | -373.4022 | 0.7596 | 0.5001 |
84
 
85
 
86
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a61e6c64f98d1de332121cd4934fc387468e1434815d637ddcad2b444c849f7e
3
  size 83945744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:881e1b5a4dd0347641273b3dcdd5ce52a7e613d1712bb56b80cc13e114765f7c
3
  size 83945744
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": 0.7988529205322266,
4
- "eval_logits/rejected": 1.2523157596588135,
5
- "eval_logps/chosen": -385.9081726074219,
6
- "eval_logps/rejected": -457.0127868652344,
7
- "eval_loss": 0.5133188962936401,
8
- "eval_rewards/accuracies": 0.75390625,
9
- "eval_rewards/chosen": -1.244707703590393,
10
- "eval_rewards/margins": 0.8671280741691589,
11
- "eval_rewards/rejected": -2.1118357181549072,
12
- "eval_runtime": 99.9074,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 20.019,
15
- "eval_steps_per_second": 0.32,
16
- "train_loss": 0.5577758540044768,
17
- "train_runtime": 7516.1301,
18
- "train_samples": 61155,
19
- "train_samples_per_second": 8.137,
20
- "train_steps_per_second": 0.254
21
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": 0.5000983476638794,
4
+ "eval_logits/rejected": 0.7595670819282532,
5
+ "eval_logps/chosen": -373.40216064453125,
6
+ "eval_logps/rejected": -457.4398498535156,
7
+ "eval_loss": 0.5325239300727844,
8
+ "eval_rewards/accuracies": 0.765625,
9
+ "eval_rewards/chosen": -1.2324851751327515,
10
+ "eval_rewards/margins": 0.8239741921424866,
11
+ "eval_rewards/rejected": -2.056459426879883,
12
+ "eval_runtime": 98.6631,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 20.271,
15
+ "eval_steps_per_second": 0.324,
16
+ "train_loss": 0.5648497628454511,
17
+ "train_runtime": 7573.6114,
18
+ "train_samples": 61135,
19
+ "train_samples_per_second": 8.072,
20
+ "train_steps_per_second": 0.252
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": 0.7988529205322266,
4
- "eval_logits/rejected": 1.2523157596588135,
5
- "eval_logps/chosen": -385.9081726074219,
6
- "eval_logps/rejected": -457.0127868652344,
7
- "eval_loss": 0.5133188962936401,
8
- "eval_rewards/accuracies": 0.75390625,
9
- "eval_rewards/chosen": -1.244707703590393,
10
- "eval_rewards/margins": 0.8671280741691589,
11
- "eval_rewards/rejected": -2.1118357181549072,
12
- "eval_runtime": 99.9074,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 20.019,
15
- "eval_steps_per_second": 0.32
16
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": 0.5000983476638794,
4
+ "eval_logits/rejected": 0.7595670819282532,
5
+ "eval_logps/chosen": -373.40216064453125,
6
+ "eval_logps/rejected": -457.4398498535156,
7
+ "eval_loss": 0.5325239300727844,
8
+ "eval_rewards/accuracies": 0.765625,
9
+ "eval_rewards/chosen": -1.2324851751327515,
10
+ "eval_rewards/margins": 0.8239741921424866,
11
+ "eval_rewards/rejected": -2.056459426879883,
12
+ "eval_runtime": 98.6631,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 20.271,
15
+ "eval_steps_per_second": 0.324
16
  }
runs/Jan09_01-40-49_ip-26-0-161-142/events.out.tfevents.1704764776.ip-26-0-161-142.2956136.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c49df43589fb116653e66319d27b41f614cc5d95ced77651c38fc16b3523594
3
- size 139537
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf1a980365c77c85b76da1a9fe8e531129cbf9e1fad4fba5712af6c8a400640d
3
+ size 140525
runs/Jan09_01-40-49_ip-26-0-161-142/events.out.tfevents.1704772448.ip-26-0-161-142.2956136.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d682d900b7d6298737e62c3a131ec46013b934ae62d76cd985b2616db3db65c1
3
+ size 828
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.5577758540044768,
4
- "train_runtime": 7516.1301,
5
- "train_samples": 61155,
6
- "train_samples_per_second": 8.137,
7
- "train_steps_per_second": 0.254
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.5648497628454511,
4
+ "train_runtime": 7573.6114,
5
+ "train_samples": 61135,
6
+ "train_samples_per_second": 8.072,
7
+ "train_steps_per_second": 0.252
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff