Model save

Browse files

Files changed (5) hide show

README.md +49 -52
adapter_model.safetensors +1 -1
all_results.json +4 -17
train_results.json +4 -4
trainer_state.json +0 -0

README.md CHANGED Viewed

@@ -2,13 +2,10 @@
 license: apache-2.0
 library_name: peft
 tags:
-- alignment-handbook
 - trl
 - dpo
 - generated_from_trainer
 base_model: alignment-handbook/zephyr-7b-sft-full
-datasets:
-- HuggingFaceH4/ultrafeedback_binarized
 model-index:
 - name: zephyr-7b-dpo-lora
   results: []
@@ -19,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
 # zephyr-7b-dpo-lora
-This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the HuggingFaceH4/ultrafeedback_binarized dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.5894
-- Rewards/chosen: -0.2738
-- Rewards/rejected: -0.6020
-- Rewards/accuracies: 0.7035
-- Rewards/margins: 0.3282
-- Logps/rejected: -321.6407
-- Logps/chosen: -310.1199
-- Logits/rejected: -2.7529
-- Logits/chosen: -2.7746
 ## Model description
@@ -63,50 +60,50 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.6929        | 0.0262 | 100  | 0.6930          | -0.0001        | -0.0004          | 0.5250             | 0.0003          | -261.4788      | -282.7496    | -2.8388         | -2.8661       |
-| 0.6923        | 0.0523 | 200  | 0.6923          | 0.0008         | -0.0009          | 0.6050             | 0.0017          | -261.5316      | -282.6624    | -2.8380         | -2.8653       |
-| 0.6898        | 0.0785 | 300  | 0.6903          | 0.0035         | -0.0024          | 0.6640             | 0.0058          | -261.6760      | -282.3918    | -2.8350         | -2.8623       |
-| 0.6872        | 0.1047 | 400  | 0.6862          | 0.0165         | 0.0021           | 0.6670             | 0.0144          | -261.2256      | -281.0900    | -2.8308         | -2.8577       |
-| 0.6783        | 0.1309 | 500  | 0.6804          | 0.0209         | -0.0059          | 0.6835             | 0.0267          | -262.0230      | -280.6481    | -2.8215         | -2.8486       |
-| 0.6729        | 0.1570 | 600  | 0.6733          | 0.0154         | -0.0272          | 0.6840             | 0.0426          | -264.1608      | -281.1958    | -2.8138         | -2.8410       |
-| 0.6665        | 0.1832 | 700  | 0.6638          | -0.0035        | -0.0689          | 0.6755             | 0.0654          | -268.3266      | -283.0863    | -2.8060         | -2.8327       |
-| 0.6427        | 0.2094 | 800  | 0.6546          | -0.0214        | -0.1104          | 0.6815             | 0.0889          | -272.4747      | -284.8825    | -2.8020         | -2.8283       |
-| 0.6428        | 0.2355 | 900  | 0.6458          | -0.0247        | -0.1383          | 0.6770             | 0.1136          | -275.2685      | -285.2050    | -2.7942         | -2.8199       |
-| 0.6381        | 0.2617 | 1000 | 0.6358          | -0.0638        | -0.2074          | 0.6785             | 0.1436          | -282.1761      | -289.1206    | -2.7887         | -2.8138       |
-| 0.6488        | 0.2879 | 1100 | 0.6284          | -0.1378        | -0.3055          | 0.6790             | 0.1677          | -291.9890      | -296.5138    | -2.7826         | -2.8071       |
-| 0.6427        | 0.3141 | 1200 | 0.6223          | -0.1104        | -0.2986          | 0.6835             | 0.1882          | -291.3028      | -293.7785    | -2.7931         | -2.8165       |
-| 0.6131        | 0.3402 | 1300 | 0.6172          | -0.1466        | -0.3514          | 0.6865             | 0.2049          | -296.5806      | -297.3945    | -2.7951         | -2.8180       |
-| 0.6326        | 0.3664 | 1400 | 0.6155          | -0.1752        | -0.3896          | 0.6860             | 0.2144          | -300.3966      | -300.2597    | -2.7920         | -2.8147       |
-| 0.6128        | 0.3926 | 1500 | 0.6180          | -0.0630        | -0.2687          | 0.6890             | 0.2057          | -288.3090      | -289.0369    | -2.7980         | -2.8198       |
-| 0.6223        | 0.4187 | 1600 | 0.6088          | -0.1688        | -0.4097          | 0.6945             | 0.2409          | -302.4074      | -299.6220    | -2.7926         | -2.8148       |
-| 0.6338        | 0.4449 | 1700 | 0.6061          | -0.2152        | -0.4665          | 0.6925             | 0.2513          | -308.0869      | -304.2535    | -2.7961         | -2.8181       |
-| 0.585         | 0.4711 | 1800 | 0.6050          | -0.1327        | -0.3850          | 0.6915             | 0.2523          | -299.9368      | -296.0054    | -2.7949         | -2.8174       |
-| 0.577         | 0.4973 | 1900 | 0.6013          | -0.2170        | -0.4883          | 0.6965             | 0.2713          | -310.2670      | -304.4333    | -2.7954         | -2.8176       |
-| 0.5945        | 0.5234 | 2000 | 0.5992          | -0.2107        | -0.4899          | 0.6995             | 0.2793          | -310.4293      | -303.8028    | -2.7903         | -2.8122       |
-| 0.5913        | 0.5496 | 2100 | 0.5981          | -0.2373        | -0.5251          | 0.7025             | 0.2879          | -313.9529      | -306.4641    | -2.7863         | -2.8085       |
-| 0.5816        | 0.5758 | 2200 | 0.5989          | -0.2688        | -0.5570          | 0.6970             | 0.2883          | -317.1411      | -309.6146    | -2.7849         | -2.8070       |
-| 0.5824        | 0.6019 | 2300 | 0.5961          | -0.2227        | -0.5189          | 0.6955             | 0.2961          | -313.3233      | -305.0098    | -2.7821         | -2.8037       |
-| 0.602         | 0.6281 | 2400 | 0.5969          | -0.2683        | -0.5669          | 0.6990             | 0.2986          | -318.1251      | -309.5652    | -2.7744         | -2.7961       |
-| 0.5792        | 0.6543 | 2500 | 0.5963          | -0.2102        | -0.5041          | 0.6975             | 0.2938          | -311.8429      | -303.7615    | -2.7763         | -2.7980       |
-| 0.6028        | 0.6805 | 2600 | 0.5974          | -0.1896        | -0.4790          | 0.6920             | 0.2895          | -309.3417      | -301.6964    | -2.7717         | -2.7933       |
-| 0.5854        | 0.7066 | 2700 | 0.5930          | -0.2517        | -0.5615          | 0.7020             | 0.3098          | -317.5864      | -307.9027    | -2.7676         | -2.7892       |
-| 0.5994        | 0.7328 | 2800 | 0.5920          | -0.2607        | -0.5775          | 0.7045             | 0.3167          | -319.1838      | -308.8107    | -2.7636         | -2.7851       |
-| 0.5837        | 0.7590 | 2900 | 0.5913          | -0.2540        | -0.5721          | 0.7055             | 0.3181          | -318.6511      | -308.1379    | -2.7619         | -2.7834       |
-| 0.5858        | 0.7851 | 3000 | 0.5910          | -0.2625        | -0.5835          | 0.7055             | 0.3210          | -319.7853      | -308.9898    | -2.7605         | -2.7819       |
-| 0.5685        | 0.8113 | 3100 | 0.5914          | -0.2383        | -0.5571          | 0.7040             | 0.3188          | -317.1507      | -306.5707    | -2.7558         | -2.7777       |
-| 0.5753        | 0.8375 | 3200 | 0.5903          | -0.2623        | -0.5868          | 0.7020             | 0.3246          | -320.1224      | -308.9666    | -2.7567         | -2.7783       |
-| 0.5769        | 0.8636 | 3300 | 0.5900          | -0.2673        | -0.5934          | 0.7030             | 0.3260          | -320.7757      | -309.4716    | -2.7555         | -2.7771       |
-| 0.5608        | 0.8898 | 3400 | 0.5896          | -0.2716        | -0.5988          | 0.7020             | 0.3273          | -321.3196      | -309.8930    | -2.7520         | -2.7739       |
-| 0.6008        | 0.9160 | 3500 | 0.5895          | -0.2716        | -0.5994          | 0.7035             | 0.3277          | -321.3745      | -309.9000    | -2.7539         | -2.7755       |
-| 0.585         | 0.9422 | 3600 | 0.5895          | -0.2722        | -0.6000          | 0.7020             | 0.3279          | -321.4418      | -309.9531    | -2.7549         | -2.7764       |
-| 0.567         | 0.9683 | 3700 | 0.5893          | -0.2738        | -0.6022          | 0.7015             | 0.3284          | -321.6555      | -310.1171    | -2.7539         | -2.7755       |
-| 0.5834        | 0.9945 | 3800 | 0.5893          | -0.2740        | -0.6023          | 0.7025             | 0.3283          | -321.6666      | -310.1333    | -2.7525         | -2.7742       |
 ### Framework versions
 - PEFT 0.10.0
-- Transformers 4.40.0
 - Pytorch 2.2.0
 - Datasets 2.16.1
 - Tokenizers 0.19.1

 license: apache-2.0
 library_name: peft
 tags:
 - trl
 - dpo
 - generated_from_trainer
 base_model: alignment-handbook/zephyr-7b-sft-full
 model-index:
 - name: zephyr-7b-dpo-lora
   results: []
 # zephyr-7b-dpo-lora
+This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.6776
+- Rewards/chosen: 0.0182
+- Rewards/rejected: -0.0146
+- Rewards/accuracies: 0.6855
+- Rewards/margins: 0.0328
+- Logps/rejected: -262.9002
+- Logps/chosen: -280.9546
+- Logits/rejected: -2.8233
+- Logits/chosen: -2.8504
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.6929        | 0.0262 | 100  | 0.6930          | 0.0001         | -0.0001          | 0.5135             | 0.0002          | -261.4512      | -282.7630    | -2.8381         | -2.8655       |
+| 0.693         | 0.0523 | 200  | 0.6928          | 0.0001         | -0.0005          | 0.5470             | 0.0007          | -261.4925      | -282.7611    | -2.8349         | -2.8626       |
+| 0.692         | 0.0785 | 300  | 0.6921          | 0.0010         | -0.0011          | 0.6050             | 0.0021          | -261.5461      | -282.6746    | -2.8378         | -2.8650       |
+| 0.6913        | 0.1047 | 400  | 0.6910          | 0.0036         | -0.0008          | 0.6395             | 0.0044          | -261.5211      | -282.4127    | -2.8349         | -2.8622       |
+| 0.689         | 0.1309 | 500  | 0.6895          | 0.0049         | -0.0024          | 0.6700             | 0.0073          | -261.6805      | -282.2831    | -2.8389         | -2.8656       |
+| 0.6875        | 0.1570 | 600  | 0.6880          | 0.0059         | -0.0047          | 0.6690             | 0.0106          | -261.9060      | -282.1841    | -2.8332         | -2.8603       |
+| 0.6874        | 0.1832 | 700  | 0.6864          | 0.0084         | -0.0055          | 0.6785             | 0.0138          | -261.9842      | -281.9370    | -2.8342         | -2.8610       |
+| 0.682         | 0.2094 | 800  | 0.6850          | 0.0107         | -0.0060          | 0.6800             | 0.0167          | -262.0419      | -281.7033    | -2.8307         | -2.8578       |
+| 0.6837        | 0.2355 | 900  | 0.6840          | 0.0136         | -0.0054          | 0.6840             | 0.0190          | -261.9797      | -281.4180    | -2.8304         | -2.8573       |
+| 0.6819        | 0.2617 | 1000 | 0.6828          | 0.0161         | -0.0054          | 0.6810             | 0.0215          | -261.9830      | -281.1678    | -2.8269         | -2.8540       |
+| 0.6836        | 0.2879 | 1100 | 0.6818          | 0.0179         | -0.0057          | 0.6785             | 0.0236          | -262.0052      | -280.9853    | -2.8258         | -2.8529       |
+| 0.685         | 0.3141 | 1200 | 0.6810          | 0.0221         | -0.0032          | 0.6810             | 0.0253          | -261.7610      | -280.5679    | -2.8238         | -2.8510       |
+| 0.6785        | 0.3402 | 1300 | 0.6803          | 0.0209         | -0.0061          | 0.6840             | 0.0270          | -262.0453      | -280.6852    | -2.8259         | -2.8529       |
+| 0.6828        | 0.3664 | 1400 | 0.6796          | 0.0217         | -0.0066          | 0.6865             | 0.0283          | -262.1007      | -280.6062    | -2.8233         | -2.8505       |
+| 0.6795        | 0.3926 | 1500 | 0.6792          | 0.0226         | -0.0068          | 0.6830             | 0.0293          | -262.1143      | -280.5175    | -2.8250         | -2.8520       |
+| 0.6801        | 0.4187 | 1600 | 0.6788          | 0.0194         | -0.0107          | 0.6845             | 0.0301          | -262.5066      | -280.8286    | -2.8245         | -2.8516       |
+| 0.6839        | 0.4449 | 1700 | 0.6785          | 0.0204         | -0.0104          | 0.6855             | 0.0308          | -262.4770      | -280.7289    | -2.8261         | -2.8530       |
+| 0.6793        | 0.4711 | 1800 | 0.6782          | 0.0188         | -0.0126          | 0.6870             | 0.0314          | -262.6961      | -280.8936    | -2.8248         | -2.8519       |
+| 0.6766        | 0.4973 | 1900 | 0.6781          | 0.0188         | -0.0129          | 0.6810             | 0.0317          | -262.7311      | -280.8921    | -2.8281         | -2.8548       |
+| 0.6762        | 0.5234 | 2000 | 0.6778          | 0.0190         | -0.0133          | 0.6840             | 0.0323          | -262.7651      | -280.8749    | -2.8270         | -2.8538       |
+| 0.6796        | 0.5496 | 2100 | 0.6777          | 0.0184         | -0.0141          | 0.6795             | 0.0325          | -262.8513      | -280.9321    | -2.8299         | -2.8564       |
+| 0.6736        | 0.5758 | 2200 | 0.6777          | 0.0181         | -0.0145          | 0.6825             | 0.0326          | -262.8893      | -280.9635    | -2.8306         | -2.8571       |
+| 0.6779        | 0.6019 | 2300 | 0.6776          | 0.0176         | -0.0152          | 0.6875             | 0.0327          | -262.9558      | -281.0184    | -2.8281         | -2.8548       |
+| 0.6782        | 0.6281 | 2400 | 0.6777          | 0.0179         | -0.0148          | 0.6835             | 0.0327          | -262.9155      | -280.9810    | -2.8273         | -2.8540       |
+| 0.6753        | 0.6543 | 2500 | 0.6776          | 0.0181         | -0.0147          | 0.6805             | 0.0328          | -262.9074      | -280.9631    | -2.8256         | -2.8525       |
+| 0.6776        | 0.6805 | 2600 | 0.6776          | 0.0181         | -0.0148          | 0.6775             | 0.0329          | -262.9167      | -280.9641    | -2.8226         | -2.8498       |
+| 0.6774        | 0.7066 | 2700 | 0.6775          | 0.0182         | -0.0149          | 0.6860             | 0.0331          | -262.9263      | -280.9553    | -2.8261         | -2.8530       |
+| 0.679         | 0.7328 | 2800 | 0.6774          | 0.0184         | -0.0148          | 0.6850             | 0.0332          | -262.9162      | -280.9359    | -2.8271         | -2.8539       |
+| 0.6782        | 0.7590 | 2900 | 0.6775          | 0.0181         | -0.0150          | 0.6845             | 0.0330          | -262.9336      | -280.9681    | -2.8260         | -2.8529       |
+| 0.6784        | 0.7851 | 3000 | 0.6774          | 0.0180         | -0.0152          | 0.6890             | 0.0332          | -262.9586      | -280.9731    | -2.8283         | -2.8550       |
+| 0.6713        | 0.8113 | 3100 | 0.6775          | 0.0181         | -0.0149          | 0.6825             | 0.0330          | -262.9238      | -280.9596    | -2.8280         | -2.8547       |
+| 0.6774        | 0.8375 | 3200 | 0.6774          | 0.0182         | -0.0150          | 0.6830             | 0.0332          | -262.9411      | -280.9583    | -2.8275         | -2.8543       |
+| 0.6781        | 0.8636 | 3300 | 0.6775          | 0.0182         | -0.0148          | 0.6810             | 0.0329          | -262.9146      | -280.9559    | -2.8293         | -2.8559       |
+| 0.6733        | 0.8898 | 3400 | 0.6775          | 0.0180         | -0.0150          | 0.6825             | 0.0330          | -262.9403      | -280.9770    | -2.8237         | -2.8508       |
+| 0.6739        | 0.9160 | 3500 | 0.6775          | 0.0180         | -0.0150          | 0.6850             | 0.0331          | -262.9413      | -280.9686    | -2.8311         | -2.8575       |
+| 0.6807        | 0.9422 | 3600 | 0.6775          | 0.0182         | -0.0148          | 0.6855             | 0.0330          | -262.9205      | -280.9524    | -2.8257         | -2.8527       |
+| 0.6731        | 0.9683 | 3700 | 0.6775          | 0.0182         | -0.0147          | 0.6835             | 0.0330          | -262.9113      | -280.9514    | -2.8239         | -2.8510       |
+| 0.675         | 0.9945 | 3800 | 0.6776          | 0.0182         | -0.0146          | 0.6855             | 0.0328          | -262.9002      | -280.9546    | -2.8233         | -2.8504       |
 ### Framework versions
 - PEFT 0.10.0
+- Transformers 4.40.2
 - Pytorch 2.2.0
 - Datasets 2.16.1
 - Tokenizers 0.19.1

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57f6f6831f650677ba4077648352cd1acad1ebfe2f56602f9a1c08feea65dd25
 size 335605144

 version https://git-lfs.github.com/spec/v1
+oid sha256:194e6bfe4247ce75b342f152b59aa0facda256cb7c78ea0f88c5b290aaf10375
 size 335605144

all_results.json CHANGED Viewed

@@ -1,22 +1,9 @@
 {
     "epoch": 1.0,
-    "eval_logits/chosen": -2.7746472358703613,
-    "eval_logits/rejected": -2.752934455871582,
-    "eval_logps/chosen": -310.119873046875,
-    "eval_logps/rejected": -321.6407165527344,
-    "eval_loss": 0.5893968343734741,
-    "eval_rewards/accuracies": 0.703499972820282,
-    "eval_rewards/chosen": -0.27382245659828186,
-    "eval_rewards/margins": 0.32820531725883484,
-    "eval_rewards/rejected": -0.6020277142524719,
-    "eval_runtime": 692.2285,
-    "eval_samples": 2000,
-    "eval_samples_per_second": 2.889,
-    "eval_steps_per_second": 0.361,
     "total_flos": 0.0,
-    "train_loss": 0.6164219083351729,
-    "train_runtime": 73481.1174,
     "train_samples": 61134,
-    "train_samples_per_second": 0.832,
-    "train_steps_per_second": 0.052
 }

 {
     "epoch": 1.0,
     "total_flos": 0.0,
+    "train_loss": 0.680465580483626,
+    "train_runtime": 64957.9706,
     "train_samples": 61134,
+    "train_samples_per_second": 0.941,
+    "train_steps_per_second": 0.059
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 1.0,
     "total_flos": 0.0,
-    "train_loss": 0.6164219083351729,
-    "train_runtime": 73481.1174,
     "train_samples": 61134,
-    "train_samples_per_second": 0.832,
-    "train_steps_per_second": 0.052
 }

 {
     "epoch": 1.0,
     "total_flos": 0.0,
+    "train_loss": 0.680465580483626,
+    "train_runtime": 64957.9706,
     "train_samples": 61134,
+    "train_samples_per_second": 0.941,
+    "train_steps_per_second": 0.059
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff