Model save

Browse files

Files changed (8) hide show

README.md +92 -0
adapter_model.safetensors +1 -1
all_results.json +21 -0
eval_results.json +16 -0
runs/Mar05_01-38-10_SYS-4029GP-TRT/events.out.tfevents.1709574193.SYS-4029GP-TRT.1942121.0 +2 -2
runs/Mar05_01-38-10_SYS-4029GP-TRT/events.out.tfevents.1709602315.SYS-4029GP-TRT.1942121.1 +3 -0
train_results.json +8 -0
trainer_state.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,92 @@

+---
+library_name: peft
+tags:
+- trl
+- dpo
+- generated_from_trainer
+base_model: meta-llama/Llama-2-7b-chat-hf
+model-index:
+- name: llama-7b-dpo-qlora
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# llama-7b-dpo-qlora
+This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.5797
+- Rewards/chosen: -0.7180
+- Rewards/rejected: -1.2522
+- Rewards/accuracies: 0.7163
+- Rewards/margins: 0.5342
+- Logps/rejected: -439.3930
+- Logps/chosen: -418.4136
+- Logits/rejected: -0.5278
+- Logits/chosen: -0.4875
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-06
+- train_batch_size: 1
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 4
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 32
+- total_eval_batch_size: 32
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
+|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.6856        | 0.05  | 100  | 0.6868          | 0.0843         | 0.0692           | 0.5377             | 0.0151          | -307.2546      | -338.1842    | -0.3397         | -0.3142       |
+| 0.6704        | 0.1   | 200  | 0.6715          | 0.2423         | 0.1804           | 0.5714             | 0.0619          | -296.1337      | -322.3911    | -0.3758         | -0.3406       |
+| 0.6506        | 0.16  | 300  | 0.6529          | 0.1559         | 0.0442           | 0.6647             | 0.1117          | -309.7589      | -331.0275    | -0.4759         | -0.4428       |
+| 0.6372        | 0.21  | 400  | 0.6272          | -0.1132        | -0.3130          | 0.6865             | 0.1998          | -345.4769      | -357.9352    | -0.5776         | -0.5492       |
+| 0.6233        | 0.26  | 500  | 0.6162          | -0.1577        | -0.4261          | 0.6825             | 0.2685          | -356.7882      | -362.3849    | -0.5820         | -0.5495       |
+| 0.5951        | 0.31  | 600  | 0.6063          | -0.3417        | -0.6825          | 0.6806             | 0.3408          | -382.4303      | -380.7912    | -0.6100         | -0.5733       |
+| 0.6051        | 0.37  | 700  | 0.5973          | -0.4906        | -0.8807          | 0.6944             | 0.3901          | -402.2431      | -395.6783    | -0.6108         | -0.5761       |
+| 0.5632        | 0.42  | 800  | 0.5928          | -0.6334        | -1.0835          | 0.7024             | 0.4501          | -422.5295      | -409.9586    | -0.6245         | -0.5841       |
+| 0.6015        | 0.47  | 900  | 0.5896          | -0.6102        | -1.0642          | 0.7123             | 0.4540          | -420.5953      | -407.6412    | -0.5756         | -0.5359       |
+| 0.5756        | 0.52  | 1000 | 0.5865          | -0.6474        | -1.1215          | 0.6984             | 0.4742          | -426.3284      | -411.3543    | -0.5431         | -0.5058       |
+| 0.6024        | 0.58  | 1100 | 0.5855          | -0.7264        | -1.2283          | 0.7063             | 0.5018          | -437.0025      | -419.2626    | -0.5501         | -0.5104       |
+| 0.5578        | 0.63  | 1200 | 0.5823          | -0.6906        | -1.1994          | 0.7143             | 0.5087          | -434.1114      | -415.6815    | -0.5297         | -0.4896       |
+| 0.5243        | 0.68  | 1300 | 0.5803          | -0.7453        | -1.2720          | 0.7143             | 0.5267          | -441.3783      | -421.1522    | -0.5340         | -0.4930       |
+| 0.5343        | 0.73  | 1400 | 0.5805          | -0.7354        | -1.2662          | 0.7103             | 0.5308          | -440.8000      | -420.1602    | -0.5271         | -0.4872       |
+| 0.5707        | 0.79  | 1500 | 0.5799          | -0.7179        | -1.2504          | 0.7123             | 0.5326          | -439.2190      | -418.4040    | -0.5268         | -0.4864       |
+| 0.5582        | 0.84  | 1600 | 0.5795          | -0.7300        | -1.2655          | 0.7123             | 0.5355          | -440.7271      | -419.6230    | -0.5271         | -0.4870       |
+| 0.5722        | 0.89  | 1700 | 0.5798          | -0.7181        | -1.2517          | 0.7143             | 0.5336          | -439.3442      | -418.4286    | -0.5279         | -0.4876       |
+| 0.5964        | 0.94  | 1800 | 0.5796          | -0.7165        | -1.2507          | 0.7163             | 0.5342          | -439.2476      | -418.2664    | -0.5278         | -0.4875       |
+| 0.5896        | 0.99  | 1900 | 0.5797          | -0.7180        | -1.2521          | 0.7163             | 0.5341          | -439.3842      | -418.4147    | -0.5278         | -0.4875       |
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.36.2
+- Pytorch 2.2.1+cu121
+- Datasets 2.14.6
+- Tokenizers 0.15.2

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c25118f501537916a9b89de85ab7c31d16a0786bc7d89771d3b1c7bbedaabf31
 size 639692768

 version https://git-lfs.github.com/spec/v1
+oid sha256:4dbebda98adc5188e890aa5738d59119741858f3f2d9a1863a4affd649dabd93
 size 639692768

all_results.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "epoch": 1.0,
+    "eval_logits/chosen": -0.4874803125858307,
+    "eval_logits/rejected": -0.5277639627456665,
+    "eval_logps/chosen": -418.41363525390625,
+    "eval_logps/rejected": -439.39300537109375,
+    "eval_loss": 0.5796785354614258,
+    "eval_rewards/accuracies": 0.716269850730896,
+    "eval_rewards/chosen": -0.7179543972015381,
+    "eval_rewards/margins": 0.5342229008674622,
+    "eval_rewards/rejected": -1.2521772384643555,
+    "eval_runtime": 305.3815,
+    "eval_samples": 2000,
+    "eval_samples_per_second": 6.549,
+    "eval_steps_per_second": 0.206,
+    "train_loss": 0.598351346759896,
+    "train_runtime": 27817.4474,
+    "train_samples": 61135,
+    "train_samples_per_second": 2.198,
+    "train_steps_per_second": 0.069
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 1.0,
+    "eval_logits/chosen": -0.4874803125858307,
+    "eval_logits/rejected": -0.5277639627456665,
+    "eval_logps/chosen": -418.41363525390625,
+    "eval_logps/rejected": -439.39300537109375,
+    "eval_loss": 0.5796785354614258,
+    "eval_rewards/accuracies": 0.716269850730896,
+    "eval_rewards/chosen": -0.7179543972015381,
+    "eval_rewards/margins": 0.5342229008674622,
+    "eval_rewards/rejected": -1.2521772384643555,
+    "eval_runtime": 305.3815,
+    "eval_samples": 2000,
+    "eval_samples_per_second": 6.549,
+    "eval_steps_per_second": 0.206
+}

runs/Mar05_01-38-10_SYS-4029GP-TRT/events.out.tfevents.1709574193.SYS-4029GP-TRT.1942121.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d703d4e50b6102b23c0007085f8f86c7cb562619f8ceffa080d0ead04152084
-size 139982

 version https://git-lfs.github.com/spec/v1
+oid sha256:80bb8c93cc8d3d53d09e64959ecfbc17766cfc34ca6287612953ac6e4988aa62
+size 140970

runs/Mar05_01-38-10_SYS-4029GP-TRT/events.out.tfevents.1709602315.SYS-4029GP-TRT.1942121.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df8d44e29fc382ed0209aeea3f64a6785f45a59a9738e2730ba8a39579183f20
+size 828

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "train_loss": 0.598351346759896,
+    "train_runtime": 27817.4474,
+    "train_samples": 61135,
+    "train_samples_per_second": 2.198,
+    "train_steps_per_second": 0.069
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff