Model save

Browse files

Files changed (9) hide show

README.md +16 -21
all_results.json +4 -4
config.json +1 -1
model-00001-of-00003.safetensors +1 -1
model-00002-of-00003.safetensors +1 -1
model-00003-of-00003.safetensors +1 -1
train_results.json +4 -4
trainer_state.json +4 -4
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,15 +1,10 @@
 ---
 base_model: Minbyul/selfbiorag-7b-wo-medication_qa-sft
 tags:
-- alignment-handbook
-- trl
-- dpo
-- generated_from_trainer
 - trl
 - dpo
 - generated_from_trainer
-datasets:
-- HuggingFaceH4/ultrafeedback_binarized
 model-index:
 - name: selfbiorag-7b-dpo-full-sft-wo-medication_qa
   results: []
@@ -20,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
 # selfbiorag-7b-dpo-full-sft-wo-medication_qa
-This model is a fine-tuned version of [Minbyul/selfbiorag-7b-wo-medication_qa-sft](https://huggingface.co/Minbyul/selfbiorag-7b-wo-medication_qa-sft) on the HuggingFaceH4/ultrafeedback_binarized dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2758
-- Rewards/chosen: -1.2291
-- Rewards/rejected: -7.1014
-- Rewards/accuracies: 0.8949
-- Rewards/margins: 5.8722
-- Logps/rejected: -1442.6945
-- Logps/chosen: -679.7597
-- Logits/rejected: -0.3284
-- Logits/chosen: -0.3526
 ## Model description
@@ -65,11 +60,11 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
-|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.2249        | 0.32  | 100  | 0.3821          | -0.9339        | -4.5771          | 0.8551             | 3.6432          | -1190.2701     | -650.2339    | -0.0290         | -0.1107       |
-| 0.1549        | 0.65  | 200  | 0.2709          | -0.9607        | -5.7585          | 0.8977             | 4.7978          | -1308.4048     | -652.9113    | -0.3222         | -0.3180       |
-| 0.0946        | 0.97  | 300  | 0.2756          | -1.2277        | -7.0991          | 0.8920             | 5.8714          | -1442.4718     | -679.6155    | -0.3283         | -0.3523       |
 ### Framework versions

 ---
 base_model: Minbyul/selfbiorag-7b-wo-medication_qa-sft
 tags:
 - trl
 - dpo
+- alignment-handbook
 - generated_from_trainer
 model-index:
 - name: selfbiorag-7b-dpo-full-sft-wo-medication_qa
   results: []
 # selfbiorag-7b-dpo-full-sft-wo-medication_qa
+This model is a fine-tuned version of [Minbyul/selfbiorag-7b-wo-medication_qa-sft](https://huggingface.co/Minbyul/selfbiorag-7b-wo-medication_qa-sft) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Logits/chosen: -0.3523
+- Logits/rejected: -0.3283
+- Logps/chosen: -679.6155
+- Logps/rejected: -1442.4718
+- Loss: 0.2756
+- Rewards/accuracies: 0.8920
+- Rewards/chosen: -1.2277
+- Rewards/margins: 5.8714
+- Rewards/rejected: -7.0991
 ## Model description
 ### Training results
+| Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
+|:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
+| 0.2249        | 0.32  | 100  | -0.1107       | -0.0290         | -650.2339    | -1190.2701     | 0.3821          | 0.8551             | -0.9339        | 3.6432          | -4.5771          |
+| 0.1549        | 0.65  | 200  | -0.3180       | -0.3222         | -652.9113    | -1308.4048     | 0.2709          | 0.8977             | -0.9607        | 4.7978          | -5.7585          |
+| 0.0946        | 0.97  | 300  | -0.3523       | -0.3283         | -679.6155    | -1442.4718     | 0.2756          | 0.8920             | -1.2277        | 5.8714          | -7.0991          |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -13,9 +13,9 @@
     "eval_samples": 1392,
     "eval_samples_per_second": 8.061,
     "eval_steps_per_second": 0.255,
-    "train_loss": 0.23439283587014406,
-    "train_runtime": 5872.9997,
     "train_samples": 19761,
-    "train_samples_per_second": 3.365,
-    "train_steps_per_second": 0.053
 }

     "eval_samples": 1392,
     "eval_samples_per_second": 8.061,
     "eval_steps_per_second": 0.255,
+    "train_loss": 0.0030385508506429234,
+    "train_runtime": 144.6456,
     "train_samples": 19761,
+    "train_samples_per_second": 136.617,
+    "train_steps_per_second": 2.136
 }

config.json CHANGED Viewed

@@ -23,6 +23,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.39.0.dev0",
-  "use_cache": true,
   "vocab_size": 32016
 }

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.39.0.dev0",
+  "use_cache": false,
   "vocab_size": 32016
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e89a2cf022407c47d43554bd2be48b24f21d95171afc83f83b7a5fbc6c1c2091
 size 4939116424

 version https://git-lfs.github.com/spec/v1
+oid sha256:34a6842492517bd5147607d339f469a2810aa4ef18c9520443ca11d3c43380f3
 size 4939116424

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9aaf261990d4a881df076ec2e3b70271760eb1d4650860e21e7dac23d6253532
 size 4947390880

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d5a9b4e1a52b8244e7ca6922b06cc6e2e89dac0dc0236fb1b472bb2fe3c2d2a
 size 4947390880

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:56834f893605fbd9b04fae4860c65ca40ad37951f73d2e824a2edba88a18cc12
 size 3590619888

 version https://git-lfs.github.com/spec/v1
+oid sha256:603cb88c567f7f6f78be3d4bef7e5dfd47d99a547b30001d846984fcc555a0d8
 size 3590619888

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "train_loss": 0.23439283587014406,
-    "train_runtime": 5872.9997,
     "train_samples": 19761,
-    "train_samples_per_second": 3.365,
-    "train_steps_per_second": 0.053
 }

 {
     "epoch": 1.0,
+    "train_loss": 0.0030385508506429234,
+    "train_runtime": 144.6456,
     "train_samples": 19761,
+    "train_samples_per_second": 136.617,
+    "train_steps_per_second": 2.136
 }

trainer_state.json CHANGED Viewed

@@ -525,10 +525,10 @@
       "epoch": 1.0,
       "step": 309,
       "total_flos": 0.0,
-      "train_loss": 0.23439283587014406,
-      "train_runtime": 5872.9997,
-      "train_samples_per_second": 3.365,
-      "train_steps_per_second": 0.053
     }
   ],
   "logging_steps": 10,

       "epoch": 1.0,
       "step": 309,
       "total_flos": 0.0,
+      "train_loss": 0.0030385508506429234,
+      "train_runtime": 144.6456,
+      "train_samples_per_second": 136.617,
+      "train_steps_per_second": 2.136
     }
   ],
   "logging_steps": 10,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b89935d144b21f9eae1842dca6021f49e0972822d5cb86deaca42054492c5e32
 size 6264

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ea0f89586293f82aba15efa6e181e34eada67564061d2a833ae7eb15e0b550a
 size 6264