yakazimir
/

qwen_cUNL_entropy

@@ -3,15 +3,9 @@ library_name: transformers
 license: other
 base_model: trl-lib/qwen1.5-0.5b-sft
 tags:
-- alignment-handbook
 - trl
 - simpo
 - generated_from_trainer
-- trl
-- simpo
-- generated_from_trainer
-datasets:
-- yakazimir/ultrafeedback_binarized
 model-index:
 - name: qwen_cUNL_entropy
   results: []
@@ -22,18 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
 # qwen_cUNL_entropy
-This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on the yakazimir/ultrafeedback_binarized dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.5443
-- Rewards/chosen: -10.4573
-- Rewards/rejected: -11.6657
-- Rewards/accuracies: 0.7211
-- Rewards/margins: 1.2083
-- Logps/rejected: -11.6657
-- Logps/chosen: -10.4573
-- Logits/rejected: 1.7006
-- Logits/chosen: 1.6599
-- Semantic Entropy: 0.0009
 ## Model description
@@ -66,22 +59,22 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Semantic Entropy |
-|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:----------------:|
-| 1.0568        | 0.2141 | 400  | 1.0641          | -2.1120        | -2.3958          | 0.5497             | 0.2838          | -2.3958        | -2.1120      | 0.4624          | 0.3653        | 0.5089           |
-| 0.602         | 0.4282 | 800  | 0.5817          | -7.8264        | -8.3563          | 0.6855             | 0.5299          | -8.3563        | -7.8264      | 1.3572          | 1.3145        | 0.0047           |
-| 0.6143        | 0.6422 | 1200 | 0.5509          | -8.8961        | -9.6072          | 0.6996             | 0.7110          | -9.6072        | -8.8961      | 1.6397          | 1.5818        | 0.0021           |
-| 0.4586        | 0.8563 | 1600 | 0.5386          | -9.1154        | -9.8342          | 0.7166             | 0.7188          | -9.8342        | -9.1154      | 1.6747          | 1.6254        | 0.0017           |
-| 0.5717        | 1.0704 | 2000 | 0.5367          | -9.0664        | -9.8093          | 0.7151             | 0.7429          | -9.8093        | -9.0664      | 1.6674          | 1.6192        | 0.0019           |
-| 0.4984        | 1.2845 | 2400 | 0.5360          | -9.2711        | -10.2641         | 0.7307             | 0.9930          | -10.2641       | -9.2711      | 1.8156          | 1.7480        | 0.0018           |
-| 0.4938        | 1.4986 | 2800 | 0.5318          | -8.7814        | -9.7107          | 0.7255             | 0.9293          | -9.7107        | -8.7814      | 1.5667          | 1.5074        | 0.0028           |
-| 0.5379        | 1.7127 | 3200 | 0.5329          | -9.7086        | -10.6885         | 0.7151             | 0.9799          | -10.6885       | -9.7086      | 1.6242          | 1.5778        | 0.0014           |
-| 0.5434        | 1.9267 | 3600 | 0.5306          | -9.6408        | -10.6193         | 0.7211             | 0.9784          | -10.6193       | -9.6408      | 1.5137          | 1.4731        | 0.0014           |
-| 0.3725        | 2.1408 | 4000 | 0.5395          | -10.2459       | -11.3888         | 0.7240             | 1.1430          | -11.3888       | -10.2459     | 1.6740          | 1.6276        | 0.0011           |
-| 0.4469        | 2.3549 | 4400 | 0.5429          | -10.1108       | -11.3001         | 0.7218             | 1.1893          | -11.3001       | -10.1108     | 1.6134          | 1.5684        | 0.0012           |
-| 0.3893        | 2.5690 | 4800 | 0.5416          | -10.2988       | -11.4914         | 0.7240             | 1.1926          | -11.4914       | -10.2988     | 1.6694          | 1.6281        | 0.0010           |
-| 0.4342        | 2.7831 | 5200 | 0.5442          | -10.3998       | -11.6056         | 0.7226             | 1.2058          | -11.6056       | -10.3998     | 1.7148          | 1.6724        | 0.0010           |
-| 0.3904        | 2.9972 | 5600 | 0.5443          | -10.4574       | -11.6657         | 0.7211             | 1.2083          | -11.6657       | -10.4574     | 1.7006          | 1.6599        | 0.0009           |
 ### Framework versions

 license: other
 base_model: trl-lib/qwen1.5-0.5b-sft
 tags:
 - trl
 - simpo
 - generated_from_trainer
 model-index:
 - name: qwen_cUNL_entropy
   results: []
 # qwen_cUNL_entropy
+This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5195
+- Rewards/chosen: -7.4572
+- Rewards/rejected: -8.6117
+- Rewards/accuracies: 0.7285
+- Rewards/margins: 1.1545
+- Logps/rejected: -8.6117
+- Logps/chosen: -7.4572
+- Logits/rejected: 0.5435
+- Logits/chosen: 0.4914
 ## Model description
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
+|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.8222        | 0.2141 | 400  | 0.8285          | -1.7118        | -1.9086          | 0.5534             | 0.1968          | -1.9086        | -1.7118      | 0.3777          | 0.2894        |
+| 0.5698        | 0.4282 | 800  | 0.5834          | -4.3085        | -4.8785          | 0.6899             | 0.5700          | -4.8785        | -4.3085      | 0.4424          | 0.3680        |
+| 0.5645        | 0.6422 | 1200 | 0.5407          | -5.3672        | -6.1336          | 0.7196             | 0.7664          | -6.1336        | -5.3672      | 0.5839          | 0.4867        |
+| 0.4723        | 0.8563 | 1600 | 0.5308          | -6.0239        | -6.7829          | 0.7188             | 0.7590          | -6.7829        | -6.0239      | 0.4449          | 0.3580        |
+| 0.5671        | 1.0704 | 2000 | 0.5245          | -6.1299        | -6.9744          | 0.7270             | 0.8445          | -6.9744        | -6.1299      | 0.5458          | 0.4536        |
+| 0.5184        | 1.2845 | 2400 | 0.5194          | -6.2767        | -7.2502          | 0.7300             | 0.9736          | -7.2502        | -6.2767      | 0.5423          | 0.4595        |
+| 0.4823        | 1.4986 | 2800 | 0.5166          | -6.4303        | -7.3916          | 0.7285             | 0.9613          | -7.3916        | -6.4303      | 0.4681          | 0.4003        |
+| 0.5627        | 1.7127 | 3200 | 0.5134          | -6.6572        | -7.6688          | 0.7352             | 1.0116          | -7.6688        | -6.6572      | 0.5174          | 0.4489        |
+| 0.5355        | 1.9267 | 3600 | 0.5093          | -6.3599        | -7.3630          | 0.7352             | 1.0031          | -7.3630        | -6.3599      | 0.4672          | 0.4010        |
+| 0.3968        | 2.1408 | 4000 | 0.5234          | -7.4930        | -8.6276          | 0.7248             | 1.1346          | -8.6276        | -7.4930      | 0.5678          | 0.5128        |
+| 0.4135        | 2.3549 | 4400 | 0.5203          | -7.4952        | -8.6565          | 0.7240             | 1.1613          | -8.6565        | -7.4952      | 0.4661          | 0.4203        |
+| 0.4277        | 2.5690 | 4800 | 0.5189          | -7.3524        | -8.5007          | 0.7270             | 1.1483          | -8.5007        | -7.3524      | 0.5701          | 0.5143        |
+| 0.3999        | 2.7831 | 5200 | 0.5187          | -7.4281        | -8.5789          | 0.7292             | 1.1507          | -8.5789        | -7.4281      | 0.5522          | 0.4986        |
+| 0.3855        | 2.9972 | 5600 | 0.5195          | -7.4572        | -8.6117          | 0.7285             | 1.1545          | -8.6117        | -7.4572      | 0.5435          | 0.4914        |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,23 +1,9 @@
 {
     "epoch": 2.999297541394882,
-    "eval_logits/chosen": 1.6599200963974,
-    "eval_logits/rejected": 1.7005723714828491,
-    "eval_logps/chosen": -10.457348823547363,
-    "eval_logps/rejected": -11.665694236755371,
-    "eval_loss": 0.5442745685577393,
-    "eval_rewards/accuracies": 0.721068263053894,
-    "eval_rewards/chosen": -10.457348823547363,
-    "eval_rewards/margins": 1.2083450555801392,
-    "eval_rewards/rejected": -11.665694236755371,
-    "eval_runtime": 34.1687,
-    "eval_samples": 1345,
-    "eval_samples_per_second": 39.364,
-    "eval_semantic_entropy": 0.0009483203175477684,
-    "eval_steps_per_second": 9.863,
     "total_flos": 0.0,
-    "train_loss": 0.541753981451236,
-    "train_runtime": 28781.2396,
     "train_samples": 59790,
-    "train_samples_per_second": 6.232,
-    "train_steps_per_second": 0.195
 }

 {
     "epoch": 2.999297541394882,
     "total_flos": 0.0,
+    "train_loss": 0.5211759163684967,
+    "train_runtime": 30222.1049,
     "train_samples": 59790,
+    "train_samples_per_second": 5.935,
+    "train_steps_per_second": 0.185
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 2.999297541394882,
     "total_flos": 0.0,
-    "train_loss": 0.541753981451236,
-    "train_runtime": 28781.2396,
     "train_samples": 59790,
-    "train_samples_per_second": 6.232,
-    "train_steps_per_second": 0.195
 }

 {
     "epoch": 2.999297541394882,
     "total_flos": 0.0,
+    "train_loss": 0.5211759163684967,
+    "train_runtime": 30222.1049,
     "train_samples": 59790,
+    "train_samples_per_second": 5.935,
+    "train_steps_per_second": 0.185
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff