End of training

Files changed (5) hide show

README.md CHANGED Viewed

@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [teknium/OpenHermes-2.5-Mistral-7B](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6991
 ## Model description
@@ -50,18 +50,18 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.8356        | 0.33  | 9    | 1.8214          |
-| 1.8011        | 0.67  | 18   | 1.7745          |
-| 1.724         | 1.0   | 27   | 1.6848          |
-| 1.6316        | 1.33  | 36   | 1.5849          |
-| 1.5605        | 1.67  | 45   | 1.4827          |
-| 1.4149        | 2.0   | 54   | 1.3548          |
-| 1.2969        | 2.33  | 63   | 1.1693          |
-| 0.9925        | 2.67  | 72   | 0.9323          |
-| 0.8631        | 3.0   | 81   | 0.8125          |
-| 0.7053        | 3.33  | 90   | 0.7490          |
-| 0.6931        | 3.67  | 99   | 0.7222          |
-| 0.6993        | 4.0   | 108  | 0.6991          |
 ### Framework versions

 This model is a fine-tuned version of [teknium/OpenHermes-2.5-Mistral-7B](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.6278
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.9423        | 0.33  | 9    | 1.0697          |
+| 1.0127        | 0.67  | 18   | 1.0604          |
+| 0.9779        | 1.0   | 27   | 1.0315          |
+| 0.9518        | 1.33  | 36   | 0.9850          |
+| 0.8728        | 1.67  | 45   | 0.9434          |
+| 0.8215        | 2.0   | 54   | 0.8961          |
+| 0.8151        | 2.33  | 63   | 0.8434          |
+| 0.7027        | 2.67  | 72   | 0.7606          |
+| 0.6277        | 3.0   | 81   | 0.6863          |
+| 0.5266        | 3.33  | 90   | 0.6582          |
+| 0.5282        | 3.67  | 99   | 0.6403          |
+| 0.5709        | 4.0   | 108  | 0.6278          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -9,18 +9,20 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 16,
   "lora_dropout": 0.7,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 8,
   "lora_dropout": 0.7,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7c31b67e81dab1d346b94f4b28a1bee061949682bcefb20a107a40aefdf613e
-size 27280152

 version https://git-lfs.github.com/spec/v1
+oid sha256:1aa957a0bf0ff31f1cbde437d83345dceff042d86fb5606bae12b908eec27844
+size 13665592

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 1000,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 3600,
     "strategy": "LongestFirst",
     "stride": 0
   },

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e55c5db746c2e70928eff861bd496e6f40226dfd2255ddeaa790f7dab1474f3
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:c05f7c31581197c870dc7944504b6e7211f9d859692dba036917f09c3113bc41
 size 4664