LineLS/shawgpt-ftV2

Browse files

Files changed (7) hide show

README.md +24 -15
adapter_config.json +2 -2
adapter_model.safetensors +2 -2
runs/Oct13_11-56-56_bc68b11829bc/events.out.tfevents.1728821649.bc68b11829bc.3705.1 +3 -0
runs/Oct13_13-04-02_bc68b11829bc/events.out.tfevents.1728824651.bc68b11829bc.3705.2 +3 -0
runs/Oct13_13-04-02_bc68b11829bc/events.out.tfevents.1728826131.bc68b11829bc.3705.3 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [TheBloke/Mistral-7B-Instruct-v0.2-GPTQ](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GPTQ) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.7259
 ## Model description
@@ -35,7 +35,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0002
 - train_batch_size: 4
 - eval_batch_size: 4
 - seed: 42
@@ -44,23 +44,32 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
-- num_epochs: 10
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss |
-|:-------------:|:------:|:----:|:---------------:|
-| 4.5932        | 0.9231 | 3    | 3.9725          |
-| 4.0532        | 1.8462 | 6    | 3.4547          |
-| 3.4704        | 2.7692 | 9    | 2.9927          |
-| 2.2395        | 4.0    | 13   | 2.5308          |
-| 2.6051        | 4.9231 | 16   | 2.2441          |
-| 2.2545        | 5.8462 | 19   | 2.0362          |
-| 2.0136        | 6.7692 | 22   | 1.8869          |
-| 1.4182        | 8.0    | 26   | 1.7714          |
-| 1.8025        | 8.9231 | 29   | 1.7306          |
-| 1.2552        | 9.2308 | 30   | 1.7259          |
 ### Framework versions

 This model is a fine-tuned version of [TheBloke/Mistral-7B-Instruct-v0.2-GPTQ](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GPTQ) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.3086
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0003
 - train_batch_size: 4
 - eval_batch_size: 4
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
+- num_epochs: 20
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch   | Step | Validation Loss |
+|:-------------:|:-------:|:----:|:---------------:|
+| 4.562         | 0.9231  | 3    | 3.8136          |
+| 3.7584        | 1.8462  | 6    | 3.0439          |
+| 2.9884        | 2.7692  | 9    | 2.5154          |
+| 1.8103        | 4.0     | 13   | 1.9763          |
+| 1.9109        | 4.9231  | 16   | 1.6535          |
+| 1.543         | 5.8462  | 19   | 1.4627          |
+| 1.352         | 6.7692  | 22   | 1.3896          |
+| 0.9888        | 8.0     | 26   | 1.3458          |
+| 1.2644        | 8.9231  | 29   | 1.3269          |
+| 1.2035        | 9.8462  | 32   | 1.3169          |
+| 1.198         | 10.7692 | 35   | 1.3156          |
+| 0.8476        | 12.0    | 39   | 1.3114          |
+| 1.1265        | 12.9231 | 42   | 1.3086          |
+| 1.0791        | 13.8462 | 45   | 1.3138          |
+| 1.0711        | 14.7692 | 48   | 1.3207          |
+| 0.7968        | 16.0    | 52   | 1.3141          |
+| 1.0279        | 16.9231 | 55   | 1.3216          |
+| 1.0094        | 17.8462 | 58   | 1.3262          |
+| 0.7129        | 18.4615 | 60   | 1.3255          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -16,7 +16,7 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": null,
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a0b08f5d6246e777f2e54dc03d715ec001af5f28f7f6d6d0dd5981bcb6b9708
-size 8397056

 version https://git-lfs.github.com/spec/v1
+oid sha256:85a09d508b0e378143f60dbcdda2f4742ce3ae1a5e0276ab89a10ad18f5fe552
+size 16786880

runs/Oct13_11-56-56_bc68b11829bc/events.out.tfevents.1728821649.bc68b11829bc.3705.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee39184a716e7ed55e1ce73ca0ba4b7bf468ea77a20950c9d76f837130b1098b
+size 354

runs/Oct13_13-04-02_bc68b11829bc/events.out.tfevents.1728824651.bc68b11829bc.3705.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f56925f4cefaab758e6d06cc758db3d51e74763d0c936de8ab366e0c443bed4
+size 14905

runs/Oct13_13-04-02_bc68b11829bc/events.out.tfevents.1728826131.bc68b11829bc.3705.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9db49c04d7aa45aee004b9d8feaace6c96c1c19449da48308390e4c0deec31f4
+size 354

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0af5f42eb06c9715607ce2f751f94bb5963f86272e684f9d9f815a2912e2403
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea165663aed45dffb895c53d611a4e7e5b39fb6a8e8427b7fa97da0d2adad9c7
 size 5176