Adil1567 commited on
Commit
9c4a160
·
verified ·
1 Parent(s): 6c0cb3c

Model save

Browse files
README.md CHANGED
@@ -17,8 +17,6 @@ should probably proofread and complete it, then remove this comment. -->
17
  # mistral-sft-lora-fsdp
18
 
19
  This model is a fine-tuned version of [meta-llama/Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct) on the None dataset.
20
- It achieves the following results on the evaluation set:
21
- - Loss: 0.6089
22
 
23
  ## Model description
24
 
@@ -53,7 +51,7 @@ The following hyperparameters were used during training:
53
 
54
  | Training Loss | Epoch | Step | Validation Loss |
55
  |:-------------:|:-----:|:----:|:---------------:|
56
- | 0.6126 | 1.0 | 200 | 0.6089 |
57
 
58
 
59
  ### Framework versions
 
17
  # mistral-sft-lora-fsdp
18
 
19
  This model is a fine-tuned version of [meta-llama/Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct) on the None dataset.
 
 
20
 
21
  ## Model description
22
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
+ | No log | 1.0 | 1 | 1.9028 |
55
 
56
 
57
  ### Framework versions
adapter_config.json CHANGED
@@ -12,24 +12,24 @@
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
- "lora_alpha": 16,
16
  "lora_bias": false,
17
  "lora_dropout": 0.1,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
- "r": 8,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "gate_proj",
27
- "v_proj",
28
  "o_proj",
29
- "q_proj",
30
  "down_proj",
31
- "up_proj",
32
- "k_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
+ "lora_alpha": 32,
16
  "lora_bias": false,
17
  "lora_dropout": 0.1,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
+ "r": 16,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "q_proj",
27
  "gate_proj",
 
28
  "o_proj",
 
29
  "down_proj",
30
+ "k_proj",
31
+ "v_proj",
32
+ "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac66d622181fcbc5e33f12553515eeaf7cd109b0e3ffcf59bc482977c1c0aac3
3
- size 414337624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7179070f2b41fa604f641922b27f7517e3f5e003ce1f6747e8baddf286783a97
3
+ size 828526568
runs/Jan05_08-18-52_gpu-server/events.out.tfevents.1736065310.gpu-server.2641294.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19b64842890997481b14d7320f1185677bf3e6e12eeda6795a93df407beb20c1
3
- size 5607
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa5e0395a973ee2a09dbd53241d77b086f9635a8b8d719be39c8fdadc8abd5e
3
+ size 6221