ysr commited on
Commit
a224372
·
verified ·
1 Parent(s): c0a97ac

End of training

Browse files
README.md CHANGED
@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [deepseek-ai/deepseek-coder-1.3b-base](https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.4301
24
 
25
  ## Model description
26
 
@@ -55,22 +55,22 @@ The following hyperparameters were used during training:
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
- | 0.8205 | 0.3 | 25 | 0.5577 |
59
- | 0.5013 | 0.59 | 50 | 0.4856 |
60
- | 0.4588 | 0.89 | 75 | 0.4663 |
61
- | 0.4503 | 1.18 | 100 | 0.4556 |
62
- | 0.4319 | 1.48 | 125 | 0.4482 |
63
- | 0.4293 | 1.77 | 150 | 0.4429 |
64
- | 0.4272 | 2.07 | 175 | 0.4387 |
65
- | 0.4153 | 2.37 | 200 | 0.4365 |
66
- | 0.4111 | 2.66 | 225 | 0.4343 |
67
- | 0.4031 | 2.96 | 250 | 0.4322 |
68
- | 0.406 | 3.25 | 275 | 0.4317 |
69
- | 0.3996 | 3.55 | 300 | 0.4309 |
70
- | 0.3959 | 3.84 | 325 | 0.4303 |
71
- | 0.3989 | 4.14 | 350 | 0.4302 |
72
- | 0.4035 | 4.43 | 375 | 0.4303 |
73
- | 0.3929 | 4.73 | 400 | 0.4301 |
74
 
75
 
76
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [deepseek-ai/deepseek-coder-1.3b-base](https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.4247
24
 
25
  ## Model description
26
 
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
+ | 0.7919 | 0.3 | 25 | 0.5285 |
59
+ | 0.4811 | 0.59 | 50 | 0.4738 |
60
+ | 0.4512 | 0.89 | 75 | 0.4567 |
61
+ | 0.4367 | 1.18 | 100 | 0.4465 |
62
+ | 0.4162 | 1.48 | 125 | 0.4399 |
63
+ | 0.4188 | 1.77 | 150 | 0.4352 |
64
+ | 0.4127 | 2.07 | 175 | 0.4318 |
65
+ | 0.3981 | 2.37 | 200 | 0.4296 |
66
+ | 0.3887 | 2.66 | 225 | 0.4281 |
67
+ | 0.3943 | 2.96 | 250 | 0.4258 |
68
+ | 0.3808 | 3.25 | 275 | 0.4263 |
69
+ | 0.3836 | 3.55 | 300 | 0.4251 |
70
+ | 0.3824 | 3.84 | 325 | 0.4247 |
71
+ | 0.3782 | 4.14 | 350 | 0.4246 |
72
+ | 0.377 | 4.43 | 375 | 0.4247 |
73
+ | 0.3725 | 4.73 | 400 | 0.4247 |
74
 
75
 
76
  ### Framework versions
adapter_config.json CHANGED
@@ -10,13 +10,13 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 32,
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 32,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 64,
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 64,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84a723e022024cdfece83514750f0c6efcf4dc33aefc626f199a1553a4a60f8d
3
- size 25191728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f6e54b85b5e1c319ff06364aeaea95c8377744edc2f8c79bacf1b9407e39b8c
3
+ size 50357632
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b08e1878e76116da9d989483eca71f784ccf29c23ca5c4fb9e809222fcba214
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2247bb79d70a2c0061f08825809540083701a72d23993237e18e744b81f6631
3
  size 4920