rkumar1999 commited on
Commit
7120ce4
·
verified ·
1 Parent(s): eafbfe2

End of training 10/12

Browse files
README.md CHANGED
@@ -17,8 +17,6 @@ should probably proofread and complete it, then remove this comment. -->
17
  # gpt2-fine-tuned-math
18
 
19
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
20
- It achieves the following results on the evaluation set:
21
- - Loss: 1.6780
22
 
23
  ## Model description
24
 
@@ -38,36 +36,23 @@ More information needed
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
- - train_batch_size: 2
42
- - eval_batch_size: 2
43
  - seed: 42
 
 
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
- - num_epochs: 2
47
- - mixed_precision_training: Native AMP
48
 
49
  ### Training results
50
 
51
- | Training Loss | Epoch | Step | Validation Loss |
52
- |:-------------:|:------:|:----:|:---------------:|
53
- | 2.1026 | 0.1481 | 500 | 1.9487 |
54
- | 2.0579 | 0.2963 | 1000 | 1.8139 |
55
- | 1.9139 | 0.4444 | 1500 | 1.8102 |
56
- | 1.9183 | 0.5926 | 2000 | 1.7433 |
57
- | 1.8343 | 0.7407 | 2500 | 1.7442 |
58
- | 1.7647 | 0.8889 | 3000 | 1.6976 |
59
- | 1.8922 | 1.0370 | 3500 | 1.7190 |
60
- | 1.815 | 1.1852 | 4000 | 1.6881 |
61
- | 1.717 | 1.3333 | 4500 | 1.6645 |
62
- | 1.7583 | 1.4815 | 5000 | 1.6477 |
63
- | 1.7769 | 1.6296 | 5500 | 1.6665 |
64
- | 1.7574 | 1.7778 | 6000 | 1.6311 |
65
- | 1.7386 | 1.9259 | 6500 | 1.6780 |
66
 
67
 
68
  ### Framework versions
69
 
70
- - PEFT 0.13.0
71
  - Transformers 4.44.2
72
  - Pytorch 2.4.1+cu121
73
  - Datasets 3.0.1
 
17
  # gpt2-fine-tuned-math
18
 
19
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 
 
20
 
21
  ## Model description
22
 
 
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 5e-05
39
+ - train_batch_size: 8
40
+ - eval_batch_size: 8
41
  - seed: 42
42
+ - gradient_accumulation_steps: 4
43
+ - total_train_batch_size: 32
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
+ - lr_scheduler_warmup_steps: 100
47
+ - num_epochs: 1
48
 
49
  ### Training results
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  ### Framework versions
54
 
55
+ - PEFT 0.13.2
56
  - Transformers 4.44.2
57
  - Pytorch 2.4.1+cu121
58
  - Datasets 3.0.1
adapter_config.json CHANGED
@@ -10,18 +10,18 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 32,
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "lora_magnitude_vector",
24
- "c_attn"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": true,
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 16,
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 4,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "c_attn",
24
+ "lora_magnitude_vector"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": true,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aebcbb39f598b7c5b5c7870e8bea1e833f207976c2fd1730c8fafd1ed14c2a1c
3
- size 1294856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e146b48f7b14334309eb67332cf6f1d4bff46d94a0b4a9f668a5ae78e0a3e365
3
+ size 705016
final_checkpoint/README.md CHANGED
@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
- - PEFT 0.13.0
 
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
+ - PEFT 0.13.2
final_checkpoint/adapter_config.json CHANGED
@@ -10,18 +10,18 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 32,
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "lora_magnitude_vector",
24
- "c_attn"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": true,
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 16,
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 4,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "c_attn",
24
+ "lora_magnitude_vector"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": true,
final_checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aebcbb39f598b7c5b5c7870e8bea1e833f207976c2fd1730c8fafd1ed14c2a1c
3
- size 1294856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e146b48f7b14334309eb67332cf6f1d4bff46d94a0b4a9f668a5ae78e0a3e365
3
+ size 705016
final_checkpoint/special_tokens_map.json CHANGED
@@ -1,24 +1,6 @@
1
  {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
  "pad_token": "<|endoftext|>",
17
- "unk_token": {
18
- "content": "<|endoftext|>",
19
- "lstrip": false,
20
- "normalized": true,
21
- "rstrip": false,
22
- "single_word": false
23
- }
24
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
4
  "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
6
  }
final_checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
final_checkpoint/tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "50256": {
@@ -14,7 +13,6 @@
14
  "bos_token": "<|endoftext|>",
15
  "clean_up_tokenization_spaces": true,
16
  "eos_token": "<|endoftext|>",
17
- "errors": "replace",
18
  "model_max_length": 1024,
19
  "pad_token": "<|endoftext|>",
20
  "tokenizer_class": "GPT2Tokenizer",
 
1
  {
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "50256": {
 
13
  "bos_token": "<|endoftext|>",
14
  "clean_up_tokenization_spaces": true,
15
  "eos_token": "<|endoftext|>",
 
16
  "model_max_length": 1024,
17
  "pad_token": "<|endoftext|>",
18
  "tokenizer_class": "GPT2Tokenizer",
final_checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b4b6344ee4a743bb1f3502197ed1500f10bf38abd56a710b60ad1918fc57d54
3
- size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b58a1ff3671951ea0bcf741ecfd9f47711f5fc8e55900fa0bef9f5600d53399
3
+ size 5368
final_checkpoint/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,24 +1,6 @@
1
  {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
  "pad_token": "<|endoftext|>",
17
- "unk_token": {
18
- "content": "<|endoftext|>",
19
- "lstrip": false,
20
- "normalized": true,
21
- "rstrip": false,
22
- "single_word": false
23
- }
24
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
4
  "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
6
  }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "50256": {
@@ -14,7 +13,6 @@
14
  "bos_token": "<|endoftext|>",
15
  "clean_up_tokenization_spaces": true,
16
  "eos_token": "<|endoftext|>",
17
- "errors": "replace",
18
  "model_max_length": 1024,
19
  "pad_token": "<|endoftext|>",
20
  "tokenizer_class": "GPT2Tokenizer",
 
1
  {
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "50256": {
 
13
  "bos_token": "<|endoftext|>",
14
  "clean_up_tokenization_spaces": true,
15
  "eos_token": "<|endoftext|>",
 
16
  "model_max_length": 1024,
17
  "pad_token": "<|endoftext|>",
18
  "tokenizer_class": "GPT2Tokenizer",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b4b6344ee4a743bb1f3502197ed1500f10bf38abd56a710b60ad1918fc57d54
3
- size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b58a1ff3671951ea0bcf741ecfd9f47711f5fc8e55900fa0bef9f5600d53399
3
+ size 5368
vocab.json CHANGED
The diff for this file is too large to render. See raw diff