AdamRTomkins commited on
Commit
851eecb
·
verified ·
1 Parent(s): f743074

End of training

Browse files
README.md CHANGED
@@ -16,70 +16,81 @@ should probably proofread and complete it, then remove this comment. -->
16
  [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
17
  <details><summary>See axolotl config</summary>
18
 
19
- axolotl version: `0.4.1`
20
  ```yaml
21
- adam_beta2: 0.95
22
- adam_epsilon: 1.0e-05
23
- adapter: qlora
24
  base_model: microsoft/phi-1_5
25
- bf16: auto
26
- dataset_prepared_path: null
27
- datasets:
28
- - path: garage-bAInd/Open-Platypus
29
- type: alpaca
30
- debug: null
31
- deepspeed: null
32
- early_stopping_patience: null
33
- evals_per_epoch: 1
34
- flash_attention: false
35
- fp16: false
36
- fsdp: null
37
- fsdp_config: null
38
- gradient_accumulation_steps: 1
39
- gradient_checkpointing: true
40
- gradient_checkpointing_kwargs:
41
- use_reentrant: true
42
- hub_model_id: AdamRTomkins/phi-kal
43
- hub_strategy: end
44
- learning_rate: 3.0e-06
45
- load_in_4bit: true
46
  load_in_8bit: false
47
- local_rank: null
48
- logging_steps: 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  lora_alpha: 32
50
  lora_dropout: 0.05
51
- lora_fan_in_fan_out: null
52
- lora_model_dir: null
53
- lora_r: 64
54
  lora_target_linear: true
55
- lr_scheduler: cosine
56
- max_grad_norm: 1.0
57
- max_steps: 2
 
 
 
 
 
 
58
  micro_batch_size: 1
59
- model_type: AutoModelForCausalLM
60
  num_epochs: 1
61
  optimizer: adamw_torch
62
- output_dir: ./outputs/phi-sft-out
63
- pad_to_sequence_len: true
64
- resize_token_embeddings_to_32x: true
65
- resume_from_checkpoint: null
66
- sample_packing: true
67
- saves_per_epoch: 1
68
- sequence_len: 1024
69
- special_tokens:
70
- pad_token: <|endoftext|>
71
- strict: false
72
- tokenizer_type: AutoTokenizer
73
- val_set_size: 0.05
74
- wandb_entity: null
75
- wandb_log_model: null
76
- wandb_name: null
77
- wandb_project: null
78
- wandb_watch: null
79
  warmup_steps: 100
 
 
 
 
80
  weight_decay: 0.1
81
- xformers_attention: null
 
 
 
 
 
 
 
 
82
 
 
 
 
83
  ```
84
 
85
  </details><br>
@@ -118,15 +129,15 @@ The following hyperparameters were used during training:
118
 
119
  ### Training results
120
 
121
- | Training Loss | Epoch | Step | Validation Loss |
122
- |:-------------:|:------:|:----:|:---------------:|
123
- | 5.1862 | 0.0002 | 2 | 2.4120 |
124
 
125
 
126
  ### Framework versions
127
 
128
- - PEFT 0.11.1
129
- - Transformers 4.41.1
130
- - Pytorch 2.1.2+cu118
131
- - Datasets 2.19.1
132
- - Tokenizers 0.19.1
 
16
  [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
17
  <details><summary>See axolotl config</summary>
18
 
19
+ axolotl version: `0.4.0`
20
  ```yaml
 
 
 
21
  base_model: microsoft/phi-1_5
22
+ model_type: AutoModelForCausalLM
23
+ tokenizer_type: AutoTokenizer
24
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  load_in_8bit: false
26
+ load_in_4bit: true
27
+ strict: false
28
+
29
+ datasets:
30
+ - path: garage-bAInd/Open-Platypus
31
+ type: alpaca
32
+
33
+ dataset_prepared_path:
34
+ val_set_size: 0.05
35
+ output_dir: ./outputs/phi-sft-out
36
+
37
+ sequence_len: 1024
38
+ sample_packing: true
39
+ pad_to_sequence_len: true
40
+
41
+ adapter: qlora
42
+ lora_model_dir:
43
+ lora_r: 64
44
  lora_alpha: 32
45
  lora_dropout: 0.05
 
 
 
46
  lora_target_linear: true
47
+ lora_fan_in_fan_out:
48
+
49
+ wandb_project:
50
+ wandb_entity:
51
+ wandb_watch:
52
+ wandb_name:
53
+ wandb_log_model:
54
+
55
+ gradient_accumulation_steps: 1
56
  micro_batch_size: 1
 
57
  num_epochs: 1
58
  optimizer: adamw_torch
59
+ adam_beta2: 0.95
60
+ adam_epsilon: 0.00001
61
+ max_grad_norm: 1.0
62
+ lr_scheduler: cosine
63
+ learning_rate: 0.000003
64
+
65
+ gradient_checkpointing: true
66
+ gradient_checkpointing_kwargs:
67
+ use_reentrant: True
68
+ early_stopping_patience:
69
+ resume_from_checkpoint:
70
+ local_rank:
71
+ logging_steps: 1
72
+ xformers_attention:
73
+ flash_attention: false
74
+
 
75
  warmup_steps: 100
76
+ evals_per_epoch: 1
77
+ saves_per_epoch: 1
78
+ debug:
79
+ deepspeed:
80
  weight_decay: 0.1
81
+ fsdp:
82
+ fsdp_config:
83
+ resize_token_embeddings_to_32x: true
84
+ special_tokens:
85
+ pad_token: "<|endoftext|>"
86
+
87
+ hub_model_id: AdamRTomkins/phi-kal
88
+ hub_strategy: end
89
+ max_steps: 2
90
 
91
+ # Setting to enable pre-ampere cards!
92
+ bf16: auto
93
+ fp16: false
94
  ```
95
 
96
  </details><br>
 
129
 
130
  ### Training results
131
 
132
+ | Training Loss | Epoch | Step | Validation Loss |
133
+ |:-------------:|:-----:|:----:|:---------------:|
134
+ | 6.3765 | 0.0 | 2 | 2.4120 |
135
 
136
 
137
  ### Framework versions
138
 
139
+ - PEFT 0.8.2
140
+ - Transformers 4.39.0.dev0
141
+ - Pytorch 2.0.1+cu118
142
+ - Datasets 2.17.1
143
+ - Tokenizers 0.15.0
adapter_config.json CHANGED
@@ -6,7 +6,6 @@
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
- "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
@@ -20,14 +19,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "v_proj",
25
  "k_proj",
26
- "fc1",
27
  "fc2",
28
- "dense"
 
 
 
29
  ],
30
  "task_type": "CAUSAL_LM",
31
- "use_dora": false,
32
  "use_rslora": false
33
  }
 
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
 
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "k_proj",
 
23
  "fc2",
24
+ "dense",
25
+ "q_proj",
26
+ "fc1",
27
+ "v_proj"
28
  ],
29
  "task_type": "CAUSAL_LM",
 
30
  "use_rslora": false
31
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b37f7035685613b89aecf86f8f9de31a3bdbdc6ab6c7ff0081e0a64de77533a
3
- size 113349834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f17d6ca14d1d23d30ec7ed6aca31e1508bd7bcc52167918d23083304202aef1
3
+ size 226595597
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d8dbcc8bc7ef951b5d4dd3bbf172e58e76dbde1b4ed80aa171da864c7b04270
3
- size 113284400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45512c7c500fdaa0cbaa141f934531b482c5af5bfa64602e9103d6addedec079
3
+ size 226530600
config.json CHANGED
@@ -23,7 +23,6 @@
23
  "_load_in_4bit": true,
24
  "_load_in_8bit": false,
25
  "bnb_4bit_compute_dtype": "float32",
26
- "bnb_4bit_quant_storage": "bfloat16",
27
  "bnb_4bit_quant_type": "nf4",
28
  "bnb_4bit_use_double_quant": true,
29
  "llm_int8_enable_fp32_cpu_offload": false,
@@ -39,7 +38,7 @@
39
  "rope_theta": 10000.0,
40
  "tie_word_embeddings": false,
41
  "torch_dtype": "float16",
42
- "transformers_version": "4.41.1",
43
  "use_cache": false,
44
  "vocab_size": 51200
45
  }
 
23
  "_load_in_4bit": true,
24
  "_load_in_8bit": false,
25
  "bnb_4bit_compute_dtype": "float32",
 
26
  "bnb_4bit_quant_type": "nf4",
27
  "bnb_4bit_use_double_quant": true,
28
  "llm_int8_enable_fp32_cpu_offload": false,
 
38
  "rope_theta": 10000.0,
39
  "tie_word_embeddings": false,
40
  "torch_dtype": "float16",
41
+ "transformers_version": "4.39.0.dev0",
42
  "use_cache": false,
43
  "vocab_size": 51200
44
  }
runs/Jun21_16-13-03_sky-08ab-atomkins-3e94-head/events.out.tfevents.1718986384.sky-08ab-atomkins-3e94-head.2667.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:033cedc32c7cf86d9e798995c8826833fcc926c94373f866b8212175a3869985
3
+ size 6853
tokenizer.json CHANGED
@@ -382,7 +382,6 @@
382
  "end_of_word_suffix": "",
383
  "fuse_unk": false,
384
  "byte_fallback": false,
385
- "ignore_merges": false,
386
  "vocab": {
387
  "!": 0,
388
  "\"": 1,
 
382
  "end_of_word_suffix": "",
383
  "fuse_unk": false,
384
  "byte_fallback": false,
 
385
  "vocab": {
386
  "!": 0,
387
  "\"": 1,
tokenizer_config.json CHANGED
@@ -319,7 +319,6 @@
319
  "eos_token": "<|endoftext|>",
320
  "model_max_length": 2048,
321
  "pad_token": "<|endoftext|>",
322
- "return_token_type_ids": false,
323
  "tokenizer_class": "CodeGenTokenizer",
324
  "unk_token": "<|endoftext|>"
325
  }
 
319
  "eos_token": "<|endoftext|>",
320
  "model_max_length": 2048,
321
  "pad_token": "<|endoftext|>",
 
322
  "tokenizer_class": "CodeGenTokenizer",
323
  "unk_token": "<|endoftext|>"
324
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3ed7245af6fdfac6574f6643e1814f238a1aa4646a3d4f5bdca3202f0fdfa10
3
- size 6072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93506f2af7c4c83e948c7f2e1da0759e701cd6707309e1e91dc6120361cb7229
3
+ size 5179