ramdhanfirdaus commited on
Commit
57bff63
1 Parent(s): 6dea3ed

Training in progress, step 1400, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -201,18 +201,6 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
201
  ## Training procedure
202
 
203
 
204
- The following `bitsandbytes` quantization config was used during training:
205
- - quant_method: bitsandbytes
206
- - load_in_8bit: False
207
- - load_in_4bit: True
208
- - llm_int8_threshold: 6.0
209
- - llm_int8_skip_modules: None
210
- - llm_int8_enable_fp32_cpu_offload: False
211
- - llm_int8_has_fp16_weight: False
212
- - bnb_4bit_quant_type: nf4
213
- - bnb_4bit_use_double_quant: True
214
- - bnb_4bit_compute_dtype: float16
215
-
216
  ### Framework versions
217
 
218
 
 
201
  ## Training procedure
202
 
203
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  ### Framework versions
205
 
206
 
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfe68bb85d3b885ea16a3a4edf42e142af71a569576fc8d42fbea5987dfac9e2
3
  size 50338848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c21249785ef7e6e4552a0ae643a39e07fce9a53609d606c103117185976fb8
3
  size 50338848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28f8585dc54188d220a469a599d21bb72b0b894236d57f1b8ced70c49177a369
3
- size 100693001
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9024574af3c880d2f51faa02b5f241c69b38141a1bc48a7567edf95ef9ab608
3
+ size 100691721
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bce8ad87f00a02b0d2a3d827e73ac036f343d9552caa13a5ae85d423ed6d4b0
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d9abb419b80aead609e3d25aa188a530b509237800785868036d93ad22750d1
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60fe2b14e82d752da652953a5bae182d24c7ff34775d2753b9c2e6fa092203d0
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e41ec466e37d3e777474b32690d472b15b70497f93b8ea178e488be3afc4f79
3
  size 627
last-checkpoint/special_tokens_map.json CHANGED
@@ -1,6 +1,24 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
4
  "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
  "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
  }
last-checkpoint/tokenizer_config.json CHANGED
@@ -13,8 +13,12 @@
13
  "bos_token": "<|endoftext|>",
14
  "clean_up_tokenization_spaces": true,
15
  "eos_token": "<|endoftext|>",
 
16
  "model_max_length": 1024,
17
  "pad_token": "<|endoftext|>",
 
18
  "tokenizer_class": "GPT2Tokenizer",
 
 
19
  "unk_token": "<|endoftext|>"
20
  }
 
13
  "bos_token": "<|endoftext|>",
14
  "clean_up_tokenization_spaces": true,
15
  "eos_token": "<|endoftext|>",
16
+ "max_length": 512,
17
  "model_max_length": 1024,
18
  "pad_token": "<|endoftext|>",
19
+ "stride": 0,
20
  "tokenizer_class": "GPT2Tokenizer",
21
+ "truncation_side": "right",
22
+ "truncation_strategy": "longest_first",
23
  "unk_token": "<|endoftext|>"
24
  }
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.72101891040802,
3
- "best_model_checkpoint": "./outputs/checkpoint-1200",
4
- "epoch": 0.8743169398907104,
5
  "eval_steps": 100,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -175,13 +175,41 @@
175
  "eval_samples_per_second": 43.611,
176
  "eval_steps_per_second": 5.457,
177
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  }
179
  ],
180
  "logging_steps": 100,
181
  "max_steps": 4116,
182
  "num_train_epochs": 3,
183
  "save_steps": 100,
184
- "total_flos": 7.145822849964442e+16,
185
  "trial_name": null,
186
  "trial_params": null
187
  }
 
1
  {
2
+ "best_metric": 1.6939107179641724,
3
+ "best_model_checkpoint": "./outputs/checkpoint-1400",
4
+ "epoch": 1.0200364298724955,
5
  "eval_steps": 100,
6
+ "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
175
  "eval_samples_per_second": 43.611,
176
  "eval_steps_per_second": 5.457,
177
  "step": 1200
178
+ },
179
+ {
180
+ "epoch": 0.95,
181
+ "learning_rate": 0.0002,
182
+ "loss": 1.7282,
183
+ "step": 1300
184
+ },
185
+ {
186
+ "epoch": 0.95,
187
+ "eval_loss": 1.6993989944458008,
188
+ "eval_runtime": 143.9159,
189
+ "eval_samples_per_second": 43.595,
190
+ "eval_steps_per_second": 5.455,
191
+ "step": 1300
192
+ },
193
+ {
194
+ "epoch": 1.02,
195
+ "learning_rate": 0.0002,
196
+ "loss": 1.7077,
197
+ "step": 1400
198
+ },
199
+ {
200
+ "epoch": 1.02,
201
+ "eval_loss": 1.6939107179641724,
202
+ "eval_runtime": 132.967,
203
+ "eval_samples_per_second": 47.185,
204
+ "eval_steps_per_second": 5.904,
205
+ "step": 1400
206
  }
207
  ],
208
  "logging_steps": 100,
209
  "max_steps": 4116,
210
  "num_train_epochs": 3,
211
  "save_steps": 100,
212
+ "total_flos": 8.337729485790413e+16,
213
  "trial_name": null,
214
  "trial_params": null
215
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:155afbfe76aee38b40cefbe9ac141cafa2613d5a2d500e346b58fc281400fbd7
3
  size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33604dab3077b6665a2e7200deca0f62ee7786dcab0d4b4b7e8aa9f973422a89
3
  size 4219