KevinKibe commited on
Commit
f548523
1 Parent(s): b43c17e

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67fb82930389b24ba0208a7a08b33f2abbd2323860716962cf881cf5a16bc6ed
3
  size 2751040864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:278f1aabb225e122816504b6177d798795987ef785fda4b587f55348cd0585b1
3
  size 2751040864
last-checkpoint/pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c84f8cc0dcd4bb5980ea5c4e462292f7fdcfa76bf833ecfba7b6e2fcdd96d3c0
3
  size 5000078781
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f4cd96a02ce728c1249d05ee4257b8306ce70a2dd639029fee17d2ca97a5452
3
  size 5000078781
last-checkpoint/pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13a55fd318dc6a5d1963163df52119c23c1dac0f939b79c076170d494afe47c8
3
  size 482838574
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c922012b1355f1888ad44f340cff5338ed2ab1713e9b7b3a98a455edd97f20
3
  size 482838574
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac1b10d6f7dd9cdf9b5b89778eacbed4b4a8f31412820b85b64a56b3a491d825
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc8bc7f2726d784a753dc16c23097bea5a23a3d9dc3b5525b04524652050e82f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a573157b5af49c7fd752beb625308fa4d3184af6323528856e81787deb4e252b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dc00eb221243876f0f4b45e5adada06467f6b2d30df94cdcaa6d342f8e32768
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8545865416526794,
3
- "best_model_checkpoint": "../KevinKibe/nllb-200-distilled-1.3B-finetuned-finetuned/checkpoint-700",
4
- "epoch": 700.0,
5
  "eval_steps": 100,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -126,6 +126,57 @@
126
  "eval_samples_per_second": 1.215,
127
  "eval_steps_per_second": 0.608,
128
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  }
130
  ],
131
  "logging_steps": 100,
@@ -133,7 +184,7 @@
133
  "num_input_tokens_seen": 0,
134
  "num_train_epochs": 1000,
135
  "save_steps": 100,
136
- "total_flos": 1750201132646400.0,
137
  "train_batch_size": 16,
138
  "trial_name": null,
139
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7109387516975403,
3
+ "best_model_checkpoint": "../KevinKibe/nllb-200-distilled-1.3B-finetuned-finetuned/checkpoint-800",
4
+ "epoch": 1000.0,
5
  "eval_steps": 100,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
126
  "eval_samples_per_second": 1.215,
127
  "eval_steps_per_second": 0.608,
128
  "step": 700
129
+ },
130
+ {
131
+ "epoch": 800.0,
132
+ "grad_norm": 1.7595362663269043,
133
+ "learning_rate": 4.060000000000001e-06,
134
+ "loss": 0.1361,
135
+ "step": 800
136
+ },
137
+ {
138
+ "epoch": 800.0,
139
+ "eval_gen_len": 59.5,
140
+ "eval_loss": 0.7109387516975403,
141
+ "eval_rouge": 0.3649,
142
+ "eval_runtime": 10.8248,
143
+ "eval_samples_per_second": 0.185,
144
+ "eval_steps_per_second": 0.092,
145
+ "step": 800
146
+ },
147
+ {
148
+ "epoch": 900.0,
149
+ "grad_norm": 1.1663947105407715,
150
+ "learning_rate": 2.06e-06,
151
+ "loss": 0.0764,
152
+ "step": 900
153
+ },
154
+ {
155
+ "epoch": 900.0,
156
+ "eval_gen_len": 54.0,
157
+ "eval_loss": 0.7293275594711304,
158
+ "eval_rouge": 0.4568,
159
+ "eval_runtime": 9.9208,
160
+ "eval_samples_per_second": 0.202,
161
+ "eval_steps_per_second": 0.101,
162
+ "step": 900
163
+ },
164
+ {
165
+ "epoch": 1000.0,
166
+ "grad_norm": 0.9859239459037781,
167
+ "learning_rate": 6.000000000000001e-08,
168
+ "loss": 0.0559,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 1000.0,
173
+ "eval_gen_len": 49.0,
174
+ "eval_loss": 0.7133963704109192,
175
+ "eval_rouge": 0.467,
176
+ "eval_runtime": 9.5903,
177
+ "eval_samples_per_second": 0.209,
178
+ "eval_steps_per_second": 0.104,
179
+ "step": 1000
180
  }
181
  ],
182
  "logging_steps": 100,
 
184
  "num_input_tokens_seen": 0,
185
  "num_train_epochs": 1000,
186
  "save_steps": 100,
187
+ "total_flos": 2500287332352000.0,
188
  "train_batch_size": 16,
189
  "trial_name": null,
190
  "trial_params": null