DuongTrongChi
commited on
Training in progress, step 59, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e986d849d397f1dfe71a0947a97c8f30985df37fcc73fade493d2d8d46b1cacb
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675156
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9beab69120b8470ba5993d628bb2552a1d7a60b206e9b9165ea3374b579b5c86
|
3 |
size 50675156
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8da99b8ae5f5d39bfda5424154d9614466b3a6382198e7aa65c1c031599a0314
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -329,6 +329,97 @@
|
|
329 |
"learning_rate": 9.200000000000002e-06,
|
330 |
"loss": 1.9289,
|
331 |
"step": 46
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
}
|
333 |
],
|
334 |
"logging_steps": 1,
|
@@ -348,7 +439,7 @@
|
|
348 |
"attributes": {}
|
349 |
}
|
350 |
},
|
351 |
-
"total_flos":
|
352 |
"train_batch_size": 4,
|
353 |
"trial_name": null,
|
354 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.08620217331750525,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 59,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
329 |
"learning_rate": 9.200000000000002e-06,
|
330 |
"loss": 1.9289,
|
331 |
"step": 46
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.06866952789699571,
|
335 |
+
"grad_norm": 0.24804741144180298,
|
336 |
+
"learning_rate": 9.4e-06,
|
337 |
+
"loss": 1.9128,
|
338 |
+
"step": 47
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.07013058168203817,
|
342 |
+
"grad_norm": 0.2629912197589874,
|
343 |
+
"learning_rate": 9.600000000000001e-06,
|
344 |
+
"loss": 1.8998,
|
345 |
+
"step": 48
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.07159163546708064,
|
349 |
+
"grad_norm": 0.22671160101890564,
|
350 |
+
"learning_rate": 9.800000000000001e-06,
|
351 |
+
"loss": 1.8634,
|
352 |
+
"step": 49
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 0.07305268925212309,
|
356 |
+
"grad_norm": 0.22378858923912048,
|
357 |
+
"learning_rate": 1e-05,
|
358 |
+
"loss": 1.9038,
|
359 |
+
"step": 50
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 0.07451374303716556,
|
363 |
+
"grad_norm": 0.25769534707069397,
|
364 |
+
"learning_rate": 1.02e-05,
|
365 |
+
"loss": 1.8324,
|
366 |
+
"step": 51
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"epoch": 0.07597479682220802,
|
370 |
+
"grad_norm": 0.22694693505764008,
|
371 |
+
"learning_rate": 1.04e-05,
|
372 |
+
"loss": 1.8374,
|
373 |
+
"step": 52
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"epoch": 0.07743585060725047,
|
377 |
+
"grad_norm": 0.23865583539009094,
|
378 |
+
"learning_rate": 1.0600000000000002e-05,
|
379 |
+
"loss": 1.9129,
|
380 |
+
"step": 53
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"epoch": 0.07889690439229294,
|
384 |
+
"grad_norm": 0.23314256966114044,
|
385 |
+
"learning_rate": 1.0800000000000002e-05,
|
386 |
+
"loss": 1.8512,
|
387 |
+
"step": 54
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"epoch": 0.0803579581773354,
|
391 |
+
"grad_norm": 0.2111833393573761,
|
392 |
+
"learning_rate": 1.1000000000000001e-05,
|
393 |
+
"loss": 1.8608,
|
394 |
+
"step": 55
|
395 |
+
},
|
396 |
+
{
|
397 |
+
"epoch": 0.08181901196237787,
|
398 |
+
"grad_norm": 0.24742205440998077,
|
399 |
+
"learning_rate": 1.1200000000000001e-05,
|
400 |
+
"loss": 1.9129,
|
401 |
+
"step": 56
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"epoch": 0.08328006574742032,
|
405 |
+
"grad_norm": 0.2268109768629074,
|
406 |
+
"learning_rate": 1.14e-05,
|
407 |
+
"loss": 1.7996,
|
408 |
+
"step": 57
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"epoch": 0.08474111953246279,
|
412 |
+
"grad_norm": 0.24065515398979187,
|
413 |
+
"learning_rate": 1.16e-05,
|
414 |
+
"loss": 1.9203,
|
415 |
+
"step": 58
|
416 |
+
},
|
417 |
+
{
|
418 |
+
"epoch": 0.08620217331750525,
|
419 |
+
"grad_norm": 0.23937389254570007,
|
420 |
+
"learning_rate": 1.18e-05,
|
421 |
+
"loss": 1.8845,
|
422 |
+
"step": 59
|
423 |
}
|
424 |
],
|
425 |
"logging_steps": 1,
|
|
|
439 |
"attributes": {}
|
440 |
}
|
441 |
},
|
442 |
+
"total_flos": 6.568072173368525e+16,
|
443 |
"train_batch_size": 4,
|
444 |
"trial_name": null,
|
445 |
"trial_params": null
|