Training in progress, epoch 2
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +288 -3
- pytorch_model.bin +1 -1
- runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 236491077
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fe7b93abf02af995010aa8e52e985082cb35427475535754ba46e0f79bfe6b7
|
3 |
size 236491077
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118253458
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc972ecc1329e0803175ccbf8d455608f73797136d228b25b2d6c64e55403179
|
3 |
size 118253458
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15597
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fa70ecb25666f6769a2077ed150c9e52861a4143626716aebc146c3d3d8cd65
|
3 |
size 15597
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27f16da81f42c208591e42d4a624accc6adcfabf4b156667c067c8a5a08012ca
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4e814a01103e7d042492e6a27b7700dc34e66d68095fbabd7c929b5bd6b2625
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -285,11 +285,296 @@
|
|
285 |
"eval_samples_per_second": 603.699,
|
286 |
"eval_steps_per_second": 37.731,
|
287 |
"step": 22940
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
}
|
289 |
],
|
290 |
"max_steps": 321160,
|
291 |
"num_train_epochs": 14,
|
292 |
-
"total_flos":
|
293 |
"trial_name": null,
|
294 |
"trial_params": null
|
295 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"global_step": 45880,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
285 |
"eval_samples_per_second": 603.699,
|
286 |
"eval_steps_per_second": 37.731,
|
287 |
"step": 22940
|
288 |
+
},
|
289 |
+
{
|
290 |
+
"epoch": 1.0,
|
291 |
+
"learning_rate": 9.377822788632103e-05,
|
292 |
+
"loss": 2.1305,
|
293 |
+
"step": 23000
|
294 |
+
},
|
295 |
+
{
|
296 |
+
"epoch": 1.02,
|
297 |
+
"learning_rate": 9.362096946670525e-05,
|
298 |
+
"loss": 2.1245,
|
299 |
+
"step": 23500
|
300 |
+
},
|
301 |
+
{
|
302 |
+
"epoch": 1.05,
|
303 |
+
"learning_rate": 9.346371104708946e-05,
|
304 |
+
"loss": 2.1178,
|
305 |
+
"step": 24000
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"epoch": 1.07,
|
309 |
+
"learning_rate": 9.330676714431291e-05,
|
310 |
+
"loss": 2.1102,
|
311 |
+
"step": 24500
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"epoch": 1.09,
|
315 |
+
"learning_rate": 9.314950872469712e-05,
|
316 |
+
"loss": 2.1007,
|
317 |
+
"step": 25000
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 1.11,
|
321 |
+
"learning_rate": 9.299225030508134e-05,
|
322 |
+
"loss": 2.0966,
|
323 |
+
"step": 25500
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"epoch": 1.13,
|
327 |
+
"learning_rate": 9.283499188546555e-05,
|
328 |
+
"loss": 2.0878,
|
329 |
+
"step": 26000
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"epoch": 1.16,
|
333 |
+
"learning_rate": 9.2678047982689e-05,
|
334 |
+
"loss": 2.0814,
|
335 |
+
"step": 26500
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"epoch": 1.18,
|
339 |
+
"learning_rate": 9.252078956307321e-05,
|
340 |
+
"loss": 2.0756,
|
341 |
+
"step": 27000
|
342 |
+
},
|
343 |
+
{
|
344 |
+
"epoch": 1.2,
|
345 |
+
"learning_rate": 9.236353114345743e-05,
|
346 |
+
"loss": 2.0685,
|
347 |
+
"step": 27500
|
348 |
+
},
|
349 |
+
{
|
350 |
+
"epoch": 1.22,
|
351 |
+
"learning_rate": 9.220627272384163e-05,
|
352 |
+
"loss": 2.065,
|
353 |
+
"step": 28000
|
354 |
+
},
|
355 |
+
{
|
356 |
+
"epoch": 1.24,
|
357 |
+
"learning_rate": 9.204932882106509e-05,
|
358 |
+
"loss": 2.0592,
|
359 |
+
"step": 28500
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.26,
|
363 |
+
"learning_rate": 9.18920704014493e-05,
|
364 |
+
"loss": 2.0527,
|
365 |
+
"step": 29000
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"epoch": 1.29,
|
369 |
+
"learning_rate": 9.173481198183351e-05,
|
370 |
+
"loss": 2.0459,
|
371 |
+
"step": 29500
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"epoch": 1.31,
|
375 |
+
"learning_rate": 9.157755356221772e-05,
|
376 |
+
"loss": 2.0443,
|
377 |
+
"step": 30000
|
378 |
+
},
|
379 |
+
{
|
380 |
+
"epoch": 1.33,
|
381 |
+
"learning_rate": 9.142060965944116e-05,
|
382 |
+
"loss": 2.0367,
|
383 |
+
"step": 30500
|
384 |
+
},
|
385 |
+
{
|
386 |
+
"epoch": 1.35,
|
387 |
+
"learning_rate": 9.126366575666462e-05,
|
388 |
+
"loss": 2.0322,
|
389 |
+
"step": 31000
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"epoch": 1.37,
|
393 |
+
"learning_rate": 9.110640733704882e-05,
|
394 |
+
"loss": 2.0228,
|
395 |
+
"step": 31500
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"epoch": 1.39,
|
399 |
+
"learning_rate": 9.094914891743304e-05,
|
400 |
+
"loss": 2.0209,
|
401 |
+
"step": 32000
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"epoch": 1.42,
|
405 |
+
"learning_rate": 9.079189049781725e-05,
|
406 |
+
"loss": 2.0156,
|
407 |
+
"step": 32500
|
408 |
+
},
|
409 |
+
{
|
410 |
+
"epoch": 1.44,
|
411 |
+
"learning_rate": 9.063463207820148e-05,
|
412 |
+
"loss": 2.0141,
|
413 |
+
"step": 33000
|
414 |
+
},
|
415 |
+
{
|
416 |
+
"epoch": 1.46,
|
417 |
+
"learning_rate": 9.047737365858569e-05,
|
418 |
+
"loss": 2.0093,
|
419 |
+
"step": 33500
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"epoch": 1.48,
|
423 |
+
"learning_rate": 9.032042975580913e-05,
|
424 |
+
"loss": 2.0052,
|
425 |
+
"step": 34000
|
426 |
+
},
|
427 |
+
{
|
428 |
+
"epoch": 1.5,
|
429 |
+
"learning_rate": 9.016317133619334e-05,
|
430 |
+
"loss": 1.9973,
|
431 |
+
"step": 34500
|
432 |
+
},
|
433 |
+
{
|
434 |
+
"epoch": 1.53,
|
435 |
+
"learning_rate": 9.000591291657756e-05,
|
436 |
+
"loss": 1.995,
|
437 |
+
"step": 35000
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"epoch": 1.55,
|
441 |
+
"learning_rate": 8.984865449696177e-05,
|
442 |
+
"loss": 1.9908,
|
443 |
+
"step": 35500
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"epoch": 1.57,
|
447 |
+
"learning_rate": 8.969139607734599e-05,
|
448 |
+
"loss": 1.986,
|
449 |
+
"step": 36000
|
450 |
+
},
|
451 |
+
{
|
452 |
+
"epoch": 1.59,
|
453 |
+
"learning_rate": 8.95341376577302e-05,
|
454 |
+
"loss": 1.9825,
|
455 |
+
"step": 36500
|
456 |
+
},
|
457 |
+
{
|
458 |
+
"epoch": 1.61,
|
459 |
+
"learning_rate": 8.937687923811441e-05,
|
460 |
+
"loss": 1.9754,
|
461 |
+
"step": 37000
|
462 |
+
},
|
463 |
+
{
|
464 |
+
"epoch": 1.63,
|
465 |
+
"learning_rate": 8.921993533533786e-05,
|
466 |
+
"loss": 1.9733,
|
467 |
+
"step": 37500
|
468 |
+
},
|
469 |
+
{
|
470 |
+
"epoch": 1.66,
|
471 |
+
"learning_rate": 8.906267691572208e-05,
|
472 |
+
"loss": 1.9679,
|
473 |
+
"step": 38000
|
474 |
+
},
|
475 |
+
{
|
476 |
+
"epoch": 1.68,
|
477 |
+
"learning_rate": 8.890541849610629e-05,
|
478 |
+
"loss": 1.9611,
|
479 |
+
"step": 38500
|
480 |
+
},
|
481 |
+
{
|
482 |
+
"epoch": 1.7,
|
483 |
+
"learning_rate": 8.87481600764905e-05,
|
484 |
+
"loss": 1.961,
|
485 |
+
"step": 39000
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"epoch": 1.72,
|
489 |
+
"learning_rate": 8.859121617371395e-05,
|
490 |
+
"loss": 1.9588,
|
491 |
+
"step": 39500
|
492 |
+
},
|
493 |
+
{
|
494 |
+
"epoch": 1.74,
|
495 |
+
"learning_rate": 8.843395775409816e-05,
|
496 |
+
"loss": 1.9578,
|
497 |
+
"step": 40000
|
498 |
+
},
|
499 |
+
{
|
500 |
+
"epoch": 1.77,
|
501 |
+
"learning_rate": 8.827669933448237e-05,
|
502 |
+
"loss": 1.9528,
|
503 |
+
"step": 40500
|
504 |
+
},
|
505 |
+
{
|
506 |
+
"epoch": 1.79,
|
507 |
+
"learning_rate": 8.811944091486659e-05,
|
508 |
+
"loss": 1.9486,
|
509 |
+
"step": 41000
|
510 |
+
},
|
511 |
+
{
|
512 |
+
"epoch": 1.81,
|
513 |
+
"learning_rate": 8.796249701209002e-05,
|
514 |
+
"loss": 1.9443,
|
515 |
+
"step": 41500
|
516 |
+
},
|
517 |
+
{
|
518 |
+
"epoch": 1.83,
|
519 |
+
"learning_rate": 8.780523859247425e-05,
|
520 |
+
"loss": 1.9406,
|
521 |
+
"step": 42000
|
522 |
+
},
|
523 |
+
{
|
524 |
+
"epoch": 1.85,
|
525 |
+
"learning_rate": 8.764798017285846e-05,
|
526 |
+
"loss": 1.9372,
|
527 |
+
"step": 42500
|
528 |
+
},
|
529 |
+
{
|
530 |
+
"epoch": 1.87,
|
531 |
+
"learning_rate": 8.749072175324268e-05,
|
532 |
+
"loss": 1.9351,
|
533 |
+
"step": 43000
|
534 |
+
},
|
535 |
+
{
|
536 |
+
"epoch": 1.9,
|
537 |
+
"learning_rate": 8.733346333362689e-05,
|
538 |
+
"loss": 1.9265,
|
539 |
+
"step": 43500
|
540 |
+
},
|
541 |
+
{
|
542 |
+
"epoch": 1.92,
|
543 |
+
"learning_rate": 8.717651943085034e-05,
|
544 |
+
"loss": 1.9272,
|
545 |
+
"step": 44000
|
546 |
+
},
|
547 |
+
{
|
548 |
+
"epoch": 1.94,
|
549 |
+
"learning_rate": 8.701926101123455e-05,
|
550 |
+
"loss": 1.9268,
|
551 |
+
"step": 44500
|
552 |
+
},
|
553 |
+
{
|
554 |
+
"epoch": 1.96,
|
555 |
+
"learning_rate": 8.686200259161877e-05,
|
556 |
+
"loss": 1.9242,
|
557 |
+
"step": 45000
|
558 |
+
},
|
559 |
+
{
|
560 |
+
"epoch": 1.98,
|
561 |
+
"learning_rate": 8.670474417200297e-05,
|
562 |
+
"loss": 1.9234,
|
563 |
+
"step": 45500
|
564 |
+
},
|
565 |
+
{
|
566 |
+
"epoch": 2.0,
|
567 |
+
"eval_accuracy": 0.6411638490813204,
|
568 |
+
"eval_loss": 1.785447597503662,
|
569 |
+
"eval_runtime": 294.391,
|
570 |
+
"eval_samples_per_second": 604.475,
|
571 |
+
"eval_steps_per_second": 37.78,
|
572 |
+
"step": 45880
|
573 |
}
|
574 |
],
|
575 |
"max_steps": 321160,
|
576 |
"num_train_epochs": 14,
|
577 |
+
"total_flos": 3.488545026395035e+17,
|
578 |
"trial_name": null,
|
579 |
"trial_params": null
|
580 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118253458
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc972ecc1329e0803175ccbf8d455608f73797136d228b25b2d6c64e55403179
|
3 |
size 118253458
|
runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:666ec60d0078a18e9ab7268ca3acc069e6bcfd168d1a61b253913169617035c1
|
3 |
+
size 18890
|