learn3r commited on
Commit
594471a
·
1 Parent(s): 0eb410f

Upload 14 files

Browse files
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb6d305492a4ee15bb1a84c2f6eeb8c3b759479f1bc8791a729e8d4570609cd2
3
+ size 10772000
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70991c5a25b8553ad840c404001cb30a7b808da8b7e2b0f193f5b1d543763ad4
3
  size 9970100058
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb972f49b7a271fa93df113ab42d1c21f4af82125d844e1cf380bcd176755157
3
  size 9970100058
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9de6c7babff251bade48a27ea9835217f92feceb551d1d85e009d06d8a17e073
3
  size 1429345899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4044cb349ca9dd629cac1d35b7a3628509b06fa292942c7e0b4d31496e88460
3
  size 1429345899
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b704c66cdb4c1be4f6c6cb2ea25762c33cb73b9c5eace3867362c6c0d0a1643
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8767f9d2bdc2a07a5a1f17637f8c37b2de2e1cba9fc0256a7e1f3de857a47dd
3
+ size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 3.1917154788970947,
3
  "best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_20/checkpoint-28",
4
- "epoch": 9.73913043478261,
5
  "eval_steps": 500,
6
- "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -398,181 +398,13 @@
398
  "eval_samples_per_second": 0.225,
399
  "eval_steps_per_second": 0.112,
400
  "step": 100
401
- },
402
- {
403
- "epoch": 7.1,
404
- "learning_rate": 0.001,
405
- "loss": 0.0724,
406
- "step": 102
407
- },
408
- {
409
- "epoch": 7.23,
410
- "learning_rate": 0.001,
411
- "loss": 0.0638,
412
- "step": 104
413
- },
414
- {
415
- "epoch": 7.37,
416
- "learning_rate": 0.001,
417
- "loss": 0.0649,
418
- "step": 106
419
- },
420
- {
421
- "epoch": 7.51,
422
- "learning_rate": 0.001,
423
- "loss": 0.0743,
424
- "step": 108
425
- },
426
- {
427
- "epoch": 7.65,
428
- "learning_rate": 0.001,
429
- "loss": 0.0754,
430
- "step": 110
431
- },
432
- {
433
- "epoch": 7.79,
434
- "learning_rate": 0.001,
435
- "loss": 0.0865,
436
- "step": 112
437
- },
438
- {
439
- "epoch": 7.93,
440
- "learning_rate": 0.001,
441
- "loss": 0.1572,
442
- "step": 114
443
- },
444
- {
445
- "epoch": 8.0,
446
- "eval_gen_len": 121.01775147928994,
447
- "eval_loss": 4.938564300537109,
448
- "eval_rouge1": 31.4658,
449
- "eval_rouge2": 7.2592,
450
- "eval_rougeL": 18.4796,
451
- "eval_rougeLsum": 27.6047,
452
- "eval_runtime": 1185.8984,
453
- "eval_samples_per_second": 0.285,
454
- "eval_steps_per_second": 0.143,
455
- "step": 115
456
- },
457
- {
458
- "epoch": 8.07,
459
- "learning_rate": 0.001,
460
- "loss": 0.0862,
461
- "step": 116
462
- },
463
- {
464
- "epoch": 8.21,
465
- "learning_rate": 0.001,
466
- "loss": 0.0607,
467
- "step": 118
468
- },
469
- {
470
- "epoch": 8.35,
471
- "learning_rate": 0.001,
472
- "loss": 0.0692,
473
- "step": 120
474
- },
475
- {
476
- "epoch": 8.49,
477
- "learning_rate": 0.001,
478
- "loss": 0.0916,
479
- "step": 122
480
- },
481
- {
482
- "epoch": 8.63,
483
- "learning_rate": 0.001,
484
- "loss": 0.0847,
485
- "step": 124
486
- },
487
- {
488
- "epoch": 8.77,
489
- "learning_rate": 0.001,
490
- "loss": 0.089,
491
- "step": 126
492
- },
493
- {
494
- "epoch": 8.9,
495
- "learning_rate": 0.001,
496
- "loss": 0.0867,
497
- "step": 128
498
- },
499
- {
500
- "epoch": 8.97,
501
- "eval_gen_len": 160.4792899408284,
502
- "eval_loss": 4.556480884552002,
503
- "eval_rouge1": 32.0531,
504
- "eval_rouge2": 7.0692,
505
- "eval_rougeL": 18.5551,
506
- "eval_rougeLsum": 27.3373,
507
- "eval_runtime": 1462.551,
508
- "eval_samples_per_second": 0.231,
509
- "eval_steps_per_second": 0.116,
510
- "step": 129
511
- },
512
- {
513
- "epoch": 9.04,
514
- "learning_rate": 0.001,
515
- "loss": 0.1022,
516
- "step": 130
517
- },
518
- {
519
- "epoch": 9.18,
520
- "learning_rate": 0.001,
521
- "loss": 0.067,
522
- "step": 132
523
- },
524
- {
525
- "epoch": 9.32,
526
- "learning_rate": 0.001,
527
- "loss": 0.0746,
528
- "step": 134
529
- },
530
- {
531
- "epoch": 9.46,
532
- "learning_rate": 0.001,
533
- "loss": 0.0888,
534
- "step": 136
535
- },
536
- {
537
- "epoch": 9.6,
538
- "learning_rate": 0.001,
539
- "loss": 0.1111,
540
- "step": 138
541
- },
542
- {
543
- "epoch": 9.74,
544
- "learning_rate": 0.001,
545
- "loss": 0.0748,
546
- "step": 140
547
- },
548
- {
549
- "epoch": 9.74,
550
- "eval_gen_len": 124.18934911242603,
551
- "eval_loss": 5.086633682250977,
552
- "eval_rouge1": 32.2717,
553
- "eval_rouge2": 7.7004,
554
- "eval_rougeL": 18.9107,
555
- "eval_rougeLsum": 28.3874,
556
- "eval_runtime": 1232.2532,
557
- "eval_samples_per_second": 0.274,
558
- "eval_steps_per_second": 0.137,
559
- "step": 140
560
- },
561
- {
562
- "epoch": 9.74,
563
- "step": 140,
564
- "total_flos": 2.447850236380324e+18,
565
- "train_loss": 0.18376290196818965,
566
- "train_runtime": 47972.5065,
567
- "train_samples_per_second": 0.766,
568
- "train_steps_per_second": 0.003
569
  }
570
  ],
571
  "logging_steps": 2,
572
  "max_steps": 140,
573
  "num_train_epochs": 10,
574
  "save_steps": 500,
575
- "total_flos": 2.447850236380324e+18,
576
  "trial_name": null,
577
  "trial_params": null
578
  }
 
1
  {
2
  "best_metric": 3.1917154788970947,
3
  "best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_20/checkpoint-28",
4
+ "epoch": 6.956521739130435,
5
  "eval_steps": 500,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
398
  "eval_samples_per_second": 0.225,
399
  "eval_steps_per_second": 0.112,
400
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  }
402
  ],
403
  "logging_steps": 2,
404
  "max_steps": 140,
405
  "num_train_epochs": 10,
406
  "save_steps": 500,
407
+ "total_flos": 1.7591398064550052e+18,
408
  "trial_name": null,
409
  "trial_params": null
410
  }