plip commited on
Commit
e238422
·
1 Parent(s): aad1369

Training in progress, step 190000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d649779ba95c621015cb53780f5701aa302c759b040cf454f46acc31f4706b5
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba65516926b4ff6cdfb50443e11a434cb503c7c65e74dc6127694de917095dae
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c619af36af772fff3b27134f866199a2501f2804d36e9fef52fa198a4bf9feb5
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40af1c76beae3fcf68a183dcf22a7c21a89e9eb4f6548fe295f14d1acc603f3a
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae7e48b658f6388c6c044e6d37239970a21307494d626979f7e10630dfa93207
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:196e9b55e2db27c384076a5416088da2b3e045d13b4c3f579694349cd8bb530e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.029411764705882,
5
- "global_step": 180000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3606,11 +3606,211 @@
3606
  "eval_samples_per_second": 745.664,
3607
  "eval_steps_per_second": 11.931,
3608
  "step": 180000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3609
  }
3610
  ],
3611
  "max_steps": 250000,
3612
  "num_train_epochs": 16,
3613
- "total_flos": 2.8829139360447075e+21,
3614
  "trial_name": null,
3615
  "trial_params": null
3616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.642156862745098,
5
+ "global_step": 190000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3606
  "eval_samples_per_second": 745.664,
3607
  "eval_steps_per_second": 11.931,
3608
  "step": 180000
3609
+ },
3610
+ {
3611
+ "epoch": 11.06,
3612
+ "learning_rate": 0.00012612565995880976,
3613
+ "loss": 0.4467,
3614
+ "step": 180500
3615
+ },
3616
+ {
3617
+ "epoch": 11.09,
3618
+ "learning_rate": 0.00012457808290921774,
3619
+ "loss": 0.4467,
3620
+ "step": 181000
3621
+ },
3622
+ {
3623
+ "epoch": 11.09,
3624
+ "eval_loss": 0.8010080456733704,
3625
+ "eval_runtime": 1.2919,
3626
+ "eval_samples_per_second": 774.08,
3627
+ "eval_steps_per_second": 12.385,
3628
+ "step": 181000
3629
+ },
3630
+ {
3631
+ "epoch": 11.12,
3632
+ "learning_rate": 0.00012303839809876525,
3633
+ "loss": 0.4466,
3634
+ "step": 181500
3635
+ },
3636
+ {
3637
+ "epoch": 11.15,
3638
+ "learning_rate": 0.00012150667287825382,
3639
+ "loss": 0.4463,
3640
+ "step": 182000
3641
+ },
3642
+ {
3643
+ "epoch": 11.15,
3644
+ "eval_loss": 0.8034773468971252,
3645
+ "eval_runtime": 1.2819,
3646
+ "eval_samples_per_second": 780.103,
3647
+ "eval_steps_per_second": 12.482,
3648
+ "step": 182000
3649
+ },
3650
+ {
3651
+ "epoch": 11.18,
3652
+ "learning_rate": 0.00011998297425030656,
3653
+ "loss": 0.4464,
3654
+ "step": 182500
3655
+ },
3656
+ {
3657
+ "epoch": 11.21,
3658
+ "learning_rate": 0.00011846736886643775,
3659
+ "loss": 0.4463,
3660
+ "step": 183000
3661
+ },
3662
+ {
3663
+ "epoch": 11.21,
3664
+ "eval_loss": 0.8048831820487976,
3665
+ "eval_runtime": 1.3614,
3666
+ "eval_samples_per_second": 734.528,
3667
+ "eval_steps_per_second": 11.752,
3668
+ "step": 183000
3669
+ },
3670
+ {
3671
+ "epoch": 11.24,
3672
+ "learning_rate": 0.00011695992302413651,
3673
+ "loss": 0.4462,
3674
+ "step": 183500
3675
+ },
3676
+ {
3677
+ "epoch": 11.27,
3678
+ "learning_rate": 0.00011546070266396771,
3679
+ "loss": 0.4462,
3680
+ "step": 184000
3681
+ },
3682
+ {
3683
+ "epoch": 11.27,
3684
+ "eval_loss": 0.7998443841934204,
3685
+ "eval_runtime": 1.3341,
3686
+ "eval_samples_per_second": 749.594,
3687
+ "eval_steps_per_second": 11.994,
3688
+ "step": 184000
3689
+ },
3690
+ {
3691
+ "epoch": 11.31,
3692
+ "learning_rate": 0.00011396977336668645,
3693
+ "loss": 0.4459,
3694
+ "step": 184500
3695
+ },
3696
+ {
3697
+ "epoch": 11.34,
3698
+ "learning_rate": 0.00011248720035037021,
3699
+ "loss": 0.4459,
3700
+ "step": 185000
3701
+ },
3702
+ {
3703
+ "epoch": 11.34,
3704
+ "eval_loss": 0.7987710237503052,
3705
+ "eval_runtime": 1.3437,
3706
+ "eval_samples_per_second": 744.203,
3707
+ "eval_steps_per_second": 11.907,
3708
+ "step": 185000
3709
+ },
3710
+ {
3711
+ "epoch": 11.37,
3712
+ "learning_rate": 0.00011101304846756577,
3713
+ "loss": 0.4458,
3714
+ "step": 185500
3715
+ },
3716
+ {
3717
+ "epoch": 11.4,
3718
+ "learning_rate": 0.00010954738220245183,
3719
+ "loss": 0.4457,
3720
+ "step": 186000
3721
+ },
3722
+ {
3723
+ "epoch": 11.4,
3724
+ "eval_loss": 0.8063639402389526,
3725
+ "eval_runtime": 1.2501,
3726
+ "eval_samples_per_second": 799.917,
3727
+ "eval_steps_per_second": 12.799,
3728
+ "step": 186000
3729
+ },
3730
+ {
3731
+ "epoch": 11.43,
3732
+ "learning_rate": 0.00010809026566801912,
3733
+ "loss": 0.4457,
3734
+ "step": 186500
3735
+ },
3736
+ {
3737
+ "epoch": 11.46,
3738
+ "learning_rate": 0.00010664176260326507,
3739
+ "loss": 0.4456,
3740
+ "step": 187000
3741
+ },
3742
+ {
3743
+ "epoch": 11.46,
3744
+ "eval_loss": 0.8042049407958984,
3745
+ "eval_runtime": 1.3155,
3746
+ "eval_samples_per_second": 760.186,
3747
+ "eval_steps_per_second": 12.163,
3748
+ "step": 187000
3749
+ },
3750
+ {
3751
+ "epoch": 11.49,
3752
+ "learning_rate": 0.00010520193637040641,
3753
+ "loss": 0.4454,
3754
+ "step": 187500
3755
+ },
3756
+ {
3757
+ "epoch": 11.52,
3758
+ "learning_rate": 0.00010377084995210682,
3759
+ "loss": 0.4454,
3760
+ "step": 188000
3761
+ },
3762
+ {
3763
+ "epoch": 11.52,
3764
+ "eval_loss": 0.7998358607292175,
3765
+ "eval_runtime": 1.3257,
3766
+ "eval_samples_per_second": 754.322,
3767
+ "eval_steps_per_second": 12.069,
3768
+ "step": 188000
3769
+ },
3770
+ {
3771
+ "epoch": 11.55,
3772
+ "learning_rate": 0.00010234856594872234,
3773
+ "loss": 0.4452,
3774
+ "step": 188500
3775
+ },
3776
+ {
3777
+ "epoch": 11.58,
3778
+ "learning_rate": 0.00010093514657556295,
3779
+ "loss": 0.4453,
3780
+ "step": 189000
3781
+ },
3782
+ {
3783
+ "epoch": 11.58,
3784
+ "eval_loss": 0.8026143908500671,
3785
+ "eval_runtime": 1.2929,
3786
+ "eval_samples_per_second": 773.468,
3787
+ "eval_steps_per_second": 12.375,
3788
+ "step": 189000
3789
+ },
3790
+ {
3791
+ "epoch": 11.61,
3792
+ "learning_rate": 9.953065366017073e-05,
3793
+ "loss": 0.4451,
3794
+ "step": 189500
3795
+ },
3796
+ {
3797
+ "epoch": 11.64,
3798
+ "learning_rate": 9.813514863961586e-05,
3799
+ "loss": 0.4449,
3800
+ "step": 190000
3801
+ },
3802
+ {
3803
+ "epoch": 11.64,
3804
+ "eval_loss": 0.7992528080940247,
3805
+ "eval_runtime": 1.2891,
3806
+ "eval_samples_per_second": 775.752,
3807
+ "eval_steps_per_second": 12.412,
3808
+ "step": 190000
3809
  }
3810
  ],
3811
  "max_steps": 250000,
3812
  "num_train_epochs": 16,
3813
+ "total_flos": 3.0430819389105116e+21,
3814
  "trial_name": null,
3815
  "trial_params": null
3816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c619af36af772fff3b27134f866199a2501f2804d36e9fef52fa198a4bf9feb5
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40af1c76beae3fcf68a183dcf22a7c21a89e9eb4f6548fe295f14d1acc603f3a
3
  size 25761253