abdiharyadi commited on
Commit
e148429
1 Parent(s): d6c3dd1

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a32de7d5357341b365094d4b0bdeccdb3aae0d77fcd2fe97c3e7e0b29c7aab4e
3
  size 1575259780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5cb2cfc5117f3531c8bd4177812ffc142572d96d99c17ff8e99066dae40dc25
3
  size 1575259780
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b65ad8a4d4408d8ea2d47cfca03bb0202226b3d1773d79cdbcc48402654bb48a
3
  size 3150397656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e076027da10369decf3426e665cdd5f8a63464a96851c9cfa686d1bf2b95c3a
3
  size 3150397656
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d0982c65fadbf6bf7482db7062c965549f8158635957f4bdc41338317eca289
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65256b72b395fe9c8d3f39501b23485eb6c1ae7d0e4d1f4dbbccadf77e8bf228
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fe116f764987ad933a40001607fac90b24f963059ef9f0559216685859936f0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a39dca76f7733101110373dca301368b725f4d4854876ebc9e72e6ecc87f8875
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.0795,
3
- "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-11608",
4
- "epoch": 2.9998707843390617,
5
  "eval_steps": 500,
6
- "global_step": 11608,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3523,6 +3523,1174 @@
3523
  "eval_samples_per_second": 0.887,
3524
  "eval_steps_per_second": 0.444,
3525
  "step": 11608
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3526
  }
3527
  ],
3528
  "logging_steps": 20,
@@ -3542,7 +4710,7 @@
3542
  "attributes": {}
3543
  }
3544
  },
3545
- "total_flos": 7.619114628911923e+16,
3546
  "train_batch_size": 2,
3547
  "trial_name": null,
3548
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.1685,
3
+ "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-15478",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 15478,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3523
  "eval_samples_per_second": 0.887,
3524
  "eval_steps_per_second": 0.444,
3525
  "step": 11608
3526
+ },
3527
+ {
3528
+ "epoch": 3.0029719602015765,
3529
+ "learning_rate": 1.8149228575132891e-07,
3530
+ "loss": 2.8225,
3531
+ "step": 11620
3532
+ },
3533
+ {
3534
+ "epoch": 3.0081405866391004,
3535
+ "learning_rate": 1.814598729417866e-07,
3536
+ "loss": 2.7593,
3537
+ "step": 11640
3538
+ },
3539
+ {
3540
+ "epoch": 3.013309213076625,
3541
+ "learning_rate": 1.8142746013224426e-07,
3542
+ "loss": 2.7975,
3543
+ "step": 11660
3544
+ },
3545
+ {
3546
+ "epoch": 3.018477839514149,
3547
+ "learning_rate": 1.8139504732270193e-07,
3548
+ "loss": 2.8483,
3549
+ "step": 11680
3550
+ },
3551
+ {
3552
+ "epoch": 3.0236464659516735,
3553
+ "learning_rate": 1.8136263451315961e-07,
3554
+ "loss": 2.8307,
3555
+ "step": 11700
3556
+ },
3557
+ {
3558
+ "epoch": 3.0288150923891974,
3559
+ "learning_rate": 1.8133022170361725e-07,
3560
+ "loss": 2.7256,
3561
+ "step": 11720
3562
+ },
3563
+ {
3564
+ "epoch": 3.033983718826722,
3565
+ "learning_rate": 1.8129780889407494e-07,
3566
+ "loss": 2.8067,
3567
+ "step": 11740
3568
+ },
3569
+ {
3570
+ "epoch": 3.039152345264246,
3571
+ "learning_rate": 1.812653960845326e-07,
3572
+ "loss": 2.7359,
3573
+ "step": 11760
3574
+ },
3575
+ {
3576
+ "epoch": 3.04432097170177,
3577
+ "learning_rate": 1.8123298327499026e-07,
3578
+ "loss": 2.8118,
3579
+ "step": 11780
3580
+ },
3581
+ {
3582
+ "epoch": 3.0494895981392944,
3583
+ "learning_rate": 1.8120057046544795e-07,
3584
+ "loss": 2.8233,
3585
+ "step": 11800
3586
+ },
3587
+ {
3588
+ "epoch": 3.054658224576819,
3589
+ "learning_rate": 1.811681576559056e-07,
3590
+ "loss": 2.8124,
3591
+ "step": 11820
3592
+ },
3593
+ {
3594
+ "epoch": 3.059826851014343,
3595
+ "learning_rate": 1.8113574484636327e-07,
3596
+ "loss": 2.7403,
3597
+ "step": 11840
3598
+ },
3599
+ {
3600
+ "epoch": 3.064995477451867,
3601
+ "learning_rate": 1.8110333203682096e-07,
3602
+ "loss": 2.8573,
3603
+ "step": 11860
3604
+ },
3605
+ {
3606
+ "epoch": 3.0701641038893914,
3607
+ "learning_rate": 1.810709192272786e-07,
3608
+ "loss": 2.8055,
3609
+ "step": 11880
3610
+ },
3611
+ {
3612
+ "epoch": 3.075332730326916,
3613
+ "learning_rate": 1.8103850641773629e-07,
3614
+ "loss": 2.7713,
3615
+ "step": 11900
3616
+ },
3617
+ {
3618
+ "epoch": 3.0805013567644397,
3619
+ "learning_rate": 1.8100609360819395e-07,
3620
+ "loss": 2.7659,
3621
+ "step": 11920
3622
+ },
3623
+ {
3624
+ "epoch": 3.085669983201964,
3625
+ "learning_rate": 1.809736807986516e-07,
3626
+ "loss": 2.8172,
3627
+ "step": 11940
3628
+ },
3629
+ {
3630
+ "epoch": 3.0908386096394884,
3631
+ "learning_rate": 1.809412679891093e-07,
3632
+ "loss": 2.8002,
3633
+ "step": 11960
3634
+ },
3635
+ {
3636
+ "epoch": 3.0960072360770123,
3637
+ "learning_rate": 1.8090885517956696e-07,
3638
+ "loss": 2.8898,
3639
+ "step": 11980
3640
+ },
3641
+ {
3642
+ "epoch": 3.1011758625145367,
3643
+ "learning_rate": 1.8087644237002462e-07,
3644
+ "loss": 2.8703,
3645
+ "step": 12000
3646
+ },
3647
+ {
3648
+ "epoch": 3.106344488952061,
3649
+ "learning_rate": 1.808440295604823e-07,
3650
+ "loss": 2.7963,
3651
+ "step": 12020
3652
+ },
3653
+ {
3654
+ "epoch": 3.1115131153895854,
3655
+ "learning_rate": 1.8081161675093997e-07,
3656
+ "loss": 2.7938,
3657
+ "step": 12040
3658
+ },
3659
+ {
3660
+ "epoch": 3.1166817418271093,
3661
+ "learning_rate": 1.8077920394139763e-07,
3662
+ "loss": 2.7748,
3663
+ "step": 12060
3664
+ },
3665
+ {
3666
+ "epoch": 3.1218503682646337,
3667
+ "learning_rate": 1.8074679113185532e-07,
3668
+ "loss": 2.7813,
3669
+ "step": 12080
3670
+ },
3671
+ {
3672
+ "epoch": 3.127018994702158,
3673
+ "learning_rate": 1.8071437832231296e-07,
3674
+ "loss": 2.8148,
3675
+ "step": 12100
3676
+ },
3677
+ {
3678
+ "epoch": 3.132187621139682,
3679
+ "learning_rate": 1.8068196551277065e-07,
3680
+ "loss": 2.7859,
3681
+ "step": 12120
3682
+ },
3683
+ {
3684
+ "epoch": 3.1373562475772063,
3685
+ "learning_rate": 1.806495527032283e-07,
3686
+ "loss": 2.7769,
3687
+ "step": 12140
3688
+ },
3689
+ {
3690
+ "epoch": 3.1425248740147307,
3691
+ "learning_rate": 1.8061713989368597e-07,
3692
+ "loss": 2.8138,
3693
+ "step": 12160
3694
+ },
3695
+ {
3696
+ "epoch": 3.147693500452255,
3697
+ "learning_rate": 1.8058472708414366e-07,
3698
+ "loss": 2.8262,
3699
+ "step": 12180
3700
+ },
3701
+ {
3702
+ "epoch": 3.152862126889779,
3703
+ "learning_rate": 1.8055231427460132e-07,
3704
+ "loss": 2.7442,
3705
+ "step": 12200
3706
+ },
3707
+ {
3708
+ "epoch": 3.1580307533273033,
3709
+ "learning_rate": 1.8051990146505898e-07,
3710
+ "loss": 2.7795,
3711
+ "step": 12220
3712
+ },
3713
+ {
3714
+ "epoch": 3.1631993797648277,
3715
+ "learning_rate": 1.8048748865551667e-07,
3716
+ "loss": 2.8031,
3717
+ "step": 12240
3718
+ },
3719
+ {
3720
+ "epoch": 3.1683680062023516,
3721
+ "learning_rate": 1.804550758459743e-07,
3722
+ "loss": 2.7879,
3723
+ "step": 12260
3724
+ },
3725
+ {
3726
+ "epoch": 3.173536632639876,
3727
+ "learning_rate": 1.80422663036432e-07,
3728
+ "loss": 2.8254,
3729
+ "step": 12280
3730
+ },
3731
+ {
3732
+ "epoch": 3.1787052590774003,
3733
+ "learning_rate": 1.8039025022688966e-07,
3734
+ "loss": 2.7154,
3735
+ "step": 12300
3736
+ },
3737
+ {
3738
+ "epoch": 3.1838738855149242,
3739
+ "learning_rate": 1.8035783741734732e-07,
3740
+ "loss": 2.8229,
3741
+ "step": 12320
3742
+ },
3743
+ {
3744
+ "epoch": 3.1890425119524486,
3745
+ "learning_rate": 1.80325424607805e-07,
3746
+ "loss": 2.7942,
3747
+ "step": 12340
3748
+ },
3749
+ {
3750
+ "epoch": 3.194211138389973,
3751
+ "learning_rate": 1.8029301179826267e-07,
3752
+ "loss": 2.7733,
3753
+ "step": 12360
3754
+ },
3755
+ {
3756
+ "epoch": 3.199379764827497,
3757
+ "learning_rate": 1.8026059898872033e-07,
3758
+ "loss": 2.775,
3759
+ "step": 12380
3760
+ },
3761
+ {
3762
+ "epoch": 3.2045483912650212,
3763
+ "learning_rate": 1.8022818617917802e-07,
3764
+ "loss": 2.7004,
3765
+ "step": 12400
3766
+ },
3767
+ {
3768
+ "epoch": 3.2097170177025456,
3769
+ "learning_rate": 1.8019577336963565e-07,
3770
+ "loss": 2.8086,
3771
+ "step": 12420
3772
+ },
3773
+ {
3774
+ "epoch": 3.21488564414007,
3775
+ "learning_rate": 1.8016336056009334e-07,
3776
+ "loss": 2.7877,
3777
+ "step": 12440
3778
+ },
3779
+ {
3780
+ "epoch": 3.220054270577594,
3781
+ "learning_rate": 1.80130947750551e-07,
3782
+ "loss": 2.8041,
3783
+ "step": 12460
3784
+ },
3785
+ {
3786
+ "epoch": 3.225222897015118,
3787
+ "learning_rate": 1.8009853494100867e-07,
3788
+ "loss": 2.8454,
3789
+ "step": 12480
3790
+ },
3791
+ {
3792
+ "epoch": 3.2303915234526426,
3793
+ "learning_rate": 1.8006612213146635e-07,
3794
+ "loss": 2.8097,
3795
+ "step": 12500
3796
+ },
3797
+ {
3798
+ "epoch": 3.2355601498901665,
3799
+ "learning_rate": 1.8003370932192402e-07,
3800
+ "loss": 2.8309,
3801
+ "step": 12520
3802
+ },
3803
+ {
3804
+ "epoch": 3.240728776327691,
3805
+ "learning_rate": 1.8000129651238168e-07,
3806
+ "loss": 2.7646,
3807
+ "step": 12540
3808
+ },
3809
+ {
3810
+ "epoch": 3.245897402765215,
3811
+ "learning_rate": 1.7996888370283937e-07,
3812
+ "loss": 2.7901,
3813
+ "step": 12560
3814
+ },
3815
+ {
3816
+ "epoch": 3.2510660292027396,
3817
+ "learning_rate": 1.79936470893297e-07,
3818
+ "loss": 2.7912,
3819
+ "step": 12580
3820
+ },
3821
+ {
3822
+ "epoch": 3.2562346556402635,
3823
+ "learning_rate": 1.799040580837547e-07,
3824
+ "loss": 2.8004,
3825
+ "step": 12600
3826
+ },
3827
+ {
3828
+ "epoch": 3.261403282077788,
3829
+ "learning_rate": 1.7987164527421235e-07,
3830
+ "loss": 2.7432,
3831
+ "step": 12620
3832
+ },
3833
+ {
3834
+ "epoch": 3.266571908515312,
3835
+ "learning_rate": 1.7983923246467001e-07,
3836
+ "loss": 2.7917,
3837
+ "step": 12640
3838
+ },
3839
+ {
3840
+ "epoch": 3.271740534952836,
3841
+ "learning_rate": 1.798068196551277e-07,
3842
+ "loss": 2.8395,
3843
+ "step": 12660
3844
+ },
3845
+ {
3846
+ "epoch": 3.2769091613903605,
3847
+ "learning_rate": 1.7977440684558536e-07,
3848
+ "loss": 2.7566,
3849
+ "step": 12680
3850
+ },
3851
+ {
3852
+ "epoch": 3.282077787827885,
3853
+ "learning_rate": 1.7974199403604303e-07,
3854
+ "loss": 2.7603,
3855
+ "step": 12700
3856
+ },
3857
+ {
3858
+ "epoch": 3.287246414265409,
3859
+ "learning_rate": 1.7970958122650071e-07,
3860
+ "loss": 2.7443,
3861
+ "step": 12720
3862
+ },
3863
+ {
3864
+ "epoch": 3.292415040702933,
3865
+ "learning_rate": 1.7967716841695835e-07,
3866
+ "loss": 2.7244,
3867
+ "step": 12740
3868
+ },
3869
+ {
3870
+ "epoch": 3.2975836671404575,
3871
+ "learning_rate": 1.7964475560741604e-07,
3872
+ "loss": 2.7736,
3873
+ "step": 12760
3874
+ },
3875
+ {
3876
+ "epoch": 3.302752293577982,
3877
+ "learning_rate": 1.7961234279787373e-07,
3878
+ "loss": 2.7509,
3879
+ "step": 12780
3880
+ },
3881
+ {
3882
+ "epoch": 3.3079209200155058,
3883
+ "learning_rate": 1.7957992998833136e-07,
3884
+ "loss": 2.8058,
3885
+ "step": 12800
3886
+ },
3887
+ {
3888
+ "epoch": 3.31308954645303,
3889
+ "learning_rate": 1.7954751717878905e-07,
3890
+ "loss": 2.7647,
3891
+ "step": 12820
3892
+ },
3893
+ {
3894
+ "epoch": 3.3182581728905545,
3895
+ "learning_rate": 1.795151043692467e-07,
3896
+ "loss": 2.7883,
3897
+ "step": 12840
3898
+ },
3899
+ {
3900
+ "epoch": 3.3234267993280784,
3901
+ "learning_rate": 1.7948269155970437e-07,
3902
+ "loss": 2.8089,
3903
+ "step": 12860
3904
+ },
3905
+ {
3906
+ "epoch": 3.3285954257656027,
3907
+ "learning_rate": 1.7945027875016206e-07,
3908
+ "loss": 2.7858,
3909
+ "step": 12880
3910
+ },
3911
+ {
3912
+ "epoch": 3.333764052203127,
3913
+ "learning_rate": 1.7941786594061972e-07,
3914
+ "loss": 2.7449,
3915
+ "step": 12900
3916
+ },
3917
+ {
3918
+ "epoch": 3.338932678640651,
3919
+ "learning_rate": 1.7938545313107739e-07,
3920
+ "loss": 2.7406,
3921
+ "step": 12920
3922
+ },
3923
+ {
3924
+ "epoch": 3.3441013050781754,
3925
+ "learning_rate": 1.7935304032153507e-07,
3926
+ "loss": 2.8112,
3927
+ "step": 12940
3928
+ },
3929
+ {
3930
+ "epoch": 3.3492699315156997,
3931
+ "learning_rate": 1.793206275119927e-07,
3932
+ "loss": 2.7738,
3933
+ "step": 12960
3934
+ },
3935
+ {
3936
+ "epoch": 3.354438557953224,
3937
+ "learning_rate": 1.792882147024504e-07,
3938
+ "loss": 2.7914,
3939
+ "step": 12980
3940
+ },
3941
+ {
3942
+ "epoch": 3.359607184390748,
3943
+ "learning_rate": 1.7925580189290806e-07,
3944
+ "loss": 2.8128,
3945
+ "step": 13000
3946
+ },
3947
+ {
3948
+ "epoch": 3.3647758108282724,
3949
+ "learning_rate": 1.7922338908336572e-07,
3950
+ "loss": 2.8135,
3951
+ "step": 13020
3952
+ },
3953
+ {
3954
+ "epoch": 3.3699444372657967,
3955
+ "learning_rate": 1.791909762738234e-07,
3956
+ "loss": 2.7928,
3957
+ "step": 13040
3958
+ },
3959
+ {
3960
+ "epoch": 3.3751130637033206,
3961
+ "learning_rate": 1.7915856346428107e-07,
3962
+ "loss": 2.7973,
3963
+ "step": 13060
3964
+ },
3965
+ {
3966
+ "epoch": 3.380281690140845,
3967
+ "learning_rate": 1.7912615065473873e-07,
3968
+ "loss": 2.8088,
3969
+ "step": 13080
3970
+ },
3971
+ {
3972
+ "epoch": 3.3854503165783694,
3973
+ "learning_rate": 1.7909373784519642e-07,
3974
+ "loss": 2.7603,
3975
+ "step": 13100
3976
+ },
3977
+ {
3978
+ "epoch": 3.3906189430158937,
3979
+ "learning_rate": 1.7906132503565406e-07,
3980
+ "loss": 2.7204,
3981
+ "step": 13120
3982
+ },
3983
+ {
3984
+ "epoch": 3.3957875694534176,
3985
+ "learning_rate": 1.7902891222611175e-07,
3986
+ "loss": 2.7494,
3987
+ "step": 13140
3988
+ },
3989
+ {
3990
+ "epoch": 3.400956195890942,
3991
+ "learning_rate": 1.7899649941656943e-07,
3992
+ "loss": 2.7304,
3993
+ "step": 13160
3994
+ },
3995
+ {
3996
+ "epoch": 3.4061248223284664,
3997
+ "learning_rate": 1.7896408660702707e-07,
3998
+ "loss": 2.7921,
3999
+ "step": 13180
4000
+ },
4001
+ {
4002
+ "epoch": 3.4112934487659903,
4003
+ "learning_rate": 1.7893167379748476e-07,
4004
+ "loss": 2.7574,
4005
+ "step": 13200
4006
+ },
4007
+ {
4008
+ "epoch": 3.4164620752035146,
4009
+ "learning_rate": 1.7889926098794242e-07,
4010
+ "loss": 2.7649,
4011
+ "step": 13220
4012
+ },
4013
+ {
4014
+ "epoch": 3.421630701641039,
4015
+ "learning_rate": 1.7886684817840008e-07,
4016
+ "loss": 2.7618,
4017
+ "step": 13240
4018
+ },
4019
+ {
4020
+ "epoch": 3.4267993280785634,
4021
+ "learning_rate": 1.7883443536885777e-07,
4022
+ "loss": 2.7628,
4023
+ "step": 13260
4024
+ },
4025
+ {
4026
+ "epoch": 3.4319679545160873,
4027
+ "learning_rate": 1.7880202255931543e-07,
4028
+ "loss": 2.7988,
4029
+ "step": 13280
4030
+ },
4031
+ {
4032
+ "epoch": 3.4371365809536116,
4033
+ "learning_rate": 1.787696097497731e-07,
4034
+ "loss": 2.7147,
4035
+ "step": 13300
4036
+ },
4037
+ {
4038
+ "epoch": 3.442305207391136,
4039
+ "learning_rate": 1.7873719694023078e-07,
4040
+ "loss": 2.8114,
4041
+ "step": 13320
4042
+ },
4043
+ {
4044
+ "epoch": 3.44747383382866,
4045
+ "learning_rate": 1.7870478413068842e-07,
4046
+ "loss": 2.7474,
4047
+ "step": 13340
4048
+ },
4049
+ {
4050
+ "epoch": 3.4526424602661843,
4051
+ "learning_rate": 1.786723713211461e-07,
4052
+ "loss": 2.7718,
4053
+ "step": 13360
4054
+ },
4055
+ {
4056
+ "epoch": 3.4578110867037086,
4057
+ "learning_rate": 1.786399585116038e-07,
4058
+ "loss": 2.821,
4059
+ "step": 13380
4060
+ },
4061
+ {
4062
+ "epoch": 3.4629797131412325,
4063
+ "learning_rate": 1.7860754570206143e-07,
4064
+ "loss": 2.706,
4065
+ "step": 13400
4066
+ },
4067
+ {
4068
+ "epoch": 3.468148339578757,
4069
+ "learning_rate": 1.7857513289251912e-07,
4070
+ "loss": 2.7982,
4071
+ "step": 13420
4072
+ },
4073
+ {
4074
+ "epoch": 3.4733169660162813,
4075
+ "learning_rate": 1.7854272008297678e-07,
4076
+ "loss": 2.7446,
4077
+ "step": 13440
4078
+ },
4079
+ {
4080
+ "epoch": 3.478485592453805,
4081
+ "learning_rate": 1.7851030727343444e-07,
4082
+ "loss": 2.749,
4083
+ "step": 13460
4084
+ },
4085
+ {
4086
+ "epoch": 3.4836542188913295,
4087
+ "learning_rate": 1.7847789446389213e-07,
4088
+ "loss": 2.7231,
4089
+ "step": 13480
4090
+ },
4091
+ {
4092
+ "epoch": 3.488822845328854,
4093
+ "learning_rate": 1.784454816543498e-07,
4094
+ "loss": 2.7026,
4095
+ "step": 13500
4096
+ },
4097
+ {
4098
+ "epoch": 3.4939914717663783,
4099
+ "learning_rate": 1.7841306884480745e-07,
4100
+ "loss": 2.7534,
4101
+ "step": 13520
4102
+ },
4103
+ {
4104
+ "epoch": 3.499160098203902,
4105
+ "learning_rate": 1.7838065603526514e-07,
4106
+ "loss": 2.8366,
4107
+ "step": 13540
4108
+ },
4109
+ {
4110
+ "epoch": 3.5043287246414265,
4111
+ "learning_rate": 1.7834824322572278e-07,
4112
+ "loss": 2.7885,
4113
+ "step": 13560
4114
+ },
4115
+ {
4116
+ "epoch": 3.509497351078951,
4117
+ "learning_rate": 1.7831583041618047e-07,
4118
+ "loss": 2.7273,
4119
+ "step": 13580
4120
+ },
4121
+ {
4122
+ "epoch": 3.514665977516475,
4123
+ "learning_rate": 1.7828341760663813e-07,
4124
+ "loss": 2.7864,
4125
+ "step": 13600
4126
+ },
4127
+ {
4128
+ "epoch": 3.519834603953999,
4129
+ "learning_rate": 1.782510047970958e-07,
4130
+ "loss": 2.7448,
4131
+ "step": 13620
4132
+ },
4133
+ {
4134
+ "epoch": 3.5250032303915235,
4135
+ "learning_rate": 1.7821859198755348e-07,
4136
+ "loss": 2.7384,
4137
+ "step": 13640
4138
+ },
4139
+ {
4140
+ "epoch": 3.530171856829048,
4141
+ "learning_rate": 1.7818617917801114e-07,
4142
+ "loss": 2.7694,
4143
+ "step": 13660
4144
+ },
4145
+ {
4146
+ "epoch": 3.535340483266572,
4147
+ "learning_rate": 1.781537663684688e-07,
4148
+ "loss": 2.7501,
4149
+ "step": 13680
4150
+ },
4151
+ {
4152
+ "epoch": 3.540509109704096,
4153
+ "learning_rate": 1.781213535589265e-07,
4154
+ "loss": 2.8046,
4155
+ "step": 13700
4156
+ },
4157
+ {
4158
+ "epoch": 3.5456777361416205,
4159
+ "learning_rate": 1.7808894074938413e-07,
4160
+ "loss": 2.752,
4161
+ "step": 13720
4162
+ },
4163
+ {
4164
+ "epoch": 3.5508463625791444,
4165
+ "learning_rate": 1.7805652793984181e-07,
4166
+ "loss": 2.771,
4167
+ "step": 13740
4168
+ },
4169
+ {
4170
+ "epoch": 3.556014989016669,
4171
+ "learning_rate": 1.780241151302995e-07,
4172
+ "loss": 2.7431,
4173
+ "step": 13760
4174
+ },
4175
+ {
4176
+ "epoch": 3.561183615454193,
4177
+ "learning_rate": 1.7799170232075714e-07,
4178
+ "loss": 2.7601,
4179
+ "step": 13780
4180
+ },
4181
+ {
4182
+ "epoch": 3.5663522418917175,
4183
+ "learning_rate": 1.7795928951121483e-07,
4184
+ "loss": 2.8301,
4185
+ "step": 13800
4186
+ },
4187
+ {
4188
+ "epoch": 3.5715208683292414,
4189
+ "learning_rate": 1.779268767016725e-07,
4190
+ "loss": 2.7157,
4191
+ "step": 13820
4192
+ },
4193
+ {
4194
+ "epoch": 3.576689494766766,
4195
+ "learning_rate": 1.7789446389213015e-07,
4196
+ "loss": 2.7602,
4197
+ "step": 13840
4198
+ },
4199
+ {
4200
+ "epoch": 3.5818581212042897,
4201
+ "learning_rate": 1.7786205108258784e-07,
4202
+ "loss": 2.7926,
4203
+ "step": 13860
4204
+ },
4205
+ {
4206
+ "epoch": 3.587026747641814,
4207
+ "learning_rate": 1.778296382730455e-07,
4208
+ "loss": 2.76,
4209
+ "step": 13880
4210
+ },
4211
+ {
4212
+ "epoch": 3.5921953740793384,
4213
+ "learning_rate": 1.7779722546350316e-07,
4214
+ "loss": 2.735,
4215
+ "step": 13900
4216
+ },
4217
+ {
4218
+ "epoch": 3.597364000516863,
4219
+ "learning_rate": 1.7776481265396085e-07,
4220
+ "loss": 2.7026,
4221
+ "step": 13920
4222
+ },
4223
+ {
4224
+ "epoch": 3.602532626954387,
4225
+ "learning_rate": 1.7773239984441849e-07,
4226
+ "loss": 2.7219,
4227
+ "step": 13940
4228
+ },
4229
+ {
4230
+ "epoch": 3.607701253391911,
4231
+ "learning_rate": 1.7769998703487617e-07,
4232
+ "loss": 2.821,
4233
+ "step": 13960
4234
+ },
4235
+ {
4236
+ "epoch": 3.6128698798294354,
4237
+ "learning_rate": 1.7766757422533386e-07,
4238
+ "loss": 2.6779,
4239
+ "step": 13980
4240
+ },
4241
+ {
4242
+ "epoch": 3.6180385062669593,
4243
+ "learning_rate": 1.776351614157915e-07,
4244
+ "loss": 2.7756,
4245
+ "step": 14000
4246
+ },
4247
+ {
4248
+ "epoch": 3.6232071327044837,
4249
+ "learning_rate": 1.7760274860624919e-07,
4250
+ "loss": 2.7093,
4251
+ "step": 14020
4252
+ },
4253
+ {
4254
+ "epoch": 3.628375759142008,
4255
+ "learning_rate": 1.7757033579670685e-07,
4256
+ "loss": 2.8079,
4257
+ "step": 14040
4258
+ },
4259
+ {
4260
+ "epoch": 3.6335443855795324,
4261
+ "learning_rate": 1.775379229871645e-07,
4262
+ "loss": 2.7358,
4263
+ "step": 14060
4264
+ },
4265
+ {
4266
+ "epoch": 3.6387130120170563,
4267
+ "learning_rate": 1.775055101776222e-07,
4268
+ "loss": 2.7401,
4269
+ "step": 14080
4270
+ },
4271
+ {
4272
+ "epoch": 3.6438816384545807,
4273
+ "learning_rate": 1.7747309736807986e-07,
4274
+ "loss": 2.7088,
4275
+ "step": 14100
4276
+ },
4277
+ {
4278
+ "epoch": 3.649050264892105,
4279
+ "learning_rate": 1.7744068455853752e-07,
4280
+ "loss": 2.6936,
4281
+ "step": 14120
4282
+ },
4283
+ {
4284
+ "epoch": 3.654218891329629,
4285
+ "learning_rate": 1.774082717489952e-07,
4286
+ "loss": 2.7538,
4287
+ "step": 14140
4288
+ },
4289
+ {
4290
+ "epoch": 3.6593875177671533,
4291
+ "learning_rate": 1.7737585893945285e-07,
4292
+ "loss": 2.7446,
4293
+ "step": 14160
4294
+ },
4295
+ {
4296
+ "epoch": 3.6645561442046777,
4297
+ "learning_rate": 1.7734344612991053e-07,
4298
+ "loss": 2.7566,
4299
+ "step": 14180
4300
+ },
4301
+ {
4302
+ "epoch": 3.669724770642202,
4303
+ "learning_rate": 1.773110333203682e-07,
4304
+ "loss": 2.7146,
4305
+ "step": 14200
4306
+ },
4307
+ {
4308
+ "epoch": 3.674893397079726,
4309
+ "learning_rate": 1.7727862051082586e-07,
4310
+ "loss": 2.7953,
4311
+ "step": 14220
4312
+ },
4313
+ {
4314
+ "epoch": 3.6800620235172503,
4315
+ "learning_rate": 1.7724620770128355e-07,
4316
+ "loss": 2.7185,
4317
+ "step": 14240
4318
+ },
4319
+ {
4320
+ "epoch": 3.6852306499547747,
4321
+ "learning_rate": 1.772137948917412e-07,
4322
+ "loss": 2.7957,
4323
+ "step": 14260
4324
+ },
4325
+ {
4326
+ "epoch": 3.6903992763922986,
4327
+ "learning_rate": 1.7718138208219887e-07,
4328
+ "loss": 2.7017,
4329
+ "step": 14280
4330
+ },
4331
+ {
4332
+ "epoch": 3.695567902829823,
4333
+ "learning_rate": 1.7714896927265656e-07,
4334
+ "loss": 2.7374,
4335
+ "step": 14300
4336
+ },
4337
+ {
4338
+ "epoch": 3.7007365292673473,
4339
+ "learning_rate": 1.771165564631142e-07,
4340
+ "loss": 2.7442,
4341
+ "step": 14320
4342
+ },
4343
+ {
4344
+ "epoch": 3.7059051557048717,
4345
+ "learning_rate": 1.7708414365357188e-07,
4346
+ "loss": 2.7792,
4347
+ "step": 14340
4348
+ },
4349
+ {
4350
+ "epoch": 3.7110737821423956,
4351
+ "learning_rate": 1.7705173084402957e-07,
4352
+ "loss": 2.7355,
4353
+ "step": 14360
4354
+ },
4355
+ {
4356
+ "epoch": 3.71624240857992,
4357
+ "learning_rate": 1.770193180344872e-07,
4358
+ "loss": 2.7571,
4359
+ "step": 14380
4360
+ },
4361
+ {
4362
+ "epoch": 3.721411035017444,
4363
+ "learning_rate": 1.769869052249449e-07,
4364
+ "loss": 2.6742,
4365
+ "step": 14400
4366
+ },
4367
+ {
4368
+ "epoch": 3.726579661454968,
4369
+ "learning_rate": 1.7695449241540256e-07,
4370
+ "loss": 2.7332,
4371
+ "step": 14420
4372
+ },
4373
+ {
4374
+ "epoch": 3.7317482878924926,
4375
+ "learning_rate": 1.7692207960586022e-07,
4376
+ "loss": 2.7142,
4377
+ "step": 14440
4378
+ },
4379
+ {
4380
+ "epoch": 3.736916914330017,
4381
+ "learning_rate": 1.768896667963179e-07,
4382
+ "loss": 2.7682,
4383
+ "step": 14460
4384
+ },
4385
+ {
4386
+ "epoch": 3.7420855407675413,
4387
+ "learning_rate": 1.7685725398677557e-07,
4388
+ "loss": 2.8006,
4389
+ "step": 14480
4390
+ },
4391
+ {
4392
+ "epoch": 3.747254167205065,
4393
+ "learning_rate": 1.7682484117723323e-07,
4394
+ "loss": 2.7649,
4395
+ "step": 14500
4396
+ },
4397
+ {
4398
+ "epoch": 3.7524227936425896,
4399
+ "learning_rate": 1.7679242836769092e-07,
4400
+ "loss": 2.7049,
4401
+ "step": 14520
4402
+ },
4403
+ {
4404
+ "epoch": 3.7575914200801135,
4405
+ "learning_rate": 1.7676001555814855e-07,
4406
+ "loss": 2.7923,
4407
+ "step": 14540
4408
+ },
4409
+ {
4410
+ "epoch": 3.762760046517638,
4411
+ "learning_rate": 1.7672760274860624e-07,
4412
+ "loss": 2.6909,
4413
+ "step": 14560
4414
+ },
4415
+ {
4416
+ "epoch": 3.767928672955162,
4417
+ "learning_rate": 1.7669518993906393e-07,
4418
+ "loss": 2.7651,
4419
+ "step": 14580
4420
+ },
4421
+ {
4422
+ "epoch": 3.7730972993926866,
4423
+ "learning_rate": 1.7666277712952157e-07,
4424
+ "loss": 2.6994,
4425
+ "step": 14600
4426
+ },
4427
+ {
4428
+ "epoch": 3.7782659258302105,
4429
+ "learning_rate": 1.7663036431997925e-07,
4430
+ "loss": 2.7794,
4431
+ "step": 14620
4432
+ },
4433
+ {
4434
+ "epoch": 3.783434552267735,
4435
+ "learning_rate": 1.7659795151043692e-07,
4436
+ "loss": 2.712,
4437
+ "step": 14640
4438
+ },
4439
+ {
4440
+ "epoch": 3.788603178705259,
4441
+ "learning_rate": 1.7656553870089458e-07,
4442
+ "loss": 2.698,
4443
+ "step": 14660
4444
+ },
4445
+ {
4446
+ "epoch": 3.793771805142783,
4447
+ "learning_rate": 1.7653312589135227e-07,
4448
+ "loss": 2.6473,
4449
+ "step": 14680
4450
+ },
4451
+ {
4452
+ "epoch": 3.7989404315803075,
4453
+ "learning_rate": 1.7650071308180993e-07,
4454
+ "loss": 2.7231,
4455
+ "step": 14700
4456
+ },
4457
+ {
4458
+ "epoch": 3.804109058017832,
4459
+ "learning_rate": 1.764683002722676e-07,
4460
+ "loss": 2.7367,
4461
+ "step": 14720
4462
+ },
4463
+ {
4464
+ "epoch": 3.809277684455356,
4465
+ "learning_rate": 1.7643588746272528e-07,
4466
+ "loss": 2.7298,
4467
+ "step": 14740
4468
+ },
4469
+ {
4470
+ "epoch": 3.81444631089288,
4471
+ "learning_rate": 1.764034746531829e-07,
4472
+ "loss": 2.7584,
4473
+ "step": 14760
4474
+ },
4475
+ {
4476
+ "epoch": 3.8196149373304045,
4477
+ "learning_rate": 1.763710618436406e-07,
4478
+ "loss": 2.7566,
4479
+ "step": 14780
4480
+ },
4481
+ {
4482
+ "epoch": 3.824783563767929,
4483
+ "learning_rate": 1.7633864903409826e-07,
4484
+ "loss": 2.753,
4485
+ "step": 14800
4486
+ },
4487
+ {
4488
+ "epoch": 3.8299521902054527,
4489
+ "learning_rate": 1.7630623622455592e-07,
4490
+ "loss": 2.7702,
4491
+ "step": 14820
4492
+ },
4493
+ {
4494
+ "epoch": 3.835120816642977,
4495
+ "learning_rate": 1.7627382341501361e-07,
4496
+ "loss": 2.7236,
4497
+ "step": 14840
4498
+ },
4499
+ {
4500
+ "epoch": 3.8402894430805015,
4501
+ "learning_rate": 1.7624141060547128e-07,
4502
+ "loss": 2.8067,
4503
+ "step": 14860
4504
+ },
4505
+ {
4506
+ "epoch": 3.845458069518026,
4507
+ "learning_rate": 1.7620899779592894e-07,
4508
+ "loss": 2.723,
4509
+ "step": 14880
4510
+ },
4511
+ {
4512
+ "epoch": 3.8506266959555497,
4513
+ "learning_rate": 1.7617658498638663e-07,
4514
+ "loss": 2.7566,
4515
+ "step": 14900
4516
+ },
4517
+ {
4518
+ "epoch": 3.855795322393074,
4519
+ "learning_rate": 1.7614417217684426e-07,
4520
+ "loss": 2.6923,
4521
+ "step": 14920
4522
+ },
4523
+ {
4524
+ "epoch": 3.860963948830598,
4525
+ "learning_rate": 1.7611175936730195e-07,
4526
+ "loss": 2.7525,
4527
+ "step": 14940
4528
+ },
4529
+ {
4530
+ "epoch": 3.8661325752681224,
4531
+ "learning_rate": 1.7607934655775964e-07,
4532
+ "loss": 2.7261,
4533
+ "step": 14960
4534
+ },
4535
+ {
4536
+ "epoch": 3.8713012017056467,
4537
+ "learning_rate": 1.7604693374821727e-07,
4538
+ "loss": 2.7879,
4539
+ "step": 14980
4540
+ },
4541
+ {
4542
+ "epoch": 3.876469828143171,
4543
+ "learning_rate": 1.7601452093867496e-07,
4544
+ "loss": 2.7061,
4545
+ "step": 15000
4546
+ },
4547
+ {
4548
+ "epoch": 3.8816384545806955,
4549
+ "learning_rate": 1.7598210812913262e-07,
4550
+ "loss": 2.6878,
4551
+ "step": 15020
4552
+ },
4553
+ {
4554
+ "epoch": 3.8868070810182194,
4555
+ "learning_rate": 1.7594969531959028e-07,
4556
+ "loss": 2.7831,
4557
+ "step": 15040
4558
+ },
4559
+ {
4560
+ "epoch": 3.8919757074557437,
4561
+ "learning_rate": 1.7591728251004797e-07,
4562
+ "loss": 2.751,
4563
+ "step": 15060
4564
+ },
4565
+ {
4566
+ "epoch": 3.8971443338932676,
4567
+ "learning_rate": 1.7588486970050564e-07,
4568
+ "loss": 2.7191,
4569
+ "step": 15080
4570
+ },
4571
+ {
4572
+ "epoch": 3.902312960330792,
4573
+ "learning_rate": 1.758524568909633e-07,
4574
+ "loss": 2.7581,
4575
+ "step": 15100
4576
+ },
4577
+ {
4578
+ "epoch": 3.9074815867683164,
4579
+ "learning_rate": 1.7582004408142099e-07,
4580
+ "loss": 2.7016,
4581
+ "step": 15120
4582
+ },
4583
+ {
4584
+ "epoch": 3.9126502132058407,
4585
+ "learning_rate": 1.7578763127187862e-07,
4586
+ "loss": 2.7306,
4587
+ "step": 15140
4588
+ },
4589
+ {
4590
+ "epoch": 3.9178188396433646,
4591
+ "learning_rate": 1.757552184623363e-07,
4592
+ "loss": 2.7835,
4593
+ "step": 15160
4594
+ },
4595
+ {
4596
+ "epoch": 3.922987466080889,
4597
+ "learning_rate": 1.75722805652794e-07,
4598
+ "loss": 2.7854,
4599
+ "step": 15180
4600
+ },
4601
+ {
4602
+ "epoch": 3.9281560925184134,
4603
+ "learning_rate": 1.7569039284325163e-07,
4604
+ "loss": 2.7345,
4605
+ "step": 15200
4606
+ },
4607
+ {
4608
+ "epoch": 3.9333247189559373,
4609
+ "learning_rate": 1.7565798003370932e-07,
4610
+ "loss": 2.758,
4611
+ "step": 15220
4612
+ },
4613
+ {
4614
+ "epoch": 3.9384933453934616,
4615
+ "learning_rate": 1.7562556722416698e-07,
4616
+ "loss": 2.7777,
4617
+ "step": 15240
4618
+ },
4619
+ {
4620
+ "epoch": 3.943661971830986,
4621
+ "learning_rate": 1.7559315441462464e-07,
4622
+ "loss": 2.8035,
4623
+ "step": 15260
4624
+ },
4625
+ {
4626
+ "epoch": 3.9488305982685104,
4627
+ "learning_rate": 1.7556074160508233e-07,
4628
+ "loss": 2.7233,
4629
+ "step": 15280
4630
+ },
4631
+ {
4632
+ "epoch": 3.9539992247060343,
4633
+ "learning_rate": 1.7552832879554e-07,
4634
+ "loss": 2.7346,
4635
+ "step": 15300
4636
+ },
4637
+ {
4638
+ "epoch": 3.9591678511435586,
4639
+ "learning_rate": 1.7549591598599766e-07,
4640
+ "loss": 2.7189,
4641
+ "step": 15320
4642
+ },
4643
+ {
4644
+ "epoch": 3.964336477581083,
4645
+ "learning_rate": 1.7546350317645535e-07,
4646
+ "loss": 2.69,
4647
+ "step": 15340
4648
+ },
4649
+ {
4650
+ "epoch": 3.969505104018607,
4651
+ "learning_rate": 1.7543109036691298e-07,
4652
+ "loss": 2.7707,
4653
+ "step": 15360
4654
+ },
4655
+ {
4656
+ "epoch": 3.9746737304561313,
4657
+ "learning_rate": 1.7539867755737067e-07,
4658
+ "loss": 2.7585,
4659
+ "step": 15380
4660
+ },
4661
+ {
4662
+ "epoch": 3.9798423568936556,
4663
+ "learning_rate": 1.7536626474782833e-07,
4664
+ "loss": 2.6792,
4665
+ "step": 15400
4666
+ },
4667
+ {
4668
+ "epoch": 3.98501098333118,
4669
+ "learning_rate": 1.75333851938286e-07,
4670
+ "loss": 2.7379,
4671
+ "step": 15420
4672
+ },
4673
+ {
4674
+ "epoch": 3.990179609768704,
4675
+ "learning_rate": 1.7530143912874368e-07,
4676
+ "loss": 2.6914,
4677
+ "step": 15440
4678
+ },
4679
+ {
4680
+ "epoch": 3.9953482362062283,
4681
+ "learning_rate": 1.7526902631920134e-07,
4682
+ "loss": 2.7623,
4683
+ "step": 15460
4684
+ },
4685
+ {
4686
+ "epoch": 4.0,
4687
+ "eval_bleu": 0.1685,
4688
+ "eval_gen_len": 105.7747,
4689
+ "eval_loss": 2.695643186569214,
4690
+ "eval_runtime": 1862.1701,
4691
+ "eval_samples_per_second": 0.925,
4692
+ "eval_steps_per_second": 0.462,
4693
+ "step": 15478
4694
  }
4695
  ],
4696
  "logging_steps": 20,
 
4710
  "attributes": {}
4711
  }
4712
  },
4713
+ "total_flos": 1.0160427286939238e+17,
4714
  "train_batch_size": 2,
4715
  "trial_name": null,
4716
  "trial_params": null