da-xlarge / trainer_state.json
dreamerdeo's picture
init
7efb199
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.850746268656717,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15,
"learning_rate": 2.7034552830322406e-05,
"loss": 4.2127,
"step": 10
},
{
"epoch": 0.3,
"learning_rate": 3.5172764151612024e-05,
"loss": 3.3881,
"step": 20
},
{
"epoch": 0.45,
"learning_rate": 3.993331259751083e-05,
"loss": 3.0315,
"step": 30
},
{
"epoch": 0.6,
"learning_rate": 4.331097547290165e-05,
"loss": 2.8793,
"step": 40
},
{
"epoch": 0.75,
"learning_rate": 4.5930894339355186e-05,
"loss": 2.7883,
"step": 50
},
{
"epoch": 0.9,
"learning_rate": 4.8071523918800455e-05,
"loss": 2.7364,
"step": 60
},
{
"epoch": 1.04,
"learning_rate": 4.9881400439889756e-05,
"loss": 2.6109,
"step": 70
},
{
"epoch": 1.19,
"learning_rate": 5.1449186794191275e-05,
"loss": 2.5014,
"step": 80
},
{
"epoch": 1.34,
"learning_rate": 5.283207236469926e-05,
"loss": 2.4571,
"step": 90
},
{
"epoch": 1.49,
"learning_rate": 5.406910566064481e-05,
"loss": 2.5222,
"step": 100
},
{
"epoch": 1.64,
"learning_rate": 5.518813839434375e-05,
"loss": 2.4918,
"step": 110
},
{
"epoch": 1.79,
"learning_rate": 5.620973524009008e-05,
"loss": 2.4835,
"step": 120
},
{
"epoch": 1.94,
"learning_rate": 5.714951323824802e-05,
"loss": 2.4628,
"step": 130
},
{
"epoch": 2.09,
"learning_rate": 5.8019611761179374e-05,
"loss": 2.3972,
"step": 140
},
{
"epoch": 2.24,
"learning_rate": 5.882965410654361e-05,
"loss": 2.3233,
"step": 150
},
{
"epoch": 2.39,
"learning_rate": 5.95873981154809e-05,
"loss": 2.3075,
"step": 160
},
{
"epoch": 2.54,
"learning_rate": 6.029918920033657e-05,
"loss": 2.321,
"step": 170
},
{
"epoch": 2.69,
"learning_rate": 6.0970283685988885e-05,
"loss": 2.2946,
"step": 180
},
{
"epoch": 2.84,
"learning_rate": 6.160508461224668e-05,
"loss": 2.2593,
"step": 190
},
{
"epoch": 2.99,
"learning_rate": 6.220731698193443e-05,
"loss": 2.2308,
"step": 200
},
{
"epoch": 3.13,
"learning_rate": 6.278016020707817e-05,
"loss": 2.0929,
"step": 210
},
{
"epoch": 3.28,
"learning_rate": 6.332634971563337e-05,
"loss": 2.156,
"step": 220
},
{
"epoch": 3.43,
"learning_rate": 6.384825595366063e-05,
"loss": 2.1504,
"step": 230
},
{
"epoch": 3.58,
"learning_rate": 6.43479465613797e-05,
"loss": 2.182,
"step": 240
},
{
"epoch": 3.73,
"learning_rate": 6.482723584838796e-05,
"loss": 2.1635,
"step": 250
},
{
"epoch": 3.88,
"learning_rate": 6.528772455953764e-05,
"loss": 2.1302,
"step": 260
},
{
"epoch": 4.03,
"learning_rate": 6.573083213188768e-05,
"loss": 2.1364,
"step": 270
},
{
"epoch": 4.18,
"learning_rate": 6.6157823082469e-05,
"loss": 2.015,
"step": 280
},
{
"epoch": 4.33,
"learning_rate": 6.656982876347945e-05,
"loss": 2.0479,
"step": 290
},
{
"epoch": 4.48,
"learning_rate": 6.696786542783324e-05,
"loss": 1.9968,
"step": 300
},
{
"epoch": 4.63,
"learning_rate": 6.735284933140416e-05,
"loss": 2.012,
"step": 310
},
{
"epoch": 4.78,
"learning_rate": 6.772560943677052e-05,
"loss": 2.0458,
"step": 320
},
{
"epoch": 4.93,
"learning_rate": 6.808689816153217e-05,
"loss": 2.0991,
"step": 330
},
{
"epoch": 5.07,
"learning_rate": 6.84374005216262e-05,
"loss": 1.9845,
"step": 340
},
{
"epoch": 5.22,
"learning_rate": 6.877774194892253e-05,
"loss": 1.9325,
"step": 350
},
{
"epoch": 5.37,
"learning_rate": 6.910849500727851e-05,
"loss": 1.9603,
"step": 360
},
{
"epoch": 5.52,
"learning_rate": 6.943018518821426e-05,
"loss": 1.9095,
"step": 370
},
{
"epoch": 5.67,
"learning_rate": 6.97432959335363e-05,
"loss": 1.9443,
"step": 380
},
{
"epoch": 5.82,
"learning_rate": 7.004827300543644e-05,
"loss": 1.9461,
"step": 390
},
{
"epoch": 5.97,
"learning_rate": 7.034552830322405e-05,
"loss": 1.9462,
"step": 400
},
{
"epoch": 6.12,
"learning_rate": 7.063544320870321e-05,
"loss": 1.8685,
"step": 410
},
{
"epoch": 6.27,
"learning_rate": 7.09183715283678e-05,
"loss": 1.8694,
"step": 420
},
{
"epoch": 6.42,
"learning_rate": 7.119464208935388e-05,
"loss": 1.8429,
"step": 430
},
{
"epoch": 6.57,
"learning_rate": 7.146456103692298e-05,
"loss": 1.8458,
"step": 440
},
{
"epoch": 6.72,
"learning_rate": 7.172841387373204e-05,
"loss": 1.9065,
"step": 450
},
{
"epoch": 6.87,
"learning_rate": 7.198646727495026e-05,
"loss": 1.911,
"step": 460
},
{
"epoch": 7.01,
"learning_rate": 7.223897070815449e-05,
"loss": 1.8694,
"step": 470
},
{
"epoch": 7.16,
"learning_rate": 7.248615788266932e-05,
"loss": 1.7973,
"step": 480
},
{
"epoch": 7.31,
"learning_rate": 7.272824804945709e-05,
"loss": 1.8114,
"step": 490
},
{
"epoch": 7.46,
"learning_rate": 7.296544716967758e-05,
"loss": 1.7718,
"step": 500
},
{
"epoch": 7.46,
"eval_loss": 2.8234732151031494,
"eval_runtime": 35.5202,
"eval_samples_per_second": 18.328,
"eval_steps_per_second": 0.253,
"step": 500
},
{
"epoch": 7.61,
"learning_rate": 7.319794896752499e-05,
"loss": 1.7947,
"step": 510
},
{
"epoch": 7.76,
"learning_rate": 7.342593588082727e-05,
"loss": 1.8117,
"step": 520
},
{
"epoch": 7.91,
"learning_rate": 7.364957992109503e-05,
"loss": 1.8188,
"step": 530
},
{
"epoch": 8.06,
"learning_rate": 7.386904345317732e-05,
"loss": 1.8015,
"step": 540
},
{
"epoch": 8.21,
"learning_rate": 7.408447990337652e-05,
"loss": 1.734,
"step": 550
},
{
"epoch": 8.36,
"learning_rate": 7.429603440375862e-05,
"loss": 1.7217,
"step": 560
},
{
"epoch": 8.51,
"learning_rate": 7.450384437943511e-05,
"loss": 1.7398,
"step": 570
},
{
"epoch": 8.66,
"learning_rate": 7.470804008476907e-05,
"loss": 1.7452,
"step": 580
},
{
"epoch": 8.81,
"learning_rate": 7.490874509374465e-05,
"loss": 1.778,
"step": 590
},
{
"epoch": 8.96,
"learning_rate": 7.510607674912285e-05,
"loss": 1.7332,
"step": 600
},
{
"epoch": 9.1,
"learning_rate": 7.530014657447177e-05,
"loss": 1.6922,
"step": 610
},
{
"epoch": 9.25,
"learning_rate": 7.549106065269378e-05,
"loss": 1.6818,
"step": 620
},
{
"epoch": 9.4,
"learning_rate": 7.567891997426661e-05,
"loss": 1.6757,
"step": 630
},
{
"epoch": 9.55,
"learning_rate": 7.586382075806015e-05,
"loss": 1.752,
"step": 640
},
{
"epoch": 9.7,
"learning_rate": 7.604585474728082e-05,
"loss": 1.7074,
"step": 650
},
{
"epoch": 9.85,
"learning_rate": 7.62251094828218e-05,
"loss": 1.7052,
"step": 660
},
{
"epoch": 10.0,
"learning_rate": 7.640166855605846e-05,
"loss": 1.742,
"step": 670
},
{
"epoch": 10.15,
"learning_rate": 7.65756118429158e-05,
"loss": 1.6759,
"step": 680
},
{
"epoch": 10.3,
"learning_rate": 7.674701572084905e-05,
"loss": 1.6935,
"step": 690
},
{
"epoch": 10.45,
"learning_rate": 7.691595327021215e-05,
"loss": 1.6563,
"step": 700
},
{
"epoch": 10.6,
"learning_rate": 7.708249446134367e-05,
"loss": 1.6941,
"step": 710
},
{
"epoch": 10.75,
"learning_rate": 7.724670632856813e-05,
"loss": 1.676,
"step": 720
},
{
"epoch": 10.9,
"learning_rate": 7.740865313219632e-05,
"loss": 1.6948,
"step": 730
},
{
"epoch": 11.04,
"learning_rate": 7.756839650950389e-05,
"loss": 1.6687,
"step": 740
},
{
"epoch": 11.19,
"learning_rate": 7.772599561557638e-05,
"loss": 1.6469,
"step": 750
},
{
"epoch": 11.34,
"learning_rate": 7.788150725482592e-05,
"loss": 1.6783,
"step": 760
},
{
"epoch": 11.49,
"learning_rate": 7.803498600391108e-05,
"loss": 1.6408,
"step": 770
},
{
"epoch": 11.64,
"learning_rate": 7.818648432672608e-05,
"loss": 1.6521,
"step": 780
},
{
"epoch": 11.79,
"learning_rate": 7.833605268206489e-05,
"loss": 1.6451,
"step": 790
},
{
"epoch": 11.94,
"learning_rate": 7.848373962451368e-05,
"loss": 1.6504,
"step": 800
},
{
"epoch": 12.09,
"learning_rate": 7.862959189907611e-05,
"loss": 1.6431,
"step": 810
},
{
"epoch": 12.24,
"learning_rate": 7.877365452999284e-05,
"loss": 1.6131,
"step": 820
},
{
"epoch": 12.39,
"learning_rate": 7.89159709041777e-05,
"loss": 1.6256,
"step": 830
},
{
"epoch": 12.54,
"learning_rate": 7.905658284965742e-05,
"loss": 1.6257,
"step": 840
},
{
"epoch": 12.69,
"learning_rate": 7.919553070936936e-05,
"loss": 1.6143,
"step": 850
},
{
"epoch": 12.84,
"learning_rate": 7.933285341064351e-05,
"loss": 1.6383,
"step": 860
},
{
"epoch": 12.99,
"learning_rate": 7.946858853066788e-05,
"loss": 1.6234,
"step": 870
},
{
"epoch": 13.13,
"learning_rate": 7.960277235821263e-05,
"loss": 1.5871,
"step": 880
},
{
"epoch": 13.28,
"learning_rate": 7.973543995186684e-05,
"loss": 1.6028,
"step": 890
},
{
"epoch": 13.43,
"learning_rate": 7.986662519502166e-05,
"loss": 1.5723,
"step": 900
},
{
"epoch": 13.58,
"learning_rate": 7.999636084781537e-05,
"loss": 1.5936,
"step": 910
},
{
"epoch": 13.73,
"learning_rate": 8.012467859623988e-05,
"loss": 1.5869,
"step": 920
},
{
"epoch": 13.88,
"learning_rate": 8.025160909859258e-05,
"loss": 1.6018,
"step": 930
},
{
"epoch": 14.03,
"learning_rate": 8.037718202944411e-05,
"loss": 1.5926,
"step": 940
},
{
"epoch": 14.18,
"learning_rate": 8.050142612127945e-05,
"loss": 1.5546,
"step": 950
},
{
"epoch": 14.33,
"learning_rate": 8.062436920395896e-05,
"loss": 1.5601,
"step": 960
},
{
"epoch": 14.48,
"learning_rate": 8.074603824213446e-05,
"loss": 1.5668,
"step": 970
},
{
"epoch": 14.63,
"learning_rate": 8.086645937074672e-05,
"loss": 1.5623,
"step": 980
},
{
"epoch": 14.78,
"learning_rate": 8.09856579287206e-05,
"loss": 1.579,
"step": 990
},
{
"epoch": 14.93,
"learning_rate": 8.110365849096721e-05,
"loss": 1.5637,
"step": 1000
},
{
"epoch": 14.93,
"eval_loss": 3.1092050075531006,
"eval_runtime": 35.1461,
"eval_samples_per_second": 18.523,
"eval_steps_per_second": 0.256,
"step": 1000
},
{
"epoch": 15.07,
"learning_rate": 8.122048489879363e-05,
"loss": 1.5647,
"step": 1010
},
{
"epoch": 15.22,
"learning_rate": 8.133616028881462e-05,
"loss": 1.5349,
"step": 1020
},
{
"epoch": 15.37,
"learning_rate": 8.145070712045392e-05,
"loss": 1.542,
"step": 1030
},
{
"epoch": 15.52,
"learning_rate": 8.15641472021169e-05,
"loss": 1.5345,
"step": 1040
},
{
"epoch": 15.67,
"learning_rate": 8.167650171611095e-05,
"loss": 1.5491,
"step": 1050
},
{
"epoch": 15.82,
"learning_rate": 8.178779124238466e-05,
"loss": 1.5469,
"step": 1060
},
{
"epoch": 15.97,
"learning_rate": 8.189803578115246e-05,
"loss": 1.5825,
"step": 1070
},
{
"epoch": 16.12,
"learning_rate": 8.200725477446693e-05,
"loss": 1.5314,
"step": 1080
},
{
"epoch": 16.27,
"learning_rate": 8.211546712679696e-05,
"loss": 1.5126,
"step": 1090
},
{
"epoch": 16.42,
"learning_rate": 8.222269122466616e-05,
"loss": 1.5194,
"step": 1100
},
{
"epoch": 16.57,
"learning_rate": 8.232894495540269e-05,
"loss": 1.5276,
"step": 1110
},
{
"epoch": 16.72,
"learning_rate": 8.243424572504824e-05,
"loss": 1.5376,
"step": 1120
},
{
"epoch": 16.87,
"learning_rate": 8.2538610475471e-05,
"loss": 1.5393,
"step": 1130
},
{
"epoch": 17.01,
"learning_rate": 8.264205570072473e-05,
"loss": 1.5298,
"step": 1140
},
{
"epoch": 17.16,
"learning_rate": 8.27445974626934e-05,
"loss": 1.5135,
"step": 1150
},
{
"epoch": 17.31,
"learning_rate": 8.284625140605869e-05,
"loss": 1.5175,
"step": 1160
},
{
"epoch": 17.46,
"learning_rate": 8.294703277262488e-05,
"loss": 1.5106,
"step": 1170
},
{
"epoch": 17.61,
"learning_rate": 8.304695641503428e-05,
"loss": 1.5276,
"step": 1180
},
{
"epoch": 17.76,
"learning_rate": 8.31460368099039e-05,
"loss": 1.5227,
"step": 1190
},
{
"epoch": 17.91,
"learning_rate": 8.324428807041249e-05,
"loss": 1.5241,
"step": 1200
},
{
"epoch": 18.06,
"learning_rate": 8.334172395836509e-05,
"loss": 1.5187,
"step": 1210
},
{
"epoch": 18.21,
"learning_rate": 8.34383578957614e-05,
"loss": 1.4929,
"step": 1220
},
{
"epoch": 18.36,
"learning_rate": 8.353420297589165e-05,
"loss": 1.4934,
"step": 1230
},
{
"epoch": 18.51,
"learning_rate": 8.362927197398341e-05,
"loss": 1.5061,
"step": 1240
},
{
"epoch": 18.66,
"learning_rate": 8.372357735742074e-05,
"loss": 1.5068,
"step": 1250
},
{
"epoch": 18.81,
"learning_rate": 8.381713129555623e-05,
"loss": 1.5058,
"step": 1260
},
{
"epoch": 18.96,
"learning_rate": 8.390994566913507e-05,
"loss": 1.4944,
"step": 1270
},
{
"epoch": 19.1,
"learning_rate": 8.400203207934977e-05,
"loss": 1.4905,
"step": 1280
},
{
"epoch": 19.25,
"learning_rate": 8.409340185654231e-05,
"loss": 1.4908,
"step": 1290
},
{
"epoch": 19.4,
"learning_rate": 8.418406606857043e-05,
"loss": 1.4788,
"step": 1300
},
{
"epoch": 19.55,
"learning_rate": 8.427403552885332e-05,
"loss": 1.4851,
"step": 1310
},
{
"epoch": 19.7,
"learning_rate": 8.436332080411142e-05,
"loss": 1.4934,
"step": 1320
},
{
"epoch": 19.85,
"learning_rate": 8.445193222181402e-05,
"loss": 1.4862,
"step": 1330
},
{
"epoch": 20.0,
"learning_rate": 8.453987987734808e-05,
"loss": 1.4922,
"step": 1340
},
{
"epoch": 20.15,
"learning_rate": 8.462717364092046e-05,
"loss": 1.48,
"step": 1350
},
{
"epoch": 20.3,
"learning_rate": 8.471382316420545e-05,
"loss": 1.4731,
"step": 1360
},
{
"epoch": 20.45,
"learning_rate": 8.479983788674874e-05,
"loss": 1.4746,
"step": 1370
},
{
"epoch": 20.6,
"learning_rate": 8.488522704213867e-05,
"loss": 1.48,
"step": 1380
},
{
"epoch": 20.75,
"learning_rate": 8.496999966395455e-05,
"loss": 1.4743,
"step": 1390
},
{
"epoch": 20.9,
"learning_rate": 8.505416459150177e-05,
"loss": 1.4758,
"step": 1400
},
{
"epoch": 21.04,
"learning_rate": 8.513773047534291e-05,
"loss": 1.4738,
"step": 1410
},
{
"epoch": 21.19,
"learning_rate": 8.522070578263329e-05,
"loss": 1.4589,
"step": 1420
},
{
"epoch": 21.34,
"learning_rate": 8.530309880226936e-05,
"loss": 1.4783,
"step": 1430
},
{
"epoch": 21.49,
"learning_rate": 8.538491764985775e-05,
"loss": 1.4656,
"step": 1440
},
{
"epoch": 21.64,
"learning_rate": 8.546617027251222e-05,
"loss": 1.4702,
"step": 1450
},
{
"epoch": 21.79,
"learning_rate": 8.554686445348594e-05,
"loss": 1.4768,
"step": 1460
},
{
"epoch": 21.94,
"learning_rate": 8.562700781664552e-05,
"loss": 1.4802,
"step": 1470
},
{
"epoch": 22.09,
"learning_rate": 8.57066078307935e-05,
"loss": 1.463,
"step": 1480
},
{
"epoch": 22.24,
"learning_rate": 8.578567181384524e-05,
"loss": 1.4582,
"step": 1490
},
{
"epoch": 22.39,
"learning_rate": 8.586420693686602e-05,
"loss": 1.4588,
"step": 1500
},
{
"epoch": 22.39,
"eval_loss": 3.2750725746154785,
"eval_runtime": 34.9007,
"eval_samples_per_second": 18.653,
"eval_steps_per_second": 0.258,
"step": 1500
},
{
"epoch": 22.54,
"learning_rate": 8.594222022797423e-05,
"loss": 1.462,
"step": 1510
},
{
"epoch": 22.69,
"learning_rate": 8.601971857611555e-05,
"loss": 1.4671,
"step": 1520
},
{
"epoch": 22.84,
"learning_rate": 8.609670873471342e-05,
"loss": 1.4637,
"step": 1530
},
{
"epoch": 22.99,
"learning_rate": 8.617319732520071e-05,
"loss": 1.4661,
"step": 1540
},
{
"epoch": 23.13,
"learning_rate": 8.624919084043694e-05,
"loss": 1.4601,
"step": 1550
},
{
"epoch": 23.28,
"learning_rate": 8.632469564801571e-05,
"loss": 1.4553,
"step": 1560
},
{
"epoch": 23.43,
"learning_rate": 8.639971799346644e-05,
"loss": 1.4543,
"step": 1570
},
{
"epoch": 23.58,
"learning_rate": 8.647426400335451e-05,
"loss": 1.4667,
"step": 1580
},
{
"epoch": 23.73,
"learning_rate": 8.654833968828348e-05,
"loss": 1.4622,
"step": 1590
},
{
"epoch": 23.88,
"learning_rate": 8.66219509458033e-05,
"loss": 1.4654,
"step": 1600
},
{
"epoch": 24.03,
"learning_rate": 8.669510356322798e-05,
"loss": 1.4532,
"step": 1610
},
{
"epoch": 24.18,
"learning_rate": 8.676780322036573e-05,
"loss": 1.4525,
"step": 1620
},
{
"epoch": 24.33,
"learning_rate": 8.684005549216557e-05,
"loss": 1.4508,
"step": 1630
},
{
"epoch": 24.48,
"learning_rate": 8.691186585128246e-05,
"loss": 1.4526,
"step": 1640
},
{
"epoch": 24.63,
"learning_rate": 8.698323967056495e-05,
"loss": 1.4499,
"step": 1650
},
{
"epoch": 24.78,
"learning_rate": 8.705418222546732e-05,
"loss": 1.4633,
"step": 1660
},
{
"epoch": 24.93,
"learning_rate": 8.712469869638952e-05,
"loss": 1.4513,
"step": 1670
},
{
"epoch": 25.07,
"learning_rate": 8.719479417094704e-05,
"loss": 1.4543,
"step": 1680
},
{
"epoch": 25.22,
"learning_rate": 8.726447364617366e-05,
"loss": 1.4454,
"step": 1690
},
{
"epoch": 25.37,
"learning_rate": 8.733374203065898e-05,
"loss": 1.4462,
"step": 1700
},
{
"epoch": 25.52,
"learning_rate": 8.740260414662352e-05,
"loss": 1.4561,
"step": 1710
},
{
"epoch": 25.67,
"learning_rate": 8.747106473193313e-05,
"loss": 1.4503,
"step": 1720
},
{
"epoch": 25.82,
"learning_rate": 8.753912844205501e-05,
"loss": 1.453,
"step": 1730
},
{
"epoch": 25.97,
"learning_rate": 8.76067998519575e-05,
"loss": 1.4593,
"step": 1740
},
{
"epoch": 26.12,
"learning_rate": 8.76740834579553e-05,
"loss": 1.4412,
"step": 1750
},
{
"epoch": 26.27,
"learning_rate": 8.774098367950224e-05,
"loss": 1.4476,
"step": 1760
},
{
"epoch": 26.42,
"learning_rate": 8.780750486093308e-05,
"loss": 1.4412,
"step": 1770
},
{
"epoch": 26.57,
"learning_rate": 8.787365127315646e-05,
"loss": 1.4481,
"step": 1780
},
{
"epoch": 26.72,
"learning_rate": 8.79394271153003e-05,
"loss": 1.4471,
"step": 1790
},
{
"epoch": 26.87,
"learning_rate": 8.800483651631128e-05,
"loss": 1.447,
"step": 1800
},
{
"epoch": 27.01,
"learning_rate": 8.806988353651037e-05,
"loss": 1.4507,
"step": 1810
},
{
"epoch": 27.16,
"learning_rate": 8.813457216910499e-05,
"loss": 1.435,
"step": 1820
},
{
"epoch": 27.31,
"learning_rate": 8.81989063416602e-05,
"loss": 1.4361,
"step": 1830
},
{
"epoch": 27.46,
"learning_rate": 8.82628899175295e-05,
"loss": 1.4359,
"step": 1840
},
{
"epoch": 27.61,
"learning_rate": 8.832652669724704e-05,
"loss": 1.4379,
"step": 1850
},
{
"epoch": 27.76,
"learning_rate": 8.838982041988221e-05,
"loss": 1.4476,
"step": 1860
},
{
"epoch": 27.91,
"learning_rate": 8.845277476435792e-05,
"loss": 1.4395,
"step": 1870
},
{
"epoch": 28.06,
"learning_rate": 8.851539335073373e-05,
"loss": 1.4403,
"step": 1880
},
{
"epoch": 28.21,
"learning_rate": 8.857767974145503e-05,
"loss": 1.4387,
"step": 1890
},
{
"epoch": 28.36,
"learning_rate": 8.863963744256908e-05,
"loss": 1.4388,
"step": 1900
},
{
"epoch": 28.51,
"learning_rate": 8.87012699049093e-05,
"loss": 1.4377,
"step": 1910
},
{
"epoch": 28.66,
"learning_rate": 8.876258052524857e-05,
"loss": 1.4367,
"step": 1920
},
{
"epoch": 28.81,
"learning_rate": 8.882357264742258e-05,
"loss": 1.4482,
"step": 1930
},
{
"epoch": 28.96,
"learning_rate": 8.88842495634241e-05,
"loss": 1.4354,
"step": 1940
},
{
"epoch": 29.1,
"learning_rate": 8.894461451446924e-05,
"loss": 1.4333,
"step": 1950
},
{
"epoch": 29.25,
"learning_rate": 8.900467069203634e-05,
"loss": 1.4334,
"step": 1960
},
{
"epoch": 29.4,
"learning_rate": 8.906442123887845e-05,
"loss": 1.4454,
"step": 1970
},
{
"epoch": 29.55,
"learning_rate": 8.912386925001022e-05,
"loss": 1.4368,
"step": 1980
},
{
"epoch": 29.7,
"learning_rate": 8.918301777366981e-05,
"loss": 1.4319,
"step": 1990
},
{
"epoch": 29.85,
"learning_rate": 8.924186981225684e-05,
"loss": 1.4337,
"step": 2000
},
{
"epoch": 29.85,
"eval_loss": 3.362933874130249,
"eval_runtime": 35.3655,
"eval_samples_per_second": 18.408,
"eval_steps_per_second": 0.254,
"step": 2000
}
],
"max_steps": 50000,
"num_train_epochs": 747,
"total_flos": 348961395840.0,
"trial_name": null,
"trial_params": null
}