system's picture
system HF staff
Update trainer_state.json
b083460
{
"best_metric": 0.88762098828324,
"best_model_checkpoint": "test-glue/checkpoint-98176",
"epoch": 4.0,
"global_step": 98176,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.020371577574967405,
"learning_rate": 1.9898142112125166e-05,
"loss": 0.659133544921875,
"step": 500
},
{
"epoch": 0.04074315514993481,
"learning_rate": 1.9796284224250328e-05,
"loss": 0.5110537109375,
"step": 1000
},
{
"epoch": 0.06111473272490222,
"learning_rate": 1.969442633637549e-05,
"loss": 0.485730224609375,
"step": 1500
},
{
"epoch": 0.08148631029986962,
"learning_rate": 1.9592568448500654e-05,
"loss": 0.48240771484375,
"step": 2000
},
{
"epoch": 0.10185788787483703,
"learning_rate": 1.9490710560625816e-05,
"loss": 0.46434228515625,
"step": 2500
},
{
"epoch": 0.12222946544980444,
"learning_rate": 1.938885267275098e-05,
"loss": 0.449291259765625,
"step": 3000
},
{
"epoch": 0.14260104302477183,
"learning_rate": 1.9286994784876142e-05,
"loss": 0.4585693359375,
"step": 3500
},
{
"epoch": 0.16297262059973924,
"learning_rate": 1.9185136897001307e-05,
"loss": 0.44105908203125,
"step": 4000
},
{
"epoch": 0.18334419817470665,
"learning_rate": 1.9083279009126468e-05,
"loss": 0.444558837890625,
"step": 4500
},
{
"epoch": 0.20371577574967406,
"learning_rate": 1.898142112125163e-05,
"loss": 0.45100146484375,
"step": 5000
},
{
"epoch": 0.22408735332464147,
"learning_rate": 1.8879563233376795e-05,
"loss": 0.448494140625,
"step": 5500
},
{
"epoch": 0.24445893089960888,
"learning_rate": 1.8777705345501956e-05,
"loss": 0.4301044921875,
"step": 6000
},
{
"epoch": 0.2648305084745763,
"learning_rate": 1.867584745762712e-05,
"loss": 0.42643359375,
"step": 6500
},
{
"epoch": 0.28520208604954367,
"learning_rate": 1.8573989569752282e-05,
"loss": 0.41756689453125,
"step": 7000
},
{
"epoch": 0.3055736636245111,
"learning_rate": 1.8472131681877447e-05,
"loss": 0.4308828125,
"step": 7500
},
{
"epoch": 0.3259452411994785,
"learning_rate": 1.837027379400261e-05,
"loss": 0.41683154296875,
"step": 8000
},
{
"epoch": 0.34631681877444587,
"learning_rate": 1.8268415906127773e-05,
"loss": 0.413720703125,
"step": 8500
},
{
"epoch": 0.3666883963494133,
"learning_rate": 1.8166558018252935e-05,
"loss": 0.425869140625,
"step": 9000
},
{
"epoch": 0.3870599739243807,
"learning_rate": 1.80647001303781e-05,
"loss": 0.42243359375,
"step": 9500
},
{
"epoch": 0.4074315514993481,
"learning_rate": 1.796284224250326e-05,
"loss": 0.415326171875,
"step": 10000
},
{
"epoch": 0.4278031290743155,
"learning_rate": 1.7860984354628423e-05,
"loss": 0.4072890625,
"step": 10500
},
{
"epoch": 0.44817470664928294,
"learning_rate": 1.7759126466753587e-05,
"loss": 0.4062841796875,
"step": 11000
},
{
"epoch": 0.4685462842242503,
"learning_rate": 1.765726857887875e-05,
"loss": 0.3890205078125,
"step": 11500
},
{
"epoch": 0.48891786179921776,
"learning_rate": 1.7555410691003914e-05,
"loss": 0.3969296875,
"step": 12000
},
{
"epoch": 0.5092894393741851,
"learning_rate": 1.7453552803129075e-05,
"loss": 0.3952880859375,
"step": 12500
},
{
"epoch": 0.5296610169491526,
"learning_rate": 1.7351694915254237e-05,
"loss": 0.41177734375,
"step": 13000
},
{
"epoch": 0.5500325945241199,
"learning_rate": 1.72498370273794e-05,
"loss": 0.3807109375,
"step": 13500
},
{
"epoch": 0.5704041720990873,
"learning_rate": 1.7147979139504566e-05,
"loss": 0.403853515625,
"step": 14000
},
{
"epoch": 0.5907757496740548,
"learning_rate": 1.7046121251629728e-05,
"loss": 0.411607421875,
"step": 14500
},
{
"epoch": 0.6111473272490222,
"learning_rate": 1.6944263363754893e-05,
"loss": 0.3996162109375,
"step": 15000
},
{
"epoch": 0.6315189048239895,
"learning_rate": 1.6842405475880054e-05,
"loss": 0.3948984375,
"step": 15500
},
{
"epoch": 0.651890482398957,
"learning_rate": 1.6740547588005215e-05,
"loss": 0.389115234375,
"step": 16000
},
{
"epoch": 0.6722620599739244,
"learning_rate": 1.663868970013038e-05,
"loss": 0.403466796875,
"step": 16500
},
{
"epoch": 0.6926336375488917,
"learning_rate": 1.6536831812255542e-05,
"loss": 0.4059140625,
"step": 17000
},
{
"epoch": 0.7130052151238592,
"learning_rate": 1.6434973924380707e-05,
"loss": 0.3873095703125,
"step": 17500
},
{
"epoch": 0.7333767926988266,
"learning_rate": 1.6333116036505868e-05,
"loss": 0.3887783203125,
"step": 18000
},
{
"epoch": 0.753748370273794,
"learning_rate": 1.623125814863103e-05,
"loss": 0.395041015625,
"step": 18500
},
{
"epoch": 0.7741199478487614,
"learning_rate": 1.6129400260756194e-05,
"loss": 0.39855078125,
"step": 19000
},
{
"epoch": 0.7944915254237288,
"learning_rate": 1.6027542372881356e-05,
"loss": 0.390134765625,
"step": 19500
},
{
"epoch": 0.8148631029986962,
"learning_rate": 1.592568448500652e-05,
"loss": 0.3717578125,
"step": 20000
},
{
"epoch": 0.8352346805736637,
"learning_rate": 1.5823826597131685e-05,
"loss": 0.387556640625,
"step": 20500
},
{
"epoch": 0.855606258148631,
"learning_rate": 1.5721968709256844e-05,
"loss": 0.389892578125,
"step": 21000
},
{
"epoch": 0.8759778357235984,
"learning_rate": 1.562011082138201e-05,
"loss": 0.38671875,
"step": 21500
},
{
"epoch": 0.8963494132985659,
"learning_rate": 1.5518252933507173e-05,
"loss": 0.36665234375,
"step": 22000
},
{
"epoch": 0.9167209908735332,
"learning_rate": 1.5416395045632335e-05,
"loss": 0.3940859375,
"step": 22500
},
{
"epoch": 0.9370925684485006,
"learning_rate": 1.53145371577575e-05,
"loss": 0.382552734375,
"step": 23000
},
{
"epoch": 0.9574641460234681,
"learning_rate": 1.5212679269882663e-05,
"loss": 0.38312109375,
"step": 23500
},
{
"epoch": 0.9778357235984355,
"learning_rate": 1.5110821382007822e-05,
"loss": 0.3804765625,
"step": 24000
},
{
"epoch": 0.9982073011734028,
"learning_rate": 1.5008963494132987e-05,
"loss": 0.383294921875,
"step": 24500
},
{
"epoch": 1.0,
"eval_accuracy": 0.8807947019867549,
"eval_loss": 0.3417690396308899,
"step": 24544
},
{
"epoch": 1.0185788787483703,
"learning_rate": 1.490710560625815e-05,
"loss": 0.293595703125,
"step": 25000
},
{
"epoch": 1.0389504563233376,
"learning_rate": 1.4805247718383314e-05,
"loss": 0.29225,
"step": 25500
},
{
"epoch": 1.0593220338983051,
"learning_rate": 1.4703389830508477e-05,
"loss": 0.294130859375,
"step": 26000
},
{
"epoch": 1.0796936114732725,
"learning_rate": 1.4601531942633638e-05,
"loss": 0.28494921875,
"step": 26500
},
{
"epoch": 1.1000651890482398,
"learning_rate": 1.4499674054758801e-05,
"loss": 0.28199609375,
"step": 27000
},
{
"epoch": 1.1204367666232073,
"learning_rate": 1.4397816166883964e-05,
"loss": 0.28830078125,
"step": 27500
},
{
"epoch": 1.1408083441981747,
"learning_rate": 1.4295958279009128e-05,
"loss": 0.283265625,
"step": 28000
},
{
"epoch": 1.161179921773142,
"learning_rate": 1.419410039113429e-05,
"loss": 0.284984375,
"step": 28500
},
{
"epoch": 1.1815514993481095,
"learning_rate": 1.4092242503259454e-05,
"loss": 0.2741875,
"step": 29000
},
{
"epoch": 1.2019230769230769,
"learning_rate": 1.3990384615384615e-05,
"loss": 0.293828125,
"step": 29500
},
{
"epoch": 1.2222946544980444,
"learning_rate": 1.3888526727509778e-05,
"loss": 0.291970703125,
"step": 30000
},
{
"epoch": 1.2426662320730117,
"learning_rate": 1.3786668839634942e-05,
"loss": 0.2823515625,
"step": 30500
},
{
"epoch": 1.263037809647979,
"learning_rate": 1.3684810951760106e-05,
"loss": 0.2898671875,
"step": 31000
},
{
"epoch": 1.2834093872229466,
"learning_rate": 1.358295306388527e-05,
"loss": 0.291859375,
"step": 31500
},
{
"epoch": 1.303780964797914,
"learning_rate": 1.3481095176010431e-05,
"loss": 0.292462890625,
"step": 32000
},
{
"epoch": 1.3241525423728815,
"learning_rate": 1.3379237288135594e-05,
"loss": 0.29555859375,
"step": 32500
},
{
"epoch": 1.3445241199478488,
"learning_rate": 1.3277379400260757e-05,
"loss": 0.288212890625,
"step": 33000
},
{
"epoch": 1.3648956975228161,
"learning_rate": 1.317552151238592e-05,
"loss": 0.294849609375,
"step": 33500
},
{
"epoch": 1.3852672750977835,
"learning_rate": 1.3073663624511084e-05,
"loss": 0.289576171875,
"step": 34000
},
{
"epoch": 1.405638852672751,
"learning_rate": 1.2971805736636247e-05,
"loss": 0.289185546875,
"step": 34500
},
{
"epoch": 1.4260104302477183,
"learning_rate": 1.2869947848761408e-05,
"loss": 0.283068359375,
"step": 35000
},
{
"epoch": 1.4463820078226859,
"learning_rate": 1.2768089960886571e-05,
"loss": 0.28625390625,
"step": 35500
},
{
"epoch": 1.4667535853976532,
"learning_rate": 1.2666232073011735e-05,
"loss": 0.282048828125,
"step": 36000
},
{
"epoch": 1.4871251629726205,
"learning_rate": 1.2564374185136898e-05,
"loss": 0.28896484375,
"step": 36500
},
{
"epoch": 1.5074967405475879,
"learning_rate": 1.2462516297262063e-05,
"loss": 0.276443359375,
"step": 37000
},
{
"epoch": 1.5278683181225554,
"learning_rate": 1.2360658409387226e-05,
"loss": 0.310044921875,
"step": 37500
},
{
"epoch": 1.548239895697523,
"learning_rate": 1.2258800521512385e-05,
"loss": 0.285341796875,
"step": 38000
},
{
"epoch": 1.5686114732724903,
"learning_rate": 1.215694263363755e-05,
"loss": 0.282486328125,
"step": 38500
},
{
"epoch": 1.5889830508474576,
"learning_rate": 1.2055084745762713e-05,
"loss": 0.295353515625,
"step": 39000
},
{
"epoch": 1.609354628422425,
"learning_rate": 1.1953226857887877e-05,
"loss": 0.284724609375,
"step": 39500
},
{
"epoch": 1.6297262059973925,
"learning_rate": 1.185136897001304e-05,
"loss": 0.286890625,
"step": 40000
},
{
"epoch": 1.6500977835723598,
"learning_rate": 1.1749511082138201e-05,
"loss": 0.278595703125,
"step": 40500
},
{
"epoch": 1.6704693611473274,
"learning_rate": 1.1647653194263364e-05,
"loss": 0.30148046875,
"step": 41000
},
{
"epoch": 1.6908409387222947,
"learning_rate": 1.1545795306388527e-05,
"loss": 0.274353515625,
"step": 41500
},
{
"epoch": 1.711212516297262,
"learning_rate": 1.144393741851369e-05,
"loss": 0.29472265625,
"step": 42000
},
{
"epoch": 1.7315840938722293,
"learning_rate": 1.1342079530638854e-05,
"loss": 0.2853359375,
"step": 42500
},
{
"epoch": 1.7519556714471969,
"learning_rate": 1.1240221642764017e-05,
"loss": 0.286759765625,
"step": 43000
},
{
"epoch": 1.7723272490221644,
"learning_rate": 1.1138363754889178e-05,
"loss": 0.28123046875,
"step": 43500
},
{
"epoch": 1.7926988265971318,
"learning_rate": 1.1036505867014341e-05,
"loss": 0.2833515625,
"step": 44000
},
{
"epoch": 1.813070404172099,
"learning_rate": 1.0934647979139506e-05,
"loss": 0.281861328125,
"step": 44500
},
{
"epoch": 1.8334419817470664,
"learning_rate": 1.083279009126467e-05,
"loss": 0.2866171875,
"step": 45000
},
{
"epoch": 1.8538135593220337,
"learning_rate": 1.0730932203389833e-05,
"loss": 0.28151953125,
"step": 45500
},
{
"epoch": 1.8741851368970013,
"learning_rate": 1.0629074315514994e-05,
"loss": 0.285787109375,
"step": 46000
},
{
"epoch": 1.8945567144719688,
"learning_rate": 1.0527216427640157e-05,
"loss": 0.28494921875,
"step": 46500
},
{
"epoch": 1.9149282920469362,
"learning_rate": 1.042535853976532e-05,
"loss": 0.281390625,
"step": 47000
},
{
"epoch": 1.9352998696219035,
"learning_rate": 1.0323500651890483e-05,
"loss": 0.2822734375,
"step": 47500
},
{
"epoch": 1.9556714471968708,
"learning_rate": 1.0221642764015647e-05,
"loss": 0.2885546875,
"step": 48000
},
{
"epoch": 1.9760430247718384,
"learning_rate": 1.011978487614081e-05,
"loss": 0.2759296875,
"step": 48500
},
{
"epoch": 1.996414602346806,
"learning_rate": 1.0017926988265971e-05,
"loss": 0.27166796875,
"step": 49000
},
{
"epoch": 2.0,
"eval_accuracy": 0.8865002547121752,
"eval_loss": 0.34813082218170166,
"step": 49088
},
{
"epoch": 2.0167861799217732,
"learning_rate": 9.916069100391134e-06,
"loss": 0.2067265625,
"step": 49500
},
{
"epoch": 2.0371577574967406,
"learning_rate": 9.814211212516298e-06,
"loss": 0.18627734375,
"step": 50000
},
{
"epoch": 2.057529335071708,
"learning_rate": 9.71235332464146e-06,
"loss": 0.20591796875,
"step": 50500
},
{
"epoch": 2.077900912646675,
"learning_rate": 9.610495436766624e-06,
"loss": 0.207765625,
"step": 51000
},
{
"epoch": 2.098272490221643,
"learning_rate": 9.508637548891787e-06,
"loss": 0.200046875,
"step": 51500
},
{
"epoch": 2.1186440677966103,
"learning_rate": 9.40677966101695e-06,
"loss": 0.20955078125,
"step": 52000
},
{
"epoch": 2.1390156453715776,
"learning_rate": 9.304921773142113e-06,
"loss": 0.20408203125,
"step": 52500
},
{
"epoch": 2.159387222946545,
"learning_rate": 9.203063885267276e-06,
"loss": 0.1967578125,
"step": 53000
},
{
"epoch": 2.1797588005215123,
"learning_rate": 9.101205997392438e-06,
"loss": 0.2086171875,
"step": 53500
},
{
"epoch": 2.2001303780964796,
"learning_rate": 8.999348109517601e-06,
"loss": 0.19387890625,
"step": 54000
},
{
"epoch": 2.2205019556714474,
"learning_rate": 8.897490221642766e-06,
"loss": 0.2149140625,
"step": 54500
},
{
"epoch": 2.2408735332464147,
"learning_rate": 8.795632333767927e-06,
"loss": 0.20816015625,
"step": 55000
},
{
"epoch": 2.261245110821382,
"learning_rate": 8.69377444589309e-06,
"loss": 0.1981796875,
"step": 55500
},
{
"epoch": 2.2816166883963493,
"learning_rate": 8.591916558018254e-06,
"loss": 0.20524609375,
"step": 56000
},
{
"epoch": 2.3019882659713167,
"learning_rate": 8.490058670143417e-06,
"loss": 0.20031640625,
"step": 56500
},
{
"epoch": 2.322359843546284,
"learning_rate": 8.38820078226858e-06,
"loss": 0.2170234375,
"step": 57000
},
{
"epoch": 2.3427314211212518,
"learning_rate": 8.286342894393743e-06,
"loss": 0.202625,
"step": 57500
},
{
"epoch": 2.363102998696219,
"learning_rate": 8.184485006518904e-06,
"loss": 0.21112890625,
"step": 58000
},
{
"epoch": 2.3834745762711864,
"learning_rate": 8.08262711864407e-06,
"loss": 0.19984375,
"step": 58500
},
{
"epoch": 2.4038461538461537,
"learning_rate": 7.980769230769232e-06,
"loss": 0.2016015625,
"step": 59000
},
{
"epoch": 2.424217731421121,
"learning_rate": 7.878911342894394e-06,
"loss": 0.20296875,
"step": 59500
},
{
"epoch": 2.444589308996089,
"learning_rate": 7.777053455019557e-06,
"loss": 0.20494140625,
"step": 60000
},
{
"epoch": 2.464960886571056,
"learning_rate": 7.67519556714472e-06,
"loss": 0.19887890625,
"step": 60500
},
{
"epoch": 2.4853324641460235,
"learning_rate": 7.573337679269883e-06,
"loss": 0.2058984375,
"step": 61000
},
{
"epoch": 2.505704041720991,
"learning_rate": 7.4714797913950464e-06,
"loss": 0.2057265625,
"step": 61500
},
{
"epoch": 2.526075619295958,
"learning_rate": 7.369621903520209e-06,
"loss": 0.1991484375,
"step": 62000
},
{
"epoch": 2.5464471968709255,
"learning_rate": 7.267764015645372e-06,
"loss": 0.2016328125,
"step": 62500
},
{
"epoch": 2.5668187744458932,
"learning_rate": 7.165906127770536e-06,
"loss": 0.2006640625,
"step": 63000
},
{
"epoch": 2.5871903520208606,
"learning_rate": 7.064048239895698e-06,
"loss": 0.20136328125,
"step": 63500
},
{
"epoch": 2.607561929595828,
"learning_rate": 6.962190352020861e-06,
"loss": 0.210953125,
"step": 64000
},
{
"epoch": 2.627933507170795,
"learning_rate": 6.8603324641460245e-06,
"loss": 0.20728515625,
"step": 64500
},
{
"epoch": 2.648305084745763,
"learning_rate": 6.758474576271187e-06,
"loss": 0.19809765625,
"step": 65000
},
{
"epoch": 2.6686766623207303,
"learning_rate": 6.65661668839635e-06,
"loss": 0.20791796875,
"step": 65500
},
{
"epoch": 2.6890482398956976,
"learning_rate": 6.554758800521513e-06,
"loss": 0.1968359375,
"step": 66000
},
{
"epoch": 2.709419817470665,
"learning_rate": 6.452900912646675e-06,
"loss": 0.19569140625,
"step": 66500
},
{
"epoch": 2.7297913950456323,
"learning_rate": 6.351043024771839e-06,
"loss": 0.21024609375,
"step": 67000
},
{
"epoch": 2.7501629726205996,
"learning_rate": 6.249185136897002e-06,
"loss": 0.20680859375,
"step": 67500
},
{
"epoch": 2.770534550195567,
"learning_rate": 6.147327249022165e-06,
"loss": 0.206203125,
"step": 68000
},
{
"epoch": 2.7909061277705347,
"learning_rate": 6.045469361147328e-06,
"loss": 0.1981015625,
"step": 68500
},
{
"epoch": 2.811277705345502,
"learning_rate": 5.94361147327249e-06,
"loss": 0.20408984375,
"step": 69000
},
{
"epoch": 2.8316492829204694,
"learning_rate": 5.841753585397653e-06,
"loss": 0.20057421875,
"step": 69500
},
{
"epoch": 2.8520208604954367,
"learning_rate": 5.739895697522817e-06,
"loss": 0.19898046875,
"step": 70000
},
{
"epoch": 2.872392438070404,
"learning_rate": 5.63803780964798e-06,
"loss": 0.19495703125,
"step": 70500
},
{
"epoch": 2.8927640156453718,
"learning_rate": 5.536179921773143e-06,
"loss": 0.1974296875,
"step": 71000
},
{
"epoch": 2.913135593220339,
"learning_rate": 5.434322033898306e-06,
"loss": 0.20246484375,
"step": 71500
},
{
"epoch": 2.9335071707953064,
"learning_rate": 5.332464146023468e-06,
"loss": 0.19712109375,
"step": 72000
},
{
"epoch": 2.9538787483702738,
"learning_rate": 5.230606258148631e-06,
"loss": 0.202109375,
"step": 72500
},
{
"epoch": 2.974250325945241,
"learning_rate": 5.128748370273794e-06,
"loss": 0.2054140625,
"step": 73000
},
{
"epoch": 2.9946219035202084,
"learning_rate": 5.026890482398958e-06,
"loss": 0.202515625,
"step": 73500
},
{
"epoch": 3.0,
"eval_accuracy": 0.884666327050433,
"eval_loss": 0.4447501301765442,
"step": 73632
},
{
"epoch": 3.014993481095176,
"learning_rate": 4.92503259452412e-06,
"loss": 0.13929296875,
"step": 74000
},
{
"epoch": 3.0353650586701435,
"learning_rate": 4.823174706649283e-06,
"loss": 0.14209765625,
"step": 74500
},
{
"epoch": 3.055736636245111,
"learning_rate": 4.721316818774446e-06,
"loss": 0.14396875,
"step": 75000
},
{
"epoch": 3.076108213820078,
"learning_rate": 4.6194589308996094e-06,
"loss": 0.13436328125,
"step": 75500
},
{
"epoch": 3.0964797913950455,
"learning_rate": 4.5176010430247726e-06,
"loss": 0.13274609375,
"step": 76000
},
{
"epoch": 3.1168513689700132,
"learning_rate": 4.415743155149935e-06,
"loss": 0.1425546875,
"step": 76500
},
{
"epoch": 3.1372229465449806,
"learning_rate": 4.313885267275098e-06,
"loss": 0.1465,
"step": 77000
},
{
"epoch": 3.157594524119948,
"learning_rate": 4.212027379400261e-06,
"loss": 0.13299609375,
"step": 77500
},
{
"epoch": 3.1779661016949152,
"learning_rate": 4.110169491525424e-06,
"loss": 0.146578125,
"step": 78000
},
{
"epoch": 3.1983376792698825,
"learning_rate": 4.0083116036505874e-06,
"loss": 0.1382421875,
"step": 78500
},
{
"epoch": 3.21870925684485,
"learning_rate": 3.90645371577575e-06,
"loss": 0.14358984375,
"step": 79000
},
{
"epoch": 3.2390808344198176,
"learning_rate": 3.8045958279009133e-06,
"loss": 0.1374140625,
"step": 79500
},
{
"epoch": 3.259452411994785,
"learning_rate": 3.702737940026076e-06,
"loss": 0.14011328125,
"step": 80000
},
{
"epoch": 3.2798239895697523,
"learning_rate": 3.6008800521512388e-06,
"loss": 0.14218359375,
"step": 80500
},
{
"epoch": 3.3001955671447196,
"learning_rate": 3.4990221642764015e-06,
"loss": 0.14026953125,
"step": 81000
},
{
"epoch": 3.320567144719687,
"learning_rate": 3.397164276401565e-06,
"loss": 0.1326171875,
"step": 81500
},
{
"epoch": 3.3409387222946547,
"learning_rate": 3.2953063885267278e-06,
"loss": 0.124484375,
"step": 82000
},
{
"epoch": 3.361310299869622,
"learning_rate": 3.1934485006518905e-06,
"loss": 0.13862890625,
"step": 82500
},
{
"epoch": 3.3816818774445894,
"learning_rate": 3.091590612777054e-06,
"loss": 0.14094140625,
"step": 83000
},
{
"epoch": 3.4020534550195567,
"learning_rate": 2.9897327249022168e-06,
"loss": 0.13262890625,
"step": 83500
},
{
"epoch": 3.422425032594524,
"learning_rate": 2.8878748370273795e-06,
"loss": 0.14966015625,
"step": 84000
},
{
"epoch": 3.4427966101694913,
"learning_rate": 2.7860169491525422e-06,
"loss": 0.136,
"step": 84500
},
{
"epoch": 3.463168187744459,
"learning_rate": 2.684159061277706e-06,
"loss": 0.15666796875,
"step": 85000
},
{
"epoch": 3.4835397653194264,
"learning_rate": 2.5823011734028685e-06,
"loss": 0.12829296875,
"step": 85500
},
{
"epoch": 3.5039113428943938,
"learning_rate": 2.4804432855280312e-06,
"loss": 0.134625,
"step": 86000
},
{
"epoch": 3.524282920469361,
"learning_rate": 2.3785853976531944e-06,
"loss": 0.1383125,
"step": 86500
},
{
"epoch": 3.5446544980443284,
"learning_rate": 2.2767275097783575e-06,
"loss": 0.145578125,
"step": 87000
},
{
"epoch": 3.565026075619296,
"learning_rate": 2.1748696219035202e-06,
"loss": 0.13809765625,
"step": 87500
},
{
"epoch": 3.5853976531942635,
"learning_rate": 2.0730117340286834e-06,
"loss": 0.131578125,
"step": 88000
},
{
"epoch": 3.605769230769231,
"learning_rate": 1.971153846153846e-06,
"loss": 0.128625,
"step": 88500
},
{
"epoch": 3.626140808344198,
"learning_rate": 1.8692959582790093e-06,
"loss": 0.1566328125,
"step": 89000
},
{
"epoch": 3.6465123859191655,
"learning_rate": 1.7674380704041722e-06,
"loss": 0.13289453125,
"step": 89500
},
{
"epoch": 3.666883963494133,
"learning_rate": 1.6655801825293353e-06,
"loss": 0.13489453125,
"step": 90000
},
{
"epoch": 3.6872555410691,
"learning_rate": 1.563722294654498e-06,
"loss": 0.1342890625,
"step": 90500
},
{
"epoch": 3.707627118644068,
"learning_rate": 1.4618644067796612e-06,
"loss": 0.13596875,
"step": 91000
},
{
"epoch": 3.7279986962190352,
"learning_rate": 1.3600065189048241e-06,
"loss": 0.12205859375,
"step": 91500
},
{
"epoch": 3.7483702737940026,
"learning_rate": 1.258148631029987e-06,
"loss": 0.136265625,
"step": 92000
},
{
"epoch": 3.76874185136897,
"learning_rate": 1.15629074315515e-06,
"loss": 0.1251875,
"step": 92500
},
{
"epoch": 3.7891134289439377,
"learning_rate": 1.0544328552803131e-06,
"loss": 0.1440859375,
"step": 93000
},
{
"epoch": 3.809485006518905,
"learning_rate": 9.52574967405476e-07,
"loss": 0.14747265625,
"step": 93500
},
{
"epoch": 3.8298565840938723,
"learning_rate": 8.507170795306389e-07,
"loss": 0.12144921875,
"step": 94000
},
{
"epoch": 3.8502281616688396,
"learning_rate": 7.488591916558019e-07,
"loss": 0.138546875,
"step": 94500
},
{
"epoch": 3.870599739243807,
"learning_rate": 6.470013037809649e-07,
"loss": 0.1290390625,
"step": 95000
},
{
"epoch": 3.8909713168187743,
"learning_rate": 5.451434159061278e-07,
"loss": 0.13651953125,
"step": 95500
},
{
"epoch": 3.9113428943937416,
"learning_rate": 4.432855280312908e-07,
"loss": 0.14065234375,
"step": 96000
},
{
"epoch": 3.9317144719687094,
"learning_rate": 3.4142764015645373e-07,
"loss": 0.129953125,
"step": 96500
},
{
"epoch": 3.9520860495436767,
"learning_rate": 2.395697522816167e-07,
"loss": 0.127765625,
"step": 97000
},
{
"epoch": 3.972457627118644,
"learning_rate": 1.3771186440677968e-07,
"loss": 0.12146875,
"step": 97500
},
{
"epoch": 3.9928292046936114,
"learning_rate": 3.585397653194264e-08,
"loss": 0.132578125,
"step": 98000
},
{
"epoch": 4.0,
"eval_accuracy": 0.88762098828324,
"eval_loss": 0.5551679134368896,
"step": 98176
}
],
"max_steps": 98176,
"num_train_epochs": 4,
"total_flos": 124136967570323400,
"trial_name": null,
"trial_params": null
}