{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0019788801578772787, "global_step": 23392, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.999987310478829e-05, "loss": 0.0134, "step": 100 }, { "epoch": 0.0, "learning_rate": 2.999961931436487e-05, "loss": 0.0121, "step": 200 }, { "epoch": 0.0, "learning_rate": 2.999936552394145e-05, "loss": 0.016, "step": 300 }, { "epoch": 0.0, "learning_rate": 2.9999111733518026e-05, "loss": 0.0097, "step": 400 }, { "epoch": 0.0, "learning_rate": 2.9998857943094606e-05, "loss": 0.0058, "step": 500 }, { "epoch": 0.0, "learning_rate": 2.9998604152671182e-05, "loss": 0.0074, "step": 600 }, { "epoch": 0.0, "learning_rate": 2.9998350362247762e-05, "loss": 0.0122, "step": 700 }, { "epoch": 0.0, "learning_rate": 2.9998096571824342e-05, "loss": 0.0071, "step": 800 }, { "epoch": 0.0, "learning_rate": 2.999784278140092e-05, "loss": 0.0152, "step": 900 }, { "epoch": 0.0, "learning_rate": 2.99975889909775e-05, "loss": 0.0136, "step": 1000 }, { "epoch": 0.0, "learning_rate": 2.9997335200554075e-05, "loss": 0.0093, "step": 1100 }, { "epoch": 0.0, "learning_rate": 2.9997081410130655e-05, "loss": 0.0103, "step": 1200 }, { "epoch": 0.0, "learning_rate": 2.9996827619707235e-05, "loss": 0.0095, "step": 1300 }, { "epoch": 0.0, "learning_rate": 2.9996573829283812e-05, "loss": 0.013, "step": 1400 }, { "epoch": 0.0, "learning_rate": 2.9996320038860392e-05, "loss": 0.0222, "step": 1500 }, { "epoch": 0.0, "learning_rate": 2.999606624843697e-05, "loss": 0.0151, "step": 1600 }, { "epoch": 0.0, "learning_rate": 2.999581245801355e-05, "loss": 0.0289, "step": 1700 }, { "epoch": 0.0, "learning_rate": 2.999555866759013e-05, "loss": 0.0075, "step": 1800 }, { "epoch": 0.0, "learning_rate": 2.9995304877166705e-05, "loss": 0.0073, "step": 1900 }, { "epoch": 0.0, "learning_rate": 2.9995051086743285e-05, "loss": 0.0069, "step": 2000 }, { "epoch": 0.0, "learning_rate": 2.9994797296319865e-05, "loss": 0.016, "step": 2100 }, { "epoch": 0.0, "learning_rate": 2.999454350589644e-05, "loss": 0.0077, "step": 2200 }, { "epoch": 0.0, "learning_rate": 2.999428971547302e-05, "loss": 0.0107, "step": 2300 }, { "epoch": 0.0, "learning_rate": 2.9994035925049598e-05, "loss": 0.03, "step": 2400 }, { "epoch": 0.0, "learning_rate": 2.9993782134626178e-05, "loss": 0.0203, "step": 2500 }, { "epoch": 0.0, "learning_rate": 2.9993528344202758e-05, "loss": 0.0128, "step": 2600 }, { "epoch": 0.0, "learning_rate": 2.999327455377933e-05, "loss": 0.0152, "step": 2700 }, { "epoch": 0.0, "learning_rate": 2.999302076335591e-05, "loss": 0.0149, "step": 2800 }, { "epoch": 0.0, "learning_rate": 2.9992766972932488e-05, "loss": 0.0185, "step": 2900 }, { "epoch": 0.0, "learning_rate": 2.9992513182509068e-05, "loss": 0.0142, "step": 3000 }, { "epoch": 0.0, "learning_rate": 2.9992259392085648e-05, "loss": 0.0158, "step": 3100 }, { "epoch": 0.0, "learning_rate": 2.9992005601662224e-05, "loss": 0.022, "step": 3200 }, { "epoch": 0.0, "learning_rate": 2.9991751811238804e-05, "loss": 0.0072, "step": 3300 }, { "epoch": 0.0, "learning_rate": 2.999149802081538e-05, "loss": 0.0094, "step": 3400 }, { "epoch": 0.0, "learning_rate": 2.999124423039196e-05, "loss": 0.0118, "step": 3500 }, { "epoch": 0.0, "learning_rate": 2.999099043996854e-05, "loss": 0.0285, "step": 3600 }, { "epoch": 0.0, "learning_rate": 2.9990736649545117e-05, "loss": 0.0151, "step": 3700 }, { "epoch": 0.0, "learning_rate": 2.9990482859121697e-05, "loss": 0.0112, "step": 3800 }, { "epoch": 0.0, "learning_rate": 2.9990229068698277e-05, "loss": 0.013, "step": 3900 }, { "epoch": 0.0, "learning_rate": 2.9989975278274854e-05, "loss": 0.0123, "step": 4000 }, { "epoch": 0.0, "learning_rate": 2.9989721487851434e-05, "loss": 0.0242, "step": 4100 }, { "epoch": 0.0, "learning_rate": 2.998946769742801e-05, "loss": 0.0162, "step": 4200 }, { "epoch": 0.0, "learning_rate": 2.998921390700459e-05, "loss": 0.0138, "step": 4300 }, { "epoch": 0.0, "learning_rate": 2.998896011658117e-05, "loss": 0.007, "step": 4400 }, { "epoch": 0.0, "learning_rate": 2.9988706326157747e-05, "loss": 0.0076, "step": 4500 }, { "epoch": 0.0, "learning_rate": 2.9988452535734327e-05, "loss": 0.0386, "step": 4600 }, { "epoch": 0.0, "learning_rate": 2.9988198745310904e-05, "loss": 0.0165, "step": 4700 }, { "epoch": 0.0, "learning_rate": 2.9987944954887484e-05, "loss": 0.0156, "step": 4800 }, { "epoch": 0.0, "learning_rate": 2.9987691164464064e-05, "loss": 0.0292, "step": 4900 }, { "epoch": 0.0, "learning_rate": 2.998743737404064e-05, "loss": 0.0485, "step": 5000 }, { "epoch": 0.0, "learning_rate": 2.998718358361722e-05, "loss": 0.0325, "step": 5100 }, { "epoch": 0.0, "learning_rate": 2.9986929793193797e-05, "loss": 0.0172, "step": 5200 }, { "epoch": 0.0, "learning_rate": 2.9986676002770377e-05, "loss": 0.0139, "step": 5300 }, { "epoch": 0.0, "learning_rate": 2.9986422212346957e-05, "loss": 0.0165, "step": 5400 }, { "epoch": 0.0, "learning_rate": 2.998617095982777e-05, "loss": 0.0344, "step": 5500 }, { "epoch": 0.0, "learning_rate": 2.9985917169404347e-05, "loss": 0.0085, "step": 5600 }, { "epoch": 0.0, "learning_rate": 2.9985663378980927e-05, "loss": 0.0337, "step": 5700 }, { "epoch": 0.0, "learning_rate": 2.9985409588557503e-05, "loss": 0.0106, "step": 5800 }, { "epoch": 0.0, "learning_rate": 2.9985155798134083e-05, "loss": 0.0097, "step": 5900 }, { "epoch": 0.0, "learning_rate": 2.9984902007710663e-05, "loss": 0.0128, "step": 6000 }, { "epoch": 0.0, "learning_rate": 2.998464821728724e-05, "loss": 0.0137, "step": 6100 }, { "epoch": 0.0, "learning_rate": 2.998439442686382e-05, "loss": 0.017, "step": 6200 }, { "epoch": 0.0, "learning_rate": 2.9984140636440396e-05, "loss": 0.0324, "step": 6300 }, { "epoch": 0.0, "learning_rate": 2.998388938392121e-05, "loss": 0.0165, "step": 6400 }, { "epoch": 0.0, "learning_rate": 2.998363559349779e-05, "loss": 0.0132, "step": 6500 }, { "epoch": 0.0, "learning_rate": 2.9983381803074366e-05, "loss": 0.0155, "step": 6600 }, { "epoch": 0.0, "learning_rate": 2.9983128012650946e-05, "loss": 0.0121, "step": 6700 }, { "epoch": 0.0, "learning_rate": 2.9982874222227526e-05, "loss": 0.0115, "step": 6800 }, { "epoch": 0.0, "learning_rate": 2.9982620431804103e-05, "loss": 0.0088, "step": 6900 }, { "epoch": 0.0, "learning_rate": 2.9982366641380683e-05, "loss": 0.0132, "step": 7000 }, { "epoch": 0.0, "learning_rate": 2.9982112850957263e-05, "loss": 0.0078, "step": 7100 }, { "epoch": 0.0, "learning_rate": 2.998185906053384e-05, "loss": 0.0068, "step": 7200 }, { "epoch": 0.0, "learning_rate": 2.998160527011042e-05, "loss": 0.0092, "step": 7300 }, { "epoch": 0.0, "learning_rate": 2.9981351479686996e-05, "loss": 0.0175, "step": 7400 }, { "epoch": 0.0, "learning_rate": 2.9981097689263576e-05, "loss": 0.0096, "step": 7500 }, { "epoch": 0.0, "learning_rate": 2.9980843898840156e-05, "loss": 0.0097, "step": 7600 }, { "epoch": 0.0, "learning_rate": 2.9980590108416732e-05, "loss": 0.012, "step": 7700 }, { "epoch": 0.0, "learning_rate": 2.9980336317993312e-05, "loss": 0.013, "step": 7800 }, { "epoch": 0.0, "learning_rate": 2.998008252756989e-05, "loss": 0.0153, "step": 7900 }, { "epoch": 0.0, "learning_rate": 2.997982873714647e-05, "loss": 0.0119, "step": 8000 }, { "epoch": 0.0, "learning_rate": 2.997957494672305e-05, "loss": 0.0088, "step": 8100 }, { "epoch": 0.0, "learning_rate": 2.9979321156299626e-05, "loss": 0.0077, "step": 8200 }, { "epoch": 0.0, "learning_rate": 2.9979067365876202e-05, "loss": 0.0107, "step": 8300 }, { "epoch": 0.0, "learning_rate": 2.997881357545278e-05, "loss": 0.0231, "step": 8400 }, { "epoch": 0.0, "learning_rate": 2.997855978502936e-05, "loss": 0.0119, "step": 8500 }, { "epoch": 0.0, "learning_rate": 2.997830599460594e-05, "loss": 0.0094, "step": 8600 }, { "epoch": 0.0, "learning_rate": 2.9978054742086752e-05, "loss": 0.0208, "step": 8700 }, { "epoch": 0.0, "learning_rate": 2.997780095166333e-05, "loss": 0.0096, "step": 8800 }, { "epoch": 0.0, "learning_rate": 2.997754716123991e-05, "loss": 0.0075, "step": 8900 }, { "epoch": 0.0, "learning_rate": 2.9977293370816485e-05, "loss": 0.0066, "step": 9000 }, { "epoch": 0.0, "learning_rate": 2.9977039580393065e-05, "loss": 0.0106, "step": 9100 }, { "epoch": 0.0, "learning_rate": 2.9976785789969645e-05, "loss": 0.0351, "step": 9200 }, { "epoch": 0.0, "learning_rate": 2.997653199954622e-05, "loss": 0.0923, "step": 9300 }, { "epoch": 0.0, "learning_rate": 2.99762782091228e-05, "loss": 0.0104, "step": 9400 }, { "epoch": 0.0, "learning_rate": 2.9976024418699378e-05, "loss": 0.0314, "step": 9500 }, { "epoch": 0.0, "learning_rate": 2.9975770628275958e-05, "loss": 0.0099, "step": 9600 }, { "epoch": 0.0, "learning_rate": 2.9975516837852538e-05, "loss": 0.008, "step": 9700 }, { "epoch": 0.0, "learning_rate": 2.9975263047429115e-05, "loss": 0.0105, "step": 9800 }, { "epoch": 0.0, "learning_rate": 2.9975009257005695e-05, "loss": 0.0104, "step": 9900 }, { "epoch": 0.0, "learning_rate": 2.997475546658227e-05, "loss": 0.0172, "step": 10000 }, { "epoch": 0.0, "learning_rate": 2.997450167615885e-05, "loss": 0.0107, "step": 10100 }, { "epoch": 0.0, "learning_rate": 2.997424788573543e-05, "loss": 0.0123, "step": 10200 }, { "epoch": 0.0, "learning_rate": 2.9973994095312008e-05, "loss": 0.0085, "step": 10300 }, { "epoch": 0.0, "learning_rate": 2.9973740304888588e-05, "loss": 0.0205, "step": 10400 }, { "epoch": 0.0, "learning_rate": 2.9973486514465168e-05, "loss": 0.0086, "step": 10500 }, { "epoch": 0.0, "learning_rate": 2.9973232724041744e-05, "loss": 0.0142, "step": 10600 }, { "epoch": 0.0, "learning_rate": 2.9972978933618324e-05, "loss": 0.0154, "step": 10700 }, { "epoch": 0.0, "learning_rate": 2.99727251431949e-05, "loss": 0.0158, "step": 10800 }, { "epoch": 0.0, "learning_rate": 2.997247135277148e-05, "loss": 0.0128, "step": 10900 }, { "epoch": 0.0, "learning_rate": 2.997221756234806e-05, "loss": 0.0156, "step": 11000 }, { "epoch": 0.0, "learning_rate": 2.9971963771924638e-05, "loss": 0.0088, "step": 11100 }, { "epoch": 0.0, "learning_rate": 2.9971709981501218e-05, "loss": 0.0256, "step": 11200 }, { "epoch": 0.0, "learning_rate": 2.9971456191077794e-05, "loss": 0.0208, "step": 11300 }, { "epoch": 0.0, "learning_rate": 2.9971202400654374e-05, "loss": 0.0218, "step": 11400 }, { "epoch": 0.0, "learning_rate": 2.9970948610230954e-05, "loss": 0.0116, "step": 11500 }, { "epoch": 0.0, "learning_rate": 2.997069481980753e-05, "loss": 0.0195, "step": 11600 }, { "epoch": 0.0, "learning_rate": 2.997044102938411e-05, "loss": 0.0142, "step": 11700 }, { "epoch": 0.0, "learning_rate": 2.9970187238960687e-05, "loss": 0.0114, "step": 11800 }, { "epoch": 0.0, "learning_rate": 2.9969933448537267e-05, "loss": 0.0246, "step": 11900 }, { "epoch": 0.0, "learning_rate": 2.9969679658113847e-05, "loss": 0.0187, "step": 12000 }, { "epoch": 0.0, "learning_rate": 2.9969425867690424e-05, "loss": 0.011, "step": 12100 }, { "epoch": 0.0, "learning_rate": 2.9969172077267004e-05, "loss": 0.0094, "step": 12200 }, { "epoch": 0.0, "learning_rate": 2.9968918286843584e-05, "loss": 0.0129, "step": 12300 }, { "epoch": 0.0, "learning_rate": 2.996866449642016e-05, "loss": 0.0397, "step": 12400 }, { "epoch": 0.0, "learning_rate": 2.996841070599674e-05, "loss": 0.0174, "step": 12500 }, { "epoch": 0.0, "learning_rate": 2.9968156915573317e-05, "loss": 0.0355, "step": 12600 }, { "epoch": 0.0, "learning_rate": 2.9967903125149897e-05, "loss": 0.0124, "step": 12700 }, { "epoch": 0.0, "learning_rate": 2.9967649334726477e-05, "loss": 0.0164, "step": 12800 }, { "epoch": 0.0, "learning_rate": 2.9967395544303053e-05, "loss": 0.0311, "step": 12900 }, { "epoch": 0.0, "learning_rate": 2.9967141753879633e-05, "loss": 0.0224, "step": 13000 }, { "epoch": 0.0, "learning_rate": 2.996688796345621e-05, "loss": 0.0171, "step": 13100 }, { "epoch": 0.0, "learning_rate": 2.996663417303279e-05, "loss": 0.0185, "step": 13200 }, { "epoch": 0.0, "learning_rate": 2.996638038260937e-05, "loss": 0.0136, "step": 13300 }, { "epoch": 0.0, "learning_rate": 2.9966126592185947e-05, "loss": 0.0101, "step": 13400 }, { "epoch": 0.0, "learning_rate": 2.9965872801762527e-05, "loss": 0.0362, "step": 13500 }, { "epoch": 0.0, "learning_rate": 2.9965619011339103e-05, "loss": 0.0467, "step": 13600 }, { "epoch": 0.0, "learning_rate": 2.996536522091568e-05, "loss": 0.0128, "step": 13700 }, { "epoch": 0.0, "learning_rate": 2.996511143049226e-05, "loss": 0.0112, "step": 13800 }, { "epoch": 0.0, "learning_rate": 2.9964857640068836e-05, "loss": 0.0109, "step": 13900 }, { "epoch": 0.0, "learning_rate": 2.9964603849645416e-05, "loss": 0.0159, "step": 14000 }, { "epoch": 0.0, "learning_rate": 2.9964350059221996e-05, "loss": 0.019, "step": 14100 }, { "epoch": 0.0, "learning_rate": 2.9964098806702806e-05, "loss": 0.0106, "step": 14200 }, { "epoch": 0.0, "learning_rate": 2.9963845016279386e-05, "loss": 0.0207, "step": 14300 }, { "epoch": 0.0, "learning_rate": 2.9963591225855966e-05, "loss": 0.0184, "step": 14400 }, { "epoch": 0.0, "learning_rate": 2.9963337435432543e-05, "loss": 0.0129, "step": 14500 }, { "epoch": 0.0, "learning_rate": 2.9963083645009123e-05, "loss": 0.0105, "step": 14600 }, { "epoch": 0.0, "learning_rate": 2.99628298545857e-05, "loss": 0.0169, "step": 14700 }, { "epoch": 0.0, "learning_rate": 2.996257606416228e-05, "loss": 0.0108, "step": 14800 }, { "epoch": 0.0, "learning_rate": 2.996232227373886e-05, "loss": 0.0163, "step": 14900 }, { "epoch": 0.0, "learning_rate": 2.9962068483315436e-05, "loss": 0.0095, "step": 15000 }, { "epoch": 0.0, "learning_rate": 2.9961814692892016e-05, "loss": 0.0099, "step": 15100 }, { "epoch": 0.0, "learning_rate": 2.9961560902468592e-05, "loss": 0.0201, "step": 15200 }, { "epoch": 0.0, "learning_rate": 2.9961307112045172e-05, "loss": 0.0164, "step": 15300 }, { "epoch": 0.0, "learning_rate": 2.9961053321621752e-05, "loss": 0.021, "step": 15400 }, { "epoch": 0.0, "learning_rate": 2.996079953119833e-05, "loss": 0.0148, "step": 15500 }, { "epoch": 0.0, "learning_rate": 2.996054574077491e-05, "loss": 0.0156, "step": 15600 }, { "epoch": 0.0, "learning_rate": 2.996029195035149e-05, "loss": 0.0124, "step": 15700 }, { "epoch": 0.0, "learning_rate": 2.9960038159928066e-05, "loss": 0.0163, "step": 15800 }, { "epoch": 0.0, "learning_rate": 2.9959784369504646e-05, "loss": 0.0113, "step": 15900 }, { "epoch": 0.0, "learning_rate": 2.9959530579081222e-05, "loss": 0.0097, "step": 16000 }, { "epoch": 0.0, "learning_rate": 2.9959276788657802e-05, "loss": 0.0133, "step": 16100 }, { "epoch": 0.0, "learning_rate": 2.9959022998234382e-05, "loss": 0.0122, "step": 16200 }, { "epoch": 0.0, "learning_rate": 2.995876920781096e-05, "loss": 0.0608, "step": 16300 }, { "epoch": 0.0, "learning_rate": 2.995851541738754e-05, "loss": 0.0181, "step": 16400 }, { "epoch": 0.0, "learning_rate": 2.9958261626964115e-05, "loss": 0.0464, "step": 16500 }, { "epoch": 0.0, "learning_rate": 2.9958007836540695e-05, "loss": 0.0163, "step": 16600 }, { "epoch": 0.0, "learning_rate": 2.9957754046117275e-05, "loss": 0.0095, "step": 16700 }, { "epoch": 0.0, "learning_rate": 2.9957500255693852e-05, "loss": 0.01, "step": 16800 }, { "epoch": 0.0, "learning_rate": 2.9957246465270432e-05, "loss": 0.038, "step": 16900 }, { "epoch": 0.0, "learning_rate": 2.995699267484701e-05, "loss": 0.0149, "step": 17000 }, { "epoch": 0.0, "learning_rate": 2.995673888442359e-05, "loss": 0.0207, "step": 17100 }, { "epoch": 0.0, "learning_rate": 2.995648509400017e-05, "loss": 0.0194, "step": 17200 }, { "epoch": 0.0, "learning_rate": 2.9956231303576745e-05, "loss": 0.016, "step": 17300 }, { "epoch": 0.0, "learning_rate": 2.9955977513153325e-05, "loss": 0.0162, "step": 17400 }, { "epoch": 0.0, "learning_rate": 2.9955723722729905e-05, "loss": 0.0135, "step": 17500 }, { "epoch": 0.0, "learning_rate": 2.995546993230648e-05, "loss": 0.0253, "step": 17600 }, { "epoch": 0.0, "learning_rate": 2.995521614188306e-05, "loss": 0.0138, "step": 17700 }, { "epoch": 0.0, "learning_rate": 2.9954962351459638e-05, "loss": 0.0125, "step": 17800 }, { "epoch": 0.0, "learning_rate": 2.9954708561036218e-05, "loss": 0.0113, "step": 17900 }, { "epoch": 0.0, "learning_rate": 2.9954454770612798e-05, "loss": 0.0184, "step": 18000 }, { "epoch": 0.0, "learning_rate": 2.9954200980189375e-05, "loss": 0.0169, "step": 18100 }, { "epoch": 0.0, "learning_rate": 2.9953947189765955e-05, "loss": 0.01, "step": 18200 }, { "epoch": 0.0, "learning_rate": 2.995369339934253e-05, "loss": 0.0151, "step": 18300 }, { "epoch": 0.0, "learning_rate": 2.995343960891911e-05, "loss": 0.0171, "step": 18400 }, { "epoch": 0.0, "learning_rate": 2.995318581849569e-05, "loss": 0.0238, "step": 18500 }, { "epoch": 0.0, "learning_rate": 2.9952932028072268e-05, "loss": 0.0266, "step": 18600 }, { "epoch": 0.0, "learning_rate": 2.995268077555308e-05, "loss": 0.0082, "step": 18700 }, { "epoch": 0.0, "learning_rate": 2.995242698512966e-05, "loss": 0.0279, "step": 18800 }, { "epoch": 0.0, "learning_rate": 2.9952175732610474e-05, "loss": 0.0178, "step": 18900 }, { "epoch": 0.0, "learning_rate": 2.995192194218705e-05, "loss": 0.0169, "step": 19000 }, { "epoch": 0.0, "learning_rate": 2.995166815176363e-05, "loss": 0.0181, "step": 19100 }, { "epoch": 0.0, "learning_rate": 2.9951414361340207e-05, "loss": 0.0257, "step": 19200 }, { "epoch": 0.0, "learning_rate": 2.9951160570916787e-05, "loss": 0.0138, "step": 19300 }, { "epoch": 0.0, "learning_rate": 2.9950906780493364e-05, "loss": 0.0203, "step": 19400 }, { "epoch": 0.0, "learning_rate": 2.995065299006994e-05, "loss": 0.0265, "step": 19500 }, { "epoch": 0.0, "learning_rate": 2.995039919964652e-05, "loss": 0.0291, "step": 19600 }, { "epoch": 0.0, "learning_rate": 2.9950145409223097e-05, "loss": 0.0114, "step": 19700 }, { "epoch": 0.0, "learning_rate": 2.9949891618799677e-05, "loss": 0.0126, "step": 19800 }, { "epoch": 0.0, "learning_rate": 2.9949637828376257e-05, "loss": 0.0182, "step": 19900 }, { "epoch": 0.0, "learning_rate": 2.9949384037952834e-05, "loss": 0.0161, "step": 20000 }, { "epoch": 0.0, "learning_rate": 2.9949130247529414e-05, "loss": 0.0145, "step": 20100 }, { "epoch": 0.0, "learning_rate": 2.994887645710599e-05, "loss": 0.0142, "step": 20200 }, { "epoch": 0.0, "learning_rate": 2.994862266668257e-05, "loss": 0.0187, "step": 20300 }, { "epoch": 0.0, "learning_rate": 2.994836887625915e-05, "loss": 0.0172, "step": 20400 }, { "epoch": 0.0, "learning_rate": 2.9948115085835727e-05, "loss": 0.0091, "step": 20500 }, { "epoch": 0.0, "learning_rate": 2.9947861295412307e-05, "loss": 0.021, "step": 20600 }, { "epoch": 0.0, "learning_rate": 2.9947607504988887e-05, "loss": 0.0241, "step": 20700 }, { "epoch": 0.0, "learning_rate": 2.9947353714565463e-05, "loss": 0.0161, "step": 20800 }, { "epoch": 0.0, "learning_rate": 2.9947099924142043e-05, "loss": 0.0134, "step": 20900 }, { "epoch": 0.0, "learning_rate": 2.994684613371862e-05, "loss": 0.0199, "step": 21000 }, { "epoch": 0.0, "learning_rate": 2.99465923432952e-05, "loss": 0.021, "step": 21100 }, { "epoch": 0.0, "learning_rate": 2.994633855287178e-05, "loss": 0.0191, "step": 21200 }, { "epoch": 0.0, "learning_rate": 2.9946084762448356e-05, "loss": 0.0151, "step": 21300 }, { "epoch": 0.0, "learning_rate": 2.9945830972024936e-05, "loss": 0.0323, "step": 21400 }, { "epoch": 0.0, "learning_rate": 2.9945577181601513e-05, "loss": 0.0185, "step": 21500 }, { "epoch": 0.0, "learning_rate": 2.9945323391178093e-05, "loss": 0.036, "step": 21600 }, { "epoch": 0.0, "learning_rate": 2.9945072138658906e-05, "loss": 0.0146, "step": 21700 }, { "epoch": 0.0, "learning_rate": 2.9944818348235483e-05, "loss": 0.0136, "step": 21800 }, { "epoch": 0.0, "learning_rate": 2.9944564557812063e-05, "loss": 0.0111, "step": 21900 }, { "epoch": 0.0, "learning_rate": 2.9944310767388643e-05, "loss": 0.0101, "step": 22000 }, { "epoch": 0.0, "learning_rate": 2.994405697696522e-05, "loss": 0.0117, "step": 22100 }, { "epoch": 0.0, "learning_rate": 2.99438031865418e-05, "loss": 0.0168, "step": 22200 }, { "epoch": 0.0, "learning_rate": 2.994354939611838e-05, "loss": 0.0222, "step": 22300 }, { "epoch": 0.0, "learning_rate": 2.9943295605694956e-05, "loss": 0.0174, "step": 22400 }, { "epoch": 0.0, "learning_rate": 2.9943041815271536e-05, "loss": 0.0112, "step": 22500 }, { "epoch": 0.0, "learning_rate": 2.9942788024848113e-05, "loss": 0.0216, "step": 22600 }, { "epoch": 0.0, "learning_rate": 2.9942534234424693e-05, "loss": 0.0231, "step": 22700 }, { "epoch": 0.0, "learning_rate": 2.9942280444001272e-05, "loss": 0.0205, "step": 22800 }, { "epoch": 0.0, "learning_rate": 2.994202665357785e-05, "loss": 0.0109, "step": 22900 }, { "epoch": 0.0, "learning_rate": 2.994177286315443e-05, "loss": 0.0289, "step": 23000 }, { "epoch": 0.0, "learning_rate": 2.9941519072731006e-05, "loss": 0.0062, "step": 23100 }, { "epoch": 0.0, "learning_rate": 2.9941265282307586e-05, "loss": 0.0114, "step": 23200 }, { "epoch": 0.0, "learning_rate": 2.9941011491884166e-05, "loss": 0.0151, "step": 23300 }, { "epoch": 0.0, "eval_accuracy": 0.9947417043262652, "eval_f1": 0.9856725059531255, "eval_loss": 0.019235746935009956, "eval_matthews_correlation": 0.9713496749738897, "eval_precision": 0.9844822149673419, "eval_recall": 0.9868703958180809, "eval_runtime": 13952.5459, "eval_samples_per_second": 1495.694, "eval_steps_per_second": 1495.694, "step": 23392 } ], "max_steps": 11820827, "num_train_epochs": 1, "total_flos": 1.0387544338753126e+18, "trial_name": null, "trial_params": null }