|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0019788801578772787, |
|
"global_step": 23392, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999987310478829e-05, |
|
"loss": 0.0134, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999961931436487e-05, |
|
"loss": 0.0121, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999936552394145e-05, |
|
"loss": 0.016, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9999111733518026e-05, |
|
"loss": 0.0097, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9998857943094606e-05, |
|
"loss": 0.0058, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9998604152671182e-05, |
|
"loss": 0.0074, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9998350362247762e-05, |
|
"loss": 0.0122, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9998096571824342e-05, |
|
"loss": 0.0071, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999784278140092e-05, |
|
"loss": 0.0152, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.99975889909775e-05, |
|
"loss": 0.0136, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9997335200554075e-05, |
|
"loss": 0.0093, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9997081410130655e-05, |
|
"loss": 0.0103, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9996827619707235e-05, |
|
"loss": 0.0095, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9996573829283812e-05, |
|
"loss": 0.013, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9996320038860392e-05, |
|
"loss": 0.0222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999606624843697e-05, |
|
"loss": 0.0151, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999581245801355e-05, |
|
"loss": 0.0289, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999555866759013e-05, |
|
"loss": 0.0075, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9995304877166705e-05, |
|
"loss": 0.0073, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9995051086743285e-05, |
|
"loss": 0.0069, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9994797296319865e-05, |
|
"loss": 0.016, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999454350589644e-05, |
|
"loss": 0.0077, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999428971547302e-05, |
|
"loss": 0.0107, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9994035925049598e-05, |
|
"loss": 0.03, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9993782134626178e-05, |
|
"loss": 0.0203, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9993528344202758e-05, |
|
"loss": 0.0128, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999327455377933e-05, |
|
"loss": 0.0152, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999302076335591e-05, |
|
"loss": 0.0149, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9992766972932488e-05, |
|
"loss": 0.0185, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9992513182509068e-05, |
|
"loss": 0.0142, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9992259392085648e-05, |
|
"loss": 0.0158, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9992005601662224e-05, |
|
"loss": 0.022, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9991751811238804e-05, |
|
"loss": 0.0072, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999149802081538e-05, |
|
"loss": 0.0094, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999124423039196e-05, |
|
"loss": 0.0118, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.999099043996854e-05, |
|
"loss": 0.0285, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9990736649545117e-05, |
|
"loss": 0.0151, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9990482859121697e-05, |
|
"loss": 0.0112, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9990229068698277e-05, |
|
"loss": 0.013, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9989975278274854e-05, |
|
"loss": 0.0123, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9989721487851434e-05, |
|
"loss": 0.0242, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998946769742801e-05, |
|
"loss": 0.0162, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998921390700459e-05, |
|
"loss": 0.0138, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998896011658117e-05, |
|
"loss": 0.007, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9988706326157747e-05, |
|
"loss": 0.0076, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9988452535734327e-05, |
|
"loss": 0.0386, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9988198745310904e-05, |
|
"loss": 0.0165, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9987944954887484e-05, |
|
"loss": 0.0156, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9987691164464064e-05, |
|
"loss": 0.0292, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998743737404064e-05, |
|
"loss": 0.0485, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998718358361722e-05, |
|
"loss": 0.0325, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9986929793193797e-05, |
|
"loss": 0.0172, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9986676002770377e-05, |
|
"loss": 0.0139, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9986422212346957e-05, |
|
"loss": 0.0165, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998617095982777e-05, |
|
"loss": 0.0344, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9985917169404347e-05, |
|
"loss": 0.0085, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9985663378980927e-05, |
|
"loss": 0.0337, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9985409588557503e-05, |
|
"loss": 0.0106, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9985155798134083e-05, |
|
"loss": 0.0097, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9984902007710663e-05, |
|
"loss": 0.0128, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998464821728724e-05, |
|
"loss": 0.0137, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998439442686382e-05, |
|
"loss": 0.017, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9984140636440396e-05, |
|
"loss": 0.0324, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998388938392121e-05, |
|
"loss": 0.0165, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998363559349779e-05, |
|
"loss": 0.0132, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9983381803074366e-05, |
|
"loss": 0.0155, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9983128012650946e-05, |
|
"loss": 0.0121, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9982874222227526e-05, |
|
"loss": 0.0115, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9982620431804103e-05, |
|
"loss": 0.0088, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9982366641380683e-05, |
|
"loss": 0.0132, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9982112850957263e-05, |
|
"loss": 0.0078, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998185906053384e-05, |
|
"loss": 0.0068, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998160527011042e-05, |
|
"loss": 0.0092, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9981351479686996e-05, |
|
"loss": 0.0175, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9981097689263576e-05, |
|
"loss": 0.0096, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9980843898840156e-05, |
|
"loss": 0.0097, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9980590108416732e-05, |
|
"loss": 0.012, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9980336317993312e-05, |
|
"loss": 0.013, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.998008252756989e-05, |
|
"loss": 0.0153, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997982873714647e-05, |
|
"loss": 0.0119, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997957494672305e-05, |
|
"loss": 0.0088, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9979321156299626e-05, |
|
"loss": 0.0077, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9979067365876202e-05, |
|
"loss": 0.0107, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997881357545278e-05, |
|
"loss": 0.0231, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997855978502936e-05, |
|
"loss": 0.0119, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997830599460594e-05, |
|
"loss": 0.0094, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9978054742086752e-05, |
|
"loss": 0.0208, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997780095166333e-05, |
|
"loss": 0.0096, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997754716123991e-05, |
|
"loss": 0.0075, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9977293370816485e-05, |
|
"loss": 0.0066, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9977039580393065e-05, |
|
"loss": 0.0106, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9976785789969645e-05, |
|
"loss": 0.0351, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997653199954622e-05, |
|
"loss": 0.0923, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.99762782091228e-05, |
|
"loss": 0.0104, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9976024418699378e-05, |
|
"loss": 0.0314, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9975770628275958e-05, |
|
"loss": 0.0099, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9975516837852538e-05, |
|
"loss": 0.008, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9975263047429115e-05, |
|
"loss": 0.0105, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9975009257005695e-05, |
|
"loss": 0.0104, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997475546658227e-05, |
|
"loss": 0.0172, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997450167615885e-05, |
|
"loss": 0.0107, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997424788573543e-05, |
|
"loss": 0.0123, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9973994095312008e-05, |
|
"loss": 0.0085, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9973740304888588e-05, |
|
"loss": 0.0205, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9973486514465168e-05, |
|
"loss": 0.0086, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9973232724041744e-05, |
|
"loss": 0.0142, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9972978933618324e-05, |
|
"loss": 0.0154, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.99727251431949e-05, |
|
"loss": 0.0158, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997247135277148e-05, |
|
"loss": 0.0128, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997221756234806e-05, |
|
"loss": 0.0156, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9971963771924638e-05, |
|
"loss": 0.0088, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9971709981501218e-05, |
|
"loss": 0.0256, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9971456191077794e-05, |
|
"loss": 0.0208, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9971202400654374e-05, |
|
"loss": 0.0218, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9970948610230954e-05, |
|
"loss": 0.0116, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997069481980753e-05, |
|
"loss": 0.0195, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.997044102938411e-05, |
|
"loss": 0.0142, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9970187238960687e-05, |
|
"loss": 0.0114, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9969933448537267e-05, |
|
"loss": 0.0246, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9969679658113847e-05, |
|
"loss": 0.0187, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9969425867690424e-05, |
|
"loss": 0.011, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9969172077267004e-05, |
|
"loss": 0.0094, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9968918286843584e-05, |
|
"loss": 0.0129, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996866449642016e-05, |
|
"loss": 0.0397, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996841070599674e-05, |
|
"loss": 0.0174, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9968156915573317e-05, |
|
"loss": 0.0355, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9967903125149897e-05, |
|
"loss": 0.0124, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9967649334726477e-05, |
|
"loss": 0.0164, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9967395544303053e-05, |
|
"loss": 0.0311, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9967141753879633e-05, |
|
"loss": 0.0224, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996688796345621e-05, |
|
"loss": 0.0171, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996663417303279e-05, |
|
"loss": 0.0185, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996638038260937e-05, |
|
"loss": 0.0136, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9966126592185947e-05, |
|
"loss": 0.0101, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9965872801762527e-05, |
|
"loss": 0.0362, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9965619011339103e-05, |
|
"loss": 0.0467, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996536522091568e-05, |
|
"loss": 0.0128, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996511143049226e-05, |
|
"loss": 0.0112, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9964857640068836e-05, |
|
"loss": 0.0109, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9964603849645416e-05, |
|
"loss": 0.0159, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9964350059221996e-05, |
|
"loss": 0.019, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9964098806702806e-05, |
|
"loss": 0.0106, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9963845016279386e-05, |
|
"loss": 0.0207, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9963591225855966e-05, |
|
"loss": 0.0184, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9963337435432543e-05, |
|
"loss": 0.0129, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9963083645009123e-05, |
|
"loss": 0.0105, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.99628298545857e-05, |
|
"loss": 0.0169, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996257606416228e-05, |
|
"loss": 0.0108, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996232227373886e-05, |
|
"loss": 0.0163, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9962068483315436e-05, |
|
"loss": 0.0095, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9961814692892016e-05, |
|
"loss": 0.0099, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9961560902468592e-05, |
|
"loss": 0.0201, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9961307112045172e-05, |
|
"loss": 0.0164, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9961053321621752e-05, |
|
"loss": 0.021, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996079953119833e-05, |
|
"loss": 0.0148, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996054574077491e-05, |
|
"loss": 0.0156, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.996029195035149e-05, |
|
"loss": 0.0124, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9960038159928066e-05, |
|
"loss": 0.0163, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9959784369504646e-05, |
|
"loss": 0.0113, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9959530579081222e-05, |
|
"loss": 0.0097, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9959276788657802e-05, |
|
"loss": 0.0133, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9959022998234382e-05, |
|
"loss": 0.0122, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995876920781096e-05, |
|
"loss": 0.0608, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995851541738754e-05, |
|
"loss": 0.0181, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9958261626964115e-05, |
|
"loss": 0.0464, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9958007836540695e-05, |
|
"loss": 0.0163, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9957754046117275e-05, |
|
"loss": 0.0095, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9957500255693852e-05, |
|
"loss": 0.01, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9957246465270432e-05, |
|
"loss": 0.038, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995699267484701e-05, |
|
"loss": 0.0149, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995673888442359e-05, |
|
"loss": 0.0207, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995648509400017e-05, |
|
"loss": 0.0194, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9956231303576745e-05, |
|
"loss": 0.016, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9955977513153325e-05, |
|
"loss": 0.0162, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9955723722729905e-05, |
|
"loss": 0.0135, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995546993230648e-05, |
|
"loss": 0.0253, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995521614188306e-05, |
|
"loss": 0.0138, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9954962351459638e-05, |
|
"loss": 0.0125, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9954708561036218e-05, |
|
"loss": 0.0113, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9954454770612798e-05, |
|
"loss": 0.0184, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9954200980189375e-05, |
|
"loss": 0.0169, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9953947189765955e-05, |
|
"loss": 0.01, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995369339934253e-05, |
|
"loss": 0.0151, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995343960891911e-05, |
|
"loss": 0.0171, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995318581849569e-05, |
|
"loss": 0.0238, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9952932028072268e-05, |
|
"loss": 0.0266, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995268077555308e-05, |
|
"loss": 0.0082, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995242698512966e-05, |
|
"loss": 0.0279, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9952175732610474e-05, |
|
"loss": 0.0178, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995192194218705e-05, |
|
"loss": 0.0169, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995166815176363e-05, |
|
"loss": 0.0181, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9951414361340207e-05, |
|
"loss": 0.0257, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9951160570916787e-05, |
|
"loss": 0.0138, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9950906780493364e-05, |
|
"loss": 0.0203, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995065299006994e-05, |
|
"loss": 0.0265, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.995039919964652e-05, |
|
"loss": 0.0291, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9950145409223097e-05, |
|
"loss": 0.0114, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9949891618799677e-05, |
|
"loss": 0.0126, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9949637828376257e-05, |
|
"loss": 0.0182, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9949384037952834e-05, |
|
"loss": 0.0161, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9949130247529414e-05, |
|
"loss": 0.0145, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.994887645710599e-05, |
|
"loss": 0.0142, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.994862266668257e-05, |
|
"loss": 0.0187, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.994836887625915e-05, |
|
"loss": 0.0172, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9948115085835727e-05, |
|
"loss": 0.0091, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9947861295412307e-05, |
|
"loss": 0.021, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9947607504988887e-05, |
|
"loss": 0.0241, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9947353714565463e-05, |
|
"loss": 0.0161, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9947099924142043e-05, |
|
"loss": 0.0134, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.994684613371862e-05, |
|
"loss": 0.0199, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.99465923432952e-05, |
|
"loss": 0.021, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.994633855287178e-05, |
|
"loss": 0.0191, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9946084762448356e-05, |
|
"loss": 0.0151, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9945830972024936e-05, |
|
"loss": 0.0323, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9945577181601513e-05, |
|
"loss": 0.0185, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9945323391178093e-05, |
|
"loss": 0.036, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9945072138658906e-05, |
|
"loss": 0.0146, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9944818348235483e-05, |
|
"loss": 0.0136, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9944564557812063e-05, |
|
"loss": 0.0111, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9944310767388643e-05, |
|
"loss": 0.0101, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.994405697696522e-05, |
|
"loss": 0.0117, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.99438031865418e-05, |
|
"loss": 0.0168, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.994354939611838e-05, |
|
"loss": 0.0222, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9943295605694956e-05, |
|
"loss": 0.0174, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9943041815271536e-05, |
|
"loss": 0.0112, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9942788024848113e-05, |
|
"loss": 0.0216, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9942534234424693e-05, |
|
"loss": 0.0231, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9942280444001272e-05, |
|
"loss": 0.0205, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.994202665357785e-05, |
|
"loss": 0.0109, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.994177286315443e-05, |
|
"loss": 0.0289, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9941519072731006e-05, |
|
"loss": 0.0062, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9941265282307586e-05, |
|
"loss": 0.0114, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9941011491884166e-05, |
|
"loss": 0.0151, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_accuracy": 0.9947417043262652, |
|
"eval_f1": 0.9856725059531255, |
|
"eval_loss": 0.019235746935009956, |
|
"eval_matthews_correlation": 0.9713496749738897, |
|
"eval_precision": 0.9844822149673419, |
|
"eval_recall": 0.9868703958180809, |
|
"eval_runtime": 13952.5459, |
|
"eval_samples_per_second": 1495.694, |
|
"eval_steps_per_second": 1495.694, |
|
"step": 23392 |
|
} |
|
], |
|
"max_steps": 11820827, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.0387544338753126e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|