{ "best_metric": 0.03530377894639969, "best_model_checkpoint": "my_awesome_ds_model/checkpoint-22473", "epoch": 10.0, "eval_steps": 500, "global_step": 74910, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 9.933253237217995e-05, "loss": 0.2746, "step": 500 }, { "epoch": 0.13, "learning_rate": 9.86650647443599e-05, "loss": 0.0897, "step": 1000 }, { "epoch": 0.2, "learning_rate": 9.799759711653985e-05, "loss": 0.0718, "step": 1500 }, { "epoch": 0.27, "learning_rate": 9.73301294887198e-05, "loss": 0.0616, "step": 2000 }, { "epoch": 0.33, "learning_rate": 9.666266186089975e-05, "loss": 0.0568, "step": 2500 }, { "epoch": 0.4, "learning_rate": 9.59951942330797e-05, "loss": 0.0525, "step": 3000 }, { "epoch": 0.47, "learning_rate": 9.532772660525965e-05, "loss": 0.0495, "step": 3500 }, { "epoch": 0.53, "learning_rate": 9.46602589774396e-05, "loss": 0.0488, "step": 4000 }, { "epoch": 0.6, "learning_rate": 9.399279134961955e-05, "loss": 0.0457, "step": 4500 }, { "epoch": 0.67, "learning_rate": 9.33253237217995e-05, "loss": 0.0422, "step": 5000 }, { "epoch": 0.73, "learning_rate": 9.265785609397944e-05, "loss": 0.0431, "step": 5500 }, { "epoch": 0.8, "learning_rate": 9.199038846615939e-05, "loss": 0.0467, "step": 6000 }, { "epoch": 0.87, "learning_rate": 9.132292083833934e-05, "loss": 0.0473, "step": 6500 }, { "epoch": 0.93, "learning_rate": 9.065545321051929e-05, "loss": 0.0458, "step": 7000 }, { "epoch": 1.0, "eval_accuracy": 0.9874301209567, "eval_f1": 0.9027943197434722, "eval_loss": 0.04262871295213699, "eval_precision": 0.9134223210975158, "eval_recall": 0.8924107951458069, "eval_runtime": 108.0836, "eval_samples_per_second": 76.441, "eval_steps_per_second": 9.557, "step": 7491 }, { "epoch": 1.0, "learning_rate": 8.998798558269924e-05, "loss": 0.0423, "step": 7500 }, { "epoch": 1.07, "learning_rate": 8.932051795487919e-05, "loss": 0.0369, "step": 8000 }, { "epoch": 1.13, "learning_rate": 8.865305032705914e-05, "loss": 0.0363, "step": 8500 }, { "epoch": 1.2, "learning_rate": 8.798558269923909e-05, "loss": 0.0333, "step": 9000 }, { "epoch": 1.27, "learning_rate": 8.731811507141903e-05, "loss": 0.0356, "step": 9500 }, { "epoch": 1.33, "learning_rate": 8.665064744359898e-05, "loss": 0.036, "step": 10000 }, { "epoch": 1.4, "learning_rate": 8.598317981577893e-05, "loss": 0.0372, "step": 10500 }, { "epoch": 1.47, "learning_rate": 8.531571218795888e-05, "loss": 0.0354, "step": 11000 }, { "epoch": 1.54, "learning_rate": 8.464824456013883e-05, "loss": 0.0356, "step": 11500 }, { "epoch": 1.6, "learning_rate": 8.398077693231878e-05, "loss": 0.0323, "step": 12000 }, { "epoch": 1.67, "learning_rate": 8.331330930449873e-05, "loss": 0.037, "step": 12500 }, { "epoch": 1.74, "learning_rate": 8.264584167667868e-05, "loss": 0.0361, "step": 13000 }, { "epoch": 1.8, "learning_rate": 8.197837404885863e-05, "loss": 0.0346, "step": 13500 }, { "epoch": 1.87, "learning_rate": 8.131090642103857e-05, "loss": 0.0351, "step": 14000 }, { "epoch": 1.94, "learning_rate": 8.064343879321852e-05, "loss": 0.0326, "step": 14500 }, { "epoch": 2.0, "eval_accuracy": 0.9869225169145689, "eval_f1": 0.8974685334464715, "eval_loss": 0.03918388485908508, "eval_precision": 0.935896175400649, "eval_recall": 0.8620720883897844, "eval_runtime": 107.1166, "eval_samples_per_second": 77.131, "eval_steps_per_second": 9.644, "step": 14982 }, { "epoch": 2.0, "learning_rate": 7.997597116539847e-05, "loss": 0.0342, "step": 15000 }, { "epoch": 2.07, "learning_rate": 7.930850353757842e-05, "loss": 0.0269, "step": 15500 }, { "epoch": 2.14, "learning_rate": 7.864103590975837e-05, "loss": 0.0273, "step": 16000 }, { "epoch": 2.2, "learning_rate": 7.797356828193832e-05, "loss": 0.0247, "step": 16500 }, { "epoch": 2.27, "learning_rate": 7.730610065411827e-05, "loss": 0.0262, "step": 17000 }, { "epoch": 2.34, "learning_rate": 7.663863302629822e-05, "loss": 0.0274, "step": 17500 }, { "epoch": 2.4, "learning_rate": 7.597116539847817e-05, "loss": 0.0262, "step": 18000 }, { "epoch": 2.47, "learning_rate": 7.530369777065813e-05, "loss": 0.0272, "step": 18500 }, { "epoch": 2.54, "learning_rate": 7.463623014283808e-05, "loss": 0.0278, "step": 19000 }, { "epoch": 2.6, "learning_rate": 7.396876251501803e-05, "loss": 0.0291, "step": 19500 }, { "epoch": 2.67, "learning_rate": 7.330129488719797e-05, "loss": 0.0265, "step": 20000 }, { "epoch": 2.74, "learning_rate": 7.263382725937792e-05, "loss": 0.0245, "step": 20500 }, { "epoch": 2.8, "learning_rate": 7.196635963155787e-05, "loss": 0.0271, "step": 21000 }, { "epoch": 2.87, "learning_rate": 7.129889200373782e-05, "loss": 0.0283, "step": 21500 }, { "epoch": 2.94, "learning_rate": 7.063142437591777e-05, "loss": 0.0277, "step": 22000 }, { "epoch": 3.0, "eval_accuracy": 0.9892468091074852, "eval_f1": 0.9156408399710355, "eval_loss": 0.03530377894639969, "eval_precision": 0.9151438393341776, "eval_recall": 0.9161383807281289, "eval_runtime": 107.1556, "eval_samples_per_second": 77.103, "eval_steps_per_second": 9.64, "step": 22473 }, { "epoch": 3.0, "learning_rate": 6.996395674809772e-05, "loss": 0.0232, "step": 22500 }, { "epoch": 3.07, "learning_rate": 6.929648912027767e-05, "loss": 0.0195, "step": 23000 }, { "epoch": 3.14, "learning_rate": 6.862902149245762e-05, "loss": 0.0198, "step": 23500 }, { "epoch": 3.2, "learning_rate": 6.796155386463757e-05, "loss": 0.0202, "step": 24000 }, { "epoch": 3.27, "learning_rate": 6.729408623681752e-05, "loss": 0.0181, "step": 24500 }, { "epoch": 3.34, "learning_rate": 6.662661860899748e-05, "loss": 0.0193, "step": 25000 }, { "epoch": 3.4, "learning_rate": 6.595915098117743e-05, "loss": 0.0198, "step": 25500 }, { "epoch": 3.47, "learning_rate": 6.529168335335738e-05, "loss": 0.0198, "step": 26000 }, { "epoch": 3.54, "learning_rate": 6.462421572553732e-05, "loss": 0.0197, "step": 26500 }, { "epoch": 3.6, "learning_rate": 6.395674809771727e-05, "loss": 0.0194, "step": 27000 }, { "epoch": 3.67, "learning_rate": 6.328928046989722e-05, "loss": 0.0203, "step": 27500 }, { "epoch": 3.74, "learning_rate": 6.262181284207717e-05, "loss": 0.0203, "step": 28000 }, { "epoch": 3.8, "learning_rate": 6.195434521425712e-05, "loss": 0.0211, "step": 28500 }, { "epoch": 3.87, "learning_rate": 6.128687758643707e-05, "loss": 0.0201, "step": 29000 }, { "epoch": 3.94, "learning_rate": 6.061940995861701e-05, "loss": 0.0209, "step": 29500 }, { "epoch": 4.0, "eval_accuracy": 0.9885855880526038, "eval_f1": 0.9106117466202469, "eval_loss": 0.04323778301477432, "eval_precision": 0.909255079006772, "eval_recall": 0.9119724687556602, "eval_runtime": 107.2535, "eval_samples_per_second": 77.032, "eval_steps_per_second": 9.631, "step": 29964 }, { "epoch": 4.0, "learning_rate": 5.995194233079696e-05, "loss": 0.0192, "step": 30000 }, { "epoch": 4.07, "learning_rate": 5.928447470297691e-05, "loss": 0.0115, "step": 30500 }, { "epoch": 4.14, "learning_rate": 5.861700707515686e-05, "loss": 0.0131, "step": 31000 }, { "epoch": 4.21, "learning_rate": 5.794953944733681e-05, "loss": 0.014, "step": 31500 }, { "epoch": 4.27, "learning_rate": 5.7282071819516756e-05, "loss": 0.0134, "step": 32000 }, { "epoch": 4.34, "learning_rate": 5.6614604191696705e-05, "loss": 0.0136, "step": 32500 }, { "epoch": 4.41, "learning_rate": 5.5947136563876653e-05, "loss": 0.0152, "step": 33000 }, { "epoch": 4.47, "learning_rate": 5.52796689360566e-05, "loss": 0.0135, "step": 33500 }, { "epoch": 4.54, "learning_rate": 5.461220130823655e-05, "loss": 0.0152, "step": 34000 }, { "epoch": 4.61, "learning_rate": 5.394473368041651e-05, "loss": 0.0138, "step": 34500 }, { "epoch": 4.67, "learning_rate": 5.3277266052596456e-05, "loss": 0.0154, "step": 35000 }, { "epoch": 4.74, "learning_rate": 5.2609798424776405e-05, "loss": 0.0139, "step": 35500 }, { "epoch": 4.81, "learning_rate": 5.1942330796956354e-05, "loss": 0.015, "step": 36000 }, { "epoch": 4.87, "learning_rate": 5.12748631691363e-05, "loss": 0.0131, "step": 36500 }, { "epoch": 4.94, "learning_rate": 5.060739554131625e-05, "loss": 0.0141, "step": 37000 }, { "epoch": 5.0, "eval_accuracy": 0.9885588720503864, "eval_f1": 0.9082037484609421, "eval_loss": 0.049268897622823715, "eval_precision": 0.9146688711307064, "eval_recall": 0.9018293787357363, "eval_runtime": 107.4361, "eval_samples_per_second": 76.902, "eval_steps_per_second": 9.615, "step": 37455 }, { "epoch": 5.01, "learning_rate": 4.99399279134962e-05, "loss": 0.0126, "step": 37500 }, { "epoch": 5.07, "learning_rate": 4.927246028567615e-05, "loss": 0.0073, "step": 38000 }, { "epoch": 5.14, "learning_rate": 4.86049926578561e-05, "loss": 0.0076, "step": 38500 }, { "epoch": 5.21, "learning_rate": 4.793752503003605e-05, "loss": 0.0076, "step": 39000 }, { "epoch": 5.27, "learning_rate": 4.7270057402215996e-05, "loss": 0.0086, "step": 39500 }, { "epoch": 5.34, "learning_rate": 4.6602589774395945e-05, "loss": 0.0092, "step": 40000 }, { "epoch": 5.41, "learning_rate": 4.5935122146575894e-05, "loss": 0.0087, "step": 40500 }, { "epoch": 5.47, "learning_rate": 4.526765451875584e-05, "loss": 0.0085, "step": 41000 }, { "epoch": 5.54, "learning_rate": 4.460018689093579e-05, "loss": 0.0086, "step": 41500 }, { "epoch": 5.61, "learning_rate": 4.393271926311574e-05, "loss": 0.0084, "step": 42000 }, { "epoch": 5.67, "learning_rate": 4.326525163529569e-05, "loss": 0.0091, "step": 42500 }, { "epoch": 5.74, "learning_rate": 4.259778400747564e-05, "loss": 0.0083, "step": 43000 }, { "epoch": 5.81, "learning_rate": 4.193031637965559e-05, "loss": 0.0088, "step": 43500 }, { "epoch": 5.87, "learning_rate": 4.1262848751835536e-05, "loss": 0.0088, "step": 44000 }, { "epoch": 5.94, "learning_rate": 4.0595381124015485e-05, "loss": 0.0081, "step": 44500 }, { "epoch": 6.0, "eval_accuracy": 0.9886724150598104, "eval_f1": 0.9117419940949353, "eval_loss": 0.058003462851047516, "eval_precision": 0.9146085847079194, "eval_recall": 0.9088933164281833, "eval_runtime": 107.2097, "eval_samples_per_second": 77.064, "eval_steps_per_second": 9.635, "step": 44946 }, { "epoch": 6.01, "learning_rate": 3.9927913496195434e-05, "loss": 0.0091, "step": 45000 }, { "epoch": 6.07, "learning_rate": 3.926044586837538e-05, "loss": 0.004, "step": 45500 }, { "epoch": 6.14, "learning_rate": 3.859297824055533e-05, "loss": 0.0044, "step": 46000 }, { "epoch": 6.21, "learning_rate": 3.792551061273528e-05, "loss": 0.0042, "step": 46500 }, { "epoch": 6.27, "learning_rate": 3.725804298491523e-05, "loss": 0.0042, "step": 47000 }, { "epoch": 6.34, "learning_rate": 3.659057535709518e-05, "loss": 0.005, "step": 47500 }, { "epoch": 6.41, "learning_rate": 3.592310772927513e-05, "loss": 0.0036, "step": 48000 }, { "epoch": 6.47, "learning_rate": 3.525564010145508e-05, "loss": 0.0052, "step": 48500 }, { "epoch": 6.54, "learning_rate": 3.4588172473635026e-05, "loss": 0.0046, "step": 49000 }, { "epoch": 6.61, "learning_rate": 3.392070484581498e-05, "loss": 0.0035, "step": 49500 }, { "epoch": 6.67, "learning_rate": 3.325323721799493e-05, "loss": 0.0051, "step": 50000 }, { "epoch": 6.74, "learning_rate": 3.258576959017488e-05, "loss": 0.0051, "step": 50500 }, { "epoch": 6.81, "learning_rate": 3.191830196235483e-05, "loss": 0.0049, "step": 51000 }, { "epoch": 6.87, "learning_rate": 3.125083433453478e-05, "loss": 0.0051, "step": 51500 }, { "epoch": 6.94, "learning_rate": 3.0583366706714726e-05, "loss": 0.0048, "step": 52000 }, { "epoch": 7.0, "eval_accuracy": 0.9884319710398536, "eval_f1": 0.9093145069460259, "eval_loss": 0.07726183533668518, "eval_precision": 0.9146889031430404, "eval_recall": 0.90400289802572, "eval_runtime": 107.8277, "eval_samples_per_second": 76.622, "eval_steps_per_second": 9.58, "step": 52437 }, { "epoch": 7.01, "learning_rate": 2.9915899078894678e-05, "loss": 0.0045, "step": 52500 }, { "epoch": 7.08, "learning_rate": 2.9248431451074627e-05, "loss": 0.0021, "step": 53000 }, { "epoch": 7.14, "learning_rate": 2.8580963823254576e-05, "loss": 0.0022, "step": 53500 }, { "epoch": 7.21, "learning_rate": 2.7913496195434525e-05, "loss": 0.0024, "step": 54000 }, { "epoch": 7.28, "learning_rate": 2.7246028567614474e-05, "loss": 0.0025, "step": 54500 }, { "epoch": 7.34, "learning_rate": 2.6578560939794423e-05, "loss": 0.0018, "step": 55000 }, { "epoch": 7.41, "learning_rate": 2.591109331197437e-05, "loss": 0.0025, "step": 55500 }, { "epoch": 7.48, "learning_rate": 2.524362568415432e-05, "loss": 0.0027, "step": 56000 }, { "epoch": 7.54, "learning_rate": 2.457615805633427e-05, "loss": 0.0024, "step": 56500 }, { "epoch": 7.61, "learning_rate": 2.390869042851422e-05, "loss": 0.0021, "step": 57000 }, { "epoch": 7.68, "learning_rate": 2.3241222800694167e-05, "loss": 0.0022, "step": 57500 }, { "epoch": 7.74, "learning_rate": 2.2573755172874116e-05, "loss": 0.0025, "step": 58000 }, { "epoch": 7.81, "learning_rate": 2.1906287545054065e-05, "loss": 0.0031, "step": 58500 }, { "epoch": 7.88, "learning_rate": 2.1238819917234014e-05, "loss": 0.0022, "step": 59000 }, { "epoch": 7.94, "learning_rate": 2.0571352289413963e-05, "loss": 0.0023, "step": 59500 }, { "epoch": 8.0, "eval_accuracy": 0.9890330810897457, "eval_f1": 0.9139605257215121, "eval_loss": 0.0873148962855339, "eval_precision": 0.9116136588881881, "eval_recall": 0.9163195073356276, "eval_runtime": 107.6976, "eval_samples_per_second": 76.715, "eval_steps_per_second": 9.592, "step": 59928 }, { "epoch": 8.01, "learning_rate": 1.9903884661593912e-05, "loss": 0.0015, "step": 60000 }, { "epoch": 8.08, "learning_rate": 1.9236417033773864e-05, "loss": 0.001, "step": 60500 }, { "epoch": 8.14, "learning_rate": 1.8568949405953813e-05, "loss": 0.0008, "step": 61000 }, { "epoch": 8.21, "learning_rate": 1.7901481778133762e-05, "loss": 0.0011, "step": 61500 }, { "epoch": 8.28, "learning_rate": 1.723401415031371e-05, "loss": 0.0014, "step": 62000 }, { "epoch": 8.34, "learning_rate": 1.656654652249366e-05, "loss": 0.0012, "step": 62500 }, { "epoch": 8.41, "learning_rate": 1.589907889467361e-05, "loss": 0.0012, "step": 63000 }, { "epoch": 8.48, "learning_rate": 1.5231611266853558e-05, "loss": 0.0008, "step": 63500 }, { "epoch": 8.54, "learning_rate": 1.4564143639033507e-05, "loss": 0.0013, "step": 64000 }, { "epoch": 8.61, "learning_rate": 1.3896676011213459e-05, "loss": 0.0012, "step": 64500 }, { "epoch": 8.68, "learning_rate": 1.3229208383393408e-05, "loss": 0.0008, "step": 65000 }, { "epoch": 8.74, "learning_rate": 1.2561740755573357e-05, "loss": 0.0011, "step": 65500 }, { "epoch": 8.81, "learning_rate": 1.1894273127753304e-05, "loss": 0.0013, "step": 66000 }, { "epoch": 8.88, "learning_rate": 1.1226805499933253e-05, "loss": 0.0012, "step": 66500 }, { "epoch": 8.94, "learning_rate": 1.0559337872113203e-05, "loss": 0.0013, "step": 67000 }, { "epoch": 9.0, "eval_accuracy": 0.9886724150598104, "eval_f1": 0.9117727148951621, "eval_loss": 0.09737657755613327, "eval_precision": 0.9138464337700145, "eval_recall": 0.9097083861619272, "eval_runtime": 107.7722, "eval_samples_per_second": 76.662, "eval_steps_per_second": 9.585, "step": 67419 }, { "epoch": 9.01, "learning_rate": 9.891870244293152e-06, "loss": 0.0011, "step": 67500 }, { "epoch": 9.08, "learning_rate": 9.224402616473101e-06, "loss": 0.0003, "step": 68000 }, { "epoch": 9.14, "learning_rate": 8.55693498865305e-06, "loss": 0.0005, "step": 68500 }, { "epoch": 9.21, "learning_rate": 7.889467360833e-06, "loss": 0.0003, "step": 69000 }, { "epoch": 9.28, "learning_rate": 7.221999733012949e-06, "loss": 0.0006, "step": 69500 }, { "epoch": 9.34, "learning_rate": 6.554532105192898e-06, "loss": 0.0007, "step": 70000 }, { "epoch": 9.41, "learning_rate": 5.887064477372848e-06, "loss": 0.0005, "step": 70500 }, { "epoch": 9.48, "learning_rate": 5.219596849552797e-06, "loss": 0.0005, "step": 71000 }, { "epoch": 9.54, "learning_rate": 4.552129221732746e-06, "loss": 0.0007, "step": 71500 }, { "epoch": 9.61, "learning_rate": 3.884661593912695e-06, "loss": 0.0007, "step": 72000 }, { "epoch": 9.68, "learning_rate": 3.2171939660926445e-06, "loss": 0.0006, "step": 72500 }, { "epoch": 9.75, "learning_rate": 2.549726338272594e-06, "loss": 0.0006, "step": 73000 }, { "epoch": 9.81, "learning_rate": 1.8822587104525433e-06, "loss": 0.0006, "step": 73500 }, { "epoch": 9.88, "learning_rate": 1.2147910826324925e-06, "loss": 0.0002, "step": 74000 }, { "epoch": 9.95, "learning_rate": 5.473234548124416e-07, "loss": 0.0005, "step": 74500 }, { "epoch": 10.0, "eval_accuracy": 0.9888661060758868, "eval_f1": 0.9123397290170047, "eval_loss": 0.10618099570274353, "eval_precision": 0.9160883856829802, "eval_recall": 0.9086216265169353, "eval_runtime": 107.8685, "eval_samples_per_second": 76.593, "eval_steps_per_second": 9.576, "step": 74910 } ], "logging_steps": 500, "max_steps": 74910, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.4917264852008653e+18, "trial_name": null, "trial_params": null }