{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.997867803837953, "eval_steps": 10, "global_step": 351, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.388888888888889e-05, "loss": 2.303, "step": 10 }, { "epoch": 0.03, "eval_accuracy": 0.2334, "eval_loss": 2.1672284603118896, "eval_runtime": 61.2464, "eval_samples_per_second": 81.637, "eval_steps_per_second": 2.563, "step": 10 }, { "epoch": 0.06, "learning_rate": 2.777777777777778e-05, "loss": 2.0158, "step": 20 }, { "epoch": 0.06, "eval_accuracy": 0.657, "eval_loss": 1.6672015190124512, "eval_runtime": 57.6151, "eval_samples_per_second": 86.783, "eval_steps_per_second": 2.725, "step": 20 }, { "epoch": 0.09, "learning_rate": 4.166666666666667e-05, "loss": 1.4855, "step": 30 }, { "epoch": 0.09, "eval_accuracy": 0.8704, "eval_loss": 0.8292349576950073, "eval_runtime": 58.3323, "eval_samples_per_second": 85.716, "eval_steps_per_second": 2.691, "step": 30 }, { "epoch": 0.11, "learning_rate": 4.936507936507937e-05, "loss": 0.7451, "step": 40 }, { "epoch": 0.11, "eval_accuracy": 0.93, "eval_loss": 0.2578178346157074, "eval_runtime": 58.1914, "eval_samples_per_second": 85.923, "eval_steps_per_second": 2.698, "step": 40 }, { "epoch": 0.14, "learning_rate": 4.7777777777777784e-05, "loss": 0.5618, "step": 50 }, { "epoch": 0.14, "eval_accuracy": 0.962, "eval_loss": 0.14759203791618347, "eval_runtime": 58.5488, "eval_samples_per_second": 85.399, "eval_steps_per_second": 2.682, "step": 50 }, { "epoch": 0.17, "learning_rate": 4.6190476190476194e-05, "loss": 0.4545, "step": 60 }, { "epoch": 0.17, "eval_accuracy": 0.9642, "eval_loss": 0.12480442970991135, "eval_runtime": 61.2292, "eval_samples_per_second": 81.66, "eval_steps_per_second": 2.564, "step": 60 }, { "epoch": 0.2, "learning_rate": 4.460317460317461e-05, "loss": 0.4587, "step": 70 }, { "epoch": 0.2, "eval_accuracy": 0.9748, "eval_loss": 0.09407833218574524, "eval_runtime": 57.8655, "eval_samples_per_second": 86.407, "eval_steps_per_second": 2.713, "step": 70 }, { "epoch": 0.23, "learning_rate": 4.301587301587302e-05, "loss": 0.3911, "step": 80 }, { "epoch": 0.23, "eval_accuracy": 0.9712, "eval_loss": 0.09437316656112671, "eval_runtime": 57.5812, "eval_samples_per_second": 86.834, "eval_steps_per_second": 2.727, "step": 80 }, { "epoch": 0.26, "learning_rate": 4.1428571428571437e-05, "loss": 0.3839, "step": 90 }, { "epoch": 0.26, "eval_accuracy": 0.9756, "eval_loss": 0.0848420187830925, "eval_runtime": 58.1027, "eval_samples_per_second": 86.055, "eval_steps_per_second": 2.702, "step": 90 }, { "epoch": 0.28, "learning_rate": 3.984126984126984e-05, "loss": 0.3864, "step": 100 }, { "epoch": 0.28, "eval_accuracy": 0.978, "eval_loss": 0.0744096040725708, "eval_runtime": 57.7886, "eval_samples_per_second": 86.522, "eval_steps_per_second": 2.717, "step": 100 }, { "epoch": 0.31, "learning_rate": 3.8253968253968256e-05, "loss": 0.3141, "step": 110 }, { "epoch": 0.31, "eval_accuracy": 0.98, "eval_loss": 0.06731583178043365, "eval_runtime": 58.0242, "eval_samples_per_second": 86.171, "eval_steps_per_second": 2.706, "step": 110 }, { "epoch": 0.34, "learning_rate": 3.6666666666666666e-05, "loss": 0.3764, "step": 120 }, { "epoch": 0.34, "eval_accuracy": 0.9764, "eval_loss": 0.07063312083482742, "eval_runtime": 58.0733, "eval_samples_per_second": 86.098, "eval_steps_per_second": 2.703, "step": 120 }, { "epoch": 0.37, "learning_rate": 3.5079365079365075e-05, "loss": 0.3003, "step": 130 }, { "epoch": 0.37, "eval_accuracy": 0.984, "eval_loss": 0.05995839089155197, "eval_runtime": 58.0208, "eval_samples_per_second": 86.176, "eval_steps_per_second": 2.706, "step": 130 }, { "epoch": 0.4, "learning_rate": 3.349206349206349e-05, "loss": 0.3566, "step": 140 }, { "epoch": 0.4, "eval_accuracy": 0.9826, "eval_loss": 0.05617852881550789, "eval_runtime": 57.9727, "eval_samples_per_second": 86.248, "eval_steps_per_second": 2.708, "step": 140 }, { "epoch": 0.43, "learning_rate": 3.19047619047619e-05, "loss": 0.2855, "step": 150 }, { "epoch": 0.43, "eval_accuracy": 0.9816, "eval_loss": 0.056736305356025696, "eval_runtime": 57.7871, "eval_samples_per_second": 86.525, "eval_steps_per_second": 2.717, "step": 150 }, { "epoch": 0.45, "learning_rate": 3.0317460317460318e-05, "loss": 0.3351, "step": 160 }, { "epoch": 0.45, "eval_accuracy": 0.9828, "eval_loss": 0.0543464832007885, "eval_runtime": 58.1354, "eval_samples_per_second": 86.006, "eval_steps_per_second": 2.701, "step": 160 }, { "epoch": 0.48, "learning_rate": 2.8730158730158728e-05, "loss": 0.2977, "step": 170 }, { "epoch": 0.48, "eval_accuracy": 0.9798, "eval_loss": 0.05677906423807144, "eval_runtime": 57.6217, "eval_samples_per_second": 86.773, "eval_steps_per_second": 2.725, "step": 170 }, { "epoch": 0.51, "learning_rate": 2.714285714285714e-05, "loss": 0.2924, "step": 180 }, { "epoch": 0.51, "eval_accuracy": 0.9804, "eval_loss": 0.05774379149079323, "eval_runtime": 58.0352, "eval_samples_per_second": 86.155, "eval_steps_per_second": 2.705, "step": 180 }, { "epoch": 0.54, "learning_rate": 2.5555555555555554e-05, "loss": 0.2884, "step": 190 }, { "epoch": 0.54, "eval_accuracy": 0.983, "eval_loss": 0.05509978160262108, "eval_runtime": 57.5779, "eval_samples_per_second": 86.839, "eval_steps_per_second": 2.727, "step": 190 }, { "epoch": 0.57, "learning_rate": 2.396825396825397e-05, "loss": 0.3067, "step": 200 }, { "epoch": 0.57, "eval_accuracy": 0.983, "eval_loss": 0.04865285009145737, "eval_runtime": 57.833, "eval_samples_per_second": 86.456, "eval_steps_per_second": 2.715, "step": 200 }, { "epoch": 0.6, "learning_rate": 2.2380952380952384e-05, "loss": 0.3159, "step": 210 }, { "epoch": 0.6, "eval_accuracy": 0.984, "eval_loss": 0.05127624422311783, "eval_runtime": 57.9626, "eval_samples_per_second": 86.263, "eval_steps_per_second": 2.709, "step": 210 }, { "epoch": 0.63, "learning_rate": 2.0793650793650797e-05, "loss": 0.2795, "step": 220 }, { "epoch": 0.63, "eval_accuracy": 0.9846, "eval_loss": 0.046046771109104156, "eval_runtime": 57.8964, "eval_samples_per_second": 86.361, "eval_steps_per_second": 2.712, "step": 220 }, { "epoch": 0.65, "learning_rate": 1.920634920634921e-05, "loss": 0.3113, "step": 230 }, { "epoch": 0.65, "eval_accuracy": 0.9832, "eval_loss": 0.04946137219667435, "eval_runtime": 57.9735, "eval_samples_per_second": 86.246, "eval_steps_per_second": 2.708, "step": 230 }, { "epoch": 0.68, "learning_rate": 1.761904761904762e-05, "loss": 0.2882, "step": 240 }, { "epoch": 0.68, "eval_accuracy": 0.9838, "eval_loss": 0.047487206757068634, "eval_runtime": 57.8304, "eval_samples_per_second": 86.46, "eval_steps_per_second": 2.715, "step": 240 }, { "epoch": 0.71, "learning_rate": 1.6031746031746033e-05, "loss": 0.263, "step": 250 }, { "epoch": 0.71, "eval_accuracy": 0.9854, "eval_loss": 0.0448877215385437, "eval_runtime": 57.7851, "eval_samples_per_second": 86.528, "eval_steps_per_second": 2.717, "step": 250 }, { "epoch": 0.74, "learning_rate": 1.4444444444444444e-05, "loss": 0.2686, "step": 260 }, { "epoch": 0.74, "eval_accuracy": 0.9826, "eval_loss": 0.05095283314585686, "eval_runtime": 57.6101, "eval_samples_per_second": 86.79, "eval_steps_per_second": 2.725, "step": 260 }, { "epoch": 0.77, "learning_rate": 1.2857142857142857e-05, "loss": 0.2705, "step": 270 }, { "epoch": 0.77, "eval_accuracy": 0.9846, "eval_loss": 0.04833937808871269, "eval_runtime": 57.637, "eval_samples_per_second": 86.75, "eval_steps_per_second": 2.724, "step": 270 }, { "epoch": 0.8, "learning_rate": 1.126984126984127e-05, "loss": 0.2807, "step": 280 }, { "epoch": 0.8, "eval_accuracy": 0.9854, "eval_loss": 0.04302287474274635, "eval_runtime": 57.6918, "eval_samples_per_second": 86.667, "eval_steps_per_second": 2.721, "step": 280 }, { "epoch": 0.82, "learning_rate": 9.682539682539683e-06, "loss": 0.2583, "step": 290 }, { "epoch": 0.82, "eval_accuracy": 0.9858, "eval_loss": 0.04517200589179993, "eval_runtime": 57.63, "eval_samples_per_second": 86.76, "eval_steps_per_second": 2.724, "step": 290 }, { "epoch": 0.85, "learning_rate": 8.095238095238097e-06, "loss": 0.2346, "step": 300 }, { "epoch": 0.85, "eval_accuracy": 0.9858, "eval_loss": 0.04349066689610481, "eval_runtime": 57.5683, "eval_samples_per_second": 86.853, "eval_steps_per_second": 2.727, "step": 300 }, { "epoch": 0.88, "learning_rate": 6.507936507936509e-06, "loss": 0.2294, "step": 310 }, { "epoch": 0.88, "eval_accuracy": 0.986, "eval_loss": 0.04344068095088005, "eval_runtime": 57.6423, "eval_samples_per_second": 86.742, "eval_steps_per_second": 2.724, "step": 310 }, { "epoch": 0.91, "learning_rate": 4.920634920634921e-06, "loss": 0.2608, "step": 320 }, { "epoch": 0.91, "eval_accuracy": 0.986, "eval_loss": 0.04333638399839401, "eval_runtime": 57.6441, "eval_samples_per_second": 86.739, "eval_steps_per_second": 2.724, "step": 320 }, { "epoch": 0.94, "learning_rate": 3.3333333333333333e-06, "loss": 0.2642, "step": 330 }, { "epoch": 0.94, "eval_accuracy": 0.9866, "eval_loss": 0.04251227155327797, "eval_runtime": 59.7952, "eval_samples_per_second": 83.619, "eval_steps_per_second": 2.626, "step": 330 }, { "epoch": 0.97, "learning_rate": 1.7460317460317462e-06, "loss": 0.2781, "step": 340 }, { "epoch": 0.97, "eval_accuracy": 0.986, "eval_loss": 0.041707251220941544, "eval_runtime": 57.5428, "eval_samples_per_second": 86.892, "eval_steps_per_second": 2.728, "step": 340 }, { "epoch": 1.0, "learning_rate": 1.5873015873015874e-07, "loss": 0.247, "step": 350 }, { "epoch": 1.0, "eval_accuracy": 0.9858, "eval_loss": 0.041396625339984894, "eval_runtime": 57.5657, "eval_samples_per_second": 86.857, "eval_steps_per_second": 2.727, "step": 350 }, { "epoch": 1.0, "step": 351, "total_flos": 3.5202418590178345e+18, "train_loss": 0.46742354324910035, "train_runtime": 3479.7951, "train_samples_per_second": 12.932, "train_steps_per_second": 0.101 } ], "logging_steps": 10, "max_steps": 351, "num_train_epochs": 1, "save_steps": 500, "total_flos": 3.5202418590178345e+18, "trial_name": null, "trial_params": null }