{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.171730940548895, "global_step": 64000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9977639200449365e-05, "loss": 3.6579, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.995527840089873e-05, "loss": 3.4615, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.993291760134809e-05, "loss": 3.3921, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.991055680179745e-05, "loss": 3.3552, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.9888196002246816e-05, "loss": 3.3133, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.986583520269618e-05, "loss": 3.2847, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.984347440314554e-05, "loss": 3.261, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.9821113603594904e-05, "loss": 3.2291, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.979875280404427e-05, "loss": 3.2023, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.977639200449363e-05, "loss": 3.1846, "step": 5000 }, { "epoch": 0.01, "learning_rate": 4.975403120494299e-05, "loss": 3.1743, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.9731670405392355e-05, "loss": 3.1475, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.970930960584171e-05, "loss": 3.1315, "step": 6500 }, { "epoch": 0.02, "learning_rate": 4.968694880629108e-05, "loss": 3.1389, "step": 7000 }, { "epoch": 0.02, "learning_rate": 4.9664588006740444e-05, "loss": 3.0986, "step": 7500 }, { "epoch": 0.02, "learning_rate": 4.9642227207189806e-05, "loss": 3.0938, "step": 8000 }, { "epoch": 0.02, "learning_rate": 4.961986640763916e-05, "loss": 3.0844, "step": 8500 }, { "epoch": 0.02, "learning_rate": 4.959750560808853e-05, "loss": 3.0931, "step": 9000 }, { "epoch": 0.03, "learning_rate": 4.9575144808537895e-05, "loss": 3.0665, "step": 9500 }, { "epoch": 0.03, "learning_rate": 4.955278400898726e-05, "loss": 3.0465, "step": 10000 }, { "epoch": 0.03, "learning_rate": 4.953042320943661e-05, "loss": 3.0343, "step": 10500 }, { "epoch": 0.03, "learning_rate": 4.950806240988598e-05, "loss": 3.036, "step": 11000 }, { "epoch": 0.03, "learning_rate": 4.9485701610335346e-05, "loss": 3.0273, "step": 11500 }, { "epoch": 0.03, "learning_rate": 4.94633408107847e-05, "loss": 3.0231, "step": 12000 }, { "epoch": 0.03, "learning_rate": 4.9440980011234064e-05, "loss": 3.0177, "step": 12500 }, { "epoch": 0.03, "learning_rate": 4.9418619211683434e-05, "loss": 2.9969, "step": 13000 }, { "epoch": 0.04, "learning_rate": 4.9396258412132796e-05, "loss": 3.0019, "step": 13500 }, { "epoch": 0.04, "learning_rate": 4.937389761258215e-05, "loss": 2.9867, "step": 14000 }, { "epoch": 0.04, "learning_rate": 4.9351536813031515e-05, "loss": 2.9825, "step": 14500 }, { "epoch": 0.04, "learning_rate": 4.9329176013480885e-05, "loss": 2.9799, "step": 15000 }, { "epoch": 0.04, "learning_rate": 4.930681521393025e-05, "loss": 2.9578, "step": 15500 }, { "epoch": 0.04, "learning_rate": 4.9284454414379603e-05, "loss": 2.949, "step": 16000 }, { "epoch": 0.04, "learning_rate": 4.9262093614828966e-05, "loss": 2.9598, "step": 16500 }, { "epoch": 0.05, "learning_rate": 4.9239732815278336e-05, "loss": 2.9568, "step": 17000 }, { "epoch": 0.05, "learning_rate": 4.921737201572769e-05, "loss": 2.9395, "step": 17500 }, { "epoch": 0.05, "learning_rate": 4.9195011216177054e-05, "loss": 2.9499, "step": 18000 }, { "epoch": 0.05, "learning_rate": 4.917265041662642e-05, "loss": 2.9316, "step": 18500 }, { "epoch": 0.05, "learning_rate": 4.915028961707579e-05, "loss": 2.9356, "step": 19000 }, { "epoch": 0.05, "learning_rate": 4.912792881752514e-05, "loss": 2.9105, "step": 19500 }, { "epoch": 0.05, "learning_rate": 4.9105568017974505e-05, "loss": 2.9277, "step": 20000 }, { "epoch": 0.06, "learning_rate": 4.908320721842387e-05, "loss": 2.9224, "step": 20500 }, { "epoch": 0.06, "learning_rate": 4.906084641887324e-05, "loss": 2.9136, "step": 21000 }, { "epoch": 0.06, "learning_rate": 4.9038485619322594e-05, "loss": 2.9109, "step": 21500 }, { "epoch": 0.06, "learning_rate": 4.9016124819771956e-05, "loss": 2.896, "step": 22000 }, { "epoch": 0.06, "learning_rate": 4.899376402022132e-05, "loss": 2.8996, "step": 22500 }, { "epoch": 0.06, "learning_rate": 4.897140322067068e-05, "loss": 2.9059, "step": 23000 }, { "epoch": 0.06, "learning_rate": 4.8949042421120045e-05, "loss": 2.8849, "step": 23500 }, { "epoch": 0.06, "learning_rate": 4.892668162156941e-05, "loss": 2.8876, "step": 24000 }, { "epoch": 0.07, "learning_rate": 4.890432082201877e-05, "loss": 2.887, "step": 24500 }, { "epoch": 0.07, "learning_rate": 4.888196002246813e-05, "loss": 2.8687, "step": 25000 }, { "epoch": 0.07, "learning_rate": 4.8859599222917496e-05, "loss": 2.8734, "step": 25500 }, { "epoch": 0.07, "learning_rate": 4.883723842336686e-05, "loss": 2.8745, "step": 26000 }, { "epoch": 0.07, "learning_rate": 4.881487762381622e-05, "loss": 2.8904, "step": 26500 }, { "epoch": 0.07, "learning_rate": 4.8792516824265584e-05, "loss": 2.8652, "step": 27000 }, { "epoch": 0.07, "learning_rate": 4.8770156024714947e-05, "loss": 2.8715, "step": 27500 }, { "epoch": 0.08, "learning_rate": 4.874779522516431e-05, "loss": 2.8643, "step": 28000 }, { "epoch": 0.08, "learning_rate": 4.872543442561367e-05, "loss": 2.8483, "step": 28500 }, { "epoch": 0.08, "learning_rate": 4.8703073626063035e-05, "loss": 2.8537, "step": 29000 }, { "epoch": 0.08, "learning_rate": 4.86807128265124e-05, "loss": 2.8456, "step": 29500 }, { "epoch": 0.08, "learning_rate": 4.865835202696176e-05, "loss": 2.8494, "step": 30000 }, { "epoch": 0.08, "learning_rate": 4.863599122741112e-05, "loss": 2.8352, "step": 30500 }, { "epoch": 0.08, "learning_rate": 4.8613630427860486e-05, "loss": 2.8388, "step": 31000 }, { "epoch": 0.08, "learning_rate": 4.859126962830985e-05, "loss": 2.8381, "step": 31500 }, { "epoch": 0.09, "learning_rate": 4.856890882875921e-05, "loss": 2.82, "step": 32000 }, { "epoch": 0.09, "learning_rate": 4.8546548029208574e-05, "loss": 2.8428, "step": 32500 }, { "epoch": 0.09, "learning_rate": 4.852418722965794e-05, "loss": 2.8436, "step": 33000 }, { "epoch": 0.09, "learning_rate": 4.85018264301073e-05, "loss": 2.826, "step": 33500 }, { "epoch": 0.09, "learning_rate": 4.8479465630556655e-05, "loss": 2.8274, "step": 34000 }, { "epoch": 0.09, "learning_rate": 4.8457104831006025e-05, "loss": 2.8338, "step": 34500 }, { "epoch": 0.09, "learning_rate": 4.843474403145539e-05, "loss": 2.8228, "step": 35000 }, { "epoch": 0.1, "learning_rate": 4.841238323190475e-05, "loss": 2.8078, "step": 35500 }, { "epoch": 0.1, "learning_rate": 4.8390022432354106e-05, "loss": 2.8198, "step": 36000 }, { "epoch": 0.1, "learning_rate": 4.8367661632803476e-05, "loss": 2.8088, "step": 36500 }, { "epoch": 0.1, "learning_rate": 4.834530083325284e-05, "loss": 2.8287, "step": 37000 }, { "epoch": 0.1, "learning_rate": 4.83229400337022e-05, "loss": 2.7993, "step": 37500 }, { "epoch": 0.1, "learning_rate": 4.830057923415156e-05, "loss": 2.8223, "step": 38000 }, { "epoch": 0.1, "learning_rate": 4.827821843460093e-05, "loss": 2.8084, "step": 38500 }, { "epoch": 0.1, "learning_rate": 4.825585763505029e-05, "loss": 2.8119, "step": 39000 }, { "epoch": 0.11, "learning_rate": 4.8233496835499646e-05, "loss": 2.8131, "step": 39500 }, { "epoch": 0.11, "learning_rate": 4.821113603594901e-05, "loss": 2.7907, "step": 40000 }, { "epoch": 0.11, "learning_rate": 4.818877523639838e-05, "loss": 2.8071, "step": 40500 }, { "epoch": 0.11, "learning_rate": 4.816641443684774e-05, "loss": 2.7969, "step": 41000 }, { "epoch": 0.11, "learning_rate": 4.81440536372971e-05, "loss": 2.8045, "step": 41500 }, { "epoch": 0.11, "learning_rate": 4.812169283774646e-05, "loss": 2.7991, "step": 42000 }, { "epoch": 0.11, "learning_rate": 4.809933203819583e-05, "loss": 2.7845, "step": 42500 }, { "epoch": 0.12, "learning_rate": 4.807697123864519e-05, "loss": 2.8144, "step": 43000 }, { "epoch": 0.12, "learning_rate": 4.805461043909455e-05, "loss": 2.764, "step": 43500 }, { "epoch": 0.12, "learning_rate": 4.803224963954391e-05, "loss": 2.7744, "step": 44000 }, { "epoch": 0.12, "learning_rate": 4.800988883999328e-05, "loss": 2.7857, "step": 44500 }, { "epoch": 0.12, "learning_rate": 4.7987528040442636e-05, "loss": 2.8017, "step": 45000 }, { "epoch": 0.12, "learning_rate": 4.7965167240892e-05, "loss": 2.7958, "step": 45500 }, { "epoch": 0.12, "learning_rate": 4.794280644134136e-05, "loss": 2.7766, "step": 46000 }, { "epoch": 0.12, "learning_rate": 4.792044564179073e-05, "loss": 2.7614, "step": 46500 }, { "epoch": 0.13, "learning_rate": 4.789808484224009e-05, "loss": 2.7739, "step": 47000 }, { "epoch": 0.13, "learning_rate": 4.787572404268945e-05, "loss": 2.7736, "step": 47500 }, { "epoch": 0.13, "learning_rate": 4.785336324313882e-05, "loss": 2.7683, "step": 48000 }, { "epoch": 0.13, "learning_rate": 4.783100244358818e-05, "loss": 2.7765, "step": 48500 }, { "epoch": 0.13, "learning_rate": 4.780864164403754e-05, "loss": 2.752, "step": 49000 }, { "epoch": 0.13, "learning_rate": 4.77862808444869e-05, "loss": 2.7666, "step": 49500 }, { "epoch": 0.13, "learning_rate": 4.776392004493627e-05, "loss": 2.7481, "step": 50000 }, { "epoch": 0.14, "learning_rate": 4.7741559245385626e-05, "loss": 2.7724, "step": 50500 }, { "epoch": 0.14, "learning_rate": 4.771919844583499e-05, "loss": 2.7773, "step": 51000 }, { "epoch": 0.14, "learning_rate": 4.769683764628435e-05, "loss": 2.7659, "step": 51500 }, { "epoch": 0.14, "learning_rate": 4.767447684673372e-05, "loss": 2.7473, "step": 52000 }, { "epoch": 0.14, "learning_rate": 4.765211604718308e-05, "loss": 2.761, "step": 52500 }, { "epoch": 0.14, "learning_rate": 4.762975524763244e-05, "loss": 2.7289, "step": 53000 }, { "epoch": 0.14, "learning_rate": 4.76073944480818e-05, "loss": 2.7657, "step": 53500 }, { "epoch": 0.14, "learning_rate": 4.758503364853117e-05, "loss": 2.7719, "step": 54000 }, { "epoch": 0.15, "learning_rate": 4.756267284898053e-05, "loss": 2.7581, "step": 54500 }, { "epoch": 0.15, "learning_rate": 4.754031204942989e-05, "loss": 2.7548, "step": 55000 }, { "epoch": 0.15, "learning_rate": 4.7517951249879253e-05, "loss": 2.7509, "step": 55500 }, { "epoch": 0.15, "learning_rate": 4.7495590450328616e-05, "loss": 2.738, "step": 56000 }, { "epoch": 0.15, "learning_rate": 4.747322965077798e-05, "loss": 2.752, "step": 56500 }, { "epoch": 0.15, "learning_rate": 4.745086885122734e-05, "loss": 2.7373, "step": 57000 }, { "epoch": 0.15, "learning_rate": 4.7428508051676704e-05, "loss": 2.7541, "step": 57500 }, { "epoch": 0.16, "learning_rate": 4.740614725212607e-05, "loss": 2.7332, "step": 58000 }, { "epoch": 0.16, "learning_rate": 4.738378645257543e-05, "loss": 2.713, "step": 58500 }, { "epoch": 0.16, "learning_rate": 4.736142565302479e-05, "loss": 2.7534, "step": 59000 }, { "epoch": 0.16, "learning_rate": 4.7339064853474155e-05, "loss": 2.753, "step": 59500 }, { "epoch": 0.16, "learning_rate": 4.731670405392352e-05, "loss": 2.7357, "step": 60000 }, { "epoch": 0.16, "learning_rate": 4.729434325437288e-05, "loss": 2.7286, "step": 60500 }, { "epoch": 0.16, "learning_rate": 4.7271982454822244e-05, "loss": 2.7137, "step": 61000 }, { "epoch": 0.17, "learning_rate": 4.7249621655271606e-05, "loss": 2.73, "step": 61500 }, { "epoch": 0.17, "learning_rate": 4.722726085572097e-05, "loss": 2.7375, "step": 62000 }, { "epoch": 0.17, "learning_rate": 4.720490005617033e-05, "loss": 2.7257, "step": 62500 }, { "epoch": 0.17, "learning_rate": 4.7182539256619695e-05, "loss": 2.7162, "step": 63000 }, { "epoch": 0.17, "learning_rate": 4.716017845706906e-05, "loss": 2.7421, "step": 63500 }, { "epoch": 0.17, "learning_rate": 4.713781765751842e-05, "loss": 2.7255, "step": 64000 } ], "max_steps": 1118028, "num_train_epochs": 3, "total_flos": 1.4482346926915584e+17, "trial_name": null, "trial_params": null }