{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 105, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1e-05, "loss": 3.0768, "step": 1 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 3.076, "step": 2 }, { "epoch": 0.03, "learning_rate": 1.9995348836233517e-05, "loss": 2.7773, "step": 3 }, { "epoch": 0.04, "learning_rate": 1.998139967159894e-05, "loss": 2.4612, "step": 4 }, { "epoch": 0.05, "learning_rate": 1.9958165482066094e-05, "loss": 2.3567, "step": 5 }, { "epoch": 0.06, "learning_rate": 1.992566788083908e-05, "loss": 2.2825, "step": 6 }, { "epoch": 0.07, "learning_rate": 1.9883937098250962e-05, "loss": 2.2337, "step": 7 }, { "epoch": 0.08, "learning_rate": 1.9833011953642525e-05, "loss": 2.174, "step": 8 }, { "epoch": 0.09, "learning_rate": 1.9772939819251247e-05, "loss": 2.1664, "step": 9 }, { "epoch": 0.1, "learning_rate": 1.9703776576144106e-05, "loss": 2.1585, "step": 10 }, { "epoch": 0.1, "learning_rate": 1.962558656223516e-05, "loss": 2.1401, "step": 11 }, { "epoch": 0.11, "learning_rate": 1.953844251243633e-05, "loss": 2.1218, "step": 12 }, { "epoch": 0.12, "learning_rate": 1.9442425490996987e-05, "loss": 2.1043, "step": 13 }, { "epoch": 0.13, "learning_rate": 1.933762481609536e-05, "loss": 2.1185, "step": 14 }, { "epoch": 0.14, "learning_rate": 1.9224137976751797e-05, "loss": 2.1042, "step": 15 }, { "epoch": 0.15, "learning_rate": 1.910207054214133e-05, "loss": 2.0782, "step": 16 }, { "epoch": 0.16, "learning_rate": 1.8971536063389745e-05, "loss": 2.0411, "step": 17 }, { "epoch": 0.17, "learning_rate": 1.8832655967944607e-05, "loss": 2.0752, "step": 18 }, { "epoch": 0.18, "learning_rate": 1.868555944661949e-05, "loss": 2.0272, "step": 19 }, { "epoch": 0.19, "learning_rate": 1.853038333341642e-05, "loss": 2.0785, "step": 20 }, { "epoch": 0.2, "learning_rate": 1.8367271978238422e-05, "loss": 2.0635, "step": 21 }, { "epoch": 0.21, "learning_rate": 1.8196377112610524e-05, "loss": 2.0478, "step": 22 }, { "epoch": 0.22, "learning_rate": 1.8017857708534107e-05, "loss": 2.0435, "step": 23 }, { "epoch": 0.23, "learning_rate": 1.783187983060594e-05, "loss": 2.0279, "step": 24 }, { "epoch": 0.24, "learning_rate": 1.763861648153945e-05, "loss": 2.0068, "step": 25 }, { "epoch": 0.25, "learning_rate": 1.743824744123196e-05, "loss": 2.0045, "step": 26 }, { "epoch": 0.26, "learning_rate": 1.7230959099527512e-05, "loss": 1.9743, "step": 27 }, { "epoch": 0.27, "learning_rate": 1.7016944282830935e-05, "loss": 2.0074, "step": 28 }, { "epoch": 0.28, "learning_rate": 1.6796402074734404e-05, "loss": 2.0146, "step": 29 }, { "epoch": 0.29, "learning_rate": 1.6569537630823385e-05, "loss": 2.0052, "step": 30 }, { "epoch": 0.3, "learning_rate": 1.6336561987834155e-05, "loss": 2.0006, "step": 31 }, { "epoch": 0.3, "learning_rate": 1.6097691867340547e-05, "loss": 1.986, "step": 32 }, { "epoch": 0.31, "learning_rate": 1.585314947415242e-05, "loss": 1.9663, "step": 33 }, { "epoch": 0.32, "learning_rate": 1.5603162289613503e-05, "loss": 1.9941, "step": 34 }, { "epoch": 0.33, "learning_rate": 1.5347962859990744e-05, "loss": 1.9706, "step": 35 }, { "epoch": 0.34, "learning_rate": 1.5087788580152207e-05, "loss": 1.9814, "step": 36 }, { "epoch": 0.35, "learning_rate": 1.4822881472734563e-05, "loss": 1.9713, "step": 37 }, { "epoch": 0.36, "learning_rate": 1.4553487963005712e-05, "loss": 1.9883, "step": 38 }, { "epoch": 0.37, "learning_rate": 1.427985864963193e-05, "loss": 1.9605, "step": 39 }, { "epoch": 0.38, "learning_rate": 1.400224807156278e-05, "loss": 1.9615, "step": 40 }, { "epoch": 0.39, "learning_rate": 1.3720914471250644e-05, "loss": 1.9548, "step": 41 }, { "epoch": 0.4, "learning_rate": 1.3436119554425133e-05, "loss": 1.9549, "step": 42 }, { "epoch": 0.41, "learning_rate": 1.314812824664585e-05, "loss": 1.9659, "step": 43 }, { "epoch": 0.42, "learning_rate": 1.285720844685996e-05, "loss": 1.9628, "step": 44 }, { "epoch": 0.43, "learning_rate": 1.2563630778193805e-05, "loss": 1.9431, "step": 45 }, { "epoch": 0.44, "learning_rate": 1.2267668336210411e-05, "loss": 1.9727, "step": 46 }, { "epoch": 0.45, "learning_rate": 1.1969596434867063e-05, "loss": 1.9604, "step": 47 }, { "epoch": 0.46, "learning_rate": 1.1669692350409223e-05, "loss": 1.939, "step": 48 }, { "epoch": 0.47, "learning_rate": 1.1368235063439103e-05, "loss": 1.9414, "step": 49 }, { "epoch": 0.48, "learning_rate": 1.1065504999398762e-05, "loss": 1.9298, "step": 50 }, { "epoch": 0.49, "learning_rate": 1.0761783767709182e-05, "loss": 1.9248, "step": 51 }, { "epoch": 0.5, "learning_rate": 1.0457353899807947e-05, "loss": 1.9556, "step": 52 }, { "epoch": 0.5, "learning_rate": 1.015249858632926e-05, "loss": 1.9553, "step": 53 }, { "epoch": 0.51, "learning_rate": 9.847501413670742e-06, "loss": 1.9302, "step": 54 }, { "epoch": 0.52, "learning_rate": 9.542646100192056e-06, "loss": 1.9398, "step": 55 }, { "epoch": 0.53, "learning_rate": 9.238216232290821e-06, "loss": 1.9346, "step": 56 }, { "epoch": 0.54, "learning_rate": 8.934495000601241e-06, "loss": 1.9306, "step": 57 }, { "epoch": 0.55, "learning_rate": 8.6317649365609e-06, "loss": 1.9383, "step": 58 }, { "epoch": 0.56, "learning_rate": 8.330307649590782e-06, "loss": 1.9141, "step": 59 }, { "epoch": 0.57, "learning_rate": 8.030403565132942e-06, "loss": 1.9167, "step": 60 }, { "epoch": 0.58, "learning_rate": 7.732331663789592e-06, "loss": 1.933, "step": 61 }, { "epoch": 0.59, "learning_rate": 7.436369221806201e-06, "loss": 1.9143, "step": 62 }, { "epoch": 0.6, "learning_rate": 7.142791553140045e-06, "loss": 1.9258, "step": 63 }, { "epoch": 0.61, "learning_rate": 6.851871753354154e-06, "loss": 1.9299, "step": 64 }, { "epoch": 0.62, "learning_rate": 6.563880445574873e-06, "loss": 1.9293, "step": 65 }, { "epoch": 0.63, "learning_rate": 6.2790855287493605e-06, "loss": 1.9081, "step": 66 }, { "epoch": 0.64, "learning_rate": 5.99775192843722e-06, "loss": 1.915, "step": 67 }, { "epoch": 0.65, "learning_rate": 5.720141350368072e-06, "loss": 1.9079, "step": 68 }, { "epoch": 0.66, "learning_rate": 5.446512036994287e-06, "loss": 1.9374, "step": 69 }, { "epoch": 0.67, "learning_rate": 5.177118527265438e-06, "loss": 1.91, "step": 70 }, { "epoch": 0.68, "learning_rate": 4.912211419847795e-06, "loss": 1.9288, "step": 71 }, { "epoch": 0.69, "learning_rate": 4.652037140009259e-06, "loss": 1.9157, "step": 72 }, { "epoch": 0.7, "learning_rate": 4.396837710386503e-06, "loss": 1.9313, "step": 73 }, { "epoch": 0.7, "learning_rate": 4.1468505258475785e-06, "loss": 1.9036, "step": 74 }, { "epoch": 0.71, "learning_rate": 3.902308132659457e-06, "loss": 1.9472, "step": 75 }, { "epoch": 0.72, "learning_rate": 3.6634380121658484e-06, "loss": 1.9138, "step": 76 }, { "epoch": 0.73, "learning_rate": 3.4304623691766193e-06, "loss": 1.939, "step": 77 }, { "epoch": 0.74, "learning_rate": 3.203597925265598e-06, "loss": 1.9081, "step": 78 }, { "epoch": 0.75, "learning_rate": 2.98305571716907e-06, "loss": 1.8986, "step": 79 }, { "epoch": 0.76, "learning_rate": 2.7690409004724883e-06, "loss": 1.893, "step": 80 }, { "epoch": 0.77, "learning_rate": 2.56175255876804e-06, "loss": 1.8973, "step": 81 }, { "epoch": 0.78, "learning_rate": 2.3613835184605527e-06, "loss": 1.9217, "step": 82 }, { "epoch": 0.79, "learning_rate": 2.1681201693940667e-06, "loss": 1.9373, "step": 83 }, { "epoch": 0.8, "learning_rate": 1.982142291465896e-06, "loss": 1.8995, "step": 84 }, { "epoch": 0.81, "learning_rate": 1.8036228873894745e-06, "loss": 1.9153, "step": 85 }, { "epoch": 0.82, "learning_rate": 1.6327280217615793e-06, "loss": 1.8962, "step": 86 }, { "epoch": 0.83, "learning_rate": 1.4696166665835853e-06, "loss": 1.9066, "step": 87 }, { "epoch": 0.84, "learning_rate": 1.3144405533805138e-06, "loss": 1.9025, "step": 88 }, { "epoch": 0.85, "learning_rate": 1.1673440320553941e-06, "loss": 1.893, "step": 89 }, { "epoch": 0.86, "learning_rate": 1.02846393661026e-06, "loss": 1.9014, "step": 90 }, { "epoch": 0.87, "learning_rate": 8.979294578586739e-07, "loss": 1.8953, "step": 91 }, { "epoch": 0.88, "learning_rate": 7.758620232482083e-07, "loss": 1.9089, "step": 92 }, { "epoch": 0.89, "learning_rate": 6.623751839046455e-07, "loss": 1.8885, "step": 93 }, { "epoch": 0.9, "learning_rate": 5.575745090030138e-07, "loss": 1.909, "step": 94 }, { "epoch": 0.9, "learning_rate": 4.61557487563673e-07, "loss": 1.9084, "step": 95 }, { "epoch": 0.91, "learning_rate": 3.7441343776484116e-07, "loss": 1.9123, "step": 96 }, { "epoch": 0.92, "learning_rate": 2.9622342385589256e-07, "loss": 1.9122, "step": 97 }, { "epoch": 0.93, "learning_rate": 2.2706018074875046e-07, "loss": 1.9041, "step": 98 }, { "epoch": 0.94, "learning_rate": 1.669880463574758e-07, "loss": 1.8988, "step": 99 }, { "epoch": 0.95, "learning_rate": 1.160629017490389e-07, "loss": 1.9071, "step": 100 }, { "epoch": 0.96, "learning_rate": 7.433211916092143e-08, "loss": 1.9057, "step": 101 }, { "epoch": 0.97, "learning_rate": 4.183451793390747e-08, "loss": 1.9146, "step": 102 }, { "epoch": 0.98, "learning_rate": 1.860032840106163e-08, "loss": 1.9051, "step": 103 }, { "epoch": 0.99, "learning_rate": 4.651163766484779e-09, "loss": 1.9185, "step": 104 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.8839, "step": 105 }, { "epoch": 1.0, "step": 105, "total_flos": 6.475878851632169e+17, "train_loss": 2.0050100269771756, "train_runtime": 1593.9957, "train_samples_per_second": 62.742, "train_steps_per_second": 0.066 } ], "max_steps": 105, "num_train_epochs": 1, "total_flos": 6.475878851632169e+17, "trial_name": null, "trial_params": null }