{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.934971098265896, "global_step": 27500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 2.945809248554913e-05, "loss": 1.4801, "step": 500 }, { "epoch": 0.36, "learning_rate": 2.8916184971098265e-05, "loss": 0.9767, "step": 1000 }, { "epoch": 0.54, "learning_rate": 2.83742774566474e-05, "loss": 0.8751, "step": 1500 }, { "epoch": 0.72, "learning_rate": 2.7832369942196533e-05, "loss": 0.8366, "step": 2000 }, { "epoch": 0.9, "learning_rate": 2.7290462427745663e-05, "loss": 0.7828, "step": 2500 }, { "epoch": 1.08, "learning_rate": 2.6748554913294797e-05, "loss": 0.6836, "step": 3000 }, { "epoch": 1.26, "learning_rate": 2.620664739884393e-05, "loss": 0.5499, "step": 3500 }, { "epoch": 1.45, "learning_rate": 2.5664739884393065e-05, "loss": 0.5279, "step": 4000 }, { "epoch": 1.63, "learning_rate": 2.5122832369942196e-05, "loss": 0.5512, "step": 4500 }, { "epoch": 1.81, "learning_rate": 2.458092485549133e-05, "loss": 0.5806, "step": 5000 }, { "epoch": 1.99, "learning_rate": 2.4039017341040463e-05, "loss": 0.5244, "step": 5500 }, { "epoch": 2.17, "learning_rate": 2.3497109826589597e-05, "loss": 0.3951, "step": 6000 }, { "epoch": 2.35, "learning_rate": 2.2955202312138728e-05, "loss": 0.3984, "step": 6500 }, { "epoch": 2.53, "learning_rate": 2.2413294797687862e-05, "loss": 0.4088, "step": 7000 }, { "epoch": 2.71, "learning_rate": 2.1871387283236992e-05, "loss": 0.4008, "step": 7500 }, { "epoch": 2.89, "learning_rate": 2.132947976878613e-05, "loss": 0.4095, "step": 8000 }, { "epoch": 3.07, "learning_rate": 2.078757225433526e-05, "loss": 0.347, "step": 8500 }, { "epoch": 3.25, "learning_rate": 2.0245664739884394e-05, "loss": 0.3115, "step": 9000 }, { "epoch": 3.43, "learning_rate": 1.9703757225433524e-05, "loss": 0.3224, "step": 9500 }, { "epoch": 3.61, "learning_rate": 1.9161849710982662e-05, "loss": 0.3421, "step": 10000 }, { "epoch": 3.79, "learning_rate": 1.8619942196531792e-05, "loss": 0.3212, "step": 10500 }, { "epoch": 3.97, "learning_rate": 1.8078034682080926e-05, "loss": 0.3241, "step": 11000 }, { "epoch": 4.15, "learning_rate": 1.7536127167630057e-05, "loss": 0.2641, "step": 11500 }, { "epoch": 4.34, "learning_rate": 1.6994219653179194e-05, "loss": 0.2823, "step": 12000 }, { "epoch": 4.52, "learning_rate": 1.6452312138728324e-05, "loss": 0.278, "step": 12500 }, { "epoch": 4.7, "learning_rate": 1.591040462427746e-05, "loss": 0.2774, "step": 13000 }, { "epoch": 4.88, "learning_rate": 1.536849710982659e-05, "loss": 0.2984, "step": 13500 }, { "epoch": 5.06, "learning_rate": 1.4826589595375723e-05, "loss": 0.276, "step": 14000 }, { "epoch": 5.24, "learning_rate": 1.4284682080924855e-05, "loss": 0.2485, "step": 14500 }, { "epoch": 5.42, "learning_rate": 1.3742774566473989e-05, "loss": 0.2513, "step": 15000 }, { "epoch": 5.6, "learning_rate": 1.3200867052023121e-05, "loss": 0.2741, "step": 15500 }, { "epoch": 5.78, "learning_rate": 1.2658959537572255e-05, "loss": 0.2581, "step": 16000 }, { "epoch": 5.96, "learning_rate": 1.2117052023121387e-05, "loss": 0.2682, "step": 16500 }, { "epoch": 6.14, "learning_rate": 1.1575144508670521e-05, "loss": 0.2436, "step": 17000 }, { "epoch": 6.32, "learning_rate": 1.1033236994219653e-05, "loss": 0.2364, "step": 17500 }, { "epoch": 6.5, "learning_rate": 1.0491329479768787e-05, "loss": 0.2336, "step": 18000 }, { "epoch": 6.68, "learning_rate": 9.94942196531792e-06, "loss": 0.2354, "step": 18500 }, { "epoch": 6.86, "learning_rate": 9.407514450867053e-06, "loss": 0.2486, "step": 19000 }, { "epoch": 7.04, "learning_rate": 8.865606936416185e-06, "loss": 0.2461, "step": 19500 }, { "epoch": 7.23, "learning_rate": 8.323699421965318e-06, "loss": 0.2161, "step": 20000 }, { "epoch": 7.41, "learning_rate": 7.781791907514452e-06, "loss": 0.2437, "step": 20500 }, { "epoch": 7.59, "learning_rate": 7.239884393063584e-06, "loss": 0.2121, "step": 21000 }, { "epoch": 7.77, "learning_rate": 6.697976878612717e-06, "loss": 0.2227, "step": 21500 }, { "epoch": 7.95, "learning_rate": 6.15606936416185e-06, "loss": 0.2341, "step": 22000 }, { "epoch": 8.13, "learning_rate": 5.614161849710983e-06, "loss": 0.2182, "step": 22500 }, { "epoch": 8.31, "learning_rate": 5.072254335260116e-06, "loss": 0.2148, "step": 23000 }, { "epoch": 8.49, "learning_rate": 4.530346820809248e-06, "loss": 0.2247, "step": 23500 }, { "epoch": 8.67, "learning_rate": 3.988439306358381e-06, "loss": 0.2268, "step": 24000 }, { "epoch": 8.85, "learning_rate": 3.4465317919075147e-06, "loss": 0.2187, "step": 24500 }, { "epoch": 9.03, "learning_rate": 2.9046242774566473e-06, "loss": 0.2188, "step": 25000 }, { "epoch": 9.21, "learning_rate": 2.3627167630057803e-06, "loss": 0.2196, "step": 25500 }, { "epoch": 9.39, "learning_rate": 1.8208092485549132e-06, "loss": 0.2114, "step": 26000 }, { "epoch": 9.57, "learning_rate": 1.2789017341040462e-06, "loss": 0.2159, "step": 26500 }, { "epoch": 9.75, "learning_rate": 7.369942196531793e-07, "loss": 0.2153, "step": 27000 }, { "epoch": 9.93, "learning_rate": 1.9508670520231215e-07, "loss": 0.2102, "step": 27500 } ], "max_steps": 27680, "num_train_epochs": 10, "total_flos": 7.184484974834688e+16, "trial_name": null, "trial_params": null }