{ "best_metric": 10.737244606018066, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.01856665428889714, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003713330857779428, "grad_norm": 0.47366201877593994, "learning_rate": 1.009e-05, "loss": 10.835, "step": 1 }, { "epoch": 0.0003713330857779428, "eval_loss": 10.832501411437988, "eval_runtime": 8.632, "eval_samples_per_second": 131.372, "eval_steps_per_second": 32.901, "step": 1 }, { "epoch": 0.0007426661715558856, "grad_norm": 0.39668479561805725, "learning_rate": 2.018e-05, "loss": 10.8341, "step": 2 }, { "epoch": 0.0011139992573338284, "grad_norm": 0.36069685220718384, "learning_rate": 3.027e-05, "loss": 10.8294, "step": 3 }, { "epoch": 0.0014853323431117712, "grad_norm": 0.3754219710826874, "learning_rate": 4.036e-05, "loss": 10.8319, "step": 4 }, { "epoch": 0.001856665428889714, "grad_norm": 0.4531574547290802, "learning_rate": 5.045e-05, "loss": 10.8283, "step": 5 }, { "epoch": 0.0022279985146676567, "grad_norm": 0.46290841698646545, "learning_rate": 6.054e-05, "loss": 10.8327, "step": 6 }, { "epoch": 0.0025993316004456, "grad_norm": 0.3586803674697876, "learning_rate": 7.062999999999999e-05, "loss": 10.8329, "step": 7 }, { "epoch": 0.0029706646862235424, "grad_norm": 0.4396965205669403, "learning_rate": 8.072e-05, "loss": 10.8277, "step": 8 }, { "epoch": 0.0033419977720014855, "grad_norm": 0.5143634676933289, "learning_rate": 9.081e-05, "loss": 10.8298, "step": 9 }, { "epoch": 0.003713330857779428, "grad_norm": 0.41205930709838867, "learning_rate": 0.0001009, "loss": 10.8375, "step": 10 }, { "epoch": 0.004084663943557371, "grad_norm": 0.4410165548324585, "learning_rate": 0.00010036894736842106, "loss": 10.8263, "step": 11 }, { "epoch": 0.004455997029335313, "grad_norm": 0.47727352380752563, "learning_rate": 9.98378947368421e-05, "loss": 10.8128, "step": 12 }, { "epoch": 0.004827330115113257, "grad_norm": 0.48599427938461304, "learning_rate": 9.930684210526315e-05, "loss": 10.8048, "step": 13 }, { "epoch": 0.0051986632008912, "grad_norm": 0.5060449838638306, "learning_rate": 9.877578947368421e-05, "loss": 10.8201, "step": 14 }, { "epoch": 0.005569996286669142, "grad_norm": 0.4900880455970764, "learning_rate": 9.824473684210527e-05, "loss": 10.8079, "step": 15 }, { "epoch": 0.005941329372447085, "grad_norm": 0.5277690291404724, "learning_rate": 9.771368421052632e-05, "loss": 10.8078, "step": 16 }, { "epoch": 0.0063126624582250275, "grad_norm": 0.5447446703910828, "learning_rate": 9.718263157894736e-05, "loss": 10.8192, "step": 17 }, { "epoch": 0.006683995544002971, "grad_norm": 0.5573148131370544, "learning_rate": 9.665157894736842e-05, "loss": 10.798, "step": 18 }, { "epoch": 0.007055328629780914, "grad_norm": 0.5032119750976562, "learning_rate": 9.612052631578948e-05, "loss": 10.7994, "step": 19 }, { "epoch": 0.007426661715558856, "grad_norm": 0.46621081233024597, "learning_rate": 9.558947368421052e-05, "loss": 10.8053, "step": 20 }, { "epoch": 0.007797994801336799, "grad_norm": 0.4873751103878021, "learning_rate": 9.505842105263159e-05, "loss": 10.8143, "step": 21 }, { "epoch": 0.008169327887114742, "grad_norm": 0.618602454662323, "learning_rate": 9.452736842105263e-05, "loss": 10.7913, "step": 22 }, { "epoch": 0.008540660972892685, "grad_norm": 0.5478153228759766, "learning_rate": 9.399631578947368e-05, "loss": 10.7964, "step": 23 }, { "epoch": 0.008911994058670627, "grad_norm": 0.5051780939102173, "learning_rate": 9.346526315789474e-05, "loss": 10.7684, "step": 24 }, { "epoch": 0.00928332714444857, "grad_norm": 0.5710480213165283, "learning_rate": 9.293421052631578e-05, "loss": 10.7952, "step": 25 }, { "epoch": 0.009654660230226514, "grad_norm": 0.5759515166282654, "learning_rate": 9.240315789473684e-05, "loss": 10.7925, "step": 26 }, { "epoch": 0.010025993316004456, "grad_norm": 0.5633918642997742, "learning_rate": 9.18721052631579e-05, "loss": 10.7648, "step": 27 }, { "epoch": 0.0103973264017824, "grad_norm": 0.5759943723678589, "learning_rate": 9.134105263157895e-05, "loss": 10.756, "step": 28 }, { "epoch": 0.010768659487560341, "grad_norm": 0.516233503818512, "learning_rate": 9.081e-05, "loss": 10.7807, "step": 29 }, { "epoch": 0.011139992573338284, "grad_norm": 0.5473969578742981, "learning_rate": 9.027894736842105e-05, "loss": 10.7649, "step": 30 }, { "epoch": 0.011511325659116228, "grad_norm": 0.46038320660591125, "learning_rate": 8.97478947368421e-05, "loss": 10.7741, "step": 31 }, { "epoch": 0.01188265874489417, "grad_norm": 0.5598097443580627, "learning_rate": 8.921684210526316e-05, "loss": 10.7898, "step": 32 }, { "epoch": 0.012253991830672113, "grad_norm": 0.5376091003417969, "learning_rate": 8.86857894736842e-05, "loss": 10.7795, "step": 33 }, { "epoch": 0.012625324916450055, "grad_norm": 0.4991442859172821, "learning_rate": 8.815473684210527e-05, "loss": 10.7416, "step": 34 }, { "epoch": 0.012996658002227999, "grad_norm": 0.5336849093437195, "learning_rate": 8.762368421052631e-05, "loss": 10.7558, "step": 35 }, { "epoch": 0.013367991088005942, "grad_norm": 0.55255526304245, "learning_rate": 8.709263157894737e-05, "loss": 10.743, "step": 36 }, { "epoch": 0.013739324173783884, "grad_norm": 0.5434712767601013, "learning_rate": 8.656157894736843e-05, "loss": 10.7494, "step": 37 }, { "epoch": 0.014110657259561827, "grad_norm": 0.5447563529014587, "learning_rate": 8.603052631578947e-05, "loss": 10.7471, "step": 38 }, { "epoch": 0.014481990345339769, "grad_norm": 0.5778723955154419, "learning_rate": 8.549947368421052e-05, "loss": 10.732, "step": 39 }, { "epoch": 0.014853323431117713, "grad_norm": 0.5451592206954956, "learning_rate": 8.496842105263158e-05, "loss": 10.7317, "step": 40 }, { "epoch": 0.015224656516895656, "grad_norm": 0.509575605392456, "learning_rate": 8.443736842105264e-05, "loss": 10.76, "step": 41 }, { "epoch": 0.015595989602673598, "grad_norm": 0.5755866765975952, "learning_rate": 8.390631578947369e-05, "loss": 10.7328, "step": 42 }, { "epoch": 0.01596732268845154, "grad_norm": 0.5568187832832336, "learning_rate": 8.337526315789473e-05, "loss": 10.7154, "step": 43 }, { "epoch": 0.016338655774229483, "grad_norm": 0.5118264555931091, "learning_rate": 8.284421052631579e-05, "loss": 10.7452, "step": 44 }, { "epoch": 0.01670998886000743, "grad_norm": 0.559354841709137, "learning_rate": 8.231315789473685e-05, "loss": 10.731, "step": 45 }, { "epoch": 0.01708132194578537, "grad_norm": 0.5593016147613525, "learning_rate": 8.178210526315789e-05, "loss": 10.7448, "step": 46 }, { "epoch": 0.017452655031563312, "grad_norm": 0.6811694502830505, "learning_rate": 8.125105263157894e-05, "loss": 10.7603, "step": 47 }, { "epoch": 0.017823988117341254, "grad_norm": 0.4952441155910492, "learning_rate": 8.072e-05, "loss": 10.7203, "step": 48 }, { "epoch": 0.0181953212031192, "grad_norm": 0.5311501026153564, "learning_rate": 8.018894736842106e-05, "loss": 10.7215, "step": 49 }, { "epoch": 0.01856665428889714, "grad_norm": 0.5852133631706238, "learning_rate": 7.965789473684211e-05, "loss": 10.728, "step": 50 }, { "epoch": 0.01856665428889714, "eval_loss": 10.737244606018066, "eval_runtime": 8.5322, "eval_samples_per_second": 132.909, "eval_steps_per_second": 33.286, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8538764083200.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }