{ "best_metric": 11.050453186035156, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.19300361881785283, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0038600723763570566, "grad_norm": 4.272080898284912, "learning_rate": 5e-05, "loss": 11.0898, "step": 1 }, { "epoch": 0.0038600723763570566, "eval_loss": 11.0899019241333, "eval_runtime": 1.8216, "eval_samples_per_second": 957.975, "eval_steps_per_second": 120.227, "step": 1 }, { "epoch": 0.007720144752714113, "grad_norm": 4.217146873474121, "learning_rate": 0.0001, "loss": 11.0862, "step": 2 }, { "epoch": 0.01158021712907117, "grad_norm": 4.589000225067139, "learning_rate": 9.989294616193017e-05, "loss": 11.0849, "step": 3 }, { "epoch": 0.015440289505428226, "grad_norm": 4.838542461395264, "learning_rate": 9.957224306869053e-05, "loss": 11.0859, "step": 4 }, { "epoch": 0.019300361881785282, "grad_norm": 5.116698265075684, "learning_rate": 9.903926402016153e-05, "loss": 11.0849, "step": 5 }, { "epoch": 0.02316043425814234, "grad_norm": 4.157015323638916, "learning_rate": 9.829629131445342e-05, "loss": 11.0858, "step": 6 }, { "epoch": 0.027020506634499397, "grad_norm": 4.388658046722412, "learning_rate": 9.73465064747553e-05, "loss": 11.0843, "step": 7 }, { "epoch": 0.030880579010856453, "grad_norm": 4.618198871612549, "learning_rate": 9.619397662556435e-05, "loss": 11.0835, "step": 8 }, { "epoch": 0.03474065138721351, "grad_norm": 4.6166276931762695, "learning_rate": 9.484363707663442e-05, "loss": 11.0822, "step": 9 }, { "epoch": 0.038600723763570564, "grad_norm": 4.856595993041992, "learning_rate": 9.330127018922194e-05, "loss": 11.074, "step": 10 }, { "epoch": 0.04246079613992763, "grad_norm": 4.96979284286499, "learning_rate": 9.157348061512727e-05, "loss": 11.0747, "step": 11 }, { "epoch": 0.04632086851628468, "grad_norm": 5.501377582550049, "learning_rate": 8.966766701456177e-05, "loss": 11.0702, "step": 12 }, { "epoch": 0.05018094089264174, "grad_norm": 4.807370662689209, "learning_rate": 8.759199037394887e-05, "loss": 11.0664, "step": 13 }, { "epoch": 0.054041013268998794, "grad_norm": 4.258703231811523, "learning_rate": 8.535533905932738e-05, "loss": 11.0712, "step": 14 }, { "epoch": 0.05790108564535585, "grad_norm": 4.211122989654541, "learning_rate": 8.296729075500344e-05, "loss": 11.0717, "step": 15 }, { "epoch": 0.061761158021712906, "grad_norm": 4.176085948944092, "learning_rate": 8.043807145043604e-05, "loss": 11.0681, "step": 16 }, { "epoch": 0.06562123039806997, "grad_norm": 4.601294994354248, "learning_rate": 7.777851165098012e-05, "loss": 11.0634, "step": 17 }, { "epoch": 0.06948130277442702, "grad_norm": 4.531471252441406, "learning_rate": 7.500000000000001e-05, "loss": 11.0651, "step": 18 }, { "epoch": 0.07334137515078408, "grad_norm": 3.695352554321289, "learning_rate": 7.211443451095007e-05, "loss": 11.0707, "step": 19 }, { "epoch": 0.07720144752714113, "grad_norm": 3.846127510070801, "learning_rate": 6.91341716182545e-05, "loss": 11.0624, "step": 20 }, { "epoch": 0.08106151990349819, "grad_norm": 3.707059621810913, "learning_rate": 6.607197326515808e-05, "loss": 11.0589, "step": 21 }, { "epoch": 0.08492159227985525, "grad_norm": 4.043831825256348, "learning_rate": 6.294095225512603e-05, "loss": 11.058, "step": 22 }, { "epoch": 0.0887816646562123, "grad_norm": 4.06102180480957, "learning_rate": 5.9754516100806423e-05, "loss": 11.0572, "step": 23 }, { "epoch": 0.09264173703256937, "grad_norm": 4.315989971160889, "learning_rate": 5.6526309611002594e-05, "loss": 11.053, "step": 24 }, { "epoch": 0.09650180940892641, "grad_norm": 4.479373931884766, "learning_rate": 5.327015646150716e-05, "loss": 11.0506, "step": 25 }, { "epoch": 0.09650180940892641, "eval_loss": 11.057889938354492, "eval_runtime": 1.8106, "eval_samples_per_second": 963.753, "eval_steps_per_second": 120.952, "step": 25 }, { "epoch": 0.10036188178528348, "grad_norm": 3.5759146213531494, "learning_rate": 5e-05, "loss": 11.0631, "step": 26 }, { "epoch": 0.10422195416164053, "grad_norm": 3.4713640213012695, "learning_rate": 4.6729843538492847e-05, "loss": 11.0568, "step": 27 }, { "epoch": 0.10808202653799759, "grad_norm": 3.517700672149658, "learning_rate": 4.347369038899744e-05, "loss": 11.0592, "step": 28 }, { "epoch": 0.11194209891435464, "grad_norm": 3.803478479385376, "learning_rate": 4.0245483899193595e-05, "loss": 11.0491, "step": 29 }, { "epoch": 0.1158021712907117, "grad_norm": 3.692575216293335, "learning_rate": 3.705904774487396e-05, "loss": 11.0552, "step": 30 }, { "epoch": 0.11966224366706876, "grad_norm": 3.297511577606201, "learning_rate": 3.392802673484193e-05, "loss": 11.0584, "step": 31 }, { "epoch": 0.12352231604342581, "grad_norm": 3.4056272506713867, "learning_rate": 3.086582838174551e-05, "loss": 11.055, "step": 32 }, { "epoch": 0.12738238841978286, "grad_norm": 3.3810365200042725, "learning_rate": 2.7885565489049946e-05, "loss": 11.0535, "step": 33 }, { "epoch": 0.13124246079613994, "grad_norm": 3.5185937881469727, "learning_rate": 2.500000000000001e-05, "loss": 11.0526, "step": 34 }, { "epoch": 0.13510253317249699, "grad_norm": 3.627316951751709, "learning_rate": 2.2221488349019903e-05, "loss": 11.0508, "step": 35 }, { "epoch": 0.13896260554885403, "grad_norm": 3.6404926776885986, "learning_rate": 1.9561928549563968e-05, "loss": 11.0464, "step": 36 }, { "epoch": 0.1428226779252111, "grad_norm": 3.660294532775879, "learning_rate": 1.703270924499656e-05, "loss": 11.04, "step": 37 }, { "epoch": 0.14668275030156816, "grad_norm": 3.650830030441284, "learning_rate": 1.4644660940672627e-05, "loss": 11.0468, "step": 38 }, { "epoch": 0.1505428226779252, "grad_norm": 3.3983261585235596, "learning_rate": 1.2408009626051137e-05, "loss": 11.0528, "step": 39 }, { "epoch": 0.15440289505428226, "grad_norm": 3.4742696285247803, "learning_rate": 1.0332332985438248e-05, "loss": 11.0479, "step": 40 }, { "epoch": 0.15826296743063933, "grad_norm": 3.557180166244507, "learning_rate": 8.426519384872733e-06, "loss": 11.0481, "step": 41 }, { "epoch": 0.16212303980699638, "grad_norm": 3.4408743381500244, "learning_rate": 6.698729810778065e-06, "loss": 11.0475, "step": 42 }, { "epoch": 0.16598311218335343, "grad_norm": 3.4046168327331543, "learning_rate": 5.156362923365588e-06, "loss": 11.0487, "step": 43 }, { "epoch": 0.1698431845597105, "grad_norm": 2.9810149669647217, "learning_rate": 3.8060233744356633e-06, "loss": 11.0557, "step": 44 }, { "epoch": 0.17370325693606756, "grad_norm": 3.1998450756073, "learning_rate": 2.653493525244721e-06, "loss": 11.0516, "step": 45 }, { "epoch": 0.1775633293124246, "grad_norm": 3.363704204559326, "learning_rate": 1.70370868554659e-06, "loss": 11.0471, "step": 46 }, { "epoch": 0.18142340168878165, "grad_norm": 3.268141746520996, "learning_rate": 9.607359798384785e-07, "loss": 11.0491, "step": 47 }, { "epoch": 0.18528347406513873, "grad_norm": 3.6082420349121094, "learning_rate": 4.277569313094809e-07, "loss": 11.0462, "step": 48 }, { "epoch": 0.18914354644149578, "grad_norm": 3.4948015213012695, "learning_rate": 1.0705383806982606e-07, "loss": 11.0508, "step": 49 }, { "epoch": 0.19300361881785283, "grad_norm": 3.8561291694641113, "learning_rate": 0.0, "loss": 11.042, "step": 50 }, { "epoch": 0.19300361881785283, "eval_loss": 11.050453186035156, "eval_runtime": 1.8118, "eval_samples_per_second": 963.139, "eval_steps_per_second": 120.875, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4202692608000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }