{ "best_metric": 1.3310532569885254, "best_model_checkpoint": "miner_id_24/checkpoint-250", "epoch": 0.002108094661882697, "eval_steps": 50, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.432378647530788e-06, "eval_loss": 2.016960620880127, "eval_runtime": 3409.8944, "eval_samples_per_second": 14.644, "eval_steps_per_second": 3.661, "step": 1 }, { "epoch": 8.432378647530789e-05, "grad_norm": 0.7229357957839966, "learning_rate": 4.2000000000000004e-05, "loss": 1.8129, "step": 10 }, { "epoch": 0.00016864757295061578, "grad_norm": 0.6948916912078857, "learning_rate": 8.400000000000001e-05, "loss": 1.5689, "step": 20 }, { "epoch": 0.00025297135942592365, "grad_norm": 0.9268532991409302, "learning_rate": 0.000126, "loss": 1.2979, "step": 30 }, { "epoch": 0.00033729514590123157, "grad_norm": 1.0942802429199219, "learning_rate": 0.00016800000000000002, "loss": 1.3864, "step": 40 }, { "epoch": 0.00042161893237653943, "grad_norm": 4.664062023162842, "learning_rate": 0.00021, "loss": 1.9757, "step": 50 }, { "epoch": 0.00042161893237653943, "eval_loss": 1.524364948272705, "eval_runtime": 3404.7905, "eval_samples_per_second": 14.666, "eval_steps_per_second": 3.667, "step": 50 }, { "epoch": 0.0005059427188518473, "grad_norm": 0.6090065240859985, "learning_rate": 0.00020974422527728155, "loss": 1.4874, "step": 60 }, { "epoch": 0.0005902665053271552, "grad_norm": 0.6349875926971436, "learning_rate": 0.0002089781472178649, "loss": 1.241, "step": 70 }, { "epoch": 0.0006745902918024631, "grad_norm": 0.7745322585105896, "learning_rate": 0.0002077054980770496, "loss": 1.0923, "step": 80 }, { "epoch": 0.000758914078277771, "grad_norm": 1.1790937185287476, "learning_rate": 0.00020593247807352348, "loss": 1.1274, "step": 90 }, { "epoch": 0.0008432378647530789, "grad_norm": 3.3187856674194336, "learning_rate": 0.00020366772518252038, "loss": 2.0574, "step": 100 }, { "epoch": 0.0008432378647530789, "eval_loss": 1.4387811422348022, "eval_runtime": 3420.3676, "eval_samples_per_second": 14.599, "eval_steps_per_second": 3.65, "step": 100 }, { "epoch": 0.0009275616512283868, "grad_norm": 0.5923383235931396, "learning_rate": 0.0002009222730524731, "loss": 1.5078, "step": 110 }, { "epoch": 0.0010118854377036946, "grad_norm": 0.6275829672813416, "learning_rate": 0.00019770949725018733, "loss": 1.3687, "step": 120 }, { "epoch": 0.0010962092241790025, "grad_norm": 0.9197534918785095, "learning_rate": 0.00019404505009642473, "loss": 1.1862, "step": 130 }, { "epoch": 0.0011805330106543104, "grad_norm": 1.0203487873077393, "learning_rate": 0.0001899467844093695, "loss": 1.1169, "step": 140 }, { "epoch": 0.0012648567971296184, "grad_norm": 2.69136118888855, "learning_rate": 0.00018543466652749268, "loss": 1.9026, "step": 150 }, { "epoch": 0.0012648567971296184, "eval_loss": 1.4064786434173584, "eval_runtime": 3415.4512, "eval_samples_per_second": 14.62, "eval_steps_per_second": 3.655, "step": 150 }, { "epoch": 0.0013491805836049263, "grad_norm": 0.7159033417701721, "learning_rate": 0.00018053067903555837, "loss": 1.4857, "step": 160 }, { "epoch": 0.001433504370080234, "grad_norm": 0.6960952877998352, "learning_rate": 0.00017525871366768012, "loss": 1.0184, "step": 170 }, { "epoch": 0.001517828156555542, "grad_norm": 0.6692061424255371, "learning_rate": 0.00016964445490919413, "loss": 1.163, "step": 180 }, { "epoch": 0.0016021519430308498, "grad_norm": 1.0128861665725708, "learning_rate": 0.00016371525486442843, "loss": 1.2, "step": 190 }, { "epoch": 0.0016864757295061577, "grad_norm": 3.461775302886963, "learning_rate": 0.0001575, "loss": 1.9163, "step": 200 }, { "epoch": 0.0016864757295061577, "eval_loss": 1.361072063446045, "eval_runtime": 3401.7609, "eval_samples_per_second": 14.679, "eval_steps_per_second": 3.67, "step": 200 }, { "epoch": 0.0017707995159814657, "grad_norm": 0.7128476500511169, "learning_rate": 0.00015102897041285315, "loss": 1.3027, "step": 210 }, { "epoch": 0.0018551233024567736, "grad_norm": 0.6080098748207092, "learning_rate": 0.00014433369230867077, "loss": 1.2597, "step": 220 }, { "epoch": 0.0019394470889320815, "grad_norm": 0.7591213583946228, "learning_rate": 0.0001374467844093695, "loss": 1.0402, "step": 230 }, { "epoch": 0.002023770875407389, "grad_norm": 0.8502488136291504, "learning_rate": 0.0001304017990379651, "loss": 1.0837, "step": 240 }, { "epoch": 0.002108094661882697, "grad_norm": 3.2781741619110107, "learning_rate": 0.0001232330586550277, "loss": 1.553, "step": 250 }, { "epoch": 0.002108094661882697, "eval_loss": 1.3310532569885254, "eval_runtime": 3412.7075, "eval_samples_per_second": 14.631, "eval_steps_per_second": 3.658, "step": 250 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.483158242852864e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }