{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.17953321364452424, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003590664272890485, "eval_loss": 11.042671203613281, "eval_runtime": 0.2536, "eval_samples_per_second": 465.332, "eval_steps_per_second": 232.666, "step": 1 }, { "epoch": 0.010771992818671455, "grad_norm": 3.922430992126465, "learning_rate": 3e-05, "loss": 44.1523, "step": 3 }, { "epoch": 0.017953321364452424, "eval_loss": 11.039698600769043, "eval_runtime": 0.247, "eval_samples_per_second": 477.641, "eval_steps_per_second": 238.82, "step": 5 }, { "epoch": 0.02154398563734291, "grad_norm": 3.7408604621887207, "learning_rate": 6e-05, "loss": 44.1838, "step": 6 }, { "epoch": 0.03231597845601436, "grad_norm": 3.6904845237731934, "learning_rate": 9e-05, "loss": 44.1623, "step": 9 }, { "epoch": 0.03590664272890485, "eval_loss": 11.029208183288574, "eval_runtime": 0.2744, "eval_samples_per_second": 429.955, "eval_steps_per_second": 214.977, "step": 10 }, { "epoch": 0.04308797127468582, "grad_norm": 3.711442470550537, "learning_rate": 9.938441702975689e-05, "loss": 44.1317, "step": 12 }, { "epoch": 0.05385996409335727, "grad_norm": 3.626725196838379, "learning_rate": 9.619397662556435e-05, "loss": 44.0647, "step": 15 }, { "epoch": 0.05385996409335727, "eval_loss": 11.008035659790039, "eval_runtime": 0.2443, "eval_samples_per_second": 482.954, "eval_steps_per_second": 241.477, "step": 15 }, { "epoch": 0.06463195691202872, "grad_norm": 3.3971025943756104, "learning_rate": 9.045084971874738e-05, "loss": 43.9962, "step": 18 }, { "epoch": 0.0718132854578097, "eval_loss": 10.996824264526367, "eval_runtime": 0.2446, "eval_samples_per_second": 482.454, "eval_steps_per_second": 241.227, "step": 20 }, { "epoch": 0.07540394973070018, "grad_norm": 3.6952595710754395, "learning_rate": 8.247240241650918e-05, "loss": 43.9724, "step": 21 }, { "epoch": 0.08617594254937164, "grad_norm": 3.675994396209717, "learning_rate": 7.269952498697734e-05, "loss": 43.9475, "step": 24 }, { "epoch": 0.08976660682226212, "eval_loss": 10.981802940368652, "eval_runtime": 0.2466, "eval_samples_per_second": 478.457, "eval_steps_per_second": 239.228, "step": 25 }, { "epoch": 0.09694793536804308, "grad_norm": 3.6856377124786377, "learning_rate": 6.167226819279528e-05, "loss": 43.9206, "step": 27 }, { "epoch": 0.10771992818671454, "grad_norm": 3.877152919769287, "learning_rate": 5e-05, "loss": 43.9313, "step": 30 }, { "epoch": 0.10771992818671454, "eval_loss": 10.966498374938965, "eval_runtime": 0.256, "eval_samples_per_second": 460.954, "eval_steps_per_second": 230.477, "step": 30 }, { "epoch": 0.118491921005386, "grad_norm": 3.32474684715271, "learning_rate": 3.832773180720475e-05, "loss": 43.8417, "step": 33 }, { "epoch": 0.12567324955116696, "eval_loss": 10.952359199523926, "eval_runtime": 0.2511, "eval_samples_per_second": 469.903, "eval_steps_per_second": 234.952, "step": 35 }, { "epoch": 0.12926391382405744, "grad_norm": 3.8607993125915527, "learning_rate": 2.7300475013022663e-05, "loss": 43.7851, "step": 36 }, { "epoch": 0.1400359066427289, "grad_norm": 3.5752923488616943, "learning_rate": 1.7527597583490822e-05, "loss": 43.7701, "step": 39 }, { "epoch": 0.1436265709156194, "eval_loss": 10.948221206665039, "eval_runtime": 0.2411, "eval_samples_per_second": 489.506, "eval_steps_per_second": 244.753, "step": 40 }, { "epoch": 0.15080789946140036, "grad_norm": 3.8073651790618896, "learning_rate": 9.549150281252633e-06, "loss": 43.8047, "step": 42 }, { "epoch": 0.1615798922800718, "grad_norm": 3.3860981464385986, "learning_rate": 3.8060233744356633e-06, "loss": 43.8426, "step": 45 }, { "epoch": 0.1615798922800718, "eval_loss": 10.947005271911621, "eval_runtime": 0.2768, "eval_samples_per_second": 426.339, "eval_steps_per_second": 213.17, "step": 45 }, { "epoch": 0.17235188509874327, "grad_norm": 3.8361706733703613, "learning_rate": 6.15582970243117e-07, "loss": 43.7625, "step": 48 }, { "epoch": 0.17953321364452424, "eval_loss": 10.946943283081055, "eval_runtime": 0.2515, "eval_samples_per_second": 469.196, "eval_steps_per_second": 234.598, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 37355520000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }