{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.35714285714285715, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014285714285714285, "grad_norm": 14.128439903259277, "learning_rate": 9.857142857142858e-05, "loss": 5.5604, "step": 20 }, { "epoch": 0.02857142857142857, "grad_norm": 14.23494815826416, "learning_rate": 9.714285714285715e-05, "loss": 4.6719, "step": 40 }, { "epoch": 0.04285714285714286, "grad_norm": 13.843968391418457, "learning_rate": 9.571428571428573e-05, "loss": 4.4492, "step": 60 }, { "epoch": 0.05714285714285714, "grad_norm": 12.36043643951416, "learning_rate": 9.428571428571429e-05, "loss": 4.1471, "step": 80 }, { "epoch": 0.07142857142857142, "grad_norm": 11.155304908752441, "learning_rate": 9.285714285714286e-05, "loss": 3.9922, "step": 100 }, { "epoch": 0.08571428571428572, "grad_norm": 12.575331687927246, "learning_rate": 9.142857142857143e-05, "loss": 4.1232, "step": 120 }, { "epoch": 0.1, "grad_norm": 14.813372611999512, "learning_rate": 9e-05, "loss": 4.0796, "step": 140 }, { "epoch": 0.11428571428571428, "grad_norm": 15.004693984985352, "learning_rate": 8.857142857142857e-05, "loss": 3.7742, "step": 160 }, { "epoch": 0.12857142857142856, "grad_norm": 15.635838508605957, "learning_rate": 8.714285714285715e-05, "loss": 3.8024, "step": 180 }, { "epoch": 0.14285714285714285, "grad_norm": 12.239358901977539, "learning_rate": 8.571428571428571e-05, "loss": 3.7662, "step": 200 }, { "epoch": 0.15714285714285714, "grad_norm": 17.418853759765625, "learning_rate": 8.428571428571429e-05, "loss": 3.8781, "step": 220 }, { "epoch": 0.17142857142857143, "grad_norm": 18.593626022338867, "learning_rate": 8.285714285714287e-05, "loss": 3.9188, "step": 240 }, { "epoch": 0.18571428571428572, "grad_norm": 11.001754760742188, "learning_rate": 8.142857142857143e-05, "loss": 3.6587, "step": 260 }, { "epoch": 0.2, "grad_norm": 14.99055290222168, "learning_rate": 8e-05, "loss": 3.7494, "step": 280 }, { "epoch": 0.21428571428571427, "grad_norm": 13.02241039276123, "learning_rate": 7.857142857142858e-05, "loss": 3.7421, "step": 300 }, { "epoch": 0.22857142857142856, "grad_norm": 19.851003646850586, "learning_rate": 7.714285714285715e-05, "loss": 3.6926, "step": 320 }, { "epoch": 0.24285714285714285, "grad_norm": 14.331436157226562, "learning_rate": 7.571428571428571e-05, "loss": 3.6418, "step": 340 }, { "epoch": 0.2571428571428571, "grad_norm": 15.481107711791992, "learning_rate": 7.428571428571429e-05, "loss": 3.6614, "step": 360 }, { "epoch": 0.2714285714285714, "grad_norm": 13.689815521240234, "learning_rate": 7.285714285714286e-05, "loss": 3.6289, "step": 380 }, { "epoch": 0.2857142857142857, "grad_norm": 16.152088165283203, "learning_rate": 7.142857142857143e-05, "loss": 3.7508, "step": 400 }, { "epoch": 0.3, "grad_norm": 11.060943603515625, "learning_rate": 7e-05, "loss": 3.7885, "step": 420 }, { "epoch": 0.3142857142857143, "grad_norm": 12.639877319335938, "learning_rate": 6.857142857142858e-05, "loss": 3.528, "step": 440 }, { "epoch": 0.32857142857142857, "grad_norm": 14.593839645385742, "learning_rate": 6.714285714285714e-05, "loss": 3.6221, "step": 460 }, { "epoch": 0.34285714285714286, "grad_norm": 13.310423851013184, "learning_rate": 6.571428571428571e-05, "loss": 3.617, "step": 480 }, { "epoch": 0.35714285714285715, "grad_norm": 31.857540130615234, "learning_rate": 6.428571428571429e-05, "loss": 3.5771, "step": 500 } ], "logging_steps": 20, "max_steps": 1400, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 184798603714560.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }