{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.682926829268293, "eval_steps": 500, "global_step": 217, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 1.9999451693655125e-05, "loss": 1.3914, "step": 1 }, { "epoch": 0.24, "learning_rate": 1.9986295347545738e-05, "loss": 1.1489, "step": 5 }, { "epoch": 0.49, "learning_rate": 1.9945218953682736e-05, "loss": 1.0541, "step": 10 }, { "epoch": 0.68, "eval_loss": 1.0341095924377441, "eval_runtime": 379.2893, "eval_samples_per_second": 60.93, "eval_steps_per_second": 0.477, "step": 14 }, { "epoch": 1.02, "learning_rate": 1.9876883405951378e-05, "loss": 1.0441, "step": 15 }, { "epoch": 1.27, "learning_rate": 1.9781476007338058e-05, "loss": 0.9858, "step": 20 }, { "epoch": 1.51, "learning_rate": 1.9659258262890683e-05, "loss": 0.9708, "step": 25 }, { "epoch": 1.71, "eval_loss": 1.0142368078231812, "eval_runtime": 378.349, "eval_samples_per_second": 61.081, "eval_steps_per_second": 0.478, "step": 29 }, { "epoch": 2.05, "learning_rate": 1.9510565162951538e-05, "loss": 0.9679, "step": 30 }, { "epoch": 2.29, "learning_rate": 1.9335804264972018e-05, "loss": 0.9274, "step": 35 }, { "epoch": 2.54, "learning_rate": 1.913545457642601e-05, "loss": 0.9142, "step": 40 }, { "epoch": 2.68, "eval_loss": 1.0111455917358398, "eval_runtime": 378.1977, "eval_samples_per_second": 61.106, "eval_steps_per_second": 0.479, "step": 43 }, { "epoch": 3.07, "learning_rate": 1.891006524188368e-05, "loss": 0.8994, "step": 45 }, { "epoch": 3.32, "learning_rate": 1.866025403784439e-05, "loss": 0.8775, "step": 50 }, { "epoch": 3.56, "learning_rate": 1.8386705679454243e-05, "loss": 0.8637, "step": 55 }, { "epoch": 3.71, "eval_loss": 1.023858666419983, "eval_runtime": 378.3558, "eval_samples_per_second": 61.08, "eval_steps_per_second": 0.478, "step": 58 }, { "epoch": 4.1, "learning_rate": 1.8090169943749477e-05, "loss": 0.8323, "step": 60 }, { "epoch": 4.34, "learning_rate": 1.777145961456971e-05, "loss": 0.8209, "step": 65 }, { "epoch": 4.59, "learning_rate": 1.7431448254773943e-05, "loss": 0.8091, "step": 70 }, { "epoch": 4.68, "eval_loss": 1.0362672805786133, "eval_runtime": 378.1433, "eval_samples_per_second": 61.114, "eval_steps_per_second": 0.479, "step": 72 }, { "epoch": 5.12, "learning_rate": 1.7071067811865477e-05, "loss": 0.7588, "step": 75 }, { "epoch": 5.37, "learning_rate": 1.6691306063588583e-05, "loss": 0.7629, "step": 80 }, { "epoch": 5.61, "learning_rate": 1.6293203910498375e-05, "loss": 0.7516, "step": 85 }, { "epoch": 5.71, "eval_loss": 1.0779954195022583, "eval_runtime": 378.7856, "eval_samples_per_second": 61.011, "eval_steps_per_second": 0.478, "step": 87 }, { "epoch": 6.15, "learning_rate": 1.5877852522924733e-05, "loss": 0.6866, "step": 90 }, { "epoch": 6.39, "learning_rate": 1.5446390350150272e-05, "loss": 0.6993, "step": 95 }, { "epoch": 6.63, "learning_rate": 1.5000000000000002e-05, "loss": 0.6884, "step": 100 }, { "epoch": 6.68, "eval_loss": 1.0987329483032227, "eval_runtime": 378.7519, "eval_samples_per_second": 61.016, "eval_steps_per_second": 0.478, "step": 101 }, { "epoch": 7.17, "learning_rate": 1.4539904997395468e-05, "loss": 0.6135, "step": 105 }, { "epoch": 7.41, "learning_rate": 1.4067366430758004e-05, "loss": 0.6301, "step": 110 }, { "epoch": 7.66, "learning_rate": 1.3583679495453e-05, "loss": 0.6309, "step": 115 }, { "epoch": 7.71, "eval_loss": 1.1394284963607788, "eval_runtime": 378.7271, "eval_samples_per_second": 61.02, "eval_steps_per_second": 0.478, "step": 116 }, { "epoch": 8.2, "learning_rate": 1.3090169943749475e-05, "loss": 0.5431, "step": 120 }, { "epoch": 8.44, "learning_rate": 1.2588190451025209e-05, "loss": 0.5686, "step": 125 }, { "epoch": 8.68, "learning_rate": 1.2079116908177592e-05, "loss": 0.5696, "step": 130 }, { "epoch": 8.68, "eval_loss": 1.1820148229599, "eval_runtime": 378.7544, "eval_samples_per_second": 61.016, "eval_steps_per_second": 0.478, "step": 130 }, { "epoch": 9.22, "learning_rate": 1.156434465040231e-05, "loss": 0.489, "step": 135 }, { "epoch": 9.46, "learning_rate": 1.1045284632676535e-05, "loss": 0.5067, "step": 140 }, { "epoch": 9.71, "learning_rate": 1.0523359562429441e-05, "loss": 0.4752, "step": 145 }, { "epoch": 9.71, "eval_loss": 1.2695468664169312, "eval_runtime": 379.3356, "eval_samples_per_second": 60.922, "eval_steps_per_second": 0.477, "step": 145 }, { "epoch": 10.24, "learning_rate": 1e-05, "loss": 0.4579, "step": 150 }, { "epoch": 10.49, "learning_rate": 9.476640437570562e-06, "loss": 0.448, "step": 155 }, { "epoch": 10.68, "eval_loss": 1.3108839988708496, "eval_runtime": 379.098, "eval_samples_per_second": 60.96, "eval_steps_per_second": 0.477, "step": 159 }, { "epoch": 11.02, "learning_rate": 8.954715367323468e-06, "loss": 0.4175, "step": 160 }, { "epoch": 11.27, "learning_rate": 8.43565534959769e-06, "loss": 0.4065, "step": 165 }, { "epoch": 11.51, "learning_rate": 7.92088309182241e-06, "loss": 0.3955, "step": 170 }, { "epoch": 11.71, "eval_loss": 1.387709379196167, "eval_runtime": 379.1872, "eval_samples_per_second": 60.946, "eval_steps_per_second": 0.477, "step": 174 }, { "epoch": 12.05, "learning_rate": 7.411809548974792e-06, "loss": 0.3736, "step": 175 }, { "epoch": 12.29, "learning_rate": 6.909830056250527e-06, "loss": 0.3623, "step": 180 }, { "epoch": 12.54, "learning_rate": 6.4163205045469975e-06, "loss": 0.3579, "step": 185 }, { "epoch": 12.68, "eval_loss": 1.3922604322433472, "eval_runtime": 378.9155, "eval_samples_per_second": 60.99, "eval_steps_per_second": 0.478, "step": 188 }, { "epoch": 13.07, "learning_rate": 5.932633569242e-06, "loss": 0.3336, "step": 190 }, { "epoch": 13.32, "learning_rate": 5.460095002604533e-06, "loss": 0.3216, "step": 195 }, { "epoch": 13.56, "learning_rate": 5.000000000000003e-06, "loss": 0.3228, "step": 200 }, { "epoch": 13.71, "eval_loss": 1.4064093828201294, "eval_runtime": 378.8946, "eval_samples_per_second": 60.993, "eval_steps_per_second": 0.478, "step": 203 }, { "epoch": 14.1, "learning_rate": 4.5536096498497295e-06, "loss": 0.2974, "step": 205 }, { "epoch": 14.34, "learning_rate": 4.12214747707527e-06, "loss": 0.29, "step": 210 }, { "epoch": 14.59, "learning_rate": 3.7067960895016277e-06, "loss": 0.2914, "step": 215 }, { "epoch": 14.68, "eval_loss": 1.4377079010009766, "eval_runtime": 378.9608, "eval_samples_per_second": 60.983, "eval_steps_per_second": 0.478, "step": 217 }, { "epoch": 14.68, "step": 217, "total_flos": 353275824046080.0, "train_loss": 0.6393404012451523, "train_runtime": 14968.9879, "train_samples_per_second": 10.415, "train_steps_per_second": 0.02 } ], "logging_steps": 5, "max_steps": 300, "num_train_epochs": 15, "save_steps": 500, "total_flos": 353275824046080.0, "trial_name": null, "trial_params": null }