{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.24742881799363145, "eval_steps": 1000000, "global_step": 45301, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 431.6959, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.0000000000000002e-06, "loss": 297.4225, "step": 1000 }, { "epoch": 0.01, "learning_rate": 1.5e-06, "loss": 252.127, "step": 1500 }, { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, "loss": 228.3056, "step": 2000 }, { "epoch": 0.01, "learning_rate": 2.5e-06, "loss": 214.1223, "step": 2500 }, { "epoch": 0.02, "learning_rate": 3e-06, "loss": 204.62, "step": 3000 }, { "epoch": 0.02, "learning_rate": 3.5e-06, "loss": 197.7543, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, "loss": 192.3316, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.5e-06, "loss": 188.5246, "step": 4500 }, { "epoch": 0.03, "learning_rate": 5e-06, "loss": 184.3941, "step": 5000 }, { "epoch": 0.03, "learning_rate": 5.500000000000001e-06, "loss": 181.4647, "step": 5500 }, { "epoch": 0.03, "learning_rate": 6e-06, "loss": 179.2321, "step": 6000 }, { "epoch": 0.04, "learning_rate": 6.5000000000000004e-06, "loss": 176.7608, "step": 6500 }, { "epoch": 0.04, "learning_rate": 7e-06, "loss": 175.437, "step": 7000 }, { "epoch": 0.04, "learning_rate": 7.500000000000001e-06, "loss": 172.9957, "step": 7500 }, { "epoch": 0.04, "learning_rate": 8.000000000000001e-06, "loss": 171.3707, "step": 8000 }, { "epoch": 0.05, "learning_rate": 8.5e-06, "loss": 170.2551, "step": 8500 }, { "epoch": 0.05, "learning_rate": 9e-06, "loss": 168.7796, "step": 9000 }, { "epoch": 0.05, "learning_rate": 9.5e-06, "loss": 167.1022, "step": 9500 }, { "epoch": 0.05, "learning_rate": 1e-05, "loss": 166.1143, "step": 10000 }, { "epoch": 0.06, "learning_rate": 9.999726756545549e-06, "loss": 164.6779, "step": 10500 }, { "epoch": 0.06, "learning_rate": 9.999453513091095e-06, "loss": 163.4033, "step": 11000 }, { "epoch": 0.06, "learning_rate": 9.999180269636641e-06, "loss": 162.5638, "step": 11500 }, { "epoch": 0.07, "learning_rate": 9.998907026182189e-06, "loss": 161.2811, "step": 12000 }, { "epoch": 0.07, "learning_rate": 9.998633782727735e-06, "loss": 160.2764, "step": 12500 }, { "epoch": 0.07, "learning_rate": 9.998360539273283e-06, "loss": 159.5492, "step": 13000 }, { "epoch": 0.07, "learning_rate": 9.99808729581883e-06, "loss": 158.2019, "step": 13500 }, { "epoch": 0.08, "learning_rate": 9.997814052364376e-06, "loss": 158.0253, "step": 14000 }, { "epoch": 0.08, "learning_rate": 9.997540808909924e-06, "loss": 156.8733, "step": 14500 }, { "epoch": 0.08, "learning_rate": 9.99726756545547e-06, "loss": 156.8639, "step": 15000 }, { "epoch": 0.08, "learning_rate": 9.996994322001016e-06, "loss": 156.0519, "step": 15500 }, { "epoch": 0.09, "learning_rate": 9.996721078546564e-06, "loss": 155.0571, "step": 16000 }, { "epoch": 0.09, "learning_rate": 9.996447835092112e-06, "loss": 154.6748, "step": 16500 }, { "epoch": 0.09, "learning_rate": 9.996174591637658e-06, "loss": 153.1673, "step": 17000 }, { "epoch": 0.1, "learning_rate": 9.995901348183204e-06, "loss": 153.309, "step": 17500 }, { "epoch": 0.1, "learning_rate": 9.995628104728752e-06, "loss": 152.58, "step": 18000 }, { "epoch": 0.1, "learning_rate": 9.995354861274299e-06, "loss": 152.1333, "step": 18500 }, { "epoch": 0.1, "learning_rate": 9.995081617819847e-06, "loss": 151.4704, "step": 19000 }, { "epoch": 0.11, "learning_rate": 9.994808374365393e-06, "loss": 151.0387, "step": 19500 }, { "epoch": 0.11, "learning_rate": 9.994535130910939e-06, "loss": 150.4169, "step": 20000 }, { "epoch": 0.11, "learning_rate": 9.994261887456487e-06, "loss": 150.5139, "step": 20500 }, { "epoch": 0.11, "learning_rate": 9.993988644002033e-06, "loss": 149.6021, "step": 21000 }, { "epoch": 0.12, "learning_rate": 9.993715400547581e-06, "loss": 149.0417, "step": 21500 }, { "epoch": 0.12, "learning_rate": 9.993442157093127e-06, "loss": 148.4873, "step": 22000 }, { "epoch": 0.12, "learning_rate": 9.993168913638675e-06, "loss": 148.4089, "step": 22500 }, { "epoch": 0.13, "learning_rate": 9.992895670184221e-06, "loss": 147.7362, "step": 23000 }, { "epoch": 0.13, "learning_rate": 9.99262242672977e-06, "loss": 147.1884, "step": 23500 }, { "epoch": 0.13, "learning_rate": 9.992349183275316e-06, "loss": 147.1985, "step": 24000 }, { "epoch": 0.13, "learning_rate": 9.992075939820862e-06, "loss": 147.2833, "step": 24500 }, { "epoch": 0.14, "learning_rate": 9.99180269636641e-06, "loss": 146.4076, "step": 25000 }, { "epoch": 0.14, "learning_rate": 9.991529452911956e-06, "loss": 146.2133, "step": 25500 }, { "epoch": 0.14, "learning_rate": 9.991256209457502e-06, "loss": 145.7042, "step": 26000 }, { "epoch": 0.14, "learning_rate": 9.99098296600305e-06, "loss": 146.1063, "step": 26500 }, { "epoch": 0.15, "learning_rate": 9.990709722548596e-06, "loss": 145.0671, "step": 27000 }, { "epoch": 0.15, "learning_rate": 9.990436479094144e-06, "loss": 144.2784, "step": 27500 }, { "epoch": 0.15, "learning_rate": 9.990163235639692e-06, "loss": 144.2024, "step": 28000 }, { "epoch": 0.16, "learning_rate": 9.989889992185238e-06, "loss": 143.9278, "step": 28500 }, { "epoch": 0.16, "learning_rate": 9.989616748730785e-06, "loss": 143.8407, "step": 29000 }, { "epoch": 0.16, "learning_rate": 9.989343505276333e-06, "loss": 143.22, "step": 29500 }, { "epoch": 0.16, "learning_rate": 9.989070261821879e-06, "loss": 142.6727, "step": 30000 }, { "epoch": 0.17, "learning_rate": 9.988797018367425e-06, "loss": 142.5397, "step": 30500 }, { "epoch": 0.17, "learning_rate": 9.988523774912973e-06, "loss": 142.2751, "step": 31000 }, { "epoch": 0.17, "learning_rate": 9.98825053145852e-06, "loss": 142.4529, "step": 31500 }, { "epoch": 0.17, "learning_rate": 9.987977288004067e-06, "loss": 141.3284, "step": 32000 }, { "epoch": 0.18, "learning_rate": 9.987704044549613e-06, "loss": 141.185, "step": 32500 }, { "epoch": 0.18, "learning_rate": 9.98743080109516e-06, "loss": 141.4491, "step": 33000 }, { "epoch": 0.18, "learning_rate": 9.987157557640708e-06, "loss": 140.3137, "step": 33500 }, { "epoch": 0.19, "learning_rate": 9.986884314186255e-06, "loss": 140.5562, "step": 34000 }, { "epoch": 0.19, "learning_rate": 9.986611070731802e-06, "loss": 140.5535, "step": 34500 }, { "epoch": 0.19, "learning_rate": 9.986337827277348e-06, "loss": 139.9692, "step": 35000 }, { "epoch": 0.19, "learning_rate": 9.986064583822896e-06, "loss": 139.3751, "step": 35500 }, { "epoch": 0.2, "learning_rate": 9.985791340368442e-06, "loss": 139.6697, "step": 36000 }, { "epoch": 0.2, "learning_rate": 9.98551809691399e-06, "loss": 138.6528, "step": 36500 }, { "epoch": 0.2, "learning_rate": 9.985244853459536e-06, "loss": 138.9591, "step": 37000 }, { "epoch": 0.2, "learning_rate": 9.984971610005082e-06, "loss": 138.7952, "step": 37500 }, { "epoch": 0.21, "learning_rate": 9.98469836655063e-06, "loss": 138.2421, "step": 38000 }, { "epoch": 0.21, "learning_rate": 9.984425123096178e-06, "loss": 138.5417, "step": 38500 }, { "epoch": 0.21, "learning_rate": 9.984151879641723e-06, "loss": 138.0299, "step": 39000 }, { "epoch": 0.22, "learning_rate": 9.98387863618727e-06, "loss": 137.7599, "step": 39500 }, { "epoch": 0.22, "learning_rate": 9.983605392732819e-06, "loss": 137.606, "step": 40000 }, { "epoch": 0.22, "learning_rate": 9.983332149278365e-06, "loss": 136.8119, "step": 40500 }, { "epoch": 0.22, "learning_rate": 9.983058905823911e-06, "loss": 136.5174, "step": 41000 }, { "epoch": 0.23, "learning_rate": 9.982785662369459e-06, "loss": 136.5812, "step": 41500 }, { "epoch": 0.23, "learning_rate": 9.982512418915005e-06, "loss": 136.5835, "step": 42000 }, { "epoch": 0.23, "learning_rate": 9.982239175460553e-06, "loss": 136.2487, "step": 42500 }, { "epoch": 0.23, "learning_rate": 9.9819659320061e-06, "loss": 135.6395, "step": 43000 }, { "epoch": 0.24, "learning_rate": 9.981692688551646e-06, "loss": 136.295, "step": 43500 }, { "epoch": 0.24, "learning_rate": 9.981419445097194e-06, "loss": 135.4576, "step": 44000 }, { "epoch": 0.24, "learning_rate": 9.981146201642742e-06, "loss": 135.6909, "step": 44500 }, { "epoch": 0.25, "learning_rate": 9.980872958188288e-06, "loss": 135.0495, "step": 45000 }, { "epoch": 0.25, "step": 45301, "total_flos": 2.4014284010815488e+17, "train_loss": 159.1573161326461, "train_runtime": 35999.4318, "train_samples_per_second": 16274.585, "train_steps_per_second": 508.583 } ], "logging_steps": 500, "max_steps": 18308700, "num_train_epochs": 100, "save_steps": 1000000, "total_flos": 2.4014284010815488e+17, "trial_name": null, "trial_params": null }