|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.24742881799363145, |
|
"eval_steps": 1000000, |
|
"global_step": 45301, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 431.6959, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 297.4225, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.5e-06, |
|
"loss": 252.127, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 228.3056, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5e-06, |
|
"loss": 214.1223, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3e-06, |
|
"loss": 204.62, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.5e-06, |
|
"loss": 197.7543, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 192.3316, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.5e-06, |
|
"loss": 188.5246, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-06, |
|
"loss": 184.3941, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 181.4647, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6e-06, |
|
"loss": 179.2321, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 176.7608, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7e-06, |
|
"loss": 175.437, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 172.9957, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 171.3707, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.5e-06, |
|
"loss": 170.2551, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9e-06, |
|
"loss": 168.7796, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.5e-06, |
|
"loss": 167.1022, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-05, |
|
"loss": 166.1143, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999726756545549e-06, |
|
"loss": 164.6779, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999453513091095e-06, |
|
"loss": 163.4033, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999180269636641e-06, |
|
"loss": 162.5638, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.998907026182189e-06, |
|
"loss": 161.2811, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.998633782727735e-06, |
|
"loss": 160.2764, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.998360539273283e-06, |
|
"loss": 159.5492, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.99808729581883e-06, |
|
"loss": 158.2019, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.997814052364376e-06, |
|
"loss": 158.0253, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.997540808909924e-06, |
|
"loss": 156.8733, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.99726756545547e-06, |
|
"loss": 156.8639, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.996994322001016e-06, |
|
"loss": 156.0519, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.996721078546564e-06, |
|
"loss": 155.0571, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.996447835092112e-06, |
|
"loss": 154.6748, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.996174591637658e-06, |
|
"loss": 153.1673, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.995901348183204e-06, |
|
"loss": 153.309, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.995628104728752e-06, |
|
"loss": 152.58, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.995354861274299e-06, |
|
"loss": 152.1333, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.995081617819847e-06, |
|
"loss": 151.4704, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.994808374365393e-06, |
|
"loss": 151.0387, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.994535130910939e-06, |
|
"loss": 150.4169, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.994261887456487e-06, |
|
"loss": 150.5139, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.993988644002033e-06, |
|
"loss": 149.6021, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.993715400547581e-06, |
|
"loss": 149.0417, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.993442157093127e-06, |
|
"loss": 148.4873, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.993168913638675e-06, |
|
"loss": 148.4089, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.992895670184221e-06, |
|
"loss": 147.7362, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.99262242672977e-06, |
|
"loss": 147.1884, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.992349183275316e-06, |
|
"loss": 147.1985, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.992075939820862e-06, |
|
"loss": 147.2833, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.99180269636641e-06, |
|
"loss": 146.4076, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.991529452911956e-06, |
|
"loss": 146.2133, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.991256209457502e-06, |
|
"loss": 145.7042, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.99098296600305e-06, |
|
"loss": 146.1063, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.990709722548596e-06, |
|
"loss": 145.0671, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.990436479094144e-06, |
|
"loss": 144.2784, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.990163235639692e-06, |
|
"loss": 144.2024, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.989889992185238e-06, |
|
"loss": 143.9278, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.989616748730785e-06, |
|
"loss": 143.8407, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.989343505276333e-06, |
|
"loss": 143.22, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.989070261821879e-06, |
|
"loss": 142.6727, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.988797018367425e-06, |
|
"loss": 142.5397, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.988523774912973e-06, |
|
"loss": 142.2751, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.98825053145852e-06, |
|
"loss": 142.4529, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.987977288004067e-06, |
|
"loss": 141.3284, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.987704044549613e-06, |
|
"loss": 141.185, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.98743080109516e-06, |
|
"loss": 141.4491, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.987157557640708e-06, |
|
"loss": 140.3137, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.986884314186255e-06, |
|
"loss": 140.5562, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.986611070731802e-06, |
|
"loss": 140.5535, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.986337827277348e-06, |
|
"loss": 139.9692, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.986064583822896e-06, |
|
"loss": 139.3751, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.985791340368442e-06, |
|
"loss": 139.6697, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.98551809691399e-06, |
|
"loss": 138.6528, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.985244853459536e-06, |
|
"loss": 138.9591, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.984971610005082e-06, |
|
"loss": 138.7952, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.98469836655063e-06, |
|
"loss": 138.2421, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.984425123096178e-06, |
|
"loss": 138.5417, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.984151879641723e-06, |
|
"loss": 138.0299, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.98387863618727e-06, |
|
"loss": 137.7599, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.983605392732819e-06, |
|
"loss": 137.606, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.983332149278365e-06, |
|
"loss": 136.8119, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.983058905823911e-06, |
|
"loss": 136.5174, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.982785662369459e-06, |
|
"loss": 136.5812, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.982512418915005e-06, |
|
"loss": 136.5835, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.982239175460553e-06, |
|
"loss": 136.2487, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.9819659320061e-06, |
|
"loss": 135.6395, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.981692688551646e-06, |
|
"loss": 136.295, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.981419445097194e-06, |
|
"loss": 135.4576, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.981146201642742e-06, |
|
"loss": 135.6909, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.980872958188288e-06, |
|
"loss": 135.0495, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"step": 45301, |
|
"total_flos": 2.4014284010815488e+17, |
|
"train_loss": 159.1573161326461, |
|
"train_runtime": 35999.4318, |
|
"train_samples_per_second": 16274.585, |
|
"train_steps_per_second": 508.583 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 18308700, |
|
"num_train_epochs": 100, |
|
"save_steps": 1000000, |
|
"total_flos": 2.4014284010815488e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|