|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.934971098265896, |
|
"global_step": 27500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.945809248554913e-05, |
|
"loss": 1.4801, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.8916184971098265e-05, |
|
"loss": 0.9767, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.83742774566474e-05, |
|
"loss": 0.8751, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.7832369942196533e-05, |
|
"loss": 0.8366, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7290462427745663e-05, |
|
"loss": 0.7828, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.6748554913294797e-05, |
|
"loss": 0.6836, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.620664739884393e-05, |
|
"loss": 0.5499, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5664739884393065e-05, |
|
"loss": 0.5279, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5122832369942196e-05, |
|
"loss": 0.5512, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.458092485549133e-05, |
|
"loss": 0.5806, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.4039017341040463e-05, |
|
"loss": 0.5244, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.3497109826589597e-05, |
|
"loss": 0.3951, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.2955202312138728e-05, |
|
"loss": 0.3984, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.2413294797687862e-05, |
|
"loss": 0.4088, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.1871387283236992e-05, |
|
"loss": 0.4008, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.132947976878613e-05, |
|
"loss": 0.4095, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.078757225433526e-05, |
|
"loss": 0.347, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 2.0245664739884394e-05, |
|
"loss": 0.3115, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.9703757225433524e-05, |
|
"loss": 0.3224, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.9161849710982662e-05, |
|
"loss": 0.3421, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.8619942196531792e-05, |
|
"loss": 0.3212, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.8078034682080926e-05, |
|
"loss": 0.3241, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.7536127167630057e-05, |
|
"loss": 0.2641, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.6994219653179194e-05, |
|
"loss": 0.2823, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.6452312138728324e-05, |
|
"loss": 0.278, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.591040462427746e-05, |
|
"loss": 0.2774, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 1.536849710982659e-05, |
|
"loss": 0.2984, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 1.4826589595375723e-05, |
|
"loss": 0.276, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1.4284682080924855e-05, |
|
"loss": 0.2485, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 1.3742774566473989e-05, |
|
"loss": 0.2513, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 1.3200867052023121e-05, |
|
"loss": 0.2741, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1.2658959537572255e-05, |
|
"loss": 0.2581, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 1.2117052023121387e-05, |
|
"loss": 0.2682, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.1575144508670521e-05, |
|
"loss": 0.2436, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 1.1033236994219653e-05, |
|
"loss": 0.2364, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 1.0491329479768787e-05, |
|
"loss": 0.2336, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 9.94942196531792e-06, |
|
"loss": 0.2354, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 9.407514450867053e-06, |
|
"loss": 0.2486, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 8.865606936416185e-06, |
|
"loss": 0.2461, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 8.323699421965318e-06, |
|
"loss": 0.2161, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 7.781791907514452e-06, |
|
"loss": 0.2437, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 7.239884393063584e-06, |
|
"loss": 0.2121, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 6.697976878612717e-06, |
|
"loss": 0.2227, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 6.15606936416185e-06, |
|
"loss": 0.2341, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 5.614161849710983e-06, |
|
"loss": 0.2182, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 5.072254335260116e-06, |
|
"loss": 0.2148, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 4.530346820809248e-06, |
|
"loss": 0.2247, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 3.988439306358381e-06, |
|
"loss": 0.2268, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 3.4465317919075147e-06, |
|
"loss": 0.2187, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 2.9046242774566473e-06, |
|
"loss": 0.2188, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 2.3627167630057803e-06, |
|
"loss": 0.2196, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 1.8208092485549132e-06, |
|
"loss": 0.2114, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1.2789017341040462e-06, |
|
"loss": 0.2159, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 7.369942196531793e-07, |
|
"loss": 0.2153, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 1.9508670520231215e-07, |
|
"loss": 0.2102, |
|
"step": 27500 |
|
} |
|
], |
|
"max_steps": 27680, |
|
"num_train_epochs": 10, |
|
"total_flos": 7.184484974834688e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|