|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.983050847457627, |
|
"eval_steps": 1000, |
|
"global_step": 132, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11299435028248588, |
|
"grad_norm": 7.197507381439209, |
|
"learning_rate": 2.5e-05, |
|
"loss": 4.1191, |
|
"num_input_tokens_seen": 43904, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.22598870056497175, |
|
"grad_norm": 6.066619873046875, |
|
"learning_rate": 5e-05, |
|
"loss": 3.6537, |
|
"num_input_tokens_seen": 91264, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3389830508474576, |
|
"grad_norm": 2.8696072101593018, |
|
"learning_rate": 4.9793066853402536e-05, |
|
"loss": 2.3123, |
|
"num_input_tokens_seen": 141184, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.4519774011299435, |
|
"grad_norm": 1.9488307237625122, |
|
"learning_rate": 4.9175693119783013e-05, |
|
"loss": 1.7347, |
|
"num_input_tokens_seen": 186944, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5649717514124294, |
|
"grad_norm": 2.07496976852417, |
|
"learning_rate": 4.815809920628738e-05, |
|
"loss": 1.6167, |
|
"num_input_tokens_seen": 234496, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"grad_norm": 1.4913270473480225, |
|
"learning_rate": 4.6757131025753886e-05, |
|
"loss": 1.6705, |
|
"num_input_tokens_seen": 282432, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7909604519774012, |
|
"grad_norm": 1.5118441581726074, |
|
"learning_rate": 4.499598111849299e-05, |
|
"loss": 1.5204, |
|
"num_input_tokens_seen": 331648, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.903954802259887, |
|
"grad_norm": 1.1554917097091675, |
|
"learning_rate": 4.2903804707859835e-05, |
|
"loss": 1.4231, |
|
"num_input_tokens_seen": 377024, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0169491525423728, |
|
"grad_norm": 1.4432414770126343, |
|
"learning_rate": 4.051523704568557e-05, |
|
"loss": 1.42, |
|
"num_input_tokens_seen": 424576, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.1299435028248588, |
|
"grad_norm": 1.4247018098831177, |
|
"learning_rate": 3.7869820037745776e-05, |
|
"loss": 1.451, |
|
"num_input_tokens_seen": 474816, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.2429378531073447, |
|
"grad_norm": 1.644587755203247, |
|
"learning_rate": 3.501134764128167e-05, |
|
"loss": 1.2046, |
|
"num_input_tokens_seen": 523392, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.3559322033898304, |
|
"grad_norm": 1.6693050861358643, |
|
"learning_rate": 3.1987140871290236e-05, |
|
"loss": 1.3315, |
|
"num_input_tokens_seen": 567680, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.4689265536723164, |
|
"grad_norm": 1.8458465337753296, |
|
"learning_rate": 2.884726441760155e-05, |
|
"loss": 1.2132, |
|
"num_input_tokens_seen": 610816, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.5819209039548023, |
|
"grad_norm": 1.5384082794189453, |
|
"learning_rate": 2.564369784137472e-05, |
|
"loss": 1.1863, |
|
"num_input_tokens_seen": 656640, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 1.7151033878326416, |
|
"learning_rate": 2.2429475071565987e-05, |
|
"loss": 1.2067, |
|
"num_input_tokens_seen": 705792, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.807909604519774, |
|
"grad_norm": 1.9599609375, |
|
"learning_rate": 1.9257806446705116e-05, |
|
"loss": 1.1691, |
|
"num_input_tokens_seen": 751232, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.92090395480226, |
|
"grad_norm": 1.5244255065917969, |
|
"learning_rate": 1.618119783627263e-05, |
|
"loss": 1.2593, |
|
"num_input_tokens_seen": 799552, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.0338983050847457, |
|
"grad_norm": 1.6506624221801758, |
|
"learning_rate": 1.325058142431701e-05, |
|
"loss": 1.2249, |
|
"num_input_tokens_seen": 849408, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.146892655367232, |
|
"grad_norm": 1.7030749320983887, |
|
"learning_rate": 1.051447254488591e-05, |
|
"loss": 1.0921, |
|
"num_input_tokens_seen": 897024, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.2598870056497176, |
|
"grad_norm": 1.9787169694900513, |
|
"learning_rate": 8.018166527567672e-06, |
|
"loss": 1.1254, |
|
"num_input_tokens_seen": 943232, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.3728813559322033, |
|
"grad_norm": 2.088651418685913, |
|
"learning_rate": 5.8029888490850005e-06, |
|
"loss": 1.1332, |
|
"num_input_tokens_seen": 989824, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.4858757062146895, |
|
"grad_norm": 2.087944746017456, |
|
"learning_rate": 3.90561100442036e-06, |
|
"loss": 1.0718, |
|
"num_input_tokens_seen": 1036224, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.598870056497175, |
|
"grad_norm": 1.599670648574829, |
|
"learning_rate": 2.3574434229882145e-06, |
|
"loss": 1.2027, |
|
"num_input_tokens_seen": 1082368, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.711864406779661, |
|
"grad_norm": 1.867066740989685, |
|
"learning_rate": 1.1841154799154374e-06, |
|
"loss": 1.1742, |
|
"num_input_tokens_seen": 1131264, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.824858757062147, |
|
"grad_norm": 1.7015374898910522, |
|
"learning_rate": 4.050512106620913e-07, |
|
"loss": 1.1628, |
|
"num_input_tokens_seen": 1177472, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.937853107344633, |
|
"grad_norm": 1.7900928258895874, |
|
"learning_rate": 3.314775287923677e-08, |
|
"loss": 1.0574, |
|
"num_input_tokens_seen": 1224000, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.983050847457627, |
|
"num_input_tokens_seen": 1245248, |
|
"step": 132, |
|
"total_flos": 7355069552918528.0, |
|
"train_loss": 1.5248766162178733, |
|
"train_runtime": 164.4609, |
|
"train_samples_per_second": 51.641, |
|
"train_steps_per_second": 0.803 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 132, |
|
"num_input_tokens_seen": 1245248, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7355069552918528.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|