|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.008537886872998933, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.268943436499467e-05, |
|
"eval_loss": 11.11116886138916, |
|
"eval_runtime": 26.7787, |
|
"eval_samples_per_second": 368.353, |
|
"eval_steps_per_second": 184.176, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00042689434364994664, |
|
"grad_norm": 0.8201702833175659, |
|
"learning_rate": 0.0002, |
|
"loss": 44.4495, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0008537886872998933, |
|
"grad_norm": 1.0575289726257324, |
|
"learning_rate": 0.0002, |
|
"loss": 44.3848, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0012806830309498398, |
|
"grad_norm": 1.1616137027740479, |
|
"learning_rate": 0.0002, |
|
"loss": 44.2795, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0017075773745997866, |
|
"grad_norm": 1.2281877994537354, |
|
"learning_rate": 0.0002, |
|
"loss": 44.1531, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0021344717182497333, |
|
"grad_norm": 1.488601565361023, |
|
"learning_rate": 0.0002, |
|
"loss": 43.9426, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0021344717182497333, |
|
"eval_loss": 10.959285736083984, |
|
"eval_runtime": 25.947, |
|
"eval_samples_per_second": 380.159, |
|
"eval_steps_per_second": 190.079, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0025613660618996796, |
|
"grad_norm": 1.1621413230895996, |
|
"learning_rate": 0.0002, |
|
"loss": 43.7882, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0029882604055496264, |
|
"grad_norm": 0.8476995229721069, |
|
"learning_rate": 0.0002, |
|
"loss": 43.6368, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.003415154749199573, |
|
"grad_norm": 0.7544131875038147, |
|
"learning_rate": 0.0002, |
|
"loss": 43.5856, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.00384204909284952, |
|
"grad_norm": 0.9888026714324951, |
|
"learning_rate": 0.0002, |
|
"loss": 43.5509, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.004268943436499467, |
|
"grad_norm": 1.0631221532821655, |
|
"learning_rate": 0.0002, |
|
"loss": 43.5477, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004268943436499467, |
|
"eval_loss": 10.883926391601562, |
|
"eval_runtime": 25.5532, |
|
"eval_samples_per_second": 386.018, |
|
"eval_steps_per_second": 193.009, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004695837780149413, |
|
"grad_norm": 0.8417938947677612, |
|
"learning_rate": 0.0002, |
|
"loss": 43.525, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.005122732123799359, |
|
"grad_norm": 0.7629103660583496, |
|
"learning_rate": 0.0002, |
|
"loss": 43.5057, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.005549626467449306, |
|
"grad_norm": 0.763012170791626, |
|
"learning_rate": 0.0002, |
|
"loss": 43.4606, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.005976520811099253, |
|
"grad_norm": 0.8534968495368958, |
|
"learning_rate": 0.0002, |
|
"loss": 43.4568, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0064034151547491995, |
|
"grad_norm": 0.9974360466003418, |
|
"learning_rate": 0.0002, |
|
"loss": 43.4598, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0064034151547491995, |
|
"eval_loss": 10.865395545959473, |
|
"eval_runtime": 25.6883, |
|
"eval_samples_per_second": 383.987, |
|
"eval_steps_per_second": 191.994, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.006830309498399146, |
|
"grad_norm": 0.7604882717132568, |
|
"learning_rate": 0.0002, |
|
"loss": 43.4926, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.007257203842049093, |
|
"grad_norm": 0.9254680275917053, |
|
"learning_rate": 0.0002, |
|
"loss": 43.4299, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.00768409818569904, |
|
"grad_norm": 0.7746944427490234, |
|
"learning_rate": 0.0002, |
|
"loss": 43.4712, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.008110992529348986, |
|
"grad_norm": 1.0943514108657837, |
|
"learning_rate": 0.0002, |
|
"loss": 43.433, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.008537886872998933, |
|
"grad_norm": 0.7889847755432129, |
|
"learning_rate": 0.0002, |
|
"loss": 43.4282, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.008537886872998933, |
|
"eval_loss": 10.85505485534668, |
|
"eval_runtime": 25.7217, |
|
"eval_samples_per_second": 383.489, |
|
"eval_steps_per_second": 191.744, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2698133372928.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|