robiual-awal's picture
Training in progress, step 200, checkpoint
f46141d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.008537886872998933,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 4.268943436499467e-05,
"eval_loss": 11.11116886138916,
"eval_runtime": 26.7787,
"eval_samples_per_second": 368.353,
"eval_steps_per_second": 184.176,
"step": 1
},
{
"epoch": 0.00042689434364994664,
"grad_norm": 0.8201702833175659,
"learning_rate": 0.0002,
"loss": 44.4495,
"step": 10
},
{
"epoch": 0.0008537886872998933,
"grad_norm": 1.0575289726257324,
"learning_rate": 0.0002,
"loss": 44.3848,
"step": 20
},
{
"epoch": 0.0012806830309498398,
"grad_norm": 1.1616137027740479,
"learning_rate": 0.0002,
"loss": 44.2795,
"step": 30
},
{
"epoch": 0.0017075773745997866,
"grad_norm": 1.2281877994537354,
"learning_rate": 0.0002,
"loss": 44.1531,
"step": 40
},
{
"epoch": 0.0021344717182497333,
"grad_norm": 1.488601565361023,
"learning_rate": 0.0002,
"loss": 43.9426,
"step": 50
},
{
"epoch": 0.0021344717182497333,
"eval_loss": 10.959285736083984,
"eval_runtime": 25.947,
"eval_samples_per_second": 380.159,
"eval_steps_per_second": 190.079,
"step": 50
},
{
"epoch": 0.0025613660618996796,
"grad_norm": 1.1621413230895996,
"learning_rate": 0.0002,
"loss": 43.7882,
"step": 60
},
{
"epoch": 0.0029882604055496264,
"grad_norm": 0.8476995229721069,
"learning_rate": 0.0002,
"loss": 43.6368,
"step": 70
},
{
"epoch": 0.003415154749199573,
"grad_norm": 0.7544131875038147,
"learning_rate": 0.0002,
"loss": 43.5856,
"step": 80
},
{
"epoch": 0.00384204909284952,
"grad_norm": 0.9888026714324951,
"learning_rate": 0.0002,
"loss": 43.5509,
"step": 90
},
{
"epoch": 0.004268943436499467,
"grad_norm": 1.0631221532821655,
"learning_rate": 0.0002,
"loss": 43.5477,
"step": 100
},
{
"epoch": 0.004268943436499467,
"eval_loss": 10.883926391601562,
"eval_runtime": 25.5532,
"eval_samples_per_second": 386.018,
"eval_steps_per_second": 193.009,
"step": 100
},
{
"epoch": 0.004695837780149413,
"grad_norm": 0.8417938947677612,
"learning_rate": 0.0002,
"loss": 43.525,
"step": 110
},
{
"epoch": 0.005122732123799359,
"grad_norm": 0.7629103660583496,
"learning_rate": 0.0002,
"loss": 43.5057,
"step": 120
},
{
"epoch": 0.005549626467449306,
"grad_norm": 0.763012170791626,
"learning_rate": 0.0002,
"loss": 43.4606,
"step": 130
},
{
"epoch": 0.005976520811099253,
"grad_norm": 0.8534968495368958,
"learning_rate": 0.0002,
"loss": 43.4568,
"step": 140
},
{
"epoch": 0.0064034151547491995,
"grad_norm": 0.9974360466003418,
"learning_rate": 0.0002,
"loss": 43.4598,
"step": 150
},
{
"epoch": 0.0064034151547491995,
"eval_loss": 10.865395545959473,
"eval_runtime": 25.6883,
"eval_samples_per_second": 383.987,
"eval_steps_per_second": 191.994,
"step": 150
},
{
"epoch": 0.006830309498399146,
"grad_norm": 0.7604882717132568,
"learning_rate": 0.0002,
"loss": 43.4926,
"step": 160
},
{
"epoch": 0.007257203842049093,
"grad_norm": 0.9254680275917053,
"learning_rate": 0.0002,
"loss": 43.4299,
"step": 170
},
{
"epoch": 0.00768409818569904,
"grad_norm": 0.7746944427490234,
"learning_rate": 0.0002,
"loss": 43.4712,
"step": 180
},
{
"epoch": 0.008110992529348986,
"grad_norm": 1.0943514108657837,
"learning_rate": 0.0002,
"loss": 43.433,
"step": 190
},
{
"epoch": 0.008537886872998933,
"grad_norm": 0.7889847755432129,
"learning_rate": 0.0002,
"loss": 43.4282,
"step": 200
},
{
"epoch": 0.008537886872998933,
"eval_loss": 10.85505485534668,
"eval_runtime": 25.7217,
"eval_samples_per_second": 383.489,
"eval_steps_per_second": 191.744,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2698133372928.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}