File size: 1,853 Bytes
75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 44be9ef 75c4c46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9866666666666667,
"eval_steps": 500,
"global_step": 37,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 5e-05,
"loss": 2.2275,
"step": 1
},
{
"epoch": 0.13,
"learning_rate": 0.00019954719225730847,
"loss": 1.9848,
"step": 5
},
{
"epoch": 0.27,
"learning_rate": 0.00018412535328311814,
"loss": 0.6529,
"step": 10
},
{
"epoch": 0.4,
"learning_rate": 0.00015000000000000001,
"loss": 0.2662,
"step": 15
},
{
"epoch": 0.53,
"learning_rate": 0.00010475819158237425,
"loss": 0.238,
"step": 20
},
{
"epoch": 0.67,
"learning_rate": 5.845849869981137e-05,
"loss": 0.2332,
"step": 25
},
{
"epoch": 0.8,
"learning_rate": 2.139469052572127e-05,
"loss": 0.2287,
"step": 30
},
{
"epoch": 0.93,
"learning_rate": 1.8071302737293295e-06,
"loss": 0.203,
"step": 35
},
{
"epoch": 0.99,
"eval_loss": 0.22467635571956635,
"eval_runtime": 9.0559,
"eval_samples_per_second": 22.085,
"eval_steps_per_second": 2.761,
"step": 37
},
{
"epoch": 0.99,
"step": 37,
"total_flos": 20586009395200.0,
"train_loss": 0.5330296472923176,
"train_runtime": 203.4112,
"train_samples_per_second": 2.95,
"train_steps_per_second": 0.182
}
],
"logging_steps": 5,
"max_steps": 37,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 20586009395200.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|