File size: 2,022 Bytes
75c4c46 64b8cf0 75c4c46 64b8cf0 75c4c46 64b8cf0 75c4c46 64b8cf0 75c4c46 64b8cf0 75c4c46 64b8cf0 75c4c46 64b8cf0 75c4c46 7dca212 64b8cf0 7dca212 64b8cf0 7dca212 64b8cf0 7dca212 64b8cf0 7dca212 64b8cf0 7dca212 64b8cf0 75c4c46 64b8cf0 75c4c46 7dca212 75c4c46 64b8cf0 75c4c46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.92,
"eval_steps": 500,
"global_step": 36,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 5e-05,
"loss": 2.272,
"step": 1
},
{
"epoch": 0.27,
"learning_rate": 0.0001995184726672197,
"loss": 1.954,
"step": 5
},
{
"epoch": 0.53,
"learning_rate": 0.00018314696123025454,
"loss": 0.6371,
"step": 10
},
{
"epoch": 0.8,
"learning_rate": 0.0001471396736825998,
"loss": 0.2788,
"step": 15
},
{
"epoch": 0.96,
"eval_loss": 0.23904697597026825,
"eval_runtime": 11.1391,
"eval_samples_per_second": 17.955,
"eval_steps_per_second": 2.244,
"step": 18
},
{
"epoch": 1.07,
"learning_rate": 0.0001,
"loss": 0.2252,
"step": 20
},
{
"epoch": 1.33,
"learning_rate": 5.286032631740023e-05,
"loss": 0.2128,
"step": 25
},
{
"epoch": 1.6,
"learning_rate": 1.6853038769745467e-05,
"loss": 0.2122,
"step": 30
},
{
"epoch": 1.87,
"learning_rate": 4.815273327803182e-07,
"loss": 0.1993,
"step": 35
},
{
"epoch": 1.92,
"eval_loss": 0.2245262861251831,
"eval_runtime": 9.6856,
"eval_samples_per_second": 20.649,
"eval_steps_per_second": 2.581,
"step": 36
},
{
"epoch": 1.92,
"step": 36,
"total_flos": 40159503319040.0,
"train_loss": 0.5313632095025645,
"train_runtime": 554.6126,
"train_samples_per_second": 2.164,
"train_steps_per_second": 0.065
}
],
"logging_steps": 5,
"max_steps": 36,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 40159503319040.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|