|
{ |
|
"best_metric": 0.5469852685928345, |
|
"best_model_checkpoint": "mushrooms_image_detection/checkpoint-10945", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 10945, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.199071407318115, |
|
"learning_rate": 1.9173932996787516e-07, |
|
"loss": 0.6273, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 6.411447048187256, |
|
"learning_rate": 1.825608077099587e-07, |
|
"loss": 0.6283, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 6.0922088623046875, |
|
"learning_rate": 1.733822854520422e-07, |
|
"loss": 0.6121, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.242175579071045, |
|
"learning_rate": 1.6420376319412576e-07, |
|
"loss": 0.6357, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 5.726295471191406, |
|
"learning_rate": 1.5502524093620926e-07, |
|
"loss": 0.6094, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 7.89302921295166, |
|
"learning_rate": 1.458467186782928e-07, |
|
"loss": 0.6297, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 6.494805812835693, |
|
"learning_rate": 1.366681964203763e-07, |
|
"loss": 0.6104, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.374981880187988, |
|
"learning_rate": 1.2748967416245983e-07, |
|
"loss": 0.6111, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 8.1451416015625, |
|
"learning_rate": 1.1831115190454337e-07, |
|
"loss": 0.6039, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 7.26981258392334, |
|
"learning_rate": 1.0913262964662688e-07, |
|
"loss": 0.6102, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 7.569005966186523, |
|
"learning_rate": 9.995410738871042e-08, |
|
"loss": 0.6167, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.174710750579834, |
|
"learning_rate": 9.077558513079394e-08, |
|
"loss": 0.6036, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 6.182538986206055, |
|
"learning_rate": 8.159706287287747e-08, |
|
"loss": 0.6166, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 8.289917945861816, |
|
"learning_rate": 7.241854061496099e-08, |
|
"loss": 0.6227, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 4.353749752044678, |
|
"learning_rate": 6.32400183570445e-08, |
|
"loss": 0.6204, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 7.799861431121826, |
|
"learning_rate": 5.406149609912804e-08, |
|
"loss": 0.6079, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 9.668302536010742, |
|
"learning_rate": 4.488297384121156e-08, |
|
"loss": 0.6149, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 6.9307451248168945, |
|
"learning_rate": 3.5704451583295086e-08, |
|
"loss": 0.6046, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 14.261872291564941, |
|
"learning_rate": 2.6525929325378617e-08, |
|
"loss": 0.6073, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 7.5108466148376465, |
|
"learning_rate": 1.7347407067462138e-08, |
|
"loss": 0.6144, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 5.3811516761779785, |
|
"learning_rate": 8.168884809545663e-09, |
|
"loss": 0.613, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.899032037005311, |
|
"eval_loss": 0.5469852685928345, |
|
"eval_runtime": 2983.2159, |
|
"eval_samples_per_second": 78.265, |
|
"eval_steps_per_second": 9.783, |
|
"step": 10945 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10945, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 2.716308216840831e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|