|
{ |
|
"best_metric": 0.2866075932979584, |
|
"best_model_checkpoint": "./convnext-nano-1e-4-augment/checkpoint-2750", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 11.396906852722168, |
|
"learning_rate": 9.967408676742751e-05, |
|
"loss": 1.7369, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 14.72342300415039, |
|
"learning_rate": 9.870059584711668e-05, |
|
"loss": 0.8282, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8536779324055666, |
|
"eval_loss": 0.5148155689239502, |
|
"eval_runtime": 61.4101, |
|
"eval_samples_per_second": 40.954, |
|
"eval_steps_per_second": 0.651, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 16.179250717163086, |
|
"learning_rate": 9.709221818197624e-05, |
|
"loss": 0.6581, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 10.215620994567871, |
|
"learning_rate": 9.486992143456792e-05, |
|
"loss": 0.5141, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 8.687200546264648, |
|
"learning_rate": 9.206267664155907e-05, |
|
"loss": 0.5209, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.88389662027833, |
|
"eval_loss": 0.4150846004486084, |
|
"eval_runtime": 61.5346, |
|
"eval_samples_per_second": 40.871, |
|
"eval_steps_per_second": 0.65, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 10.711565971374512, |
|
"learning_rate": 8.870708053195413e-05, |
|
"loss": 0.4456, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 9.642666816711426, |
|
"learning_rate": 8.484687843276469e-05, |
|
"loss": 0.4067, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 11.507894515991211, |
|
"learning_rate": 8.053239398177191e-05, |
|
"loss": 0.3867, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9009940357852882, |
|
"eval_loss": 0.3642739951610565, |
|
"eval_runtime": 61.3025, |
|
"eval_samples_per_second": 41.026, |
|
"eval_steps_per_second": 0.653, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 13.279052734375, |
|
"learning_rate": 7.58198730819481e-05, |
|
"loss": 0.3452, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 9.233952522277832, |
|
"learning_rate": 7.077075065009433e-05, |
|
"loss": 0.3216, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 9.883641242980957, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.3183, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9049701789264414, |
|
"eval_loss": 0.3240828812122345, |
|
"eval_runtime": 61.2041, |
|
"eval_samples_per_second": 41.092, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 10.317498207092285, |
|
"learning_rate": 5.992952333228728e-05, |
|
"loss": 0.2789, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"grad_norm": 6.958377838134766, |
|
"learning_rate": 5.427875042394199e-05, |
|
"loss": 0.2679, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.904572564612326, |
|
"eval_loss": 0.32900604605674744, |
|
"eval_runtime": 61.4874, |
|
"eval_samples_per_second": 40.903, |
|
"eval_steps_per_second": 0.651, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 7.9765119552612305, |
|
"learning_rate": 4.85721974603152e-05, |
|
"loss": 0.261, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"grad_norm": 6.655781269073486, |
|
"learning_rate": 4.288425808633575e-05, |
|
"loss": 0.2243, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"grad_norm": 9.778780937194824, |
|
"learning_rate": 3.728908329032567e-05, |
|
"loss": 0.2364, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9137176938369781, |
|
"eval_loss": 0.30884459614753723, |
|
"eval_runtime": 61.4411, |
|
"eval_samples_per_second": 40.934, |
|
"eval_steps_per_second": 0.651, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"grad_norm": 6.904848098754883, |
|
"learning_rate": 3.1859614732467954e-05, |
|
"loss": 0.2176, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"grad_norm": 6.360904216766357, |
|
"learning_rate": 2.6666633838716314e-05, |
|
"loss": 0.1943, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"grad_norm": 9.572250366210938, |
|
"learning_rate": 2.1777839056661554e-05, |
|
"loss": 0.1981, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9137176938369781, |
|
"eval_loss": 0.29816073179244995, |
|
"eval_runtime": 62.0018, |
|
"eval_samples_per_second": 40.563, |
|
"eval_steps_per_second": 0.645, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"grad_norm": 4.384122371673584, |
|
"learning_rate": 1.725696330273575e-05, |
|
"loss": 0.1778, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"grad_norm": 7.344178199768066, |
|
"learning_rate": 1.3162943106179749e-05, |
|
"loss": 0.1692, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 5.066224575042725, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.1704, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9168986083499006, |
|
"eval_loss": 0.2899409532546997, |
|
"eval_runtime": 61.3116, |
|
"eval_samples_per_second": 41.02, |
|
"eval_steps_per_second": 0.652, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"grad_norm": 7.729918003082275, |
|
"learning_rate": 6.462696144011149e-06, |
|
"loss": 0.1627, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"grad_norm": 9.76986312866211, |
|
"learning_rate": 3.9438173442575e-06, |
|
"loss": 0.1572, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.920079522862823, |
|
"eval_loss": 0.2868165671825409, |
|
"eval_runtime": 61.3101, |
|
"eval_samples_per_second": 41.021, |
|
"eval_steps_per_second": 0.652, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 7.546455383300781, |
|
"learning_rate": 2.0253513192751373e-06, |
|
"loss": 0.1548, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"grad_norm": 8.111244201660156, |
|
"learning_rate": 7.323082076153509e-07, |
|
"loss": 0.1519, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"grad_norm": 8.917200088500977, |
|
"learning_rate": 8.15448036932176e-08, |
|
"loss": 0.168, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9204771371769384, |
|
"eval_loss": 0.2866075932979584, |
|
"eval_runtime": 61.2534, |
|
"eval_samples_per_second": 41.059, |
|
"eval_steps_per_second": 0.653, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2750, |
|
"total_flos": 7.000491898906214e+18, |
|
"train_loss": 0.3542752295407382, |
|
"train_runtime": 6594.6916, |
|
"train_samples_per_second": 26.659, |
|
"train_steps_per_second": 0.417 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 7.000491898906214e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|