{ "best_metric": 0.6006270051002502, "best_model_checkpoint": "/mnt/ml_drive/kcardenas/target_hold_hands/checkpoint-624", "epoch": 20.0, "eval_steps": 500, "global_step": 1040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4807692307692308, "grad_norm": 772638.0, "learning_rate": 5e-06, "loss": 2.0721, "step": 25 }, { "epoch": 0.9615384615384616, "grad_norm": 57469.69140625, "learning_rate": 1e-05, "loss": 0.957, "step": 50 }, { "epoch": 1.0, "eval_IoU": 0.0, "eval_loss": 0.8556730151176453, "eval_runtime": 8.489, "eval_samples_per_second": 68.324, "eval_steps_per_second": 1.178, "step": 52 }, { "epoch": 1.4423076923076923, "grad_norm": 77164.875, "learning_rate": 1.5e-05, "loss": 0.8264, "step": 75 }, { "epoch": 1.9230769230769231, "grad_norm": 95215.3828125, "learning_rate": 2e-05, "loss": 0.7261, "step": 100 }, { "epoch": 2.0, "eval_IoU": 7.7851798261353e-09, "eval_loss": 0.6803418397903442, "eval_runtime": 8.5727, "eval_samples_per_second": 67.657, "eval_steps_per_second": 1.166, "step": 104 }, { "epoch": 2.4038461538461537, "grad_norm": 78873.3359375, "learning_rate": 2.5e-05, "loss": 0.6825, "step": 125 }, { "epoch": 2.8846153846153846, "grad_norm": 65023.43359375, "learning_rate": 3e-05, "loss": 0.6689, "step": 150 }, { "epoch": 3.0, "eval_IoU": 3.1203175320106754e-07, "eval_loss": 0.6544784903526306, "eval_runtime": 8.4506, "eval_samples_per_second": 68.634, "eval_steps_per_second": 1.183, "step": 156 }, { "epoch": 3.3653846153846154, "grad_norm": 187234.5, "learning_rate": 3.5e-05, "loss": 0.7322, "step": 175 }, { "epoch": 3.8461538461538463, "grad_norm": 207238.46875, "learning_rate": 4e-05, "loss": 0.7126, "step": 200 }, { "epoch": 4.0, "eval_IoU": 7.16540375536177e-08, "eval_loss": 0.702006459236145, "eval_runtime": 8.1834, "eval_samples_per_second": 70.875, "eval_steps_per_second": 1.222, "step": 208 }, { "epoch": 4.326923076923077, "grad_norm": 70302.0625, "learning_rate": 4.5e-05, "loss": 0.7118, "step": 225 }, { "epoch": 4.8076923076923075, "grad_norm": 106005.375, "learning_rate": 5e-05, "loss": 0.6688, "step": 250 }, { "epoch": 5.0, "eval_IoU": 5.327572640987815e-07, "eval_loss": 0.6712496876716614, "eval_runtime": 8.4417, "eval_samples_per_second": 68.707, "eval_steps_per_second": 1.185, "step": 260 }, { "epoch": 5.288461538461538, "grad_norm": 181192.4375, "learning_rate": 4.8417721518987346e-05, "loss": 0.729, "step": 275 }, { "epoch": 5.769230769230769, "grad_norm": 527063.125, "learning_rate": 4.683544303797468e-05, "loss": 0.7126, "step": 300 }, { "epoch": 6.0, "eval_IoU": 8.267290868345697e-08, "eval_loss": 0.6633245944976807, "eval_runtime": 8.2611, "eval_samples_per_second": 70.208, "eval_steps_per_second": 1.21, "step": 312 }, { "epoch": 6.25, "grad_norm": 36219.71875, "learning_rate": 4.525316455696203e-05, "loss": 0.6747, "step": 325 }, { "epoch": 6.730769230769231, "grad_norm": 89007.9921875, "learning_rate": 4.367088607594937e-05, "loss": 0.6633, "step": 350 }, { "epoch": 7.0, "eval_IoU": 1.3435934524923367e-07, "eval_loss": 0.6083313226699829, "eval_runtime": 8.3505, "eval_samples_per_second": 69.457, "eval_steps_per_second": 1.198, "step": 364 }, { "epoch": 7.211538461538462, "grad_norm": 32232.62109375, "learning_rate": 4.208860759493671e-05, "loss": 0.611, "step": 375 }, { "epoch": 7.6923076923076925, "grad_norm": 54438.1484375, "learning_rate": 4.050632911392405e-05, "loss": 0.6113, "step": 400 }, { "epoch": 8.0, "eval_IoU": 2.002574751254223e-07, "eval_loss": 0.6060946583747864, "eval_runtime": 8.5578, "eval_samples_per_second": 67.774, "eval_steps_per_second": 1.169, "step": 416 }, { "epoch": 8.173076923076923, "grad_norm": 47105.93359375, "learning_rate": 3.89240506329114e-05, "loss": 0.6072, "step": 425 }, { "epoch": 8.653846153846153, "grad_norm": 37276.02734375, "learning_rate": 3.7341772151898736e-05, "loss": 0.6101, "step": 450 }, { "epoch": 9.0, "eval_IoU": 1.3619726913206721e-07, "eval_loss": 0.6026849150657654, "eval_runtime": 8.1149, "eval_samples_per_second": 71.473, "eval_steps_per_second": 1.232, "step": 468 }, { "epoch": 9.134615384615385, "grad_norm": 27913.052734375, "learning_rate": 3.575949367088608e-05, "loss": 0.6014, "step": 475 }, { "epoch": 9.615384615384615, "grad_norm": 44065.1796875, "learning_rate": 3.4177215189873416e-05, "loss": 0.6028, "step": 500 }, { "epoch": 10.0, "eval_IoU": 0.0, "eval_loss": 0.6006531715393066, "eval_runtime": 8.1108, "eval_samples_per_second": 71.509, "eval_steps_per_second": 1.233, "step": 520 }, { "epoch": 10.096153846153847, "grad_norm": 54902.17578125, "learning_rate": 3.2594936708860766e-05, "loss": 0.6069, "step": 525 }, { "epoch": 10.576923076923077, "grad_norm": 47223.47265625, "learning_rate": 3.10126582278481e-05, "loss": 0.5983, "step": 550 }, { "epoch": 11.0, "eval_IoU": 3.227043016392884e-09, "eval_loss": 0.6019152402877808, "eval_runtime": 8.2699, "eval_samples_per_second": 70.134, "eval_steps_per_second": 1.209, "step": 572 }, { "epoch": 11.057692307692308, "grad_norm": 44127.5, "learning_rate": 2.9430379746835446e-05, "loss": 0.5984, "step": 575 }, { "epoch": 11.538461538461538, "grad_norm": 65407.2578125, "learning_rate": 2.7848101265822786e-05, "loss": 0.6014, "step": 600 }, { "epoch": 12.0, "eval_IoU": 1.2153273194260533e-07, "eval_loss": 0.6006270051002502, "eval_runtime": 8.3368, "eval_samples_per_second": 69.571, "eval_steps_per_second": 1.199, "step": 624 }, { "epoch": 12.01923076923077, "grad_norm": 72088.734375, "learning_rate": 2.626582278481013e-05, "loss": 0.5996, "step": 625 }, { "epoch": 12.5, "grad_norm": 54947.15234375, "learning_rate": 2.468354430379747e-05, "loss": 0.5988, "step": 650 }, { "epoch": 12.98076923076923, "grad_norm": 36718.6796875, "learning_rate": 2.3101265822784813e-05, "loss": 0.5968, "step": 675 }, { "epoch": 13.0, "eval_IoU": 1.0654944237144431e-07, "eval_loss": 0.6013975739479065, "eval_runtime": 8.4006, "eval_samples_per_second": 69.043, "eval_steps_per_second": 1.19, "step": 676 }, { "epoch": 13.461538461538462, "grad_norm": 44921.89453125, "learning_rate": 2.1518987341772153e-05, "loss": 0.599, "step": 700 }, { "epoch": 13.942307692307692, "grad_norm": 45908.14453125, "learning_rate": 1.9936708860759496e-05, "loss": 0.5932, "step": 725 }, { "epoch": 14.0, "eval_IoU": 4.530420583131857e-07, "eval_loss": 0.6021161675453186, "eval_runtime": 7.9829, "eval_samples_per_second": 72.656, "eval_steps_per_second": 1.253, "step": 728 }, { "epoch": 14.423076923076923, "grad_norm": 51494.0, "learning_rate": 1.8354430379746836e-05, "loss": 0.5967, "step": 750 }, { "epoch": 14.903846153846153, "grad_norm": 122982.484375, "learning_rate": 1.677215189873418e-05, "loss": 0.592, "step": 775 }, { "epoch": 15.0, "eval_IoU": 0.0, "eval_loss": 0.6046732664108276, "eval_runtime": 8.5652, "eval_samples_per_second": 67.716, "eval_steps_per_second": 1.168, "step": 780 }, { "epoch": 15.384615384615385, "grad_norm": 51938.21484375, "learning_rate": 1.5189873417721521e-05, "loss": 0.5907, "step": 800 }, { "epoch": 15.865384615384615, "grad_norm": 316787.84375, "learning_rate": 1.3607594936708861e-05, "loss": 0.5935, "step": 825 }, { "epoch": 16.0, "eval_IoU": 1.7956697673278814e-07, "eval_loss": 0.6020213961601257, "eval_runtime": 8.4008, "eval_samples_per_second": 69.041, "eval_steps_per_second": 1.19, "step": 832 }, { "epoch": 16.346153846153847, "grad_norm": 31160.560546875, "learning_rate": 1.2025316455696203e-05, "loss": 0.59, "step": 850 }, { "epoch": 16.826923076923077, "grad_norm": 61832.81640625, "learning_rate": 1.0443037974683544e-05, "loss": 0.5873, "step": 875 }, { "epoch": 17.0, "eval_IoU": 2.442868828912477e-08, "eval_loss": 0.6026484966278076, "eval_runtime": 8.4706, "eval_samples_per_second": 68.472, "eval_steps_per_second": 1.181, "step": 884 }, { "epoch": 17.307692307692307, "grad_norm": 37131.21484375, "learning_rate": 8.860759493670886e-06, "loss": 0.591, "step": 900 }, { "epoch": 17.78846153846154, "grad_norm": 42635.22265625, "learning_rate": 7.2784810126582285e-06, "loss": 0.5853, "step": 925 }, { "epoch": 18.0, "eval_IoU": 6.648389312912634e-09, "eval_loss": 0.6114510893821716, "eval_runtime": 8.1171, "eval_samples_per_second": 71.454, "eval_steps_per_second": 1.232, "step": 936 }, { "epoch": 18.26923076923077, "grad_norm": 40143.515625, "learning_rate": 5.69620253164557e-06, "loss": 0.5918, "step": 950 }, { "epoch": 18.75, "grad_norm": 38210.26953125, "learning_rate": 4.113924050632911e-06, "loss": 0.5835, "step": 975 }, { "epoch": 19.0, "eval_IoU": 1.853719873045982e-07, "eval_loss": 0.6087284088134766, "eval_runtime": 8.3102, "eval_samples_per_second": 69.794, "eval_steps_per_second": 1.203, "step": 988 }, { "epoch": 19.23076923076923, "grad_norm": 54590.265625, "learning_rate": 2.531645569620253e-06, "loss": 0.5897, "step": 1000 }, { "epoch": 19.71153846153846, "grad_norm": 47306.3046875, "learning_rate": 9.493670886075951e-07, "loss": 0.5868, "step": 1025 }, { "epoch": 20.0, "eval_IoU": 1.5521619976116134e-07, "eval_loss": 0.6075738072395325, "eval_runtime": 8.5226, "eval_samples_per_second": 68.055, "eval_steps_per_second": 1.173, "step": 1040 }, { "epoch": 20.0, "step": 1040, "total_flos": 0.0, "train_loss": 0.6731257475339449, "train_runtime": 3556.481, "train_samples_per_second": 18.479, "train_steps_per_second": 0.292 } ], "logging_steps": 25, "max_steps": 1040, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }