{ "best_metric": 0.4497845768928528, "best_model_checkpoint": "/home1/datahome/villien/project_hub/DinoVdeau/models/DinoVdrone-large-2025_02_03_31850-bs32_freeze_probs/checkpoint-2052", "epoch": 37.0, "eval_steps": 500, "global_step": 2812, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_explained_variance": 0.0698663592338562, "eval_kl_divergence": 1.791170597076416, "eval_loss": 0.5543646216392517, "eval_mae": 0.22781437635421753, "eval_rmse": 0.26052138209342957, "eval_runtime": 36.0517, "eval_samples_per_second": 22.218, "eval_steps_per_second": 0.721, "learning_rate": 0.001, "step": 76 }, { "epoch": 2.0, "eval_explained_variance": 0.07257537543773651, "eval_kl_divergence": 1.0344740152359009, "eval_loss": 0.48171326518058777, "eval_mae": 0.16019925475120544, "eval_rmse": 0.20066045224666595, "eval_runtime": 32.1858, "eval_samples_per_second": 24.887, "eval_steps_per_second": 0.808, "learning_rate": 0.001, "step": 152 }, { "epoch": 3.0, "eval_explained_variance": 0.27069091796875, "eval_kl_divergence": 0.6063743233680725, "eval_loss": 0.4615386724472046, "eval_mae": 0.1370290368795395, "eval_rmse": 0.18013149499893188, "eval_runtime": 22.3828, "eval_samples_per_second": 35.786, "eval_steps_per_second": 1.162, "learning_rate": 0.001, "step": 228 }, { "epoch": 4.0, "eval_explained_variance": 0.08923790603876114, "eval_kl_divergence": 0.6576936841011047, "eval_loss": 0.4632064700126648, "eval_mae": 0.1390942633152008, "eval_rmse": 0.18370357155799866, "eval_runtime": 22.4997, "eval_samples_per_second": 35.601, "eval_steps_per_second": 1.156, "learning_rate": 0.001, "step": 304 }, { "epoch": 5.0, "eval_explained_variance": 0.2999000549316406, "eval_kl_divergence": 0.6677561402320862, "eval_loss": 0.4579373300075531, "eval_mae": 0.13632091879844666, "eval_rmse": 0.17685972154140472, "eval_runtime": 22.4993, "eval_samples_per_second": 35.601, "eval_steps_per_second": 1.156, "learning_rate": 0.001, "step": 380 }, { "epoch": 6.0, "eval_explained_variance": 0.30495360493659973, "eval_kl_divergence": 0.7895806431770325, "eval_loss": 0.4571229815483093, "eval_mae": 0.13304883241653442, "eval_rmse": 0.176561176776886, "eval_runtime": 22.7, "eval_samples_per_second": 35.286, "eval_steps_per_second": 1.145, "learning_rate": 0.001, "step": 456 }, { "epoch": 6.578947368421053, "grad_norm": 0.27282145619392395, "learning_rate": 0.001, "loss": 0.4966, "step": 500 }, { "epoch": 7.0, "eval_explained_variance": 0.3103345036506653, "eval_kl_divergence": 0.6492787599563599, "eval_loss": 0.4586440622806549, "eval_mae": 0.13074660301208496, "eval_rmse": 0.17730861902236938, "eval_runtime": 22.4946, "eval_samples_per_second": 35.608, "eval_steps_per_second": 1.156, "learning_rate": 0.001, "step": 532 }, { "epoch": 8.0, "eval_explained_variance": 0.30848240852355957, "eval_kl_divergence": 0.9475247263908386, "eval_loss": 0.45786425471305847, "eval_mae": 0.13187319040298462, "eval_rmse": 0.17719128727912903, "eval_runtime": 22.513, "eval_samples_per_second": 35.579, "eval_steps_per_second": 1.155, "learning_rate": 0.001, "step": 608 }, { "epoch": 9.0, "eval_explained_variance": 0.3135569095611572, "eval_kl_divergence": 0.7270792126655579, "eval_loss": 0.4551210105419159, "eval_mae": 0.13063107430934906, "eval_rmse": 0.17461702227592468, "eval_runtime": 22.4949, "eval_samples_per_second": 35.608, "eval_steps_per_second": 1.156, "learning_rate": 0.001, "step": 684 }, { "epoch": 10.0, "eval_explained_variance": 0.29183268547058105, "eval_kl_divergence": 0.6882225275039673, "eval_loss": 0.45817646384239197, "eval_mae": 0.13160471618175507, "eval_rmse": 0.17741131782531738, "eval_runtime": 22.508, "eval_samples_per_second": 35.587, "eval_steps_per_second": 1.155, "learning_rate": 0.001, "step": 760 }, { "epoch": 11.0, "eval_explained_variance": 0.25859755277633667, "eval_kl_divergence": 0.371459424495697, "eval_loss": 0.46834859251976013, "eval_mae": 0.13717466592788696, "eval_rmse": 0.18415462970733643, "eval_runtime": 22.4535, "eval_samples_per_second": 35.674, "eval_steps_per_second": 1.158, "learning_rate": 0.001, "step": 836 }, { "epoch": 12.0, "eval_explained_variance": 0.308597594499588, "eval_kl_divergence": 0.5270651578903198, "eval_loss": 0.4578668475151062, "eval_mae": 0.13155478239059448, "eval_rmse": 0.17636829614639282, "eval_runtime": 22.5868, "eval_samples_per_second": 35.463, "eval_steps_per_second": 1.151, "learning_rate": 0.001, "step": 912 }, { "epoch": 13.0, "eval_explained_variance": 0.3100161552429199, "eval_kl_divergence": 0.9167731404304504, "eval_loss": 0.4558842182159424, "eval_mae": 0.1300584226846695, "eval_rmse": 0.1756095588207245, "eval_runtime": 22.7991, "eval_samples_per_second": 35.133, "eval_steps_per_second": 1.14, "learning_rate": 0.001, "step": 988 }, { "epoch": 13.157894736842104, "grad_norm": 0.26468560099601746, "learning_rate": 0.001, "loss": 0.4448, "step": 1000 }, { "epoch": 14.0, "eval_explained_variance": 0.3228832185268402, "eval_kl_divergence": 0.8826888799667358, "eval_loss": 0.4555540680885315, "eval_mae": 0.12918463349342346, "eval_rmse": 0.17491525411605835, "eval_runtime": 22.7093, "eval_samples_per_second": 35.272, "eval_steps_per_second": 1.145, "learning_rate": 0.001, "step": 1064 }, { "epoch": 15.0, "eval_explained_variance": 0.34160685539245605, "eval_kl_divergence": 0.7008672952651978, "eval_loss": 0.45217740535736084, "eval_mae": 0.1262015700340271, "eval_rmse": 0.17165420949459076, "eval_runtime": 22.6104, "eval_samples_per_second": 35.426, "eval_steps_per_second": 1.15, "learning_rate": 0.001, "step": 1140 }, { "epoch": 16.0, "eval_explained_variance": 0.31633102893829346, "eval_kl_divergence": 1.0038130283355713, "eval_loss": 0.45556434988975525, "eval_mae": 0.12863175570964813, "eval_rmse": 0.1752910166978836, "eval_runtime": 22.5805, "eval_samples_per_second": 35.473, "eval_steps_per_second": 1.151, "learning_rate": 0.001, "step": 1216 }, { "epoch": 17.0, "eval_explained_variance": 0.3205307126045227, "eval_kl_divergence": 0.2600082457065582, "eval_loss": 0.458648681640625, "eval_mae": 0.13426683843135834, "eval_rmse": 0.17750133574008942, "eval_runtime": 22.3997, "eval_samples_per_second": 35.759, "eval_steps_per_second": 1.161, "learning_rate": 0.001, "step": 1292 }, { "epoch": 18.0, "eval_explained_variance": -4.778772354125977, "eval_kl_divergence": 2.054769277572632, "eval_loss": 0.567169725894928, "eval_mae": 0.16376179456710815, "eval_rmse": 0.23688165843486786, "eval_runtime": 22.2481, "eval_samples_per_second": 36.003, "eval_steps_per_second": 1.169, "learning_rate": 0.001, "step": 1368 }, { "epoch": 19.0, "eval_explained_variance": 0.32792216539382935, "eval_kl_divergence": 0.7114961743354797, "eval_loss": 0.45287612080574036, "eval_mae": 0.12865176796913147, "eval_rmse": 0.17274516820907593, "eval_runtime": 22.3304, "eval_samples_per_second": 35.87, "eval_steps_per_second": 1.164, "learning_rate": 0.001, "step": 1444 }, { "epoch": 19.736842105263158, "grad_norm": 0.1475011706352234, "learning_rate": 0.001, "loss": 0.4406, "step": 1500 }, { "epoch": 20.0, "eval_explained_variance": 0.3204135596752167, "eval_kl_divergence": 0.9694227576255798, "eval_loss": 0.45518893003463745, "eval_mae": 0.12852200865745544, "eval_rmse": 0.17462262511253357, "eval_runtime": 22.5552, "eval_samples_per_second": 35.513, "eval_steps_per_second": 1.153, "learning_rate": 0.001, "step": 1520 }, { "epoch": 21.0, "eval_explained_variance": 0.32996666431427, "eval_kl_divergence": 0.778915524482727, "eval_loss": 0.45299893617630005, "eval_mae": 0.12820784747600555, "eval_rmse": 0.17243456840515137, "eval_runtime": 22.5861, "eval_samples_per_second": 35.464, "eval_steps_per_second": 1.151, "learning_rate": 0.001, "step": 1596 }, { "epoch": 22.0, "eval_explained_variance": 0.34726351499557495, "eval_kl_divergence": 0.7368760704994202, "eval_loss": 0.4502638280391693, "eval_mae": 0.12613575160503387, "eval_rmse": 0.17001575231552124, "eval_runtime": 22.7271, "eval_samples_per_second": 35.244, "eval_steps_per_second": 1.144, "learning_rate": 0.0001, "step": 1672 }, { "epoch": 23.0, "eval_explained_variance": 0.34029728174209595, "eval_kl_divergence": 0.5027008056640625, "eval_loss": 0.453466534614563, "eval_mae": 0.12802833318710327, "eval_rmse": 0.17160943150520325, "eval_runtime": 22.4563, "eval_samples_per_second": 35.669, "eval_steps_per_second": 1.158, "learning_rate": 0.0001, "step": 1748 }, { "epoch": 24.0, "eval_explained_variance": 0.3511368930339813, "eval_kl_divergence": 0.5968054533004761, "eval_loss": 0.4502425491809845, "eval_mae": 0.12641073763370514, "eval_rmse": 0.16971111297607422, "eval_runtime": 22.592, "eval_samples_per_second": 35.455, "eval_steps_per_second": 1.151, "learning_rate": 0.0001, "step": 1824 }, { "epoch": 25.0, "eval_explained_variance": 0.3504308760166168, "eval_kl_divergence": 0.621475100517273, "eval_loss": 0.45040303468704224, "eval_mae": 0.12673497200012207, "eval_rmse": 0.1699284017086029, "eval_runtime": 22.3202, "eval_samples_per_second": 35.887, "eval_steps_per_second": 1.165, "learning_rate": 0.0001, "step": 1900 }, { "epoch": 26.0, "eval_explained_variance": 0.34598857164382935, "eval_kl_divergence": 0.6567814350128174, "eval_loss": 0.4509589374065399, "eval_mae": 0.12596669793128967, "eval_rmse": 0.17043226957321167, "eval_runtime": 22.4313, "eval_samples_per_second": 35.709, "eval_steps_per_second": 1.159, "learning_rate": 0.0001, "step": 1976 }, { "epoch": 26.31578947368421, "grad_norm": 0.16591614484786987, "learning_rate": 0.0001, "loss": 0.4334, "step": 2000 }, { "epoch": 27.0, "eval_explained_variance": 0.35463404655456543, "eval_kl_divergence": 0.5748001337051392, "eval_loss": 0.4497845768928528, "eval_mae": 0.1262420266866684, "eval_rmse": 0.1693224012851715, "eval_runtime": 22.3409, "eval_samples_per_second": 35.854, "eval_steps_per_second": 1.164, "learning_rate": 0.0001, "step": 2052 }, { "epoch": 28.0, "eval_explained_variance": 0.34665071964263916, "eval_kl_divergence": 0.7001035809516907, "eval_loss": 0.45060041546821594, "eval_mae": 0.12559720873832703, "eval_rmse": 0.17011338472366333, "eval_runtime": 22.4894, "eval_samples_per_second": 35.617, "eval_steps_per_second": 1.156, "learning_rate": 0.0001, "step": 2128 }, { "epoch": 29.0, "eval_explained_variance": 0.3531297743320465, "eval_kl_divergence": 0.5840001702308655, "eval_loss": 0.4504892826080322, "eval_mae": 0.12626922130584717, "eval_rmse": 0.16992022097110748, "eval_runtime": 22.4286, "eval_samples_per_second": 35.713, "eval_steps_per_second": 1.159, "learning_rate": 0.0001, "step": 2204 }, { "epoch": 30.0, "eval_explained_variance": 0.34863966703414917, "eval_kl_divergence": 0.8101097345352173, "eval_loss": 0.45060065388679504, "eval_mae": 0.12516793608665466, "eval_rmse": 0.1702672839164734, "eval_runtime": 22.3007, "eval_samples_per_second": 35.918, "eval_steps_per_second": 1.166, "learning_rate": 0.0001, "step": 2280 }, { "epoch": 31.0, "eval_explained_variance": 0.3488965630531311, "eval_kl_divergence": 0.7415657043457031, "eval_loss": 0.45080825686454773, "eval_mae": 0.12486829608678818, "eval_rmse": 0.1701475977897644, "eval_runtime": 22.3895, "eval_samples_per_second": 35.776, "eval_steps_per_second": 1.161, "learning_rate": 0.0001, "step": 2356 }, { "epoch": 32.0, "eval_explained_variance": 0.3523526191711426, "eval_kl_divergence": 0.6401851177215576, "eval_loss": 0.4501984417438507, "eval_mae": 0.12540514767169952, "eval_rmse": 0.16971096396446228, "eval_runtime": 22.3438, "eval_samples_per_second": 35.849, "eval_steps_per_second": 1.164, "learning_rate": 0.0001, "step": 2432 }, { "epoch": 32.89473684210526, "grad_norm": 0.1912919282913208, "learning_rate": 0.0001, "loss": 0.4289, "step": 2500 }, { "epoch": 33.0, "eval_explained_variance": 0.33917075395584106, "eval_kl_divergence": 0.8411455154418945, "eval_loss": 0.4510658085346222, "eval_mae": 0.12500226497650146, "eval_rmse": 0.1709534227848053, "eval_runtime": 22.56, "eval_samples_per_second": 35.505, "eval_steps_per_second": 1.152, "learning_rate": 0.0001, "step": 2508 }, { "epoch": 34.0, "eval_explained_variance": 0.33831754326820374, "eval_kl_divergence": 0.7203648686408997, "eval_loss": 0.45148056745529175, "eval_mae": 0.12593072652816772, "eval_rmse": 0.17108403146266937, "eval_runtime": 22.505, "eval_samples_per_second": 35.592, "eval_steps_per_second": 1.155, "learning_rate": 1e-05, "step": 2584 }, { "epoch": 35.0, "eval_explained_variance": 0.34977057576179504, "eval_kl_divergence": 0.7354820966720581, "eval_loss": 0.4502483904361725, "eval_mae": 0.12473371624946594, "eval_rmse": 0.16982755064964294, "eval_runtime": 22.3912, "eval_samples_per_second": 35.773, "eval_steps_per_second": 1.161, "learning_rate": 1e-05, "step": 2660 }, { "epoch": 36.0, "eval_explained_variance": 0.3486325144767761, "eval_kl_divergence": 0.49899470806121826, "eval_loss": 0.4508889615535736, "eval_mae": 0.1260843575000763, "eval_rmse": 0.1702878773212433, "eval_runtime": 22.8537, "eval_samples_per_second": 35.049, "eval_steps_per_second": 1.138, "learning_rate": 1e-05, "step": 2736 }, { "epoch": 37.0, "eval_explained_variance": 0.3535779118537903, "eval_kl_divergence": 0.5451197624206543, "eval_loss": 0.44998663663864136, "eval_mae": 0.12602195143699646, "eval_rmse": 0.16962522268295288, "eval_runtime": 22.2216, "eval_samples_per_second": 36.046, "eval_steps_per_second": 1.17, "learning_rate": 1e-05, "step": 2812 }, { "epoch": 37.0, "learning_rate": 1e-05, "step": 2812, "total_flos": 1.318896404308369e+20, "train_loss": 0.4464974437295797, "train_runtime": 4164.1679, "train_samples_per_second": 86.596, "train_steps_per_second": 2.738 } ], "logging_steps": 500, "max_steps": 11400, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 10 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.318896404308369e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }