|
{ |
|
"best_metric": 0.4497845768928528, |
|
"best_model_checkpoint": "/home1/datahome/villien/project_hub/DinoVdeau/models/DinoVdrone-large-2025_02_03_31850-bs32_freeze_probs/checkpoint-2052", |
|
"epoch": 37.0, |
|
"eval_steps": 500, |
|
"global_step": 2812, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_explained_variance": 0.0698663592338562, |
|
"eval_kl_divergence": 1.791170597076416, |
|
"eval_loss": 0.5543646216392517, |
|
"eval_mae": 0.22781437635421753, |
|
"eval_rmse": 0.26052138209342957, |
|
"eval_runtime": 36.0517, |
|
"eval_samples_per_second": 22.218, |
|
"eval_steps_per_second": 0.721, |
|
"learning_rate": 0.001, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_explained_variance": 0.07257537543773651, |
|
"eval_kl_divergence": 1.0344740152359009, |
|
"eval_loss": 0.48171326518058777, |
|
"eval_mae": 0.16019925475120544, |
|
"eval_rmse": 0.20066045224666595, |
|
"eval_runtime": 32.1858, |
|
"eval_samples_per_second": 24.887, |
|
"eval_steps_per_second": 0.808, |
|
"learning_rate": 0.001, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_explained_variance": 0.27069091796875, |
|
"eval_kl_divergence": 0.6063743233680725, |
|
"eval_loss": 0.4615386724472046, |
|
"eval_mae": 0.1370290368795395, |
|
"eval_rmse": 0.18013149499893188, |
|
"eval_runtime": 22.3828, |
|
"eval_samples_per_second": 35.786, |
|
"eval_steps_per_second": 1.162, |
|
"learning_rate": 0.001, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_explained_variance": 0.08923790603876114, |
|
"eval_kl_divergence": 0.6576936841011047, |
|
"eval_loss": 0.4632064700126648, |
|
"eval_mae": 0.1390942633152008, |
|
"eval_rmse": 0.18370357155799866, |
|
"eval_runtime": 22.4997, |
|
"eval_samples_per_second": 35.601, |
|
"eval_steps_per_second": 1.156, |
|
"learning_rate": 0.001, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_explained_variance": 0.2999000549316406, |
|
"eval_kl_divergence": 0.6677561402320862, |
|
"eval_loss": 0.4579373300075531, |
|
"eval_mae": 0.13632091879844666, |
|
"eval_rmse": 0.17685972154140472, |
|
"eval_runtime": 22.4993, |
|
"eval_samples_per_second": 35.601, |
|
"eval_steps_per_second": 1.156, |
|
"learning_rate": 0.001, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_explained_variance": 0.30495360493659973, |
|
"eval_kl_divergence": 0.7895806431770325, |
|
"eval_loss": 0.4571229815483093, |
|
"eval_mae": 0.13304883241653442, |
|
"eval_rmse": 0.176561176776886, |
|
"eval_runtime": 22.7, |
|
"eval_samples_per_second": 35.286, |
|
"eval_steps_per_second": 1.145, |
|
"learning_rate": 0.001, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 6.578947368421053, |
|
"grad_norm": 0.27282145619392395, |
|
"learning_rate": 0.001, |
|
"loss": 0.4966, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_explained_variance": 0.3103345036506653, |
|
"eval_kl_divergence": 0.6492787599563599, |
|
"eval_loss": 0.4586440622806549, |
|
"eval_mae": 0.13074660301208496, |
|
"eval_rmse": 0.17730861902236938, |
|
"eval_runtime": 22.4946, |
|
"eval_samples_per_second": 35.608, |
|
"eval_steps_per_second": 1.156, |
|
"learning_rate": 0.001, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_explained_variance": 0.30848240852355957, |
|
"eval_kl_divergence": 0.9475247263908386, |
|
"eval_loss": 0.45786425471305847, |
|
"eval_mae": 0.13187319040298462, |
|
"eval_rmse": 0.17719128727912903, |
|
"eval_runtime": 22.513, |
|
"eval_samples_per_second": 35.579, |
|
"eval_steps_per_second": 1.155, |
|
"learning_rate": 0.001, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_explained_variance": 0.3135569095611572, |
|
"eval_kl_divergence": 0.7270792126655579, |
|
"eval_loss": 0.4551210105419159, |
|
"eval_mae": 0.13063107430934906, |
|
"eval_rmse": 0.17461702227592468, |
|
"eval_runtime": 22.4949, |
|
"eval_samples_per_second": 35.608, |
|
"eval_steps_per_second": 1.156, |
|
"learning_rate": 0.001, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_explained_variance": 0.29183268547058105, |
|
"eval_kl_divergence": 0.6882225275039673, |
|
"eval_loss": 0.45817646384239197, |
|
"eval_mae": 0.13160471618175507, |
|
"eval_rmse": 0.17741131782531738, |
|
"eval_runtime": 22.508, |
|
"eval_samples_per_second": 35.587, |
|
"eval_steps_per_second": 1.155, |
|
"learning_rate": 0.001, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_explained_variance": 0.25859755277633667, |
|
"eval_kl_divergence": 0.371459424495697, |
|
"eval_loss": 0.46834859251976013, |
|
"eval_mae": 0.13717466592788696, |
|
"eval_rmse": 0.18415462970733643, |
|
"eval_runtime": 22.4535, |
|
"eval_samples_per_second": 35.674, |
|
"eval_steps_per_second": 1.158, |
|
"learning_rate": 0.001, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_explained_variance": 0.308597594499588, |
|
"eval_kl_divergence": 0.5270651578903198, |
|
"eval_loss": 0.4578668475151062, |
|
"eval_mae": 0.13155478239059448, |
|
"eval_rmse": 0.17636829614639282, |
|
"eval_runtime": 22.5868, |
|
"eval_samples_per_second": 35.463, |
|
"eval_steps_per_second": 1.151, |
|
"learning_rate": 0.001, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_explained_variance": 0.3100161552429199, |
|
"eval_kl_divergence": 0.9167731404304504, |
|
"eval_loss": 0.4558842182159424, |
|
"eval_mae": 0.1300584226846695, |
|
"eval_rmse": 0.1756095588207245, |
|
"eval_runtime": 22.7991, |
|
"eval_samples_per_second": 35.133, |
|
"eval_steps_per_second": 1.14, |
|
"learning_rate": 0.001, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 13.157894736842104, |
|
"grad_norm": 0.26468560099601746, |
|
"learning_rate": 0.001, |
|
"loss": 0.4448, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_explained_variance": 0.3228832185268402, |
|
"eval_kl_divergence": 0.8826888799667358, |
|
"eval_loss": 0.4555540680885315, |
|
"eval_mae": 0.12918463349342346, |
|
"eval_rmse": 0.17491525411605835, |
|
"eval_runtime": 22.7093, |
|
"eval_samples_per_second": 35.272, |
|
"eval_steps_per_second": 1.145, |
|
"learning_rate": 0.001, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_explained_variance": 0.34160685539245605, |
|
"eval_kl_divergence": 0.7008672952651978, |
|
"eval_loss": 0.45217740535736084, |
|
"eval_mae": 0.1262015700340271, |
|
"eval_rmse": 0.17165420949459076, |
|
"eval_runtime": 22.6104, |
|
"eval_samples_per_second": 35.426, |
|
"eval_steps_per_second": 1.15, |
|
"learning_rate": 0.001, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_explained_variance": 0.31633102893829346, |
|
"eval_kl_divergence": 1.0038130283355713, |
|
"eval_loss": 0.45556434988975525, |
|
"eval_mae": 0.12863175570964813, |
|
"eval_rmse": 0.1752910166978836, |
|
"eval_runtime": 22.5805, |
|
"eval_samples_per_second": 35.473, |
|
"eval_steps_per_second": 1.151, |
|
"learning_rate": 0.001, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_explained_variance": 0.3205307126045227, |
|
"eval_kl_divergence": 0.2600082457065582, |
|
"eval_loss": 0.458648681640625, |
|
"eval_mae": 0.13426683843135834, |
|
"eval_rmse": 0.17750133574008942, |
|
"eval_runtime": 22.3997, |
|
"eval_samples_per_second": 35.759, |
|
"eval_steps_per_second": 1.161, |
|
"learning_rate": 0.001, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_explained_variance": -4.778772354125977, |
|
"eval_kl_divergence": 2.054769277572632, |
|
"eval_loss": 0.567169725894928, |
|
"eval_mae": 0.16376179456710815, |
|
"eval_rmse": 0.23688165843486786, |
|
"eval_runtime": 22.2481, |
|
"eval_samples_per_second": 36.003, |
|
"eval_steps_per_second": 1.169, |
|
"learning_rate": 0.001, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_explained_variance": 0.32792216539382935, |
|
"eval_kl_divergence": 0.7114961743354797, |
|
"eval_loss": 0.45287612080574036, |
|
"eval_mae": 0.12865176796913147, |
|
"eval_rmse": 0.17274516820907593, |
|
"eval_runtime": 22.3304, |
|
"eval_samples_per_second": 35.87, |
|
"eval_steps_per_second": 1.164, |
|
"learning_rate": 0.001, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 19.736842105263158, |
|
"grad_norm": 0.1475011706352234, |
|
"learning_rate": 0.001, |
|
"loss": 0.4406, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_explained_variance": 0.3204135596752167, |
|
"eval_kl_divergence": 0.9694227576255798, |
|
"eval_loss": 0.45518893003463745, |
|
"eval_mae": 0.12852200865745544, |
|
"eval_rmse": 0.17462262511253357, |
|
"eval_runtime": 22.5552, |
|
"eval_samples_per_second": 35.513, |
|
"eval_steps_per_second": 1.153, |
|
"learning_rate": 0.001, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_explained_variance": 0.32996666431427, |
|
"eval_kl_divergence": 0.778915524482727, |
|
"eval_loss": 0.45299893617630005, |
|
"eval_mae": 0.12820784747600555, |
|
"eval_rmse": 0.17243456840515137, |
|
"eval_runtime": 22.5861, |
|
"eval_samples_per_second": 35.464, |
|
"eval_steps_per_second": 1.151, |
|
"learning_rate": 0.001, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_explained_variance": 0.34726351499557495, |
|
"eval_kl_divergence": 0.7368760704994202, |
|
"eval_loss": 0.4502638280391693, |
|
"eval_mae": 0.12613575160503387, |
|
"eval_rmse": 0.17001575231552124, |
|
"eval_runtime": 22.7271, |
|
"eval_samples_per_second": 35.244, |
|
"eval_steps_per_second": 1.144, |
|
"learning_rate": 0.0001, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_explained_variance": 0.34029728174209595, |
|
"eval_kl_divergence": 0.5027008056640625, |
|
"eval_loss": 0.453466534614563, |
|
"eval_mae": 0.12802833318710327, |
|
"eval_rmse": 0.17160943150520325, |
|
"eval_runtime": 22.4563, |
|
"eval_samples_per_second": 35.669, |
|
"eval_steps_per_second": 1.158, |
|
"learning_rate": 0.0001, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_explained_variance": 0.3511368930339813, |
|
"eval_kl_divergence": 0.5968054533004761, |
|
"eval_loss": 0.4502425491809845, |
|
"eval_mae": 0.12641073763370514, |
|
"eval_rmse": 0.16971111297607422, |
|
"eval_runtime": 22.592, |
|
"eval_samples_per_second": 35.455, |
|
"eval_steps_per_second": 1.151, |
|
"learning_rate": 0.0001, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_explained_variance": 0.3504308760166168, |
|
"eval_kl_divergence": 0.621475100517273, |
|
"eval_loss": 0.45040303468704224, |
|
"eval_mae": 0.12673497200012207, |
|
"eval_rmse": 0.1699284017086029, |
|
"eval_runtime": 22.3202, |
|
"eval_samples_per_second": 35.887, |
|
"eval_steps_per_second": 1.165, |
|
"learning_rate": 0.0001, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_explained_variance": 0.34598857164382935, |
|
"eval_kl_divergence": 0.6567814350128174, |
|
"eval_loss": 0.4509589374065399, |
|
"eval_mae": 0.12596669793128967, |
|
"eval_rmse": 0.17043226957321167, |
|
"eval_runtime": 22.4313, |
|
"eval_samples_per_second": 35.709, |
|
"eval_steps_per_second": 1.159, |
|
"learning_rate": 0.0001, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 26.31578947368421, |
|
"grad_norm": 0.16591614484786987, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4334, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_explained_variance": 0.35463404655456543, |
|
"eval_kl_divergence": 0.5748001337051392, |
|
"eval_loss": 0.4497845768928528, |
|
"eval_mae": 0.1262420266866684, |
|
"eval_rmse": 0.1693224012851715, |
|
"eval_runtime": 22.3409, |
|
"eval_samples_per_second": 35.854, |
|
"eval_steps_per_second": 1.164, |
|
"learning_rate": 0.0001, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_explained_variance": 0.34665071964263916, |
|
"eval_kl_divergence": 0.7001035809516907, |
|
"eval_loss": 0.45060041546821594, |
|
"eval_mae": 0.12559720873832703, |
|
"eval_rmse": 0.17011338472366333, |
|
"eval_runtime": 22.4894, |
|
"eval_samples_per_second": 35.617, |
|
"eval_steps_per_second": 1.156, |
|
"learning_rate": 0.0001, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_explained_variance": 0.3531297743320465, |
|
"eval_kl_divergence": 0.5840001702308655, |
|
"eval_loss": 0.4504892826080322, |
|
"eval_mae": 0.12626922130584717, |
|
"eval_rmse": 0.16992022097110748, |
|
"eval_runtime": 22.4286, |
|
"eval_samples_per_second": 35.713, |
|
"eval_steps_per_second": 1.159, |
|
"learning_rate": 0.0001, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_explained_variance": 0.34863966703414917, |
|
"eval_kl_divergence": 0.8101097345352173, |
|
"eval_loss": 0.45060065388679504, |
|
"eval_mae": 0.12516793608665466, |
|
"eval_rmse": 0.1702672839164734, |
|
"eval_runtime": 22.3007, |
|
"eval_samples_per_second": 35.918, |
|
"eval_steps_per_second": 1.166, |
|
"learning_rate": 0.0001, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_explained_variance": 0.3488965630531311, |
|
"eval_kl_divergence": 0.7415657043457031, |
|
"eval_loss": 0.45080825686454773, |
|
"eval_mae": 0.12486829608678818, |
|
"eval_rmse": 0.1701475977897644, |
|
"eval_runtime": 22.3895, |
|
"eval_samples_per_second": 35.776, |
|
"eval_steps_per_second": 1.161, |
|
"learning_rate": 0.0001, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_explained_variance": 0.3523526191711426, |
|
"eval_kl_divergence": 0.6401851177215576, |
|
"eval_loss": 0.4501984417438507, |
|
"eval_mae": 0.12540514767169952, |
|
"eval_rmse": 0.16971096396446228, |
|
"eval_runtime": 22.3438, |
|
"eval_samples_per_second": 35.849, |
|
"eval_steps_per_second": 1.164, |
|
"learning_rate": 0.0001, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 32.89473684210526, |
|
"grad_norm": 0.1912919282913208, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4289, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_explained_variance": 0.33917075395584106, |
|
"eval_kl_divergence": 0.8411455154418945, |
|
"eval_loss": 0.4510658085346222, |
|
"eval_mae": 0.12500226497650146, |
|
"eval_rmse": 0.1709534227848053, |
|
"eval_runtime": 22.56, |
|
"eval_samples_per_second": 35.505, |
|
"eval_steps_per_second": 1.152, |
|
"learning_rate": 0.0001, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_explained_variance": 0.33831754326820374, |
|
"eval_kl_divergence": 0.7203648686408997, |
|
"eval_loss": 0.45148056745529175, |
|
"eval_mae": 0.12593072652816772, |
|
"eval_rmse": 0.17108403146266937, |
|
"eval_runtime": 22.505, |
|
"eval_samples_per_second": 35.592, |
|
"eval_steps_per_second": 1.155, |
|
"learning_rate": 1e-05, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_explained_variance": 0.34977057576179504, |
|
"eval_kl_divergence": 0.7354820966720581, |
|
"eval_loss": 0.4502483904361725, |
|
"eval_mae": 0.12473371624946594, |
|
"eval_rmse": 0.16982755064964294, |
|
"eval_runtime": 22.3912, |
|
"eval_samples_per_second": 35.773, |
|
"eval_steps_per_second": 1.161, |
|
"learning_rate": 1e-05, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_explained_variance": 0.3486325144767761, |
|
"eval_kl_divergence": 0.49899470806121826, |
|
"eval_loss": 0.4508889615535736, |
|
"eval_mae": 0.1260843575000763, |
|
"eval_rmse": 0.1702878773212433, |
|
"eval_runtime": 22.8537, |
|
"eval_samples_per_second": 35.049, |
|
"eval_steps_per_second": 1.138, |
|
"learning_rate": 1e-05, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_explained_variance": 0.3535779118537903, |
|
"eval_kl_divergence": 0.5451197624206543, |
|
"eval_loss": 0.44998663663864136, |
|
"eval_mae": 0.12602195143699646, |
|
"eval_rmse": 0.16962522268295288, |
|
"eval_runtime": 22.2216, |
|
"eval_samples_per_second": 36.046, |
|
"eval_steps_per_second": 1.17, |
|
"learning_rate": 1e-05, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 1e-05, |
|
"step": 2812, |
|
"total_flos": 1.318896404308369e+20, |
|
"train_loss": 0.4464974437295797, |
|
"train_runtime": 4164.1679, |
|
"train_samples_per_second": 86.596, |
|
"train_steps_per_second": 2.738 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 11400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 10 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.318896404308369e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|