groderg's picture
Evaluation on the test set completed on 2025_02_03.
7f3c29f verified
{
"best_metric": 0.4497845768928528,
"best_model_checkpoint": "/home1/datahome/villien/project_hub/DinoVdeau/models/DinoVdrone-large-2025_02_03_31850-bs32_freeze_probs/checkpoint-2052",
"epoch": 37.0,
"eval_steps": 500,
"global_step": 2812,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_explained_variance": 0.0698663592338562,
"eval_kl_divergence": 1.791170597076416,
"eval_loss": 0.5543646216392517,
"eval_mae": 0.22781437635421753,
"eval_rmse": 0.26052138209342957,
"eval_runtime": 36.0517,
"eval_samples_per_second": 22.218,
"eval_steps_per_second": 0.721,
"learning_rate": 0.001,
"step": 76
},
{
"epoch": 2.0,
"eval_explained_variance": 0.07257537543773651,
"eval_kl_divergence": 1.0344740152359009,
"eval_loss": 0.48171326518058777,
"eval_mae": 0.16019925475120544,
"eval_rmse": 0.20066045224666595,
"eval_runtime": 32.1858,
"eval_samples_per_second": 24.887,
"eval_steps_per_second": 0.808,
"learning_rate": 0.001,
"step": 152
},
{
"epoch": 3.0,
"eval_explained_variance": 0.27069091796875,
"eval_kl_divergence": 0.6063743233680725,
"eval_loss": 0.4615386724472046,
"eval_mae": 0.1370290368795395,
"eval_rmse": 0.18013149499893188,
"eval_runtime": 22.3828,
"eval_samples_per_second": 35.786,
"eval_steps_per_second": 1.162,
"learning_rate": 0.001,
"step": 228
},
{
"epoch": 4.0,
"eval_explained_variance": 0.08923790603876114,
"eval_kl_divergence": 0.6576936841011047,
"eval_loss": 0.4632064700126648,
"eval_mae": 0.1390942633152008,
"eval_rmse": 0.18370357155799866,
"eval_runtime": 22.4997,
"eval_samples_per_second": 35.601,
"eval_steps_per_second": 1.156,
"learning_rate": 0.001,
"step": 304
},
{
"epoch": 5.0,
"eval_explained_variance": 0.2999000549316406,
"eval_kl_divergence": 0.6677561402320862,
"eval_loss": 0.4579373300075531,
"eval_mae": 0.13632091879844666,
"eval_rmse": 0.17685972154140472,
"eval_runtime": 22.4993,
"eval_samples_per_second": 35.601,
"eval_steps_per_second": 1.156,
"learning_rate": 0.001,
"step": 380
},
{
"epoch": 6.0,
"eval_explained_variance": 0.30495360493659973,
"eval_kl_divergence": 0.7895806431770325,
"eval_loss": 0.4571229815483093,
"eval_mae": 0.13304883241653442,
"eval_rmse": 0.176561176776886,
"eval_runtime": 22.7,
"eval_samples_per_second": 35.286,
"eval_steps_per_second": 1.145,
"learning_rate": 0.001,
"step": 456
},
{
"epoch": 6.578947368421053,
"grad_norm": 0.27282145619392395,
"learning_rate": 0.001,
"loss": 0.4966,
"step": 500
},
{
"epoch": 7.0,
"eval_explained_variance": 0.3103345036506653,
"eval_kl_divergence": 0.6492787599563599,
"eval_loss": 0.4586440622806549,
"eval_mae": 0.13074660301208496,
"eval_rmse": 0.17730861902236938,
"eval_runtime": 22.4946,
"eval_samples_per_second": 35.608,
"eval_steps_per_second": 1.156,
"learning_rate": 0.001,
"step": 532
},
{
"epoch": 8.0,
"eval_explained_variance": 0.30848240852355957,
"eval_kl_divergence": 0.9475247263908386,
"eval_loss": 0.45786425471305847,
"eval_mae": 0.13187319040298462,
"eval_rmse": 0.17719128727912903,
"eval_runtime": 22.513,
"eval_samples_per_second": 35.579,
"eval_steps_per_second": 1.155,
"learning_rate": 0.001,
"step": 608
},
{
"epoch": 9.0,
"eval_explained_variance": 0.3135569095611572,
"eval_kl_divergence": 0.7270792126655579,
"eval_loss": 0.4551210105419159,
"eval_mae": 0.13063107430934906,
"eval_rmse": 0.17461702227592468,
"eval_runtime": 22.4949,
"eval_samples_per_second": 35.608,
"eval_steps_per_second": 1.156,
"learning_rate": 0.001,
"step": 684
},
{
"epoch": 10.0,
"eval_explained_variance": 0.29183268547058105,
"eval_kl_divergence": 0.6882225275039673,
"eval_loss": 0.45817646384239197,
"eval_mae": 0.13160471618175507,
"eval_rmse": 0.17741131782531738,
"eval_runtime": 22.508,
"eval_samples_per_second": 35.587,
"eval_steps_per_second": 1.155,
"learning_rate": 0.001,
"step": 760
},
{
"epoch": 11.0,
"eval_explained_variance": 0.25859755277633667,
"eval_kl_divergence": 0.371459424495697,
"eval_loss": 0.46834859251976013,
"eval_mae": 0.13717466592788696,
"eval_rmse": 0.18415462970733643,
"eval_runtime": 22.4535,
"eval_samples_per_second": 35.674,
"eval_steps_per_second": 1.158,
"learning_rate": 0.001,
"step": 836
},
{
"epoch": 12.0,
"eval_explained_variance": 0.308597594499588,
"eval_kl_divergence": 0.5270651578903198,
"eval_loss": 0.4578668475151062,
"eval_mae": 0.13155478239059448,
"eval_rmse": 0.17636829614639282,
"eval_runtime": 22.5868,
"eval_samples_per_second": 35.463,
"eval_steps_per_second": 1.151,
"learning_rate": 0.001,
"step": 912
},
{
"epoch": 13.0,
"eval_explained_variance": 0.3100161552429199,
"eval_kl_divergence": 0.9167731404304504,
"eval_loss": 0.4558842182159424,
"eval_mae": 0.1300584226846695,
"eval_rmse": 0.1756095588207245,
"eval_runtime": 22.7991,
"eval_samples_per_second": 35.133,
"eval_steps_per_second": 1.14,
"learning_rate": 0.001,
"step": 988
},
{
"epoch": 13.157894736842104,
"grad_norm": 0.26468560099601746,
"learning_rate": 0.001,
"loss": 0.4448,
"step": 1000
},
{
"epoch": 14.0,
"eval_explained_variance": 0.3228832185268402,
"eval_kl_divergence": 0.8826888799667358,
"eval_loss": 0.4555540680885315,
"eval_mae": 0.12918463349342346,
"eval_rmse": 0.17491525411605835,
"eval_runtime": 22.7093,
"eval_samples_per_second": 35.272,
"eval_steps_per_second": 1.145,
"learning_rate": 0.001,
"step": 1064
},
{
"epoch": 15.0,
"eval_explained_variance": 0.34160685539245605,
"eval_kl_divergence": 0.7008672952651978,
"eval_loss": 0.45217740535736084,
"eval_mae": 0.1262015700340271,
"eval_rmse": 0.17165420949459076,
"eval_runtime": 22.6104,
"eval_samples_per_second": 35.426,
"eval_steps_per_second": 1.15,
"learning_rate": 0.001,
"step": 1140
},
{
"epoch": 16.0,
"eval_explained_variance": 0.31633102893829346,
"eval_kl_divergence": 1.0038130283355713,
"eval_loss": 0.45556434988975525,
"eval_mae": 0.12863175570964813,
"eval_rmse": 0.1752910166978836,
"eval_runtime": 22.5805,
"eval_samples_per_second": 35.473,
"eval_steps_per_second": 1.151,
"learning_rate": 0.001,
"step": 1216
},
{
"epoch": 17.0,
"eval_explained_variance": 0.3205307126045227,
"eval_kl_divergence": 0.2600082457065582,
"eval_loss": 0.458648681640625,
"eval_mae": 0.13426683843135834,
"eval_rmse": 0.17750133574008942,
"eval_runtime": 22.3997,
"eval_samples_per_second": 35.759,
"eval_steps_per_second": 1.161,
"learning_rate": 0.001,
"step": 1292
},
{
"epoch": 18.0,
"eval_explained_variance": -4.778772354125977,
"eval_kl_divergence": 2.054769277572632,
"eval_loss": 0.567169725894928,
"eval_mae": 0.16376179456710815,
"eval_rmse": 0.23688165843486786,
"eval_runtime": 22.2481,
"eval_samples_per_second": 36.003,
"eval_steps_per_second": 1.169,
"learning_rate": 0.001,
"step": 1368
},
{
"epoch": 19.0,
"eval_explained_variance": 0.32792216539382935,
"eval_kl_divergence": 0.7114961743354797,
"eval_loss": 0.45287612080574036,
"eval_mae": 0.12865176796913147,
"eval_rmse": 0.17274516820907593,
"eval_runtime": 22.3304,
"eval_samples_per_second": 35.87,
"eval_steps_per_second": 1.164,
"learning_rate": 0.001,
"step": 1444
},
{
"epoch": 19.736842105263158,
"grad_norm": 0.1475011706352234,
"learning_rate": 0.001,
"loss": 0.4406,
"step": 1500
},
{
"epoch": 20.0,
"eval_explained_variance": 0.3204135596752167,
"eval_kl_divergence": 0.9694227576255798,
"eval_loss": 0.45518893003463745,
"eval_mae": 0.12852200865745544,
"eval_rmse": 0.17462262511253357,
"eval_runtime": 22.5552,
"eval_samples_per_second": 35.513,
"eval_steps_per_second": 1.153,
"learning_rate": 0.001,
"step": 1520
},
{
"epoch": 21.0,
"eval_explained_variance": 0.32996666431427,
"eval_kl_divergence": 0.778915524482727,
"eval_loss": 0.45299893617630005,
"eval_mae": 0.12820784747600555,
"eval_rmse": 0.17243456840515137,
"eval_runtime": 22.5861,
"eval_samples_per_second": 35.464,
"eval_steps_per_second": 1.151,
"learning_rate": 0.001,
"step": 1596
},
{
"epoch": 22.0,
"eval_explained_variance": 0.34726351499557495,
"eval_kl_divergence": 0.7368760704994202,
"eval_loss": 0.4502638280391693,
"eval_mae": 0.12613575160503387,
"eval_rmse": 0.17001575231552124,
"eval_runtime": 22.7271,
"eval_samples_per_second": 35.244,
"eval_steps_per_second": 1.144,
"learning_rate": 0.0001,
"step": 1672
},
{
"epoch": 23.0,
"eval_explained_variance": 0.34029728174209595,
"eval_kl_divergence": 0.5027008056640625,
"eval_loss": 0.453466534614563,
"eval_mae": 0.12802833318710327,
"eval_rmse": 0.17160943150520325,
"eval_runtime": 22.4563,
"eval_samples_per_second": 35.669,
"eval_steps_per_second": 1.158,
"learning_rate": 0.0001,
"step": 1748
},
{
"epoch": 24.0,
"eval_explained_variance": 0.3511368930339813,
"eval_kl_divergence": 0.5968054533004761,
"eval_loss": 0.4502425491809845,
"eval_mae": 0.12641073763370514,
"eval_rmse": 0.16971111297607422,
"eval_runtime": 22.592,
"eval_samples_per_second": 35.455,
"eval_steps_per_second": 1.151,
"learning_rate": 0.0001,
"step": 1824
},
{
"epoch": 25.0,
"eval_explained_variance": 0.3504308760166168,
"eval_kl_divergence": 0.621475100517273,
"eval_loss": 0.45040303468704224,
"eval_mae": 0.12673497200012207,
"eval_rmse": 0.1699284017086029,
"eval_runtime": 22.3202,
"eval_samples_per_second": 35.887,
"eval_steps_per_second": 1.165,
"learning_rate": 0.0001,
"step": 1900
},
{
"epoch": 26.0,
"eval_explained_variance": 0.34598857164382935,
"eval_kl_divergence": 0.6567814350128174,
"eval_loss": 0.4509589374065399,
"eval_mae": 0.12596669793128967,
"eval_rmse": 0.17043226957321167,
"eval_runtime": 22.4313,
"eval_samples_per_second": 35.709,
"eval_steps_per_second": 1.159,
"learning_rate": 0.0001,
"step": 1976
},
{
"epoch": 26.31578947368421,
"grad_norm": 0.16591614484786987,
"learning_rate": 0.0001,
"loss": 0.4334,
"step": 2000
},
{
"epoch": 27.0,
"eval_explained_variance": 0.35463404655456543,
"eval_kl_divergence": 0.5748001337051392,
"eval_loss": 0.4497845768928528,
"eval_mae": 0.1262420266866684,
"eval_rmse": 0.1693224012851715,
"eval_runtime": 22.3409,
"eval_samples_per_second": 35.854,
"eval_steps_per_second": 1.164,
"learning_rate": 0.0001,
"step": 2052
},
{
"epoch": 28.0,
"eval_explained_variance": 0.34665071964263916,
"eval_kl_divergence": 0.7001035809516907,
"eval_loss": 0.45060041546821594,
"eval_mae": 0.12559720873832703,
"eval_rmse": 0.17011338472366333,
"eval_runtime": 22.4894,
"eval_samples_per_second": 35.617,
"eval_steps_per_second": 1.156,
"learning_rate": 0.0001,
"step": 2128
},
{
"epoch": 29.0,
"eval_explained_variance": 0.3531297743320465,
"eval_kl_divergence": 0.5840001702308655,
"eval_loss": 0.4504892826080322,
"eval_mae": 0.12626922130584717,
"eval_rmse": 0.16992022097110748,
"eval_runtime": 22.4286,
"eval_samples_per_second": 35.713,
"eval_steps_per_second": 1.159,
"learning_rate": 0.0001,
"step": 2204
},
{
"epoch": 30.0,
"eval_explained_variance": 0.34863966703414917,
"eval_kl_divergence": 0.8101097345352173,
"eval_loss": 0.45060065388679504,
"eval_mae": 0.12516793608665466,
"eval_rmse": 0.1702672839164734,
"eval_runtime": 22.3007,
"eval_samples_per_second": 35.918,
"eval_steps_per_second": 1.166,
"learning_rate": 0.0001,
"step": 2280
},
{
"epoch": 31.0,
"eval_explained_variance": 0.3488965630531311,
"eval_kl_divergence": 0.7415657043457031,
"eval_loss": 0.45080825686454773,
"eval_mae": 0.12486829608678818,
"eval_rmse": 0.1701475977897644,
"eval_runtime": 22.3895,
"eval_samples_per_second": 35.776,
"eval_steps_per_second": 1.161,
"learning_rate": 0.0001,
"step": 2356
},
{
"epoch": 32.0,
"eval_explained_variance": 0.3523526191711426,
"eval_kl_divergence": 0.6401851177215576,
"eval_loss": 0.4501984417438507,
"eval_mae": 0.12540514767169952,
"eval_rmse": 0.16971096396446228,
"eval_runtime": 22.3438,
"eval_samples_per_second": 35.849,
"eval_steps_per_second": 1.164,
"learning_rate": 0.0001,
"step": 2432
},
{
"epoch": 32.89473684210526,
"grad_norm": 0.1912919282913208,
"learning_rate": 0.0001,
"loss": 0.4289,
"step": 2500
},
{
"epoch": 33.0,
"eval_explained_variance": 0.33917075395584106,
"eval_kl_divergence": 0.8411455154418945,
"eval_loss": 0.4510658085346222,
"eval_mae": 0.12500226497650146,
"eval_rmse": 0.1709534227848053,
"eval_runtime": 22.56,
"eval_samples_per_second": 35.505,
"eval_steps_per_second": 1.152,
"learning_rate": 0.0001,
"step": 2508
},
{
"epoch": 34.0,
"eval_explained_variance": 0.33831754326820374,
"eval_kl_divergence": 0.7203648686408997,
"eval_loss": 0.45148056745529175,
"eval_mae": 0.12593072652816772,
"eval_rmse": 0.17108403146266937,
"eval_runtime": 22.505,
"eval_samples_per_second": 35.592,
"eval_steps_per_second": 1.155,
"learning_rate": 1e-05,
"step": 2584
},
{
"epoch": 35.0,
"eval_explained_variance": 0.34977057576179504,
"eval_kl_divergence": 0.7354820966720581,
"eval_loss": 0.4502483904361725,
"eval_mae": 0.12473371624946594,
"eval_rmse": 0.16982755064964294,
"eval_runtime": 22.3912,
"eval_samples_per_second": 35.773,
"eval_steps_per_second": 1.161,
"learning_rate": 1e-05,
"step": 2660
},
{
"epoch": 36.0,
"eval_explained_variance": 0.3486325144767761,
"eval_kl_divergence": 0.49899470806121826,
"eval_loss": 0.4508889615535736,
"eval_mae": 0.1260843575000763,
"eval_rmse": 0.1702878773212433,
"eval_runtime": 22.8537,
"eval_samples_per_second": 35.049,
"eval_steps_per_second": 1.138,
"learning_rate": 1e-05,
"step": 2736
},
{
"epoch": 37.0,
"eval_explained_variance": 0.3535779118537903,
"eval_kl_divergence": 0.5451197624206543,
"eval_loss": 0.44998663663864136,
"eval_mae": 0.12602195143699646,
"eval_rmse": 0.16962522268295288,
"eval_runtime": 22.2216,
"eval_samples_per_second": 36.046,
"eval_steps_per_second": 1.17,
"learning_rate": 1e-05,
"step": 2812
},
{
"epoch": 37.0,
"learning_rate": 1e-05,
"step": 2812,
"total_flos": 1.318896404308369e+20,
"train_loss": 0.4464974437295797,
"train_runtime": 4164.1679,
"train_samples_per_second": 86.596,
"train_steps_per_second": 2.738
}
],
"logging_steps": 500,
"max_steps": 11400,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 10
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.318896404308369e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}