{ "best_metric": 0.9032258064516129, "best_model_checkpoint": "videomae-base-nicole/checkpoint-459", "epoch": 9.082, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 8.788407325744629, "learning_rate": 1e-05, "loss": 1.4614, "step": 10 }, { "epoch": 0.04, "grad_norm": 7.4712066650390625, "learning_rate": 2e-05, "loss": 1.3969, "step": 20 }, { "epoch": 0.06, "grad_norm": 6.619698524475098, "learning_rate": 3e-05, "loss": 1.4034, "step": 30 }, { "epoch": 0.08, "grad_norm": 6.6369781494140625, "learning_rate": 4e-05, "loss": 1.3931, "step": 40 }, { "epoch": 0.1, "grad_norm": 5.357635974884033, "learning_rate": 5e-05, "loss": 1.368, "step": 50 }, { "epoch": 0.102, "eval_accuracy": 0.2903225806451613, "eval_loss": 1.4286391735076904, "eval_runtime": 10.3135, "eval_samples_per_second": 3.006, "eval_steps_per_second": 0.388, "step": 51 }, { "epoch": 1.018, "grad_norm": 5.454110622406006, "learning_rate": 4.888888888888889e-05, "loss": 1.3208, "step": 60 }, { "epoch": 1.038, "grad_norm": 6.431514739990234, "learning_rate": 4.7777777777777784e-05, "loss": 1.1573, "step": 70 }, { "epoch": 1.058, "grad_norm": 12.289501190185547, "learning_rate": 4.666666666666667e-05, "loss": 0.8843, "step": 80 }, { "epoch": 1.078, "grad_norm": 8.682211875915527, "learning_rate": 4.555555555555556e-05, "loss": 1.0869, "step": 90 }, { "epoch": 1.098, "grad_norm": 9.173279762268066, "learning_rate": 4.4444444444444447e-05, "loss": 1.1282, "step": 100 }, { "epoch": 1.102, "eval_accuracy": 0.45161290322580644, "eval_loss": 1.0812301635742188, "eval_runtime": 3.8464, "eval_samples_per_second": 8.059, "eval_steps_per_second": 1.04, "step": 102 }, { "epoch": 2.016, "grad_norm": 13.553953170776367, "learning_rate": 4.3333333333333334e-05, "loss": 0.7574, "step": 110 }, { "epoch": 2.036, "grad_norm": 16.779861450195312, "learning_rate": 4.222222222222222e-05, "loss": 1.012, "step": 120 }, { "epoch": 2.056, "grad_norm": 16.47136688232422, "learning_rate": 4.111111111111111e-05, "loss": 0.845, "step": 130 }, { "epoch": 2.076, "grad_norm": 5.69786262512207, "learning_rate": 4e-05, "loss": 0.5726, "step": 140 }, { "epoch": 2.096, "grad_norm": 9.021860122680664, "learning_rate": 3.888888888888889e-05, "loss": 0.8288, "step": 150 }, { "epoch": 2.102, "eval_accuracy": 0.41935483870967744, "eval_loss": 1.472697138786316, "eval_runtime": 3.5759, "eval_samples_per_second": 8.669, "eval_steps_per_second": 1.119, "step": 153 }, { "epoch": 3.014, "grad_norm": 7.89491081237793, "learning_rate": 3.777777777777778e-05, "loss": 0.6131, "step": 160 }, { "epoch": 3.034, "grad_norm": 15.765496253967285, "learning_rate": 3.6666666666666666e-05, "loss": 0.656, "step": 170 }, { "epoch": 3.054, "grad_norm": 8.298906326293945, "learning_rate": 3.555555555555556e-05, "loss": 0.5137, "step": 180 }, { "epoch": 3.074, "grad_norm": 13.529772758483887, "learning_rate": 3.444444444444445e-05, "loss": 0.5048, "step": 190 }, { "epoch": 3.094, "grad_norm": 18.521669387817383, "learning_rate": 3.3333333333333335e-05, "loss": 0.3846, "step": 200 }, { "epoch": 3.102, "eval_accuracy": 0.6774193548387096, "eval_loss": 1.1546744108200073, "eval_runtime": 3.5645, "eval_samples_per_second": 8.697, "eval_steps_per_second": 1.122, "step": 204 }, { "epoch": 4.012, "grad_norm": 4.19064998626709, "learning_rate": 3.222222222222223e-05, "loss": 0.268, "step": 210 }, { "epoch": 4.032, "grad_norm": 18.81903076171875, "learning_rate": 3.111111111111111e-05, "loss": 0.4407, "step": 220 }, { "epoch": 4.052, "grad_norm": 18.416461944580078, "learning_rate": 3e-05, "loss": 0.2334, "step": 230 }, { "epoch": 4.072, "grad_norm": 1.9018150568008423, "learning_rate": 2.8888888888888888e-05, "loss": 0.3856, "step": 240 }, { "epoch": 4.092, "grad_norm": 0.378119558095932, "learning_rate": 2.777777777777778e-05, "loss": 0.3053, "step": 250 }, { "epoch": 4.102, "eval_accuracy": 0.6774193548387096, "eval_loss": 1.1199373006820679, "eval_runtime": 3.6445, "eval_samples_per_second": 8.506, "eval_steps_per_second": 1.098, "step": 255 }, { "epoch": 5.01, "grad_norm": 34.85398864746094, "learning_rate": 2.6666666666666667e-05, "loss": 0.2803, "step": 260 }, { "epoch": 5.03, "grad_norm": 8.173317909240723, "learning_rate": 2.5555555555555554e-05, "loss": 0.3287, "step": 270 }, { "epoch": 5.05, "grad_norm": 4.433807849884033, "learning_rate": 2.4444444444444445e-05, "loss": 0.0878, "step": 280 }, { "epoch": 5.07, "grad_norm": 23.77573585510254, "learning_rate": 2.3333333333333336e-05, "loss": 0.269, "step": 290 }, { "epoch": 5.09, "grad_norm": 0.3436167240142822, "learning_rate": 2.2222222222222223e-05, "loss": 0.0898, "step": 300 }, { "epoch": 5.102, "eval_accuracy": 0.7741935483870968, "eval_loss": 0.668865442276001, "eval_runtime": 3.7841, "eval_samples_per_second": 8.192, "eval_steps_per_second": 1.057, "step": 306 }, { "epoch": 6.008, "grad_norm": 26.619293212890625, "learning_rate": 2.111111111111111e-05, "loss": 0.1931, "step": 310 }, { "epoch": 6.028, "grad_norm": 10.946873664855957, "learning_rate": 2e-05, "loss": 0.3406, "step": 320 }, { "epoch": 6.048, "grad_norm": 0.8608723878860474, "learning_rate": 1.888888888888889e-05, "loss": 0.2112, "step": 330 }, { "epoch": 6.068, "grad_norm": 0.9995169043540955, "learning_rate": 1.777777777777778e-05, "loss": 0.1347, "step": 340 }, { "epoch": 6.088, "grad_norm": 1.7768505811691284, "learning_rate": 1.6666666666666667e-05, "loss": 0.304, "step": 350 }, { "epoch": 6.102, "eval_accuracy": 0.8064516129032258, "eval_loss": 0.6001490950584412, "eval_runtime": 3.8728, "eval_samples_per_second": 8.005, "eval_steps_per_second": 1.033, "step": 357 }, { "epoch": 7.006, "grad_norm": 0.2909161150455475, "learning_rate": 1.5555555555555555e-05, "loss": 0.25, "step": 360 }, { "epoch": 7.026, "grad_norm": 53.329654693603516, "learning_rate": 1.4444444444444444e-05, "loss": 0.2183, "step": 370 }, { "epoch": 7.046, "grad_norm": 31.767650604248047, "learning_rate": 1.3333333333333333e-05, "loss": 0.2739, "step": 380 }, { "epoch": 7.066, "grad_norm": 4.31083869934082, "learning_rate": 1.2222222222222222e-05, "loss": 0.0781, "step": 390 }, { "epoch": 7.086, "grad_norm": 4.138507843017578, "learning_rate": 1.1111111111111112e-05, "loss": 0.1134, "step": 400 }, { "epoch": 7.102, "eval_accuracy": 0.8709677419354839, "eval_loss": 0.5982156991958618, "eval_runtime": 3.7659, "eval_samples_per_second": 8.232, "eval_steps_per_second": 1.062, "step": 408 }, { "epoch": 8.004, "grad_norm": 12.455638885498047, "learning_rate": 1e-05, "loss": 0.2211, "step": 410 }, { "epoch": 8.024, "grad_norm": 0.5359604358673096, "learning_rate": 8.88888888888889e-06, "loss": 0.0196, "step": 420 }, { "epoch": 8.044, "grad_norm": 0.06697440892457962, "learning_rate": 7.777777777777777e-06, "loss": 0.0486, "step": 430 }, { "epoch": 8.064, "grad_norm": 21.498056411743164, "learning_rate": 6.666666666666667e-06, "loss": 0.0725, "step": 440 }, { "epoch": 8.084, "grad_norm": 12.305703163146973, "learning_rate": 5.555555555555556e-06, "loss": 0.0641, "step": 450 }, { "epoch": 8.102, "eval_accuracy": 0.9032258064516129, "eval_loss": 0.40339288115501404, "eval_runtime": 3.7107, "eval_samples_per_second": 8.354, "eval_steps_per_second": 1.078, "step": 459 }, { "epoch": 9.002, "grad_norm": 3.38336181640625, "learning_rate": 4.444444444444445e-06, "loss": 0.1039, "step": 460 }, { "epoch": 9.022, "grad_norm": 26.290576934814453, "learning_rate": 3.3333333333333333e-06, "loss": 0.0795, "step": 470 }, { "epoch": 9.042, "grad_norm": 0.5814034938812256, "learning_rate": 2.2222222222222225e-06, "loss": 0.2155, "step": 480 }, { "epoch": 9.062, "grad_norm": 0.044023916125297546, "learning_rate": 1.1111111111111112e-06, "loss": 0.1118, "step": 490 }, { "epoch": 9.082, "grad_norm": 29.71231460571289, "learning_rate": 0.0, "loss": 0.0834, "step": 500 }, { "epoch": 9.082, "eval_accuracy": 0.9032258064516129, "eval_loss": 0.3761322498321533, "eval_runtime": 7.0087, "eval_samples_per_second": 4.423, "eval_steps_per_second": 0.571, "step": 500 }, { "epoch": 9.082, "step": 500, "total_flos": 4.950702914524545e+18, "train_loss": 0.5022828738093377, "train_runtime": 1086.8997, "train_samples_per_second": 3.68, "train_steps_per_second": 0.46 }, { "epoch": 9.082, "eval_accuracy": 0.796875, "eval_loss": 0.805524468421936, "eval_runtime": 20.9731, "eval_samples_per_second": 3.052, "eval_steps_per_second": 0.381, "step": 500 }, { "epoch": 9.082, "eval_accuracy": 0.796875, "eval_loss": 0.8055245280265808, "eval_runtime": 10.3314, "eval_samples_per_second": 6.195, "eval_steps_per_second": 0.774, "step": 500 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.950702914524545e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }