{ "best_metric": 0.09835900366306305, "best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-500", "epoch": 1.0, "eval_steps": 100, "global_step": 567, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01763668430335097, "grad_norm": 0.15796910226345062, "learning_rate": 0.00019647266313932982, "loss": 0.1281, "step": 10 }, { "epoch": 0.03527336860670194, "grad_norm": 0.291827917098999, "learning_rate": 0.0001929453262786596, "loss": 0.1139, "step": 20 }, { "epoch": 0.05291005291005291, "grad_norm": 0.489213764667511, "learning_rate": 0.00018941798941798943, "loss": 0.1035, "step": 30 }, { "epoch": 0.07054673721340388, "grad_norm": 0.11527382582426071, "learning_rate": 0.00018589065255731924, "loss": 0.1207, "step": 40 }, { "epoch": 0.08818342151675485, "grad_norm": 0.15595707297325134, "learning_rate": 0.00018236331569664903, "loss": 0.1186, "step": 50 }, { "epoch": 0.10582010582010581, "grad_norm": 0.7361170053482056, "learning_rate": 0.00017883597883597884, "loss": 0.1058, "step": 60 }, { "epoch": 0.12345679012345678, "grad_norm": 0.289522647857666, "learning_rate": 0.00017530864197530866, "loss": 0.0913, "step": 70 }, { "epoch": 0.14109347442680775, "grad_norm": 0.0984591692686081, "learning_rate": 0.00017178130511463847, "loss": 0.0928, "step": 80 }, { "epoch": 0.15873015873015872, "grad_norm": 0.08082891255617142, "learning_rate": 0.00016825396825396826, "loss": 0.0829, "step": 90 }, { "epoch": 0.1763668430335097, "grad_norm": 0.5304596424102783, "learning_rate": 0.00016472663139329807, "loss": 0.1192, "step": 100 }, { "epoch": 0.1763668430335097, "eval_accuracy": 0.9692460317460317, "eval_loss": 0.1157514676451683, "eval_runtime": 12.4102, "eval_samples_per_second": 81.224, "eval_steps_per_second": 10.153, "step": 100 }, { "epoch": 0.19400352733686066, "grad_norm": 0.2734580636024475, "learning_rate": 0.0001611992945326279, "loss": 0.0381, "step": 110 }, { "epoch": 0.21164021164021163, "grad_norm": 1.1482329368591309, "learning_rate": 0.00015767195767195767, "loss": 0.1579, "step": 120 }, { "epoch": 0.2292768959435626, "grad_norm": 0.29367437958717346, "learning_rate": 0.0001541446208112875, "loss": 0.1296, "step": 130 }, { "epoch": 0.24691358024691357, "grad_norm": 0.4296620190143585, "learning_rate": 0.0001506172839506173, "loss": 0.0876, "step": 140 }, { "epoch": 0.26455026455026454, "grad_norm": 0.430569589138031, "learning_rate": 0.0001470899470899471, "loss": 0.1431, "step": 150 }, { "epoch": 0.2821869488536155, "grad_norm": 0.17311625182628632, "learning_rate": 0.0001435626102292769, "loss": 0.1194, "step": 160 }, { "epoch": 0.2998236331569665, "grad_norm": 0.21777546405792236, "learning_rate": 0.00014003527336860672, "loss": 0.1202, "step": 170 }, { "epoch": 0.31746031746031744, "grad_norm": 2.8384833335876465, "learning_rate": 0.0001365079365079365, "loss": 0.1352, "step": 180 }, { "epoch": 0.3350970017636684, "grad_norm": 0.3133362829685211, "learning_rate": 0.00013298059964726632, "loss": 0.0935, "step": 190 }, { "epoch": 0.3527336860670194, "grad_norm": 0.27794110774993896, "learning_rate": 0.00012945326278659614, "loss": 0.0734, "step": 200 }, { "epoch": 0.3527336860670194, "eval_accuracy": 0.9702380952380952, "eval_loss": 0.1267656832933426, "eval_runtime": 12.6893, "eval_samples_per_second": 79.437, "eval_steps_per_second": 9.93, "step": 200 }, { "epoch": 0.37037037037037035, "grad_norm": 0.12122131884098053, "learning_rate": 0.00012592592592592592, "loss": 0.1548, "step": 210 }, { "epoch": 0.3880070546737213, "grad_norm": 0.14606191217899323, "learning_rate": 0.00012239858906525574, "loss": 0.0166, "step": 220 }, { "epoch": 0.4056437389770723, "grad_norm": 0.118919737637043, "learning_rate": 0.00011887125220458555, "loss": 0.1295, "step": 230 }, { "epoch": 0.42328042328042326, "grad_norm": 0.14326384663581848, "learning_rate": 0.00011534391534391535, "loss": 0.1287, "step": 240 }, { "epoch": 0.4409171075837742, "grad_norm": 1.4116628170013428, "learning_rate": 0.00011181657848324515, "loss": 0.148, "step": 250 }, { "epoch": 0.4585537918871252, "grad_norm": 0.18267543613910675, "learning_rate": 0.00010828924162257497, "loss": 0.1161, "step": 260 }, { "epoch": 0.47619047619047616, "grad_norm": 0.6114774346351624, "learning_rate": 0.00010476190476190477, "loss": 0.0778, "step": 270 }, { "epoch": 0.49382716049382713, "grad_norm": 0.10120945423841476, "learning_rate": 0.00010123456790123458, "loss": 0.026, "step": 280 }, { "epoch": 0.5114638447971781, "grad_norm": 0.5115650296211243, "learning_rate": 9.770723104056437e-05, "loss": 0.0964, "step": 290 }, { "epoch": 0.5291005291005291, "grad_norm": 0.2577248513698578, "learning_rate": 9.417989417989419e-05, "loss": 0.0701, "step": 300 }, { "epoch": 0.5291005291005291, "eval_accuracy": 0.9672619047619048, "eval_loss": 0.1057305857539177, "eval_runtime": 12.4383, "eval_samples_per_second": 81.04, "eval_steps_per_second": 10.13, "step": 300 }, { "epoch": 0.54673721340388, "grad_norm": 1.4628558158874512, "learning_rate": 9.065255731922399e-05, "loss": 0.1241, "step": 310 }, { "epoch": 0.564373897707231, "grad_norm": 0.4355175793170929, "learning_rate": 8.712522045855379e-05, "loss": 0.125, "step": 320 }, { "epoch": 0.582010582010582, "grad_norm": 0.09273388981819153, "learning_rate": 8.35978835978836e-05, "loss": 0.0728, "step": 330 }, { "epoch": 0.599647266313933, "grad_norm": 0.777564525604248, "learning_rate": 8.00705467372134e-05, "loss": 0.0974, "step": 340 }, { "epoch": 0.6172839506172839, "grad_norm": 0.8731770515441895, "learning_rate": 7.65432098765432e-05, "loss": 0.1852, "step": 350 }, { "epoch": 0.6349206349206349, "grad_norm": 0.18054471909999847, "learning_rate": 7.301587301587302e-05, "loss": 0.0722, "step": 360 }, { "epoch": 0.6525573192239859, "grad_norm": 0.10277850925922394, "learning_rate": 6.948853615520282e-05, "loss": 0.0705, "step": 370 }, { "epoch": 0.6701940035273368, "grad_norm": 0.2326202243566513, "learning_rate": 6.596119929453263e-05, "loss": 0.0813, "step": 380 }, { "epoch": 0.6878306878306878, "grad_norm": 0.4453868865966797, "learning_rate": 6.243386243386243e-05, "loss": 0.0443, "step": 390 }, { "epoch": 0.7054673721340388, "grad_norm": 0.18326041102409363, "learning_rate": 5.890652557319224e-05, "loss": 0.1107, "step": 400 }, { "epoch": 0.7054673721340388, "eval_accuracy": 0.9722222222222222, "eval_loss": 0.10807543247938156, "eval_runtime": 12.4927, "eval_samples_per_second": 80.687, "eval_steps_per_second": 10.086, "step": 400 }, { "epoch": 0.7231040564373897, "grad_norm": 0.08458270132541656, "learning_rate": 5.537918871252204e-05, "loss": 0.0699, "step": 410 }, { "epoch": 0.7407407407407407, "grad_norm": 0.3042624294757843, "learning_rate": 5.185185185185185e-05, "loss": 0.0649, "step": 420 }, { "epoch": 0.7583774250440917, "grad_norm": 0.1482134908437729, "learning_rate": 4.832451499118166e-05, "loss": 0.0652, "step": 430 }, { "epoch": 0.7760141093474426, "grad_norm": 0.08534322679042816, "learning_rate": 4.4797178130511465e-05, "loss": 0.0572, "step": 440 }, { "epoch": 0.7936507936507936, "grad_norm": 0.09549910575151443, "learning_rate": 4.126984126984127e-05, "loss": 0.0527, "step": 450 }, { "epoch": 0.8112874779541446, "grad_norm": 1.4032962322235107, "learning_rate": 3.7742504409171074e-05, "loss": 0.0643, "step": 460 }, { "epoch": 0.8289241622574955, "grad_norm": 0.5806692242622375, "learning_rate": 3.421516754850088e-05, "loss": 0.0699, "step": 470 }, { "epoch": 0.8465608465608465, "grad_norm": 0.07148485630750656, "learning_rate": 3.068783068783069e-05, "loss": 0.1102, "step": 480 }, { "epoch": 0.8641975308641975, "grad_norm": 0.6352106928825378, "learning_rate": 2.7160493827160493e-05, "loss": 0.0574, "step": 490 }, { "epoch": 0.8818342151675485, "grad_norm": 0.8136057257652283, "learning_rate": 2.36331569664903e-05, "loss": 0.0413, "step": 500 }, { "epoch": 0.8818342151675485, "eval_accuracy": 0.9761904761904762, "eval_loss": 0.09835900366306305, "eval_runtime": 12.4974, "eval_samples_per_second": 80.657, "eval_steps_per_second": 10.082, "step": 500 }, { "epoch": 0.8994708994708994, "grad_norm": 0.11122062802314758, "learning_rate": 2.0105820105820105e-05, "loss": 0.0287, "step": 510 }, { "epoch": 0.9171075837742504, "grad_norm": 0.08222879469394684, "learning_rate": 1.6578483245149913e-05, "loss": 0.0793, "step": 520 }, { "epoch": 0.9347442680776014, "grad_norm": 0.08997409790754318, "learning_rate": 1.3051146384479717e-05, "loss": 0.0523, "step": 530 }, { "epoch": 0.9523809523809523, "grad_norm": 0.14071638882160187, "learning_rate": 9.523809523809523e-06, "loss": 0.0828, "step": 540 }, { "epoch": 0.9700176366843033, "grad_norm": 0.0937449261546135, "learning_rate": 5.99647266313933e-06, "loss": 0.1201, "step": 550 }, { "epoch": 0.9876543209876543, "grad_norm": 2.3273768424987793, "learning_rate": 2.469135802469136e-06, "loss": 0.1994, "step": 560 }, { "epoch": 1.0, "step": 567, "total_flos": 7.023873938666619e+17, "train_loss": 0.09499678115575612, "train_runtime": 238.2789, "train_samples_per_second": 38.039, "train_steps_per_second": 2.38 } ], "logging_steps": 10, "max_steps": 567, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.023873938666619e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }