|
{ |
|
"best_metric": 0.09835900366306305, |
|
"best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-500", |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 567, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01763668430335097, |
|
"grad_norm": 0.15796910226345062, |
|
"learning_rate": 0.00019647266313932982, |
|
"loss": 0.1281, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03527336860670194, |
|
"grad_norm": 0.291827917098999, |
|
"learning_rate": 0.0001929453262786596, |
|
"loss": 0.1139, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05291005291005291, |
|
"grad_norm": 0.489213764667511, |
|
"learning_rate": 0.00018941798941798943, |
|
"loss": 0.1035, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07054673721340388, |
|
"grad_norm": 0.11527382582426071, |
|
"learning_rate": 0.00018589065255731924, |
|
"loss": 0.1207, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08818342151675485, |
|
"grad_norm": 0.15595707297325134, |
|
"learning_rate": 0.00018236331569664903, |
|
"loss": 0.1186, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10582010582010581, |
|
"grad_norm": 0.7361170053482056, |
|
"learning_rate": 0.00017883597883597884, |
|
"loss": 0.1058, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12345679012345678, |
|
"grad_norm": 0.289522647857666, |
|
"learning_rate": 0.00017530864197530866, |
|
"loss": 0.0913, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14109347442680775, |
|
"grad_norm": 0.0984591692686081, |
|
"learning_rate": 0.00017178130511463847, |
|
"loss": 0.0928, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15873015873015872, |
|
"grad_norm": 0.08082891255617142, |
|
"learning_rate": 0.00016825396825396826, |
|
"loss": 0.0829, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1763668430335097, |
|
"grad_norm": 0.5304596424102783, |
|
"learning_rate": 0.00016472663139329807, |
|
"loss": 0.1192, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1763668430335097, |
|
"eval_accuracy": 0.9692460317460317, |
|
"eval_loss": 0.1157514676451683, |
|
"eval_runtime": 12.4102, |
|
"eval_samples_per_second": 81.224, |
|
"eval_steps_per_second": 10.153, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19400352733686066, |
|
"grad_norm": 0.2734580636024475, |
|
"learning_rate": 0.0001611992945326279, |
|
"loss": 0.0381, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.21164021164021163, |
|
"grad_norm": 1.1482329368591309, |
|
"learning_rate": 0.00015767195767195767, |
|
"loss": 0.1579, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2292768959435626, |
|
"grad_norm": 0.29367437958717346, |
|
"learning_rate": 0.0001541446208112875, |
|
"loss": 0.1296, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.24691358024691357, |
|
"grad_norm": 0.4296620190143585, |
|
"learning_rate": 0.0001506172839506173, |
|
"loss": 0.0876, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26455026455026454, |
|
"grad_norm": 0.430569589138031, |
|
"learning_rate": 0.0001470899470899471, |
|
"loss": 0.1431, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2821869488536155, |
|
"grad_norm": 0.17311625182628632, |
|
"learning_rate": 0.0001435626102292769, |
|
"loss": 0.1194, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2998236331569665, |
|
"grad_norm": 0.21777546405792236, |
|
"learning_rate": 0.00014003527336860672, |
|
"loss": 0.1202, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.31746031746031744, |
|
"grad_norm": 2.8384833335876465, |
|
"learning_rate": 0.0001365079365079365, |
|
"loss": 0.1352, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3350970017636684, |
|
"grad_norm": 0.3133362829685211, |
|
"learning_rate": 0.00013298059964726632, |
|
"loss": 0.0935, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3527336860670194, |
|
"grad_norm": 0.27794110774993896, |
|
"learning_rate": 0.00012945326278659614, |
|
"loss": 0.0734, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3527336860670194, |
|
"eval_accuracy": 0.9702380952380952, |
|
"eval_loss": 0.1267656832933426, |
|
"eval_runtime": 12.6893, |
|
"eval_samples_per_second": 79.437, |
|
"eval_steps_per_second": 9.93, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 0.12122131884098053, |
|
"learning_rate": 0.00012592592592592592, |
|
"loss": 0.1548, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3880070546737213, |
|
"grad_norm": 0.14606191217899323, |
|
"learning_rate": 0.00012239858906525574, |
|
"loss": 0.0166, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4056437389770723, |
|
"grad_norm": 0.118919737637043, |
|
"learning_rate": 0.00011887125220458555, |
|
"loss": 0.1295, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.42328042328042326, |
|
"grad_norm": 0.14326384663581848, |
|
"learning_rate": 0.00011534391534391535, |
|
"loss": 0.1287, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4409171075837742, |
|
"grad_norm": 1.4116628170013428, |
|
"learning_rate": 0.00011181657848324515, |
|
"loss": 0.148, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4585537918871252, |
|
"grad_norm": 0.18267543613910675, |
|
"learning_rate": 0.00010828924162257497, |
|
"loss": 0.1161, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 0.6114774346351624, |
|
"learning_rate": 0.00010476190476190477, |
|
"loss": 0.0778, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.49382716049382713, |
|
"grad_norm": 0.10120945423841476, |
|
"learning_rate": 0.00010123456790123458, |
|
"loss": 0.026, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5114638447971781, |
|
"grad_norm": 0.5115650296211243, |
|
"learning_rate": 9.770723104056437e-05, |
|
"loss": 0.0964, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5291005291005291, |
|
"grad_norm": 0.2577248513698578, |
|
"learning_rate": 9.417989417989419e-05, |
|
"loss": 0.0701, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5291005291005291, |
|
"eval_accuracy": 0.9672619047619048, |
|
"eval_loss": 0.1057305857539177, |
|
"eval_runtime": 12.4383, |
|
"eval_samples_per_second": 81.04, |
|
"eval_steps_per_second": 10.13, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.54673721340388, |
|
"grad_norm": 1.4628558158874512, |
|
"learning_rate": 9.065255731922399e-05, |
|
"loss": 0.1241, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.564373897707231, |
|
"grad_norm": 0.4355175793170929, |
|
"learning_rate": 8.712522045855379e-05, |
|
"loss": 0.125, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.582010582010582, |
|
"grad_norm": 0.09273388981819153, |
|
"learning_rate": 8.35978835978836e-05, |
|
"loss": 0.0728, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.599647266313933, |
|
"grad_norm": 0.777564525604248, |
|
"learning_rate": 8.00705467372134e-05, |
|
"loss": 0.0974, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6172839506172839, |
|
"grad_norm": 0.8731770515441895, |
|
"learning_rate": 7.65432098765432e-05, |
|
"loss": 0.1852, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 0.18054471909999847, |
|
"learning_rate": 7.301587301587302e-05, |
|
"loss": 0.0722, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6525573192239859, |
|
"grad_norm": 0.10277850925922394, |
|
"learning_rate": 6.948853615520282e-05, |
|
"loss": 0.0705, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6701940035273368, |
|
"grad_norm": 0.2326202243566513, |
|
"learning_rate": 6.596119929453263e-05, |
|
"loss": 0.0813, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6878306878306878, |
|
"grad_norm": 0.4453868865966797, |
|
"learning_rate": 6.243386243386243e-05, |
|
"loss": 0.0443, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7054673721340388, |
|
"grad_norm": 0.18326041102409363, |
|
"learning_rate": 5.890652557319224e-05, |
|
"loss": 0.1107, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7054673721340388, |
|
"eval_accuracy": 0.9722222222222222, |
|
"eval_loss": 0.10807543247938156, |
|
"eval_runtime": 12.4927, |
|
"eval_samples_per_second": 80.687, |
|
"eval_steps_per_second": 10.086, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7231040564373897, |
|
"grad_norm": 0.08458270132541656, |
|
"learning_rate": 5.537918871252204e-05, |
|
"loss": 0.0699, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 0.3042624294757843, |
|
"learning_rate": 5.185185185185185e-05, |
|
"loss": 0.0649, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7583774250440917, |
|
"grad_norm": 0.1482134908437729, |
|
"learning_rate": 4.832451499118166e-05, |
|
"loss": 0.0652, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7760141093474426, |
|
"grad_norm": 0.08534322679042816, |
|
"learning_rate": 4.4797178130511465e-05, |
|
"loss": 0.0572, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"grad_norm": 0.09549910575151443, |
|
"learning_rate": 4.126984126984127e-05, |
|
"loss": 0.0527, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8112874779541446, |
|
"grad_norm": 1.4032962322235107, |
|
"learning_rate": 3.7742504409171074e-05, |
|
"loss": 0.0643, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8289241622574955, |
|
"grad_norm": 0.5806692242622375, |
|
"learning_rate": 3.421516754850088e-05, |
|
"loss": 0.0699, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8465608465608465, |
|
"grad_norm": 0.07148485630750656, |
|
"learning_rate": 3.068783068783069e-05, |
|
"loss": 0.1102, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8641975308641975, |
|
"grad_norm": 0.6352106928825378, |
|
"learning_rate": 2.7160493827160493e-05, |
|
"loss": 0.0574, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8818342151675485, |
|
"grad_norm": 0.8136057257652283, |
|
"learning_rate": 2.36331569664903e-05, |
|
"loss": 0.0413, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8818342151675485, |
|
"eval_accuracy": 0.9761904761904762, |
|
"eval_loss": 0.09835900366306305, |
|
"eval_runtime": 12.4974, |
|
"eval_samples_per_second": 80.657, |
|
"eval_steps_per_second": 10.082, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8994708994708994, |
|
"grad_norm": 0.11122062802314758, |
|
"learning_rate": 2.0105820105820105e-05, |
|
"loss": 0.0287, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9171075837742504, |
|
"grad_norm": 0.08222879469394684, |
|
"learning_rate": 1.6578483245149913e-05, |
|
"loss": 0.0793, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9347442680776014, |
|
"grad_norm": 0.08997409790754318, |
|
"learning_rate": 1.3051146384479717e-05, |
|
"loss": 0.0523, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 0.14071638882160187, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 0.0828, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9700176366843033, |
|
"grad_norm": 0.0937449261546135, |
|
"learning_rate": 5.99647266313933e-06, |
|
"loss": 0.1201, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"grad_norm": 2.3273768424987793, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 0.1994, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 567, |
|
"total_flos": 7.023873938666619e+17, |
|
"train_loss": 0.09499678115575612, |
|
"train_runtime": 238.2789, |
|
"train_samples_per_second": 38.039, |
|
"train_steps_per_second": 2.38 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 567, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.023873938666619e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|