paligemma_v1 / trainer_state.json
arvisioncode's picture
Upload 10 files
2a0da94 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 97.44590163934426,
"eval_steps": 500,
"global_step": 3800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.2885245901639344,
"grad_norm": 49.25,
"learning_rate": 0.00019747235387045816,
"loss": 6.9218,
"step": 50
},
{
"epoch": 2.577049180327869,
"grad_norm": 73.5,
"learning_rate": 0.0001948393891521854,
"loss": 3.5446,
"step": 100
},
{
"epoch": 3.865573770491803,
"grad_norm": 58.25,
"learning_rate": 0.0001922064244339126,
"loss": 3.191,
"step": 150
},
{
"epoch": 5.131147540983607,
"grad_norm": 49.0,
"learning_rate": 0.00018957345971563983,
"loss": 2.9104,
"step": 200
},
{
"epoch": 6.419672131147541,
"grad_norm": 66.5,
"learning_rate": 0.00018694049499736707,
"loss": 2.0795,
"step": 250
},
{
"epoch": 7.7081967213114755,
"grad_norm": 45.75,
"learning_rate": 0.00018430753027909427,
"loss": 2.3055,
"step": 300
},
{
"epoch": 8.99672131147541,
"grad_norm": 56.25,
"learning_rate": 0.0001816745655608215,
"loss": 1.8394,
"step": 350
},
{
"epoch": 10.262295081967213,
"grad_norm": 50.75,
"learning_rate": 0.00017904160084254874,
"loss": 1.5723,
"step": 400
},
{
"epoch": 11.550819672131148,
"grad_norm": 48.5,
"learning_rate": 0.00017640863612427594,
"loss": 1.4006,
"step": 450
},
{
"epoch": 12.839344262295082,
"grad_norm": 39.75,
"learning_rate": 0.00017377567140600318,
"loss": 1.363,
"step": 500
},
{
"epoch": 14.104918032786886,
"grad_norm": 38.5,
"learning_rate": 0.0001711427066877304,
"loss": 1.3352,
"step": 550
},
{
"epoch": 15.39344262295082,
"grad_norm": 45.0,
"learning_rate": 0.00016850974196945762,
"loss": 1.1165,
"step": 600
},
{
"epoch": 16.681967213114753,
"grad_norm": 44.0,
"learning_rate": 0.00016587677725118485,
"loss": 0.8736,
"step": 650
},
{
"epoch": 17.970491803278687,
"grad_norm": 43.5,
"learning_rate": 0.00016324381253291208,
"loss": 1.0635,
"step": 700
},
{
"epoch": 19.236065573770492,
"grad_norm": 34.25,
"learning_rate": 0.0001606108478146393,
"loss": 0.7858,
"step": 750
},
{
"epoch": 20.524590163934427,
"grad_norm": 37.25,
"learning_rate": 0.00015797788309636652,
"loss": 0.8236,
"step": 800
},
{
"epoch": 21.81311475409836,
"grad_norm": 35.5,
"learning_rate": 0.00015534491837809376,
"loss": 0.7766,
"step": 850
},
{
"epoch": 23.078688524590163,
"grad_norm": 33.0,
"learning_rate": 0.00015271195365982096,
"loss": 0.6612,
"step": 900
},
{
"epoch": 24.367213114754097,
"grad_norm": 33.75,
"learning_rate": 0.0001500789889415482,
"loss": 0.6364,
"step": 950
},
{
"epoch": 25.65573770491803,
"grad_norm": 38.25,
"learning_rate": 0.00014744602422327543,
"loss": 0.6553,
"step": 1000
},
{
"epoch": 26.944262295081966,
"grad_norm": 29.25,
"learning_rate": 0.00014481305950500263,
"loss": 0.5468,
"step": 1050
},
{
"epoch": 28.20983606557377,
"grad_norm": 35.25,
"learning_rate": 0.00014218009478672987,
"loss": 0.5311,
"step": 1100
},
{
"epoch": 29.498360655737706,
"grad_norm": 27.75,
"learning_rate": 0.0001395471300684571,
"loss": 0.5019,
"step": 1150
},
{
"epoch": 30.78688524590164,
"grad_norm": 28.125,
"learning_rate": 0.0001369141653501843,
"loss": 0.6387,
"step": 1200
},
{
"epoch": 32.05245901639344,
"grad_norm": 38.75,
"learning_rate": 0.00013428120063191154,
"loss": 0.5054,
"step": 1250
},
{
"epoch": 33.34098360655738,
"grad_norm": 21.875,
"learning_rate": 0.00013164823591363877,
"loss": 0.4805,
"step": 1300
},
{
"epoch": 34.62950819672131,
"grad_norm": 29.375,
"learning_rate": 0.00012901527119536598,
"loss": 0.5118,
"step": 1350
},
{
"epoch": 35.91803278688525,
"grad_norm": 36.0,
"learning_rate": 0.0001263823064770932,
"loss": 0.447,
"step": 1400
},
{
"epoch": 37.18360655737705,
"grad_norm": 24.125,
"learning_rate": 0.00012374934175882045,
"loss": 0.3921,
"step": 1450
},
{
"epoch": 38.472131147540985,
"grad_norm": 21.875,
"learning_rate": 0.00012111637704054765,
"loss": 0.4268,
"step": 1500
},
{
"epoch": 39.760655737704916,
"grad_norm": 22.25,
"learning_rate": 0.00011848341232227489,
"loss": 0.3317,
"step": 1550
},
{
"epoch": 41.02622950819672,
"grad_norm": 15.0625,
"learning_rate": 0.00011585044760400212,
"loss": 0.387,
"step": 1600
},
{
"epoch": 42.31475409836066,
"grad_norm": 20.875,
"learning_rate": 0.00011321748288572934,
"loss": 0.3285,
"step": 1650
},
{
"epoch": 43.60327868852459,
"grad_norm": 21.375,
"learning_rate": 0.00011058451816745656,
"loss": 0.3281,
"step": 1700
},
{
"epoch": 44.89180327868853,
"grad_norm": 22.75,
"learning_rate": 0.00010795155344918379,
"loss": 0.3148,
"step": 1750
},
{
"epoch": 46.157377049180326,
"grad_norm": 18.75,
"learning_rate": 0.00010531858873091101,
"loss": 0.2567,
"step": 1800
},
{
"epoch": 47.445901639344264,
"grad_norm": 23.75,
"learning_rate": 0.00010268562401263824,
"loss": 0.2609,
"step": 1850
},
{
"epoch": 48.734426229508195,
"grad_norm": 18.75,
"learning_rate": 0.00010005265929436546,
"loss": 0.2365,
"step": 1900
},
{
"epoch": 50.0,
"grad_norm": 5.09375,
"learning_rate": 9.74196945760927e-05,
"loss": 0.2555,
"step": 1950
},
{
"epoch": 51.28852459016394,
"grad_norm": 11.9375,
"learning_rate": 9.478672985781992e-05,
"loss": 0.2184,
"step": 2000
},
{
"epoch": 52.57704918032787,
"grad_norm": 12.8125,
"learning_rate": 9.215376513954714e-05,
"loss": 0.2279,
"step": 2050
},
{
"epoch": 53.86557377049181,
"grad_norm": 13.8125,
"learning_rate": 8.952080042127437e-05,
"loss": 0.202,
"step": 2100
},
{
"epoch": 55.131147540983605,
"grad_norm": 13.0625,
"learning_rate": 8.688783570300159e-05,
"loss": 0.1651,
"step": 2150
},
{
"epoch": 56.41967213114754,
"grad_norm": 11.4375,
"learning_rate": 8.425487098472881e-05,
"loss": 0.2015,
"step": 2200
},
{
"epoch": 57.708196721311474,
"grad_norm": 16.375,
"learning_rate": 8.162190626645604e-05,
"loss": 0.1504,
"step": 2250
},
{
"epoch": 58.99672131147541,
"grad_norm": 13.0625,
"learning_rate": 7.898894154818326e-05,
"loss": 0.1725,
"step": 2300
},
{
"epoch": 60.26229508196721,
"grad_norm": 13.6875,
"learning_rate": 7.635597682991048e-05,
"loss": 0.1499,
"step": 2350
},
{
"epoch": 61.55081967213115,
"grad_norm": 7.59375,
"learning_rate": 7.372301211163771e-05,
"loss": 0.145,
"step": 2400
},
{
"epoch": 62.83934426229508,
"grad_norm": 7.0625,
"learning_rate": 7.109004739336493e-05,
"loss": 0.1379,
"step": 2450
},
{
"epoch": 64.10491803278688,
"grad_norm": 4.15625,
"learning_rate": 6.845708267509215e-05,
"loss": 0.1244,
"step": 2500
},
{
"epoch": 65.39344262295081,
"grad_norm": 7.0,
"learning_rate": 6.582411795681939e-05,
"loss": 0.1214,
"step": 2550
},
{
"epoch": 66.68196721311476,
"grad_norm": 9.3125,
"learning_rate": 6.31911532385466e-05,
"loss": 0.1341,
"step": 2600
},
{
"epoch": 67.97049180327869,
"grad_norm": 7.09375,
"learning_rate": 6.0558188520273826e-05,
"loss": 0.1201,
"step": 2650
},
{
"epoch": 69.23606557377049,
"grad_norm": 20.5,
"learning_rate": 5.792522380200106e-05,
"loss": 0.1049,
"step": 2700
},
{
"epoch": 70.52459016393442,
"grad_norm": 5.90625,
"learning_rate": 5.529225908372828e-05,
"loss": 0.1033,
"step": 2750
},
{
"epoch": 71.81311475409836,
"grad_norm": 3.25,
"learning_rate": 5.2659294365455505e-05,
"loss": 0.1028,
"step": 2800
},
{
"epoch": 73.07868852459016,
"grad_norm": 8.0625,
"learning_rate": 5.002632964718273e-05,
"loss": 0.1003,
"step": 2850
},
{
"epoch": 74.3672131147541,
"grad_norm": 6.9375,
"learning_rate": 4.739336492890996e-05,
"loss": 0.0993,
"step": 2900
},
{
"epoch": 75.65573770491804,
"grad_norm": 3.734375,
"learning_rate": 4.4760400210637185e-05,
"loss": 0.0988,
"step": 2950
},
{
"epoch": 76.94426229508197,
"grad_norm": 4.84375,
"learning_rate": 4.2127435492364404e-05,
"loss": 0.0885,
"step": 3000
},
{
"epoch": 78.20983606557377,
"grad_norm": 2.71875,
"learning_rate": 3.949447077409163e-05,
"loss": 0.0816,
"step": 3050
},
{
"epoch": 79.4983606557377,
"grad_norm": 1.59375,
"learning_rate": 3.686150605581886e-05,
"loss": 0.0969,
"step": 3100
},
{
"epoch": 80.78688524590164,
"grad_norm": 2.5625,
"learning_rate": 3.422854133754608e-05,
"loss": 0.0886,
"step": 3150
},
{
"epoch": 82.05245901639344,
"grad_norm": 4.6875,
"learning_rate": 3.15955766192733e-05,
"loss": 0.0801,
"step": 3200
},
{
"epoch": 83.34098360655737,
"grad_norm": 2.53125,
"learning_rate": 2.896261190100053e-05,
"loss": 0.0888,
"step": 3250
},
{
"epoch": 84.62950819672132,
"grad_norm": 4.1875,
"learning_rate": 2.6329647182727753e-05,
"loss": 0.0872,
"step": 3300
},
{
"epoch": 85.91803278688525,
"grad_norm": 2.9375,
"learning_rate": 2.369668246445498e-05,
"loss": 0.0807,
"step": 3350
},
{
"epoch": 87.18360655737705,
"grad_norm": 2.84375,
"learning_rate": 2.1063717746182202e-05,
"loss": 0.0779,
"step": 3400
},
{
"epoch": 88.47213114754098,
"grad_norm": 1.8125,
"learning_rate": 1.843075302790943e-05,
"loss": 0.0741,
"step": 3450
},
{
"epoch": 89.76065573770492,
"grad_norm": 2.171875,
"learning_rate": 1.579778830963665e-05,
"loss": 0.0833,
"step": 3500
},
{
"epoch": 91.02622950819672,
"grad_norm": 2.484375,
"learning_rate": 1.3164823591363876e-05,
"loss": 0.0861,
"step": 3550
},
{
"epoch": 92.31475409836065,
"grad_norm": 2.046875,
"learning_rate": 1.0531858873091101e-05,
"loss": 0.08,
"step": 3600
},
{
"epoch": 93.6032786885246,
"grad_norm": 2.84375,
"learning_rate": 7.898894154818326e-06,
"loss": 0.0785,
"step": 3650
},
{
"epoch": 94.89180327868853,
"grad_norm": 2.28125,
"learning_rate": 5.2659294365455505e-06,
"loss": 0.0936,
"step": 3700
},
{
"epoch": 96.15737704918033,
"grad_norm": 1.7734375,
"learning_rate": 2.6329647182727753e-06,
"loss": 0.0741,
"step": 3750
},
{
"epoch": 97.44590163934426,
"grad_norm": 2.21875,
"learning_rate": 0.0,
"loss": 0.0927,
"step": 3800
}
],
"logging_steps": 50,
"max_steps": 3800,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.470967617037125e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}