llava-v1.5-7b-mmbench-3 / trainer_state.json
jpark677's picture
Upload folder using huggingface_hub
1f7027d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 105,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 5e-05,
"loss": 1.41,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 0.0001,
"loss": 1.3381,
"step": 2
},
{
"epoch": 0.09,
"learning_rate": 0.00015000000000000001,
"loss": 0.3565,
"step": 3
},
{
"epoch": 0.11,
"learning_rate": 0.0002,
"loss": 0.3117,
"step": 4
},
{
"epoch": 0.14,
"learning_rate": 0.00019995162822919883,
"loss": 0.3793,
"step": 5
},
{
"epoch": 0.17,
"learning_rate": 0.00019980655971335945,
"loss": 0.3194,
"step": 6
},
{
"epoch": 0.2,
"learning_rate": 0.0001995649347969019,
"loss": 0.3543,
"step": 7
},
{
"epoch": 0.23,
"learning_rate": 0.00019922698723632767,
"loss": 0.3057,
"step": 8
},
{
"epoch": 0.26,
"learning_rate": 0.0001987930439740757,
"loss": 0.2841,
"step": 9
},
{
"epoch": 0.29,
"learning_rate": 0.00019826352482222638,
"loss": 0.3207,
"step": 10
},
{
"epoch": 0.31,
"learning_rate": 0.00019763894205636072,
"loss": 0.2432,
"step": 11
},
{
"epoch": 0.34,
"learning_rate": 0.00019691989991996663,
"loss": 0.3355,
"step": 12
},
{
"epoch": 0.37,
"learning_rate": 0.00019610709403987246,
"loss": 0.2753,
"step": 13
},
{
"epoch": 0.4,
"learning_rate": 0.00019520131075327298,
"loss": 0.2159,
"step": 14
},
{
"epoch": 0.43,
"learning_rate": 0.0001942034263469989,
"loss": 0.1977,
"step": 15
},
{
"epoch": 0.46,
"learning_rate": 0.00019311440620976597,
"loss": 0.2275,
"step": 16
},
{
"epoch": 0.49,
"learning_rate": 0.00019193530389822363,
"loss": 0.2853,
"step": 17
},
{
"epoch": 0.51,
"learning_rate": 0.00019066726011770726,
"loss": 0.1527,
"step": 18
},
{
"epoch": 0.54,
"learning_rate": 0.00018931150161867916,
"loss": 0.2492,
"step": 19
},
{
"epoch": 0.57,
"learning_rate": 0.00018786934000992688,
"loss": 0.2089,
"step": 20
},
{
"epoch": 0.6,
"learning_rate": 0.00018634217048966637,
"loss": 0.1417,
"step": 21
},
{
"epoch": 0.63,
"learning_rate": 0.00018473147049577774,
"loss": 0.2058,
"step": 22
},
{
"epoch": 0.66,
"learning_rate": 0.00018303879827647975,
"loss": 0.1367,
"step": 23
},
{
"epoch": 0.69,
"learning_rate": 0.00018126579138282503,
"loss": 0.208,
"step": 24
},
{
"epoch": 0.71,
"learning_rate": 0.00017941416508447536,
"loss": 0.1991,
"step": 25
},
{
"epoch": 0.74,
"learning_rate": 0.000177485710710289,
"loss": 0.2163,
"step": 26
},
{
"epoch": 0.77,
"learning_rate": 0.00017548229391532572,
"loss": 0.1497,
"step": 27
},
{
"epoch": 0.8,
"learning_rate": 0.00017340585287594604,
"loss": 0.1983,
"step": 28
},
{
"epoch": 0.83,
"learning_rate": 0.00017125839641475072,
"loss": 0.1899,
"step": 29
},
{
"epoch": 0.86,
"learning_rate": 0.0001690420020571747,
"loss": 0.166,
"step": 30
},
{
"epoch": 0.89,
"learning_rate": 0.00016675881402161536,
"loss": 0.1466,
"step": 31
},
{
"epoch": 0.91,
"learning_rate": 0.0001644110411450398,
"loss": 0.1478,
"step": 32
},
{
"epoch": 0.94,
"learning_rate": 0.00016200095474607753,
"loss": 0.2104,
"step": 33
},
{
"epoch": 0.97,
"learning_rate": 0.0001595308864276666,
"loss": 0.0953,
"step": 34
},
{
"epoch": 1.0,
"learning_rate": 0.00015700322582137827,
"loss": 0.101,
"step": 35
},
{
"epoch": 1.03,
"learning_rate": 0.00015442041827560274,
"loss": 0.1058,
"step": 36
},
{
"epoch": 1.06,
"learning_rate": 0.00015178496248983254,
"loss": 0.0695,
"step": 37
},
{
"epoch": 1.09,
"learning_rate": 0.00014909940809733222,
"loss": 0.0608,
"step": 38
},
{
"epoch": 1.11,
"learning_rate": 0.00014636635319853275,
"loss": 0.0414,
"step": 39
},
{
"epoch": 1.14,
"learning_rate": 0.00014358844184753712,
"loss": 0.0492,
"step": 40
},
{
"epoch": 1.17,
"learning_rate": 0.00014076836149416887,
"loss": 0.0994,
"step": 41
},
{
"epoch": 1.2,
"learning_rate": 0.00013790884038403795,
"loss": 0.1392,
"step": 42
},
{
"epoch": 1.23,
"learning_rate": 0.00013501264491913906,
"loss": 0.0333,
"step": 43
},
{
"epoch": 1.26,
"learning_rate": 0.00013208257698153677,
"loss": 0.0522,
"step": 44
},
{
"epoch": 1.29,
"learning_rate": 0.00012912147122272523,
"loss": 0.1114,
"step": 45
},
{
"epoch": 1.31,
"learning_rate": 0.00012613219232128608,
"loss": 0.0667,
"step": 46
},
{
"epoch": 1.34,
"learning_rate": 0.000123117632211497,
"loss": 0.0855,
"step": 47
},
{
"epoch": 1.37,
"learning_rate": 0.00012008070728557186,
"loss": 0.056,
"step": 48
},
{
"epoch": 1.4,
"learning_rate": 0.00011702435557223987,
"loss": 0.0742,
"step": 49
},
{
"epoch": 1.43,
"learning_rate": 0.00011395153389439233,
"loss": 0.077,
"step": 50
},
{
"epoch": 1.46,
"learning_rate": 0.00011086521500854745,
"loss": 0.0927,
"step": 51
},
{
"epoch": 1.49,
"learning_rate": 0.00010776838472890065,
"loss": 0.0612,
"step": 52
},
{
"epoch": 1.51,
"learning_rate": 0.00010466403903874176,
"loss": 0.0613,
"step": 53
},
{
"epoch": 1.54,
"learning_rate": 0.0001015551811920351,
"loss": 0.0311,
"step": 54
},
{
"epoch": 1.57,
"learning_rate": 9.844481880796491e-05,
"loss": 0.0606,
"step": 55
},
{
"epoch": 1.6,
"learning_rate": 9.533596096125825e-05,
"loss": 0.042,
"step": 56
},
{
"epoch": 1.63,
"learning_rate": 9.223161527109937e-05,
"loss": 0.047,
"step": 57
},
{
"epoch": 1.66,
"learning_rate": 8.913478499145254e-05,
"loss": 0.0401,
"step": 58
},
{
"epoch": 1.69,
"learning_rate": 8.604846610560771e-05,
"loss": 0.0409,
"step": 59
},
{
"epoch": 1.71,
"learning_rate": 8.297564442776014e-05,
"loss": 0.0585,
"step": 60
},
{
"epoch": 1.74,
"learning_rate": 7.991929271442817e-05,
"loss": 0.0765,
"step": 61
},
{
"epoch": 1.77,
"learning_rate": 7.688236778850306e-05,
"loss": 0.0525,
"step": 62
},
{
"epoch": 1.8,
"learning_rate": 7.386780767871397e-05,
"loss": 0.0291,
"step": 63
},
{
"epoch": 1.83,
"learning_rate": 7.087852877727481e-05,
"loss": 0.0533,
"step": 64
},
{
"epoch": 1.86,
"learning_rate": 6.791742301846326e-05,
"loss": 0.0793,
"step": 65
},
{
"epoch": 1.89,
"learning_rate": 6.498735508086093e-05,
"loss": 0.049,
"step": 66
},
{
"epoch": 1.91,
"learning_rate": 6.209115961596208e-05,
"loss": 0.0329,
"step": 67
},
{
"epoch": 1.94,
"learning_rate": 5.923163850583113e-05,
"loss": 0.0172,
"step": 68
},
{
"epoch": 1.97,
"learning_rate": 5.6411558152462894e-05,
"loss": 0.0504,
"step": 69
},
{
"epoch": 2.0,
"learning_rate": 5.363364680146725e-05,
"loss": 0.0378,
"step": 70
},
{
"epoch": 2.03,
"learning_rate": 5.090059190266779e-05,
"loss": 0.0514,
"step": 71
},
{
"epoch": 2.06,
"learning_rate": 4.821503751016746e-05,
"loss": 0.0286,
"step": 72
},
{
"epoch": 2.09,
"learning_rate": 4.5579581724397255e-05,
"loss": 0.0494,
"step": 73
},
{
"epoch": 2.11,
"learning_rate": 4.2996774178621736e-05,
"loss": 0.0181,
"step": 74
},
{
"epoch": 2.14,
"learning_rate": 4.046911357233343e-05,
"loss": 0.0308,
"step": 75
},
{
"epoch": 2.17,
"learning_rate": 3.79990452539225e-05,
"loss": 0.0375,
"step": 76
},
{
"epoch": 2.2,
"learning_rate": 3.558895885496023e-05,
"loss": 0.0085,
"step": 77
},
{
"epoch": 2.23,
"learning_rate": 3.324118597838464e-05,
"loss": 0.028,
"step": 78
},
{
"epoch": 2.26,
"learning_rate": 3.0957997942825336e-05,
"loss": 0.0153,
"step": 79
},
{
"epoch": 2.29,
"learning_rate": 2.874160358524931e-05,
"loss": 0.0378,
"step": 80
},
{
"epoch": 2.31,
"learning_rate": 2.659414712405398e-05,
"loss": 0.0233,
"step": 81
},
{
"epoch": 2.34,
"learning_rate": 2.451770608467432e-05,
"loss": 0.0047,
"step": 82
},
{
"epoch": 2.37,
"learning_rate": 2.251428928971102e-05,
"loss": 0.0045,
"step": 83
},
{
"epoch": 2.4,
"learning_rate": 2.058583491552465e-05,
"loss": 0.0161,
"step": 84
},
{
"epoch": 2.43,
"learning_rate": 1.8734208617174988e-05,
"loss": 0.0424,
"step": 85
},
{
"epoch": 2.46,
"learning_rate": 1.696120172352025e-05,
"loss": 0.0167,
"step": 86
},
{
"epoch": 2.49,
"learning_rate": 1.526852950422226e-05,
"loss": 0.0125,
"step": 87
},
{
"epoch": 2.51,
"learning_rate": 1.3657829510333654e-05,
"loss": 0.0236,
"step": 88
},
{
"epoch": 2.54,
"learning_rate": 1.2130659990073146e-05,
"loss": 0.0089,
"step": 89
},
{
"epoch": 2.57,
"learning_rate": 1.0688498381320855e-05,
"loss": 0.0202,
"step": 90
},
{
"epoch": 2.6,
"learning_rate": 9.332739882292752e-06,
"loss": 0.0111,
"step": 91
},
{
"epoch": 2.63,
"learning_rate": 8.064696101776358e-06,
"loss": 0.0025,
"step": 92
},
{
"epoch": 2.66,
"learning_rate": 6.8855937902340576e-06,
"loss": 0.0209,
"step": 93
},
{
"epoch": 2.69,
"learning_rate": 5.7965736530010916e-06,
"loss": 0.0318,
"step": 94
},
{
"epoch": 2.71,
"learning_rate": 4.798689246727006e-06,
"loss": 0.0334,
"step": 95
},
{
"epoch": 2.74,
"learning_rate": 3.892905960127546e-06,
"loss": 0.0117,
"step": 96
},
{
"epoch": 2.77,
"learning_rate": 3.0801000800333877e-06,
"loss": 0.0134,
"step": 97
},
{
"epoch": 2.8,
"learning_rate": 2.3610579436393e-06,
"loss": 0.0279,
"step": 98
},
{
"epoch": 2.83,
"learning_rate": 1.7364751777736332e-06,
"loss": 0.0071,
"step": 99
},
{
"epoch": 2.86,
"learning_rate": 1.2069560259243328e-06,
"loss": 0.0162,
"step": 100
},
{
"epoch": 2.89,
"learning_rate": 7.730127636723539e-07,
"loss": 0.0029,
"step": 101
},
{
"epoch": 2.91,
"learning_rate": 4.3506520309813947e-07,
"loss": 0.005,
"step": 102
},
{
"epoch": 2.94,
"learning_rate": 1.9344028664056713e-07,
"loss": 0.0189,
"step": 103
},
{
"epoch": 2.97,
"learning_rate": 4.837177080119215e-08,
"loss": 0.0099,
"step": 104
},
{
"epoch": 3.0,
"learning_rate": 0.0,
"loss": 0.0075,
"step": 105
},
{
"epoch": 3.0,
"step": 105,
"total_flos": 14279656734720.0,
"train_loss": 0.12493001309977401,
"train_runtime": 603.9084,
"train_samples_per_second": 21.743,
"train_steps_per_second": 0.174
}
],
"logging_steps": 1.0,
"max_steps": 105,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50000,
"total_flos": 14279656734720.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}