SLaVA-CXR / trainer_state.json
bluesky333's picture
Upload folder using huggingface_hub
b0088e5 verified
raw
history blame
38.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.0000000000000003e-06,
"loss": 3.1016,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 4.000000000000001e-06,
"loss": 3.1211,
"step": 2
},
{
"epoch": 0.03,
"learning_rate": 6e-06,
"loss": 3.2852,
"step": 3
},
{
"epoch": 0.04,
"learning_rate": 8.000000000000001e-06,
"loss": 3.0801,
"step": 4
},
{
"epoch": 0.05,
"learning_rate": 1e-05,
"loss": 2.9102,
"step": 5
},
{
"epoch": 0.06,
"learning_rate": 1.2e-05,
"loss": 2.7852,
"step": 6
},
{
"epoch": 0.07,
"learning_rate": 1.4e-05,
"loss": 2.7852,
"step": 7
},
{
"epoch": 0.08,
"learning_rate": 1.6000000000000003e-05,
"loss": 2.9199,
"step": 8
},
{
"epoch": 0.09,
"learning_rate": 1.8e-05,
"loss": 2.7734,
"step": 9
},
{
"epoch": 0.1,
"learning_rate": 2e-05,
"loss": 2.8418,
"step": 10
},
{
"epoch": 0.1,
"learning_rate": 1.9999469523400122e-05,
"loss": 2.7402,
"step": 11
},
{
"epoch": 0.11,
"learning_rate": 1.9997878149881576e-05,
"loss": 2.834,
"step": 12
},
{
"epoch": 0.12,
"learning_rate": 1.999522604828164e-05,
"loss": 2.7441,
"step": 13
},
{
"epoch": 0.13,
"learning_rate": 1.9991513499975883e-05,
"loss": 2.8438,
"step": 14
},
{
"epoch": 0.14,
"learning_rate": 1.9986740898848306e-05,
"loss": 2.7539,
"step": 15
},
{
"epoch": 0.15,
"learning_rate": 1.9980908751249556e-05,
"loss": 2.582,
"step": 16
},
{
"epoch": 0.16,
"learning_rate": 1.997401767594319e-05,
"loss": 2.6973,
"step": 17
},
{
"epoch": 0.17,
"learning_rate": 1.996606840404006e-05,
"loss": 2.7734,
"step": 18
},
{
"epoch": 0.18,
"learning_rate": 1.9957061778920703e-05,
"loss": 2.7227,
"step": 19
},
{
"epoch": 0.19,
"learning_rate": 1.9946998756145894e-05,
"loss": 2.6445,
"step": 20
},
{
"epoch": 0.2,
"learning_rate": 1.9935880403355255e-05,
"loss": 2.4629,
"step": 21
},
{
"epoch": 0.21,
"learning_rate": 1.9923707900153984e-05,
"loss": 2.6074,
"step": 22
},
{
"epoch": 0.22,
"learning_rate": 1.9910482537987704e-05,
"loss": 2.6328,
"step": 23
},
{
"epoch": 0.23,
"learning_rate": 1.989620572000544e-05,
"loss": 2.6055,
"step": 24
},
{
"epoch": 0.24,
"learning_rate": 1.9880878960910772e-05,
"loss": 2.7031,
"step": 25
},
{
"epoch": 0.25,
"learning_rate": 1.9864503886801108e-05,
"loss": 2.6191,
"step": 26
},
{
"epoch": 0.26,
"learning_rate": 1.9847082234995172e-05,
"loss": 2.5859,
"step": 27
},
{
"epoch": 0.27,
"learning_rate": 1.982861585384869e-05,
"loss": 2.5078,
"step": 28
},
{
"epoch": 0.28,
"learning_rate": 1.9809106702558277e-05,
"loss": 2.4648,
"step": 29
},
{
"epoch": 0.29,
"learning_rate": 1.978855685095358e-05,
"loss": 2.5371,
"step": 30
},
{
"epoch": 0.3,
"learning_rate": 1.9766968479277684e-05,
"loss": 2.6172,
"step": 31
},
{
"epoch": 0.3,
"learning_rate": 1.974434387795579e-05,
"loss": 2.5449,
"step": 32
},
{
"epoch": 0.31,
"learning_rate": 1.972068544735221e-05,
"loss": 2.3799,
"step": 33
},
{
"epoch": 0.32,
"learning_rate": 1.969599569751571e-05,
"loss": 2.2725,
"step": 34
},
{
"epoch": 0.33,
"learning_rate": 1.9670277247913205e-05,
"loss": 2.4512,
"step": 35
},
{
"epoch": 0.34,
"learning_rate": 1.964353282715183e-05,
"loss": 2.4785,
"step": 36
},
{
"epoch": 0.35,
"learning_rate": 1.961576527268946e-05,
"loss": 2.3672,
"step": 37
},
{
"epoch": 0.36,
"learning_rate": 1.9586977530533677e-05,
"loss": 2.375,
"step": 38
},
{
"epoch": 0.37,
"learning_rate": 1.95571726549292e-05,
"loss": 2.3008,
"step": 39
},
{
"epoch": 0.38,
"learning_rate": 1.9526353808033827e-05,
"loss": 2.4326,
"step": 40
},
{
"epoch": 0.39,
"learning_rate": 1.9494524259582994e-05,
"loss": 2.4062,
"step": 41
},
{
"epoch": 0.4,
"learning_rate": 1.9461687386542826e-05,
"loss": 2.4414,
"step": 42
},
{
"epoch": 0.41,
"learning_rate": 1.9427846672751873e-05,
"loss": 2.3223,
"step": 43
},
{
"epoch": 0.42,
"learning_rate": 1.93930057085515e-05,
"loss": 2.2812,
"step": 44
},
{
"epoch": 0.43,
"learning_rate": 1.9357168190404937e-05,
"loss": 2.167,
"step": 45
},
{
"epoch": 0.44,
"learning_rate": 1.932033792050515e-05,
"loss": 2.4902,
"step": 46
},
{
"epoch": 0.45,
"learning_rate": 1.928251880637141e-05,
"loss": 2.4883,
"step": 47
},
{
"epoch": 0.46,
"learning_rate": 1.924371486043473e-05,
"loss": 2.3662,
"step": 48
},
{
"epoch": 0.47,
"learning_rate": 1.920393019961217e-05,
"loss": 2.3066,
"step": 49
},
{
"epoch": 0.48,
"learning_rate": 1.916316904487005e-05,
"loss": 2.2275,
"step": 50
},
{
"epoch": 0.49,
"learning_rate": 1.9121435720776122e-05,
"loss": 2.2197,
"step": 51
},
{
"epoch": 0.5,
"learning_rate": 1.9078734655040763e-05,
"loss": 2.3467,
"step": 52
},
{
"epoch": 0.5,
"learning_rate": 1.9035070378047204e-05,
"loss": 2.1914,
"step": 53
},
{
"epoch": 0.51,
"learning_rate": 1.8990447522370886e-05,
"loss": 2.3965,
"step": 54
},
{
"epoch": 0.52,
"learning_rate": 1.8944870822287957e-05,
"loss": 2.4883,
"step": 55
},
{
"epoch": 0.53,
"learning_rate": 1.8898345113273e-05,
"loss": 2.3955,
"step": 56
},
{
"epoch": 0.54,
"learning_rate": 1.8850875331485996e-05,
"loss": 2.5859,
"step": 57
},
{
"epoch": 0.55,
"learning_rate": 1.8802466513248635e-05,
"loss": 2.2754,
"step": 58
},
{
"epoch": 0.56,
"learning_rate": 1.8753123794509974e-05,
"loss": 2.2451,
"step": 59
},
{
"epoch": 0.57,
"learning_rate": 1.8702852410301556e-05,
"loss": 2.3584,
"step": 60
},
{
"epoch": 0.58,
"learning_rate": 1.865165769418196e-05,
"loss": 2.3281,
"step": 61
},
{
"epoch": 0.59,
"learning_rate": 1.8599545077670983e-05,
"loss": 2.2256,
"step": 62
},
{
"epoch": 0.6,
"learning_rate": 1.854652008967335e-05,
"loss": 2.3828,
"step": 63
},
{
"epoch": 0.61,
"learning_rate": 1.8492588355892125e-05,
"loss": 2.126,
"step": 64
},
{
"epoch": 0.62,
"learning_rate": 1.8437755598231857e-05,
"loss": 2.3516,
"step": 65
},
{
"epoch": 0.63,
"learning_rate": 1.8382027634191523e-05,
"loss": 2.1816,
"step": 66
},
{
"epoch": 0.64,
"learning_rate": 1.8325410376247295e-05,
"loss": 2.29,
"step": 67
},
{
"epoch": 0.65,
"learning_rate": 1.826790983122527e-05,
"loss": 2.2529,
"step": 68
},
{
"epoch": 0.66,
"learning_rate": 1.8209532099664177e-05,
"loss": 1.96,
"step": 69
},
{
"epoch": 0.67,
"learning_rate": 1.8150283375168112e-05,
"loss": 2.0742,
"step": 70
},
{
"epoch": 0.68,
"learning_rate": 1.8090169943749477e-05,
"loss": 2.2773,
"step": 71
},
{
"epoch": 0.69,
"learning_rate": 1.8029198183162e-05,
"loss": 2.3135,
"step": 72
},
{
"epoch": 0.7,
"learning_rate": 1.796737456222413e-05,
"loss": 2.4082,
"step": 73
},
{
"epoch": 0.7,
"learning_rate": 1.7904705640132717e-05,
"loss": 2.3262,
"step": 74
},
{
"epoch": 0.71,
"learning_rate": 1.7841198065767107e-05,
"loss": 2.1162,
"step": 75
},
{
"epoch": 0.72,
"learning_rate": 1.7776858576983713e-05,
"loss": 2.2832,
"step": 76
},
{
"epoch": 0.73,
"learning_rate": 1.771169399990119e-05,
"loss": 2.125,
"step": 77
},
{
"epoch": 0.74,
"learning_rate": 1.7645711248176198e-05,
"loss": 2.1611,
"step": 78
},
{
"epoch": 0.75,
"learning_rate": 1.7578917322269885e-05,
"loss": 2.1689,
"step": 79
},
{
"epoch": 0.76,
"learning_rate": 1.7511319308705198e-05,
"loss": 2.1641,
"step": 80
},
{
"epoch": 0.77,
"learning_rate": 1.744292437931502e-05,
"loss": 2.2217,
"step": 81
},
{
"epoch": 0.78,
"learning_rate": 1.7373739790481263e-05,
"loss": 2.2656,
"step": 82
},
{
"epoch": 0.79,
"learning_rate": 1.7303772882365018e-05,
"loss": 2.0977,
"step": 83
},
{
"epoch": 0.8,
"learning_rate": 1.723303107812779e-05,
"loss": 2.1602,
"step": 84
},
{
"epoch": 0.81,
"learning_rate": 1.7161521883143936e-05,
"loss": 2.1045,
"step": 85
},
{
"epoch": 0.82,
"learning_rate": 1.7089252884204376e-05,
"loss": 2.2949,
"step": 86
},
{
"epoch": 0.83,
"learning_rate": 1.701623174871168e-05,
"loss": 2.124,
"step": 87
},
{
"epoch": 0.84,
"learning_rate": 1.6942466223866582e-05,
"loss": 2.2051,
"step": 88
},
{
"epoch": 0.85,
"learning_rate": 1.6867964135846043e-05,
"loss": 2.0146,
"step": 89
},
{
"epoch": 0.86,
"learning_rate": 1.679273338897293e-05,
"loss": 2.1816,
"step": 90
},
{
"epoch": 0.87,
"learning_rate": 1.6716781964877413e-05,
"loss": 2.2002,
"step": 91
},
{
"epoch": 0.88,
"learning_rate": 1.664011792165012e-05,
"loss": 2.0781,
"step": 92
},
{
"epoch": 0.89,
"learning_rate": 1.6562749392987255e-05,
"loss": 2.2031,
"step": 93
},
{
"epoch": 0.9,
"learning_rate": 1.648468458732762e-05,
"loss": 2.1719,
"step": 94
},
{
"epoch": 0.9,
"learning_rate": 1.6405931786981753e-05,
"loss": 2.2246,
"step": 95
},
{
"epoch": 0.91,
"learning_rate": 1.6326499347253206e-05,
"loss": 2.2852,
"step": 96
},
{
"epoch": 0.92,
"learning_rate": 1.6246395695552086e-05,
"loss": 2.1357,
"step": 97
},
{
"epoch": 0.93,
"learning_rate": 1.6165629330500952e-05,
"loss": 2.2148,
"step": 98
},
{
"epoch": 0.94,
"learning_rate": 1.6084208821033152e-05,
"loss": 2.0781,
"step": 99
},
{
"epoch": 0.95,
"learning_rate": 1.6002142805483686e-05,
"loss": 2.2197,
"step": 100
},
{
"epoch": 0.96,
"learning_rate": 1.591943999067273e-05,
"loss": 2.0957,
"step": 101
},
{
"epoch": 0.97,
"learning_rate": 1.5836109150981885e-05,
"loss": 2.0645,
"step": 102
},
{
"epoch": 0.98,
"learning_rate": 1.5752159127423262e-05,
"loss": 2.4609,
"step": 103
},
{
"epoch": 0.99,
"learning_rate": 1.5667598826701463e-05,
"loss": 2.3008,
"step": 104
},
{
"epoch": 1.0,
"learning_rate": 1.5582437220268648e-05,
"loss": 2.0254,
"step": 105
},
{
"epoch": 1.01,
"learning_rate": 1.549668334337271e-05,
"loss": 2.0322,
"step": 106
},
{
"epoch": 1.02,
"learning_rate": 1.541034629409865e-05,
"loss": 1.9941,
"step": 107
},
{
"epoch": 1.03,
"learning_rate": 1.532343523240334e-05,
"loss": 1.9033,
"step": 108
},
{
"epoch": 1.04,
"learning_rate": 1.523595937914368e-05,
"loss": 1.8633,
"step": 109
},
{
"epoch": 1.05,
"learning_rate": 1.5147928015098309e-05,
"loss": 2.043,
"step": 110
},
{
"epoch": 1.06,
"learning_rate": 1.5059350479982966e-05,
"loss": 2.0029,
"step": 111
},
{
"epoch": 1.07,
"learning_rate": 1.497023617145958e-05,
"loss": 1.9092,
"step": 112
},
{
"epoch": 1.08,
"learning_rate": 1.488059454413923e-05,
"loss": 1.8877,
"step": 113
},
{
"epoch": 1.09,
"learning_rate": 1.4790435108579048e-05,
"loss": 2.0322,
"step": 114
},
{
"epoch": 1.1,
"learning_rate": 1.4699767430273202e-05,
"loss": 1.9189,
"step": 115
},
{
"epoch": 1.1,
"learning_rate": 1.4608601128638027e-05,
"loss": 1.8535,
"step": 116
},
{
"epoch": 1.11,
"learning_rate": 1.4516945875991472e-05,
"loss": 1.8447,
"step": 117
},
{
"epoch": 1.12,
"learning_rate": 1.4424811396526892e-05,
"loss": 1.916,
"step": 118
},
{
"epoch": 1.13,
"learning_rate": 1.4332207465281365e-05,
"loss": 1.8838,
"step": 119
},
{
"epoch": 1.14,
"learning_rate": 1.423914390709861e-05,
"loss": 2.0352,
"step": 120
},
{
"epoch": 1.15,
"learning_rate": 1.4145630595586607e-05,
"loss": 1.9609,
"step": 121
},
{
"epoch": 1.16,
"learning_rate": 1.4051677452070064e-05,
"loss": 2.0049,
"step": 122
},
{
"epoch": 1.17,
"learning_rate": 1.3957294444537808e-05,
"loss": 1.8584,
"step": 123
},
{
"epoch": 1.18,
"learning_rate": 1.386249158658522e-05,
"loss": 1.7842,
"step": 124
},
{
"epoch": 1.19,
"learning_rate": 1.3767278936351853e-05,
"loss": 1.9111,
"step": 125
},
{
"epoch": 1.2,
"learning_rate": 1.3671666595454296e-05,
"loss": 1.8887,
"step": 126
},
{
"epoch": 1.21,
"learning_rate": 1.357566470791445e-05,
"loss": 1.917,
"step": 127
},
{
"epoch": 1.22,
"learning_rate": 1.347928345908329e-05,
"loss": 1.959,
"step": 128
},
{
"epoch": 1.23,
"learning_rate": 1.3382533074560256e-05,
"loss": 1.8545,
"step": 129
},
{
"epoch": 1.24,
"learning_rate": 1.3285423819108349e-05,
"loss": 2.0332,
"step": 130
},
{
"epoch": 1.25,
"learning_rate": 1.3187965995565098e-05,
"loss": 2.0068,
"step": 131
},
{
"epoch": 1.26,
"learning_rate": 1.3090169943749475e-05,
"loss": 2.0713,
"step": 132
},
{
"epoch": 1.27,
"learning_rate": 1.2992046039364893e-05,
"loss": 1.8516,
"step": 133
},
{
"epoch": 1.28,
"learning_rate": 1.2893604692898381e-05,
"loss": 1.8965,
"step": 134
},
{
"epoch": 1.29,
"learning_rate": 1.2794856348516095e-05,
"loss": 1.9531,
"step": 135
},
{
"epoch": 1.3,
"learning_rate": 1.2695811482955227e-05,
"loss": 1.8584,
"step": 136
},
{
"epoch": 1.3,
"learning_rate": 1.2596480604412485e-05,
"loss": 1.873,
"step": 137
},
{
"epoch": 1.31,
"learning_rate": 1.24968742514292e-05,
"loss": 2.0059,
"step": 138
},
{
"epoch": 1.32,
"learning_rate": 1.2397002991773277e-05,
"loss": 1.9492,
"step": 139
},
{
"epoch": 1.33,
"learning_rate": 1.2296877421317958e-05,
"loss": 2.0479,
"step": 140
},
{
"epoch": 1.34,
"learning_rate": 1.2196508162917678e-05,
"loss": 1.8369,
"step": 141
},
{
"epoch": 1.35,
"learning_rate": 1.2095905865281026e-05,
"loss": 2.0,
"step": 142
},
{
"epoch": 1.36,
"learning_rate": 1.1995081201840958e-05,
"loss": 1.8086,
"step": 143
},
{
"epoch": 1.37,
"learning_rate": 1.1894044869622403e-05,
"loss": 1.9092,
"step": 144
},
{
"epoch": 1.38,
"learning_rate": 1.1792807588107358e-05,
"loss": 1.8643,
"step": 145
},
{
"epoch": 1.39,
"learning_rate": 1.1691380098097598e-05,
"loss": 1.9043,
"step": 146
},
{
"epoch": 1.4,
"learning_rate": 1.158977316057513e-05,
"loss": 1.8506,
"step": 147
},
{
"epoch": 1.41,
"learning_rate": 1.1487997555560503e-05,
"loss": 1.7197,
"step": 148
},
{
"epoch": 1.42,
"learning_rate": 1.1386064080969095e-05,
"loss": 1.9434,
"step": 149
},
{
"epoch": 1.43,
"learning_rate": 1.1283983551465512e-05,
"loss": 1.9307,
"step": 150
},
{
"epoch": 1.44,
"learning_rate": 1.118176679731619e-05,
"loss": 1.915,
"step": 151
},
{
"epoch": 1.45,
"learning_rate": 1.1079424663240372e-05,
"loss": 1.8701,
"step": 152
},
{
"epoch": 1.46,
"learning_rate": 1.0976968007259519e-05,
"loss": 1.9004,
"step": 153
},
{
"epoch": 1.47,
"learning_rate": 1.0874407699545329e-05,
"loss": 1.9346,
"step": 154
},
{
"epoch": 1.48,
"learning_rate": 1.0771754621266466e-05,
"loss": 1.9727,
"step": 155
},
{
"epoch": 1.49,
"learning_rate": 1.0669019663434117e-05,
"loss": 2.0176,
"step": 156
},
{
"epoch": 1.5,
"learning_rate": 1.0566213725746506e-05,
"loss": 1.9346,
"step": 157
},
{
"epoch": 1.5,
"learning_rate": 1.0463347715432488e-05,
"loss": 1.8779,
"step": 158
},
{
"epoch": 1.51,
"learning_rate": 1.0360432546094341e-05,
"loss": 1.8389,
"step": 159
},
{
"epoch": 1.52,
"learning_rate": 1.0257479136549889e-05,
"loss": 2.0264,
"step": 160
},
{
"epoch": 1.53,
"learning_rate": 1.0154498409674051e-05,
"loss": 1.9092,
"step": 161
},
{
"epoch": 1.54,
"learning_rate": 1.0051501291240008e-05,
"loss": 1.9785,
"step": 162
},
{
"epoch": 1.55,
"learning_rate": 9.948498708759993e-06,
"loss": 1.8594,
"step": 163
},
{
"epoch": 1.56,
"learning_rate": 9.845501590325949e-06,
"loss": 1.9746,
"step": 164
},
{
"epoch": 1.57,
"learning_rate": 9.742520863450116e-06,
"loss": 1.9082,
"step": 165
},
{
"epoch": 1.58,
"learning_rate": 9.639567453905662e-06,
"loss": 1.9717,
"step": 166
},
{
"epoch": 1.59,
"learning_rate": 9.536652284567514e-06,
"loss": 1.8242,
"step": 167
},
{
"epoch": 1.6,
"learning_rate": 9.433786274253496e-06,
"loss": 1.8867,
"step": 168
},
{
"epoch": 1.61,
"learning_rate": 9.330980336565887e-06,
"loss": 1.9629,
"step": 169
},
{
"epoch": 1.62,
"learning_rate": 9.228245378733537e-06,
"loss": 1.8652,
"step": 170
},
{
"epoch": 1.63,
"learning_rate": 9.125592300454675e-06,
"loss": 1.7227,
"step": 171
},
{
"epoch": 1.64,
"learning_rate": 9.023031992740488e-06,
"loss": 1.8809,
"step": 172
},
{
"epoch": 1.65,
"learning_rate": 8.92057533675963e-06,
"loss": 2.0039,
"step": 173
},
{
"epoch": 1.66,
"learning_rate": 8.818233202683815e-06,
"loss": 1.9102,
"step": 174
},
{
"epoch": 1.67,
"learning_rate": 8.71601644853449e-06,
"loss": 2.0049,
"step": 175
},
{
"epoch": 1.68,
"learning_rate": 8.613935919030908e-06,
"loss": 1.9795,
"step": 176
},
{
"epoch": 1.69,
"learning_rate": 8.512002444439502e-06,
"loss": 1.8223,
"step": 177
},
{
"epoch": 1.7,
"learning_rate": 8.410226839424871e-06,
"loss": 1.9092,
"step": 178
},
{
"epoch": 1.7,
"learning_rate": 8.308619901902406e-06,
"loss": 1.8193,
"step": 179
},
{
"epoch": 1.71,
"learning_rate": 8.207192411892645e-06,
"loss": 1.7246,
"step": 180
},
{
"epoch": 1.72,
"learning_rate": 8.1059551303776e-06,
"loss": 1.7344,
"step": 181
},
{
"epoch": 1.73,
"learning_rate": 8.004918798159046e-06,
"loss": 1.957,
"step": 182
},
{
"epoch": 1.74,
"learning_rate": 7.904094134718975e-06,
"loss": 1.8555,
"step": 183
},
{
"epoch": 1.75,
"learning_rate": 7.803491837082324e-06,
"loss": 1.8994,
"step": 184
},
{
"epoch": 1.76,
"learning_rate": 7.703122578682047e-06,
"loss": 1.9053,
"step": 185
},
{
"epoch": 1.77,
"learning_rate": 7.602997008226725e-06,
"loss": 1.8857,
"step": 186
},
{
"epoch": 1.78,
"learning_rate": 7.503125748570801e-06,
"loss": 1.8555,
"step": 187
},
{
"epoch": 1.79,
"learning_rate": 7.403519395587522e-06,
"loss": 1.8174,
"step": 188
},
{
"epoch": 1.8,
"learning_rate": 7.304188517044774e-06,
"loss": 1.8281,
"step": 189
},
{
"epoch": 1.81,
"learning_rate": 7.2051436514839064e-06,
"loss": 1.7969,
"step": 190
},
{
"epoch": 1.82,
"learning_rate": 7.106395307101621e-06,
"loss": 2.0137,
"step": 191
},
{
"epoch": 1.83,
"learning_rate": 7.007953960635109e-06,
"loss": 1.8184,
"step": 192
},
{
"epoch": 1.84,
"learning_rate": 6.909830056250527e-06,
"loss": 2.0029,
"step": 193
},
{
"epoch": 1.85,
"learning_rate": 6.812034004434904e-06,
"loss": 1.8242,
"step": 194
},
{
"epoch": 1.86,
"learning_rate": 6.714576180891653e-06,
"loss": 1.9043,
"step": 195
},
{
"epoch": 1.87,
"learning_rate": 6.617466925439746e-06,
"loss": 1.9297,
"step": 196
},
{
"epoch": 1.88,
"learning_rate": 6.520716540916709e-06,
"loss": 1.8525,
"step": 197
},
{
"epoch": 1.89,
"learning_rate": 6.424335292085553e-06,
"loss": 1.8643,
"step": 198
},
{
"epoch": 1.9,
"learning_rate": 6.32833340454571e-06,
"loss": 1.915,
"step": 199
},
{
"epoch": 1.9,
"learning_rate": 6.232721063648148e-06,
"loss": 1.8682,
"step": 200
},
{
"epoch": 1.91,
"learning_rate": 6.137508413414784e-06,
"loss": 1.7812,
"step": 201
},
{
"epoch": 1.92,
"learning_rate": 6.042705555462192e-06,
"loss": 1.8555,
"step": 202
},
{
"epoch": 1.93,
"learning_rate": 5.948322547929939e-06,
"loss": 1.8301,
"step": 203
},
{
"epoch": 1.94,
"learning_rate": 5.8543694044133984e-06,
"loss": 1.8125,
"step": 204
},
{
"epoch": 1.95,
"learning_rate": 5.760856092901394e-06,
"loss": 1.8857,
"step": 205
},
{
"epoch": 1.96,
"learning_rate": 5.667792534718639e-06,
"loss": 1.7637,
"step": 206
},
{
"epoch": 1.97,
"learning_rate": 5.575188603473112e-06,
"loss": 1.8281,
"step": 207
},
{
"epoch": 1.98,
"learning_rate": 5.483054124008528e-06,
"loss": 1.7715,
"step": 208
},
{
"epoch": 1.99,
"learning_rate": 5.391398871361972e-06,
"loss": 1.8154,
"step": 209
},
{
"epoch": 2.0,
"learning_rate": 5.300232569726805e-06,
"loss": 1.8232,
"step": 210
},
{
"epoch": 2.01,
"learning_rate": 5.209564891420953e-06,
"loss": 1.5811,
"step": 211
},
{
"epoch": 2.02,
"learning_rate": 5.119405455860772e-06,
"loss": 1.7432,
"step": 212
},
{
"epoch": 2.03,
"learning_rate": 5.029763828540419e-06,
"loss": 1.7061,
"step": 213
},
{
"epoch": 2.04,
"learning_rate": 4.9406495200170345e-06,
"loss": 1.6689,
"step": 214
},
{
"epoch": 2.05,
"learning_rate": 4.852071984901696e-06,
"loss": 1.8164,
"step": 215
},
{
"epoch": 2.06,
"learning_rate": 4.764040620856323e-06,
"loss": 1.6289,
"step": 216
},
{
"epoch": 2.07,
"learning_rate": 4.676564767596663e-06,
"loss": 1.7148,
"step": 217
},
{
"epoch": 2.08,
"learning_rate": 4.5896537059013536e-06,
"loss": 1.79,
"step": 218
},
{
"epoch": 2.09,
"learning_rate": 4.503316656627295e-06,
"loss": 1.6221,
"step": 219
},
{
"epoch": 2.1,
"learning_rate": 4.417562779731355e-06,
"loss": 1.71,
"step": 220
},
{
"epoch": 2.1,
"learning_rate": 4.332401173298544e-06,
"loss": 1.5859,
"step": 221
},
{
"epoch": 2.11,
"learning_rate": 4.24784087257674e-06,
"loss": 1.6191,
"step": 222
},
{
"epoch": 2.12,
"learning_rate": 4.163890849018114e-06,
"loss": 1.7393,
"step": 223
},
{
"epoch": 2.13,
"learning_rate": 4.080560009327274e-06,
"loss": 1.7051,
"step": 224
},
{
"epoch": 2.14,
"learning_rate": 3.997857194516319e-06,
"loss": 1.6621,
"step": 225
},
{
"epoch": 2.15,
"learning_rate": 3.915791178966852e-06,
"loss": 1.6973,
"step": 226
},
{
"epoch": 2.16,
"learning_rate": 3.834370669499047e-06,
"loss": 1.7031,
"step": 227
},
{
"epoch": 2.17,
"learning_rate": 3.753604304447915e-06,
"loss": 1.7021,
"step": 228
},
{
"epoch": 2.18,
"learning_rate": 3.6735006527467967e-06,
"loss": 1.7051,
"step": 229
},
{
"epoch": 2.19,
"learning_rate": 3.594068213018249e-06,
"loss": 1.7451,
"step": 230
},
{
"epoch": 2.2,
"learning_rate": 3.515315412672384e-06,
"loss": 1.7207,
"step": 231
},
{
"epoch": 2.21,
"learning_rate": 3.4372506070127476e-06,
"loss": 1.6436,
"step": 232
},
{
"epoch": 2.22,
"learning_rate": 3.3598820783498833e-06,
"loss": 1.667,
"step": 233
},
{
"epoch": 2.23,
"learning_rate": 3.2832180351225916e-06,
"loss": 1.7021,
"step": 234
},
{
"epoch": 2.24,
"learning_rate": 3.207266611027069e-06,
"loss": 1.7061,
"step": 235
},
{
"epoch": 2.25,
"learning_rate": 3.1320358641539583e-06,
"loss": 1.6035,
"step": 236
},
{
"epoch": 2.26,
"learning_rate": 3.0575337761334213e-06,
"loss": 1.6094,
"step": 237
},
{
"epoch": 2.27,
"learning_rate": 2.9837682512883205e-06,
"loss": 1.707,
"step": 238
},
{
"epoch": 2.28,
"learning_rate": 2.910747115795628e-06,
"loss": 1.6982,
"step": 239
},
{
"epoch": 2.29,
"learning_rate": 2.8384781168560693e-06,
"loss": 1.5889,
"step": 240
},
{
"epoch": 2.3,
"learning_rate": 2.766968921872213e-06,
"loss": 1.7129,
"step": 241
},
{
"epoch": 2.3,
"learning_rate": 2.696227117634985e-06,
"loss": 1.6025,
"step": 242
},
{
"epoch": 2.31,
"learning_rate": 2.626260209518737e-06,
"loss": 1.7686,
"step": 243
},
{
"epoch": 2.32,
"learning_rate": 2.5570756206849834e-06,
"loss": 1.6543,
"step": 244
},
{
"epoch": 2.33,
"learning_rate": 2.4886806912948034e-06,
"loss": 1.627,
"step": 245
},
{
"epoch": 2.34,
"learning_rate": 2.4210826777301154e-06,
"loss": 1.7227,
"step": 246
},
{
"epoch": 2.35,
"learning_rate": 2.3542887518238056e-06,
"loss": 1.7588,
"step": 247
},
{
"epoch": 2.36,
"learning_rate": 2.288306000098811e-06,
"loss": 1.7256,
"step": 248
},
{
"epoch": 2.37,
"learning_rate": 2.2231414230162897e-06,
"loss": 1.5625,
"step": 249
},
{
"epoch": 2.38,
"learning_rate": 2.158801934232897e-06,
"loss": 1.7207,
"step": 250
},
{
"epoch": 2.39,
"learning_rate": 2.0952943598672847e-06,
"loss": 1.8096,
"step": 251
},
{
"epoch": 2.4,
"learning_rate": 2.0326254377758704e-06,
"loss": 1.6533,
"step": 252
},
{
"epoch": 2.41,
"learning_rate": 1.970801816838004e-06,
"loss": 1.7139,
"step": 253
},
{
"epoch": 2.42,
"learning_rate": 1.9098300562505266e-06,
"loss": 1.6504,
"step": 254
},
{
"epoch": 2.43,
"learning_rate": 1.8497166248318876e-06,
"loss": 1.585,
"step": 255
},
{
"epoch": 2.44,
"learning_rate": 1.7904679003358283e-06,
"loss": 1.6641,
"step": 256
},
{
"epoch": 2.45,
"learning_rate": 1.7320901687747294e-06,
"loss": 1.7246,
"step": 257
},
{
"epoch": 2.46,
"learning_rate": 1.6745896237527071e-06,
"loss": 1.6484,
"step": 258
},
{
"epoch": 2.47,
"learning_rate": 1.6179723658084811e-06,
"loss": 1.8076,
"step": 259
},
{
"epoch": 2.48,
"learning_rate": 1.5622444017681438e-06,
"loss": 1.748,
"step": 260
},
{
"epoch": 2.49,
"learning_rate": 1.507411644107879e-06,
"loss": 1.5947,
"step": 261
},
{
"epoch": 2.5,
"learning_rate": 1.4534799103266505e-06,
"loss": 1.667,
"step": 262
},
{
"epoch": 2.5,
"learning_rate": 1.4004549223290165e-06,
"loss": 1.6357,
"step": 263
},
{
"epoch": 2.51,
"learning_rate": 1.3483423058180423e-06,
"loss": 1.5732,
"step": 264
},
{
"epoch": 2.52,
"learning_rate": 1.2971475896984475e-06,
"loss": 1.6221,
"step": 265
},
{
"epoch": 2.53,
"learning_rate": 1.2468762054900264e-06,
"loss": 1.7861,
"step": 266
},
{
"epoch": 2.54,
"learning_rate": 1.1975334867513687e-06,
"loss": 1.6611,
"step": 267
},
{
"epoch": 2.55,
"learning_rate": 1.1491246685140078e-06,
"loss": 1.4912,
"step": 268
},
{
"epoch": 2.56,
"learning_rate": 1.1016548867270037e-06,
"loss": 1.6748,
"step": 269
},
{
"epoch": 2.57,
"learning_rate": 1.0551291777120465e-06,
"loss": 1.8672,
"step": 270
},
{
"epoch": 2.58,
"learning_rate": 1.0095524776291165e-06,
"loss": 1.7246,
"step": 271
},
{
"epoch": 2.59,
"learning_rate": 9.649296219527982e-07,
"loss": 1.6426,
"step": 272
},
{
"epoch": 2.6,
"learning_rate": 9.21265344959239e-07,
"loss": 1.6289,
"step": 273
},
{
"epoch": 2.61,
"learning_rate": 8.785642792238814e-07,
"loss": 1.6562,
"step": 274
},
{
"epoch": 2.62,
"learning_rate": 8.368309551299536e-07,
"loss": 1.7344,
"step": 275
},
{
"epoch": 2.63,
"learning_rate": 7.960698003878309e-07,
"loss": 1.7451,
"step": 276
},
{
"epoch": 2.64,
"learning_rate": 7.562851395652693e-07,
"loss": 1.6445,
"step": 277
},
{
"epoch": 2.65,
"learning_rate": 7.174811936285886e-07,
"loss": 1.792,
"step": 278
},
{
"epoch": 2.66,
"learning_rate": 6.796620794948483e-07,
"loss": 1.6738,
"step": 279
},
{
"epoch": 2.67,
"learning_rate": 6.428318095950648e-07,
"loss": 1.5811,
"step": 280
},
{
"epoch": 2.68,
"learning_rate": 6.06994291448505e-07,
"loss": 1.7129,
"step": 281
},
{
"epoch": 2.69,
"learning_rate": 5.721533272481272e-07,
"loss": 1.5957,
"step": 282
},
{
"epoch": 2.7,
"learning_rate": 5.383126134571748e-07,
"loss": 1.7637,
"step": 283
},
{
"epoch": 2.7,
"learning_rate": 5.054757404170074e-07,
"loss": 1.7734,
"step": 284
},
{
"epoch": 2.71,
"learning_rate": 4.73646191966175e-07,
"loss": 1.5537,
"step": 285
},
{
"epoch": 2.72,
"learning_rate": 4.428273450708065e-07,
"loss": 1.666,
"step": 286
},
{
"epoch": 2.73,
"learning_rate": 4.1302246946632206e-07,
"loss": 1.6973,
"step": 287
},
{
"epoch": 2.74,
"learning_rate": 3.842347273105396e-07,
"loss": 1.7041,
"step": 288
},
{
"epoch": 2.75,
"learning_rate": 3.564671728481739e-07,
"loss": 1.7529,
"step": 289
},
{
"epoch": 2.76,
"learning_rate": 3.2972275208679625e-07,
"loss": 1.667,
"step": 290
},
{
"epoch": 2.77,
"learning_rate": 3.0400430248428983e-07,
"loss": 1.7051,
"step": 291
},
{
"epoch": 2.78,
"learning_rate": 2.793145526477914e-07,
"loss": 1.7529,
"step": 292
},
{
"epoch": 2.79,
"learning_rate": 2.556561220442144e-07,
"loss": 1.5752,
"step": 293
},
{
"epoch": 2.8,
"learning_rate": 2.3303152072231883e-07,
"loss": 1.7285,
"step": 294
},
{
"epoch": 2.81,
"learning_rate": 2.1144314904642194e-07,
"loss": 1.7803,
"step": 295
},
{
"epoch": 2.82,
"learning_rate": 1.9089329744172658e-07,
"loss": 1.6621,
"step": 296
},
{
"epoch": 2.83,
"learning_rate": 1.7138414615131327e-07,
"loss": 1.6738,
"step": 297
},
{
"epoch": 2.84,
"learning_rate": 1.529177650048297e-07,
"loss": 1.6641,
"step": 298
},
{
"epoch": 2.85,
"learning_rate": 1.3549611319889522e-07,
"loss": 1.6367,
"step": 299
},
{
"epoch": 2.86,
"learning_rate": 1.1912103908922945e-07,
"loss": 1.6582,
"step": 300
},
{
"epoch": 2.87,
"learning_rate": 1.0379427999456015e-07,
"loss": 1.6406,
"step": 301
},
{
"epoch": 2.88,
"learning_rate": 8.951746201229961e-08,
"loss": 1.709,
"step": 302
},
{
"epoch": 2.89,
"learning_rate": 7.629209984601816e-08,
"loss": 1.6416,
"step": 303
},
{
"epoch": 2.9,
"learning_rate": 6.411959664474832e-08,
"loss": 1.7266,
"step": 304
},
{
"epoch": 2.9,
"learning_rate": 5.300124385410943e-08,
"loss": 1.6006,
"step": 305
},
{
"epoch": 2.91,
"learning_rate": 4.2938221079300566e-08,
"loss": 1.707,
"step": 306
},
{
"epoch": 2.92,
"learning_rate": 3.3931595959942885e-08,
"loss": 1.6123,
"step": 307
},
{
"epoch": 2.93,
"learning_rate": 2.5982324056810227e-08,
"loss": 1.6455,
"step": 308
},
{
"epoch": 2.94,
"learning_rate": 1.9091248750446877e-08,
"loss": 1.7812,
"step": 309
},
{
"epoch": 2.95,
"learning_rate": 1.325910115169471e-08,
"loss": 1.6172,
"step": 310
},
{
"epoch": 2.96,
"learning_rate": 8.48650002411855e-09,
"loss": 1.6328,
"step": 311
},
{
"epoch": 2.97,
"learning_rate": 4.773951718362035e-09,
"loss": 1.6768,
"step": 312
},
{
"epoch": 2.98,
"learning_rate": 2.1218501184261385e-09,
"loss": 1.6006,
"step": 313
},
{
"epoch": 2.99,
"learning_rate": 5.304765998781491e-10,
"loss": 1.5791,
"step": 314
},
{
"epoch": 3.0,
"learning_rate": 0.0,
"loss": 1.4365,
"step": 315
},
{
"epoch": 3.0,
"step": 315,
"total_flos": 19095171661824.0,
"train_loss": 1.9948753720238095,
"train_runtime": 5108.9216,
"train_samples_per_second": 1.957,
"train_steps_per_second": 0.062
}
],
"logging_steps": 1.0,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 158,
"total_flos": 19095171661824.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}