Video-Panda-7B / videopanda_prtr1 /trainer_state.json
jh-yi's picture
Upload 20 files
6e2080c verified
raw
history blame
44.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 367,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1e-05,
"loss": 3.2173,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 2e-05,
"loss": 3.2031,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 3.0000000000000004e-05,
"loss": 3.1896,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 4e-05,
"loss": 3.5399,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 3.999925099660787e-05,
"loss": 3.249,
"step": 5
},
{
"epoch": 0.02,
"learning_rate": 3.999700404253208e-05,
"loss": 4.346,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 3.9993259306070256e-05,
"loss": 4.1291,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 3.998801706770442e-05,
"loss": 3.7064,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 3.9981277720080015e-05,
"loss": 3.5643,
"step": 9
},
{
"epoch": 0.03,
"learning_rate": 3.9973041767976466e-05,
"loss": 3.5031,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 3.996330982826937e-05,
"loss": 3.4195,
"step": 11
},
{
"epoch": 0.03,
"learning_rate": 3.995208262988431e-05,
"loss": 3.3598,
"step": 12
},
{
"epoch": 0.04,
"learning_rate": 3.9939361013742275e-05,
"loss": 3.3177,
"step": 13
},
{
"epoch": 0.04,
"learning_rate": 3.99251459326966e-05,
"loss": 3.2443,
"step": 14
},
{
"epoch": 0.04,
"learning_rate": 3.9909438451461695e-05,
"loss": 3.2215,
"step": 15
},
{
"epoch": 0.04,
"learning_rate": 3.989223974653323e-05,
"loss": 3.149,
"step": 16
},
{
"epoch": 0.05,
"learning_rate": 3.9873551106100035e-05,
"loss": 3.1167,
"step": 17
},
{
"epoch": 0.05,
"learning_rate": 3.985337392994763e-05,
"loss": 3.0538,
"step": 18
},
{
"epoch": 0.05,
"learning_rate": 3.983170972935333e-05,
"loss": 3.029,
"step": 19
},
{
"epoch": 0.05,
"learning_rate": 3.9808560126973126e-05,
"loss": 3.0055,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 3.9783926856720085e-05,
"loss": 2.9598,
"step": 21
},
{
"epoch": 0.06,
"learning_rate": 3.975781176363451e-05,
"loss": 2.9572,
"step": 22
},
{
"epoch": 0.06,
"learning_rate": 3.973021680374571e-05,
"loss": 2.9004,
"step": 23
},
{
"epoch": 0.07,
"learning_rate": 3.9701144043925576e-05,
"loss": 2.8984,
"step": 24
},
{
"epoch": 0.07,
"learning_rate": 3.9670595661733654e-05,
"loss": 2.874,
"step": 25
},
{
"epoch": 0.07,
"learning_rate": 3.963857394525413e-05,
"loss": 2.8689,
"step": 26
},
{
"epoch": 0.07,
"learning_rate": 3.960508129292446e-05,
"loss": 2.8885,
"step": 27
},
{
"epoch": 0.08,
"learning_rate": 3.9570120213355636e-05,
"loss": 2.8331,
"step": 28
},
{
"epoch": 0.08,
"learning_rate": 3.953369332514438e-05,
"loss": 2.8093,
"step": 29
},
{
"epoch": 0.08,
"learning_rate": 3.949580335667699e-05,
"loss": 2.7919,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 3.945645314592495e-05,
"loss": 2.7876,
"step": 31
},
{
"epoch": 0.09,
"learning_rate": 3.9415645640232386e-05,
"loss": 2.791,
"step": 32
},
{
"epoch": 0.09,
"learning_rate": 3.937338389609533e-05,
"loss": 2.7751,
"step": 33
},
{
"epoch": 0.09,
"learning_rate": 3.932967107893274e-05,
"loss": 2.7673,
"step": 34
},
{
"epoch": 0.1,
"learning_rate": 3.928451046284946e-05,
"loss": 2.74,
"step": 35
},
{
"epoch": 0.1,
"learning_rate": 3.923790543039095e-05,
"loss": 2.7354,
"step": 36
},
{
"epoch": 0.1,
"learning_rate": 3.9189859472289956e-05,
"loss": 2.699,
"step": 37
},
{
"epoch": 0.1,
"learning_rate": 3.9140376187205025e-05,
"loss": 2.7079,
"step": 38
},
{
"epoch": 0.11,
"learning_rate": 3.9089459281451e-05,
"loss": 2.6673,
"step": 39
},
{
"epoch": 0.11,
"learning_rate": 3.903711256872139e-05,
"loss": 2.6733,
"step": 40
},
{
"epoch": 0.11,
"learning_rate": 3.898333996980275e-05,
"loss": 2.659,
"step": 41
},
{
"epoch": 0.11,
"learning_rate": 3.8928145512280973e-05,
"loss": 2.6657,
"step": 42
},
{
"epoch": 0.12,
"learning_rate": 3.8871533330239646e-05,
"loss": 2.6348,
"step": 43
},
{
"epoch": 0.12,
"learning_rate": 3.8813507663950404e-05,
"loss": 2.6497,
"step": 44
},
{
"epoch": 0.12,
"learning_rate": 3.8754072859555346e-05,
"loss": 2.5951,
"step": 45
},
{
"epoch": 0.13,
"learning_rate": 3.869323336874146e-05,
"loss": 2.6479,
"step": 46
},
{
"epoch": 0.13,
"learning_rate": 3.8630993748407274e-05,
"loss": 2.6018,
"step": 47
},
{
"epoch": 0.13,
"learning_rate": 3.856735866032145e-05,
"loss": 2.5817,
"step": 48
},
{
"epoch": 0.13,
"learning_rate": 3.8502332870773675e-05,
"loss": 2.5903,
"step": 49
},
{
"epoch": 0.14,
"learning_rate": 3.843592125021764e-05,
"loss": 2.5757,
"step": 50
},
{
"epoch": 0.14,
"learning_rate": 3.8368128772906254e-05,
"loss": 2.5705,
"step": 51
},
{
"epoch": 0.14,
"learning_rate": 3.829896051651907e-05,
"loss": 2.5914,
"step": 52
},
{
"epoch": 0.14,
"learning_rate": 3.822842166178194e-05,
"loss": 2.5694,
"step": 53
},
{
"epoch": 0.15,
"learning_rate": 3.815651749207902e-05,
"loss": 2.5826,
"step": 54
},
{
"epoch": 0.15,
"learning_rate": 3.8083253393057006e-05,
"loss": 2.5435,
"step": 55
},
{
"epoch": 0.15,
"learning_rate": 3.8008634852221777e-05,
"loss": 2.5286,
"step": 56
},
{
"epoch": 0.16,
"learning_rate": 3.793266745852735e-05,
"loss": 2.5152,
"step": 57
},
{
"epoch": 0.16,
"learning_rate": 3.785535690195728e-05,
"loss": 2.4879,
"step": 58
},
{
"epoch": 0.16,
"learning_rate": 3.7776708973098476e-05,
"loss": 2.5058,
"step": 59
},
{
"epoch": 0.16,
"learning_rate": 3.769672956270749e-05,
"loss": 2.5437,
"step": 60
},
{
"epoch": 0.17,
"learning_rate": 3.761542466126929e-05,
"loss": 2.4668,
"step": 61
},
{
"epoch": 0.17,
"learning_rate": 3.753280035854857e-05,
"loss": 2.501,
"step": 62
},
{
"epoch": 0.17,
"learning_rate": 3.7448862843133644e-05,
"loss": 2.4697,
"step": 63
},
{
"epoch": 0.17,
"learning_rate": 3.736361840197288e-05,
"loss": 2.4651,
"step": 64
},
{
"epoch": 0.18,
"learning_rate": 3.727707341990383e-05,
"loss": 2.4394,
"step": 65
},
{
"epoch": 0.18,
"learning_rate": 3.718923437917503e-05,
"loss": 2.4239,
"step": 66
},
{
"epoch": 0.18,
"learning_rate": 3.7100107858960404e-05,
"loss": 2.4541,
"step": 67
},
{
"epoch": 0.19,
"learning_rate": 3.7009700534866557e-05,
"loss": 2.445,
"step": 68
},
{
"epoch": 0.19,
"learning_rate": 3.691801917843273e-05,
"loss": 2.422,
"step": 69
},
{
"epoch": 0.19,
"learning_rate": 3.6825070656623626e-05,
"loss": 2.4274,
"step": 70
},
{
"epoch": 0.19,
"learning_rate": 3.6730861931315054e-05,
"loss": 2.4062,
"step": 71
},
{
"epoch": 0.2,
"learning_rate": 3.663540005877249e-05,
"loss": 2.4,
"step": 72
},
{
"epoch": 0.2,
"learning_rate": 3.653869218912258e-05,
"loss": 2.4229,
"step": 73
},
{
"epoch": 0.2,
"learning_rate": 3.6440745565817556e-05,
"loss": 2.3909,
"step": 74
},
{
"epoch": 0.2,
"learning_rate": 3.6341567525092727e-05,
"loss": 2.3607,
"step": 75
},
{
"epoch": 0.21,
"learning_rate": 3.6241165495417006e-05,
"loss": 2.399,
"step": 76
},
{
"epoch": 0.21,
"learning_rate": 3.613954699693645e-05,
"loss": 2.3902,
"step": 77
},
{
"epoch": 0.21,
"learning_rate": 3.603671964091107e-05,
"loss": 2.3692,
"step": 78
},
{
"epoch": 0.22,
"learning_rate": 3.593269112914472e-05,
"loss": 2.3436,
"step": 79
},
{
"epoch": 0.22,
"learning_rate": 3.582746925340822e-05,
"loss": 2.3629,
"step": 80
},
{
"epoch": 0.22,
"learning_rate": 3.5721061894855756e-05,
"loss": 2.3287,
"step": 81
},
{
"epoch": 0.22,
"learning_rate": 3.561347702343456e-05,
"loss": 2.3376,
"step": 82
},
{
"epoch": 0.23,
"learning_rate": 3.5504722697288025e-05,
"loss": 2.3418,
"step": 83
},
{
"epoch": 0.23,
"learning_rate": 3.539480706215204e-05,
"loss": 2.3182,
"step": 84
},
{
"epoch": 0.23,
"learning_rate": 3.5283738350744986e-05,
"loss": 2.3287,
"step": 85
},
{
"epoch": 0.23,
"learning_rate": 3.517152488215101e-05,
"loss": 2.3222,
"step": 86
},
{
"epoch": 0.24,
"learning_rate": 3.505817506119698e-05,
"loss": 2.3393,
"step": 87
},
{
"epoch": 0.24,
"learning_rate": 3.494369737782293e-05,
"loss": 2.322,
"step": 88
},
{
"epoch": 0.24,
"learning_rate": 3.4828100406446184e-05,
"loss": 2.2907,
"step": 89
},
{
"epoch": 0.25,
"learning_rate": 3.47113928053191e-05,
"loss": 2.3132,
"step": 90
},
{
"epoch": 0.25,
"learning_rate": 3.45935833158806e-05,
"loss": 2.2972,
"step": 91
},
{
"epoch": 0.25,
"learning_rate": 3.44746807621014e-05,
"loss": 2.2643,
"step": 92
},
{
"epoch": 0.25,
"learning_rate": 3.4354694049823124e-05,
"loss": 2.2527,
"step": 93
},
{
"epoch": 0.26,
"learning_rate": 3.4233632166091205e-05,
"loss": 2.2746,
"step": 94
},
{
"epoch": 0.26,
"learning_rate": 3.4111504178481813e-05,
"loss": 2.2479,
"step": 95
},
{
"epoch": 0.26,
"learning_rate": 3.3988319234422636e-05,
"loss": 2.2954,
"step": 96
},
{
"epoch": 0.26,
"learning_rate": 3.3864086560507785e-05,
"loss": 2.2455,
"step": 97
},
{
"epoch": 0.27,
"learning_rate": 3.373881546180666e-05,
"loss": 2.2442,
"step": 98
},
{
"epoch": 0.27,
"learning_rate": 3.361251532116707e-05,
"loss": 2.2035,
"step": 99
},
{
"epoch": 0.27,
"learning_rate": 3.3485195598512365e-05,
"loss": 2.2234,
"step": 100
},
{
"epoch": 0.28,
"learning_rate": 3.3356865830132976e-05,
"loss": 2.2226,
"step": 101
},
{
"epoch": 0.28,
"learning_rate": 3.322753562797209e-05,
"loss": 2.2167,
"step": 102
},
{
"epoch": 0.28,
"learning_rate": 3.309721467890571e-05,
"loss": 2.2481,
"step": 103
},
{
"epoch": 0.28,
"learning_rate": 3.296591274401712e-05,
"loss": 2.1965,
"step": 104
},
{
"epoch": 0.29,
"learning_rate": 3.28336396578658e-05,
"loss": 2.1935,
"step": 105
},
{
"epoch": 0.29,
"learning_rate": 3.270040532775077e-05,
"loss": 2.196,
"step": 106
},
{
"epoch": 0.29,
"learning_rate": 3.256621973296854e-05,
"loss": 2.1733,
"step": 107
},
{
"epoch": 0.29,
"learning_rate": 3.243109292406568e-05,
"loss": 2.1735,
"step": 108
},
{
"epoch": 0.3,
"learning_rate": 3.229503502208602e-05,
"loss": 2.1915,
"step": 109
},
{
"epoch": 0.3,
"learning_rate": 3.215805621781256e-05,
"loss": 2.1795,
"step": 110
},
{
"epoch": 0.3,
"learning_rate": 3.202016677100422e-05,
"loss": 2.2021,
"step": 111
},
{
"epoch": 0.31,
"learning_rate": 3.188137700962733e-05,
"loss": 2.201,
"step": 112
},
{
"epoch": 0.31,
"learning_rate": 3.174169732908209e-05,
"loss": 2.1857,
"step": 113
},
{
"epoch": 0.31,
"learning_rate": 3.1601138191423966e-05,
"loss": 2.1818,
"step": 114
},
{
"epoch": 0.31,
"learning_rate": 3.145971012458005e-05,
"loss": 2.1438,
"step": 115
},
{
"epoch": 0.32,
"learning_rate": 3.13174237215605e-05,
"loss": 2.1359,
"step": 116
},
{
"epoch": 0.32,
"learning_rate": 3.11742896396652e-05,
"loss": 2.1668,
"step": 117
},
{
"epoch": 0.32,
"learning_rate": 3.103031859968542e-05,
"loss": 2.1769,
"step": 118
},
{
"epoch": 0.32,
"learning_rate": 3.0885521385100885e-05,
"loss": 2.1445,
"step": 119
},
{
"epoch": 0.33,
"learning_rate": 3.0739908841272095e-05,
"loss": 2.1193,
"step": 120
},
{
"epoch": 0.33,
"learning_rate": 3.059349187462798e-05,
"loss": 2.1285,
"step": 121
},
{
"epoch": 0.33,
"learning_rate": 3.044628145184899e-05,
"loss": 2.1411,
"step": 122
},
{
"epoch": 0.34,
"learning_rate": 3.0298288599045747e-05,
"loss": 2.1321,
"step": 123
},
{
"epoch": 0.34,
"learning_rate": 3.0149524400933114e-05,
"loss": 2.1008,
"step": 124
},
{
"epoch": 0.34,
"learning_rate": 3.0000000000000004e-05,
"loss": 2.1437,
"step": 125
},
{
"epoch": 0.34,
"learning_rate": 2.9849726595674756e-05,
"loss": 2.1224,
"step": 126
},
{
"epoch": 0.35,
"learning_rate": 2.9698715443486338e-05,
"loss": 2.083,
"step": 127
},
{
"epoch": 0.35,
"learning_rate": 2.9546977854221266e-05,
"loss": 2.1156,
"step": 128
},
{
"epoch": 0.35,
"learning_rate": 2.9394525193076454e-05,
"loss": 2.1127,
"step": 129
},
{
"epoch": 0.35,
"learning_rate": 2.9241368878807925e-05,
"loss": 2.0949,
"step": 130
},
{
"epoch": 0.36,
"learning_rate": 2.908752038287558e-05,
"loss": 2.0821,
"step": 131
},
{
"epoch": 0.36,
"learning_rate": 2.8932991228583954e-05,
"loss": 2.0735,
"step": 132
},
{
"epoch": 0.36,
"learning_rate": 2.877779299021912e-05,
"loss": 2.0996,
"step": 133
},
{
"epoch": 0.37,
"learning_rate": 2.8621937292181768e-05,
"loss": 2.0967,
"step": 134
},
{
"epoch": 0.37,
"learning_rate": 2.846543580811656e-05,
"loss": 2.0313,
"step": 135
},
{
"epoch": 0.37,
"learning_rate": 2.8308300260037734e-05,
"loss": 2.0891,
"step": 136
},
{
"epoch": 0.37,
"learning_rate": 2.8150542417451144e-05,
"loss": 2.0816,
"step": 137
},
{
"epoch": 0.38,
"learning_rate": 2.7992174096472714e-05,
"loss": 2.0765,
"step": 138
},
{
"epoch": 0.38,
"learning_rate": 2.783320715894341e-05,
"loss": 2.026,
"step": 139
},
{
"epoch": 0.38,
"learning_rate": 2.767365351154077e-05,
"loss": 2.0424,
"step": 140
},
{
"epoch": 0.38,
"learning_rate": 2.751352510488711e-05,
"loss": 2.074,
"step": 141
},
{
"epoch": 0.39,
"learning_rate": 2.7352833932654402e-05,
"loss": 2.0189,
"step": 142
},
{
"epoch": 0.39,
"learning_rate": 2.719159203066597e-05,
"loss": 2.0283,
"step": 143
},
{
"epoch": 0.39,
"learning_rate": 2.702981147599495e-05,
"loss": 2.0373,
"step": 144
},
{
"epoch": 0.4,
"learning_rate": 2.6867504386059776e-05,
"loss": 2.0141,
"step": 145
},
{
"epoch": 0.4,
"learning_rate": 2.6704682917716528e-05,
"loss": 2.0197,
"step": 146
},
{
"epoch": 0.4,
"learning_rate": 2.6541359266348437e-05,
"loss": 2.0168,
"step": 147
},
{
"epoch": 0.4,
"learning_rate": 2.637754566495238e-05,
"loss": 2.0032,
"step": 148
},
{
"epoch": 0.41,
"learning_rate": 2.6213254383222665e-05,
"loss": 2.0038,
"step": 149
},
{
"epoch": 0.41,
"learning_rate": 2.6048497726632023e-05,
"loss": 1.9901,
"step": 150
},
{
"epoch": 0.41,
"learning_rate": 2.588328803550993e-05,
"loss": 1.9917,
"step": 151
},
{
"epoch": 0.41,
"learning_rate": 2.571763768411829e-05,
"loss": 1.9718,
"step": 152
},
{
"epoch": 0.42,
"learning_rate": 2.555155907972461e-05,
"loss": 2.0155,
"step": 153
},
{
"epoch": 0.42,
"learning_rate": 2.5385064661672692e-05,
"loss": 2.0072,
"step": 154
},
{
"epoch": 0.42,
"learning_rate": 2.5218166900450937e-05,
"loss": 1.9935,
"step": 155
},
{
"epoch": 0.43,
"learning_rate": 2.5050878296758255e-05,
"loss": 1.9879,
"step": 156
},
{
"epoch": 0.43,
"learning_rate": 2.488321138056783e-05,
"loss": 2.0028,
"step": 157
},
{
"epoch": 0.43,
"learning_rate": 2.471517871018855e-05,
"loss": 1.9669,
"step": 158
},
{
"epoch": 0.43,
"learning_rate": 2.4546792871324424e-05,
"loss": 1.9854,
"step": 159
},
{
"epoch": 0.44,
"learning_rate": 2.43780664761319e-05,
"loss": 1.9613,
"step": 160
},
{
"epoch": 0.44,
"learning_rate": 2.4209012162275217e-05,
"loss": 1.9474,
"step": 161
},
{
"epoch": 0.44,
"learning_rate": 2.4039642591979825e-05,
"loss": 1.9424,
"step": 162
},
{
"epoch": 0.44,
"learning_rate": 2.3869970451083996e-05,
"loss": 1.9656,
"step": 163
},
{
"epoch": 0.45,
"learning_rate": 2.370000844808863e-05,
"loss": 1.9686,
"step": 164
},
{
"epoch": 0.45,
"learning_rate": 2.3529769313205423e-05,
"loss": 1.9655,
"step": 165
},
{
"epoch": 0.45,
"learning_rate": 2.3359265797403297e-05,
"loss": 1.97,
"step": 166
},
{
"epoch": 0.46,
"learning_rate": 2.318851067145345e-05,
"loss": 1.9435,
"step": 167
},
{
"epoch": 0.46,
"learning_rate": 2.3017516724972716e-05,
"loss": 1.972,
"step": 168
},
{
"epoch": 0.46,
"learning_rate": 2.2846296765465708e-05,
"loss": 1.9789,
"step": 169
},
{
"epoch": 0.46,
"learning_rate": 2.267486361736546e-05,
"loss": 1.9466,
"step": 170
},
{
"epoch": 0.47,
"learning_rate": 2.250323012107292e-05,
"loss": 1.9231,
"step": 171
},
{
"epoch": 0.47,
"learning_rate": 2.2331409131995186e-05,
"loss": 1.9672,
"step": 172
},
{
"epoch": 0.47,
"learning_rate": 2.2159413519582623e-05,
"loss": 1.9265,
"step": 173
},
{
"epoch": 0.47,
"learning_rate": 2.1987256166364937e-05,
"loss": 1.911,
"step": 174
},
{
"epoch": 0.48,
"learning_rate": 2.1814949966986288e-05,
"loss": 1.9236,
"step": 175
},
{
"epoch": 0.48,
"learning_rate": 2.1642507827239455e-05,
"loss": 1.9543,
"step": 176
},
{
"epoch": 0.48,
"learning_rate": 2.1469942663099208e-05,
"loss": 1.9296,
"step": 177
},
{
"epoch": 0.49,
"learning_rate": 2.129726739975486e-05,
"loss": 1.9292,
"step": 178
},
{
"epoch": 0.49,
"learning_rate": 2.112449497064223e-05,
"loss": 1.9132,
"step": 179
},
{
"epoch": 0.49,
"learning_rate": 2.095163831647485e-05,
"loss": 1.9087,
"step": 180
},
{
"epoch": 0.49,
"learning_rate": 2.0778710384274757e-05,
"loss": 1.9137,
"step": 181
},
{
"epoch": 0.5,
"learning_rate": 2.0605724126402734e-05,
"loss": 1.8797,
"step": 182
},
{
"epoch": 0.5,
"learning_rate": 2.0432692499588174e-05,
"loss": 1.8937,
"step": 183
},
{
"epoch": 0.5,
"learning_rate": 2.025962846395862e-05,
"loss": 1.9124,
"step": 184
},
{
"epoch": 0.5,
"learning_rate": 2.0086544982069046e-05,
"loss": 1.8763,
"step": 185
},
{
"epoch": 0.51,
"learning_rate": 1.9913455017930964e-05,
"loss": 1.8699,
"step": 186
},
{
"epoch": 0.51,
"learning_rate": 1.9740371536041388e-05,
"loss": 1.8841,
"step": 187
},
{
"epoch": 0.51,
"learning_rate": 1.9567307500411833e-05,
"loss": 1.9116,
"step": 188
},
{
"epoch": 0.51,
"learning_rate": 1.939427587359727e-05,
"loss": 1.8997,
"step": 189
},
{
"epoch": 0.52,
"learning_rate": 1.922128961572525e-05,
"loss": 1.8716,
"step": 190
},
{
"epoch": 0.52,
"learning_rate": 1.9048361683525155e-05,
"loss": 1.8651,
"step": 191
},
{
"epoch": 0.52,
"learning_rate": 1.8875505029357775e-05,
"loss": 1.8827,
"step": 192
},
{
"epoch": 0.53,
"learning_rate": 1.8702732600245138e-05,
"loss": 1.871,
"step": 193
},
{
"epoch": 0.53,
"learning_rate": 1.8530057336900805e-05,
"loss": 1.8788,
"step": 194
},
{
"epoch": 0.53,
"learning_rate": 1.835749217276055e-05,
"loss": 1.8641,
"step": 195
},
{
"epoch": 0.53,
"learning_rate": 1.8185050033013715e-05,
"loss": 1.8856,
"step": 196
},
{
"epoch": 0.54,
"learning_rate": 1.8012743833635067e-05,
"loss": 1.8524,
"step": 197
},
{
"epoch": 0.54,
"learning_rate": 1.7840586480417387e-05,
"loss": 1.8142,
"step": 198
},
{
"epoch": 0.54,
"learning_rate": 1.766859086800482e-05,
"loss": 1.8361,
"step": 199
},
{
"epoch": 0.54,
"learning_rate": 1.7496769878927085e-05,
"loss": 1.8197,
"step": 200
},
{
"epoch": 0.55,
"learning_rate": 1.7325136382634547e-05,
"loss": 1.8723,
"step": 201
},
{
"epoch": 0.55,
"learning_rate": 1.7153703234534302e-05,
"loss": 1.8088,
"step": 202
},
{
"epoch": 0.55,
"learning_rate": 1.6982483275027287e-05,
"loss": 1.8264,
"step": 203
},
{
"epoch": 0.56,
"learning_rate": 1.6811489328546557e-05,
"loss": 1.7955,
"step": 204
},
{
"epoch": 0.56,
"learning_rate": 1.6640734202596702e-05,
"loss": 1.8337,
"step": 205
},
{
"epoch": 0.56,
"learning_rate": 1.647023068679459e-05,
"loss": 1.8571,
"step": 206
},
{
"epoch": 0.56,
"learning_rate": 1.6299991551911373e-05,
"loss": 1.8662,
"step": 207
},
{
"epoch": 0.57,
"learning_rate": 1.6130029548916007e-05,
"loss": 1.8593,
"step": 208
},
{
"epoch": 0.57,
"learning_rate": 1.5960357408020178e-05,
"loss": 1.8387,
"step": 209
},
{
"epoch": 0.57,
"learning_rate": 1.579098783772479e-05,
"loss": 1.8395,
"step": 210
},
{
"epoch": 0.57,
"learning_rate": 1.5621933523868106e-05,
"loss": 1.8271,
"step": 211
},
{
"epoch": 0.58,
"learning_rate": 1.5453207128675583e-05,
"loss": 1.8738,
"step": 212
},
{
"epoch": 0.58,
"learning_rate": 1.5284821289811453e-05,
"loss": 1.8598,
"step": 213
},
{
"epoch": 0.58,
"learning_rate": 1.5116788619432177e-05,
"loss": 1.832,
"step": 214
},
{
"epoch": 0.59,
"learning_rate": 1.4949121703241747e-05,
"loss": 1.8176,
"step": 215
},
{
"epoch": 0.59,
"learning_rate": 1.4781833099549072e-05,
"loss": 1.8302,
"step": 216
},
{
"epoch": 0.59,
"learning_rate": 1.461493533832731e-05,
"loss": 1.848,
"step": 217
},
{
"epoch": 0.59,
"learning_rate": 1.4448440920275402e-05,
"loss": 1.7919,
"step": 218
},
{
"epoch": 0.6,
"learning_rate": 1.4282362315881719e-05,
"loss": 1.8084,
"step": 219
},
{
"epoch": 0.6,
"learning_rate": 1.4116711964490076e-05,
"loss": 1.7979,
"step": 220
},
{
"epoch": 0.6,
"learning_rate": 1.395150227336798e-05,
"loss": 1.8167,
"step": 221
},
{
"epoch": 0.6,
"learning_rate": 1.3786745616777348e-05,
"loss": 1.8015,
"step": 222
},
{
"epoch": 0.61,
"learning_rate": 1.3622454335047631e-05,
"loss": 1.8359,
"step": 223
},
{
"epoch": 0.61,
"learning_rate": 1.345864073365157e-05,
"loss": 1.8111,
"step": 224
},
{
"epoch": 0.61,
"learning_rate": 1.329531708228347e-05,
"loss": 1.814,
"step": 225
},
{
"epoch": 0.62,
"learning_rate": 1.3132495613940237e-05,
"loss": 1.7876,
"step": 226
},
{
"epoch": 0.62,
"learning_rate": 1.2970188524005058e-05,
"loss": 1.7877,
"step": 227
},
{
"epoch": 0.62,
"learning_rate": 1.2808407969334037e-05,
"loss": 1.7683,
"step": 228
},
{
"epoch": 0.62,
"learning_rate": 1.2647166067345598e-05,
"loss": 1.8017,
"step": 229
},
{
"epoch": 0.63,
"learning_rate": 1.24864748951129e-05,
"loss": 1.7989,
"step": 230
},
{
"epoch": 0.63,
"learning_rate": 1.2326346488459237e-05,
"loss": 1.752,
"step": 231
},
{
"epoch": 0.63,
"learning_rate": 1.2166792841056596e-05,
"loss": 1.7771,
"step": 232
},
{
"epoch": 0.63,
"learning_rate": 1.2007825903527287e-05,
"loss": 1.7739,
"step": 233
},
{
"epoch": 0.64,
"learning_rate": 1.1849457582548864e-05,
"loss": 1.7821,
"step": 234
},
{
"epoch": 0.64,
"learning_rate": 1.1691699739962275e-05,
"loss": 1.7745,
"step": 235
},
{
"epoch": 0.64,
"learning_rate": 1.153456419188345e-05,
"loss": 1.7759,
"step": 236
},
{
"epoch": 0.65,
"learning_rate": 1.137806270781824e-05,
"loss": 1.795,
"step": 237
},
{
"epoch": 0.65,
"learning_rate": 1.1222207009780888e-05,
"loss": 1.7348,
"step": 238
},
{
"epoch": 0.65,
"learning_rate": 1.1067008771416047e-05,
"loss": 1.7385,
"step": 239
},
{
"epoch": 0.65,
"learning_rate": 1.091247961712442e-05,
"loss": 1.798,
"step": 240
},
{
"epoch": 0.66,
"learning_rate": 1.0758631121192075e-05,
"loss": 1.7615,
"step": 241
},
{
"epoch": 0.66,
"learning_rate": 1.0605474806923556e-05,
"loss": 1.7768,
"step": 242
},
{
"epoch": 0.66,
"learning_rate": 1.0453022145778742e-05,
"loss": 1.735,
"step": 243
},
{
"epoch": 0.66,
"learning_rate": 1.0301284556513669e-05,
"loss": 1.7635,
"step": 244
},
{
"epoch": 0.67,
"learning_rate": 1.0150273404325244e-05,
"loss": 1.7395,
"step": 245
},
{
"epoch": 0.67,
"learning_rate": 1.0000000000000006e-05,
"loss": 1.7238,
"step": 246
},
{
"epoch": 0.67,
"learning_rate": 9.85047559906689e-06,
"loss": 1.7633,
"step": 247
},
{
"epoch": 0.68,
"learning_rate": 9.70171140095426e-06,
"loss": 1.7585,
"step": 248
},
{
"epoch": 0.68,
"learning_rate": 9.553718548151011e-06,
"loss": 1.7379,
"step": 249
},
{
"epoch": 0.68,
"learning_rate": 9.406508125372034e-06,
"loss": 1.7427,
"step": 250
},
{
"epoch": 0.68,
"learning_rate": 9.260091158727913e-06,
"loss": 1.7205,
"step": 251
},
{
"epoch": 0.69,
"learning_rate": 9.114478614899123e-06,
"loss": 1.7576,
"step": 252
},
{
"epoch": 0.69,
"learning_rate": 8.969681400314589e-06,
"loss": 1.7559,
"step": 253
},
{
"epoch": 0.69,
"learning_rate": 8.825710360334812e-06,
"loss": 1.7465,
"step": 254
},
{
"epoch": 0.69,
"learning_rate": 8.682576278439504e-06,
"loss": 1.7571,
"step": 255
},
{
"epoch": 0.7,
"learning_rate": 8.540289875419962e-06,
"loss": 1.7239,
"step": 256
},
{
"epoch": 0.7,
"learning_rate": 8.39886180857604e-06,
"loss": 1.737,
"step": 257
},
{
"epoch": 0.7,
"learning_rate": 8.258302670917915e-06,
"loss": 1.7449,
"step": 258
},
{
"epoch": 0.71,
"learning_rate": 8.118622990372676e-06,
"loss": 1.7399,
"step": 259
},
{
"epoch": 0.71,
"learning_rate": 7.979833228995782e-06,
"loss": 1.7411,
"step": 260
},
{
"epoch": 0.71,
"learning_rate": 7.841943782187435e-06,
"loss": 1.7333,
"step": 261
},
{
"epoch": 0.71,
"learning_rate": 7.704964977913984e-06,
"loss": 1.7641,
"step": 262
},
{
"epoch": 0.72,
"learning_rate": 7.568907075934322e-06,
"loss": 1.7311,
"step": 263
},
{
"epoch": 0.72,
"learning_rate": 7.433780267031463e-06,
"loss": 1.7022,
"step": 264
},
{
"epoch": 0.72,
"learning_rate": 7.299594672249231e-06,
"loss": 1.7178,
"step": 265
},
{
"epoch": 0.72,
"learning_rate": 7.166360342134202e-06,
"loss": 1.7497,
"step": 266
},
{
"epoch": 0.73,
"learning_rate": 7.034087255982882e-06,
"loss": 1.7196,
"step": 267
},
{
"epoch": 0.73,
"learning_rate": 6.902785321094301e-06,
"loss": 1.707,
"step": 268
},
{
"epoch": 0.73,
"learning_rate": 6.7724643720279156e-06,
"loss": 1.7525,
"step": 269
},
{
"epoch": 0.74,
"learning_rate": 6.643134169867031e-06,
"loss": 1.6995,
"step": 270
},
{
"epoch": 0.74,
"learning_rate": 6.514804401487642e-06,
"loss": 1.7229,
"step": 271
},
{
"epoch": 0.74,
"learning_rate": 6.38748467883294e-06,
"loss": 1.7021,
"step": 272
},
{
"epoch": 0.74,
"learning_rate": 6.261184538193341e-06,
"loss": 1.6753,
"step": 273
},
{
"epoch": 0.75,
"learning_rate": 6.135913439492227e-06,
"loss": 1.7111,
"step": 274
},
{
"epoch": 0.75,
"learning_rate": 6.01168076557737e-06,
"loss": 1.7046,
"step": 275
},
{
"epoch": 0.75,
"learning_rate": 5.888495821518194e-06,
"loss": 1.7362,
"step": 276
},
{
"epoch": 0.75,
"learning_rate": 5.7663678339087995e-06,
"loss": 1.7811,
"step": 277
},
{
"epoch": 0.76,
"learning_rate": 5.6453059501768806e-06,
"loss": 1.6819,
"step": 278
},
{
"epoch": 0.76,
"learning_rate": 5.5253192378985966e-06,
"loss": 1.7154,
"step": 279
},
{
"epoch": 0.76,
"learning_rate": 5.4064166841194e-06,
"loss": 1.7069,
"step": 280
},
{
"epoch": 0.77,
"learning_rate": 5.288607194680899e-06,
"loss": 1.715,
"step": 281
},
{
"epoch": 0.77,
"learning_rate": 5.171899593553824e-06,
"loss": 1.7173,
"step": 282
},
{
"epoch": 0.77,
"learning_rate": 5.056302622177074e-06,
"loss": 1.6873,
"step": 283
},
{
"epoch": 0.77,
"learning_rate": 4.941824938803024e-06,
"loss": 1.7291,
"step": 284
},
{
"epoch": 0.78,
"learning_rate": 4.828475117848992e-06,
"loss": 1.6928,
"step": 285
},
{
"epoch": 0.78,
"learning_rate": 4.716261649255021e-06,
"loss": 1.6815,
"step": 286
},
{
"epoch": 0.78,
"learning_rate": 4.605192937847962e-06,
"loss": 1.6701,
"step": 287
},
{
"epoch": 0.78,
"learning_rate": 4.495277302711982e-06,
"loss": 1.7119,
"step": 288
},
{
"epoch": 0.79,
"learning_rate": 4.386522976565439e-06,
"loss": 1.6813,
"step": 289
},
{
"epoch": 0.79,
"learning_rate": 4.278938105144255e-06,
"loss": 1.6945,
"step": 290
},
{
"epoch": 0.79,
"learning_rate": 4.172530746591783e-06,
"loss": 1.7221,
"step": 291
},
{
"epoch": 0.8,
"learning_rate": 4.06730887085528e-06,
"loss": 1.7254,
"step": 292
},
{
"epoch": 0.8,
"learning_rate": 3.963280359088933e-06,
"loss": 1.6873,
"step": 293
},
{
"epoch": 0.8,
"learning_rate": 3.86045300306356e-06,
"loss": 1.6773,
"step": 294
},
{
"epoch": 0.8,
"learning_rate": 3.7588345045830044e-06,
"loss": 1.7121,
"step": 295
},
{
"epoch": 0.81,
"learning_rate": 3.658432474907274e-06,
"loss": 1.6798,
"step": 296
},
{
"epoch": 0.81,
"learning_rate": 3.559254434182451e-06,
"loss": 1.6762,
"step": 297
},
{
"epoch": 0.81,
"learning_rate": 3.461307810877428e-06,
"loss": 1.6934,
"step": 298
},
{
"epoch": 0.81,
"learning_rate": 3.364599941227513e-06,
"loss": 1.6862,
"step": 299
},
{
"epoch": 0.82,
"learning_rate": 3.2691380686849517e-06,
"loss": 1.6708,
"step": 300
},
{
"epoch": 0.82,
"learning_rate": 3.174929343376374e-06,
"loss": 1.6764,
"step": 301
},
{
"epoch": 0.82,
"learning_rate": 3.081980821567272e-06,
"loss": 1.6886,
"step": 302
},
{
"epoch": 0.83,
"learning_rate": 2.990299465133446e-06,
"loss": 1.6679,
"step": 303
},
{
"epoch": 0.83,
"learning_rate": 2.8998921410396e-06,
"loss": 1.6854,
"step": 304
},
{
"epoch": 0.83,
"learning_rate": 2.8107656208249733e-06,
"loss": 1.681,
"step": 305
},
{
"epoch": 0.83,
"learning_rate": 2.72292658009617e-06,
"loss": 1.6987,
"step": 306
},
{
"epoch": 0.84,
"learning_rate": 2.6363815980271248e-06,
"loss": 1.6574,
"step": 307
},
{
"epoch": 0.84,
"learning_rate": 2.551137156866357e-06,
"loss": 1.7037,
"step": 308
},
{
"epoch": 0.84,
"learning_rate": 2.4671996414514276e-06,
"loss": 1.702,
"step": 309
},
{
"epoch": 0.84,
"learning_rate": 2.384575338730717e-06,
"loss": 1.6788,
"step": 310
},
{
"epoch": 0.85,
"learning_rate": 2.3032704372925176e-06,
"loss": 1.6624,
"step": 311
},
{
"epoch": 0.85,
"learning_rate": 2.223291026901533e-06,
"loss": 1.693,
"step": 312
},
{
"epoch": 0.85,
"learning_rate": 2.144643098042727e-06,
"loss": 1.6687,
"step": 313
},
{
"epoch": 0.86,
"learning_rate": 2.0673325414726574e-06,
"loss": 1.6914,
"step": 314
},
{
"epoch": 0.86,
"learning_rate": 1.991365147778228e-06,
"loss": 1.6708,
"step": 315
},
{
"epoch": 0.86,
"learning_rate": 1.9167466069429964e-06,
"loss": 1.6752,
"step": 316
},
{
"epoch": 0.86,
"learning_rate": 1.8434825079209884e-06,
"loss": 1.6893,
"step": 317
},
{
"epoch": 0.87,
"learning_rate": 1.7715783382180672e-06,
"loss": 1.6681,
"step": 318
},
{
"epoch": 0.87,
"learning_rate": 1.7010394834809373e-06,
"loss": 1.6982,
"step": 319
},
{
"epoch": 0.87,
"learning_rate": 1.6318712270937442e-06,
"loss": 1.6421,
"step": 320
},
{
"epoch": 0.87,
"learning_rate": 1.5640787497823585e-06,
"loss": 1.7007,
"step": 321
},
{
"epoch": 0.88,
"learning_rate": 1.4976671292263257e-06,
"loss": 1.6832,
"step": 322
},
{
"epoch": 0.88,
"learning_rate": 1.4326413396785488e-06,
"loss": 1.6584,
"step": 323
},
{
"epoch": 0.88,
"learning_rate": 1.3690062515927239e-06,
"loss": 1.7009,
"step": 324
},
{
"epoch": 0.89,
"learning_rate": 1.306766631258536e-06,
"loss": 1.6755,
"step": 325
},
{
"epoch": 0.89,
"learning_rate": 1.245927140444665e-06,
"loss": 1.686,
"step": 326
},
{
"epoch": 0.89,
"learning_rate": 1.1864923360496028e-06,
"loss": 1.6916,
"step": 327
},
{
"epoch": 0.89,
"learning_rate": 1.128466669760362e-06,
"loss": 1.6923,
"step": 328
},
{
"epoch": 0.9,
"learning_rate": 1.0718544877190306e-06,
"loss": 1.7079,
"step": 329
},
{
"epoch": 0.9,
"learning_rate": 1.0166600301972517e-06,
"loss": 1.6865,
"step": 330
},
{
"epoch": 0.9,
"learning_rate": 9.628874312786096e-07,
"loss": 1.6624,
"step": 331
},
{
"epoch": 0.9,
"learning_rate": 9.105407185490067e-07,
"loss": 1.6489,
"step": 332
},
{
"epoch": 0.91,
"learning_rate": 8.59623812794983e-07,
"loss": 1.6923,
"step": 333
},
{
"epoch": 0.91,
"learning_rate": 8.101405277100549e-07,
"loss": 1.678,
"step": 334
},
{
"epoch": 0.91,
"learning_rate": 7.620945696090532e-07,
"loss": 1.6302,
"step": 335
},
{
"epoch": 0.92,
"learning_rate": 7.154895371505421e-07,
"loss": 1.6555,
"step": 336
},
{
"epoch": 0.92,
"learning_rate": 6.703289210672603e-07,
"loss": 1.6805,
"step": 337
},
{
"epoch": 0.92,
"learning_rate": 6.266161039046737e-07,
"loss": 1.6744,
"step": 338
},
{
"epoch": 0.92,
"learning_rate": 5.843543597676138e-07,
"loss": 1.6585,
"step": 339
},
{
"epoch": 0.93,
"learning_rate": 5.435468540750544e-07,
"loss": 1.6951,
"step": 340
},
{
"epoch": 0.93,
"learning_rate": 5.041966433230094e-07,
"loss": 1.6666,
"step": 341
},
{
"epoch": 0.93,
"learning_rate": 4.6630667485561885e-07,
"loss": 1.6757,
"step": 342
},
{
"epoch": 0.93,
"learning_rate": 4.2987978664436936e-07,
"loss": 1.6719,
"step": 343
},
{
"epoch": 0.94,
"learning_rate": 3.9491870707554445e-07,
"loss": 1.6575,
"step": 344
},
{
"epoch": 0.94,
"learning_rate": 3.614260547458659e-07,
"loss": 1.6398,
"step": 345
},
{
"epoch": 0.94,
"learning_rate": 3.2940433826635257e-07,
"loss": 1.6523,
"step": 346
},
{
"epoch": 0.95,
"learning_rate": 2.9885595607443086e-07,
"loss": 1.6521,
"step": 347
},
{
"epoch": 0.95,
"learning_rate": 2.697831962542874e-07,
"loss": 1.6598,
"step": 348
},
{
"epoch": 0.95,
"learning_rate": 2.4218823636549703e-07,
"loss": 1.6721,
"step": 349
},
{
"epoch": 0.95,
"learning_rate": 2.1607314327991791e-07,
"loss": 1.6975,
"step": 350
},
{
"epoch": 0.96,
"learning_rate": 1.9143987302687738e-07,
"loss": 1.6652,
"step": 351
},
{
"epoch": 0.96,
"learning_rate": 1.682902706466738e-07,
"loss": 1.6554,
"step": 352
},
{
"epoch": 0.96,
"learning_rate": 1.4662607005237805e-07,
"loss": 1.6603,
"step": 353
},
{
"epoch": 0.96,
"learning_rate": 1.264488938999664e-07,
"loss": 1.6843,
"step": 354
},
{
"epoch": 0.97,
"learning_rate": 1.0776025346677321e-07,
"loss": 1.69,
"step": 355
},
{
"epoch": 0.97,
"learning_rate": 9.056154853830823e-08,
"loss": 1.6755,
"step": 356
},
{
"epoch": 0.97,
"learning_rate": 7.485406730340483e-08,
"loss": 1.6577,
"step": 357
},
{
"epoch": 0.98,
"learning_rate": 6.06389862577328e-08,
"loss": 1.7059,
"step": 358
},
{
"epoch": 0.98,
"learning_rate": 4.7917370115688756e-08,
"loss": 1.7009,
"step": 359
},
{
"epoch": 0.98,
"learning_rate": 3.66901717306356e-08,
"loss": 1.6863,
"step": 360
},
{
"epoch": 0.98,
"learning_rate": 2.6958232023539532e-08,
"loss": 1.6557,
"step": 361
},
{
"epoch": 0.99,
"learning_rate": 1.8722279919987098e-08,
"loss": 1.6717,
"step": 362
},
{
"epoch": 0.99,
"learning_rate": 1.1982932295582227e-08,
"loss": 1.6401,
"step": 363
},
{
"epoch": 0.99,
"learning_rate": 6.7406939297520734e-09,
"loss": 1.6846,
"step": 364
},
{
"epoch": 0.99,
"learning_rate": 2.995957467923916e-09,
"loss": 1.6775,
"step": 365
},
{
"epoch": 1.0,
"learning_rate": 7.490033921331296e-10,
"loss": 1.675,
"step": 366
},
{
"epoch": 1.0,
"learning_rate": 0.0,
"loss": 1.6419,
"step": 367
},
{
"epoch": 1.0,
"step": 367,
"total_flos": 3.839475069608788e+18,
"train_loss": 2.069092570598509,
"train_runtime": 4018.2223,
"train_samples_per_second": 174.946,
"train_steps_per_second": 0.091
}
],
"max_steps": 367,
"num_train_epochs": 1,
"total_flos": 3.839475069608788e+18,
"trial_name": null,
"trial_params": null
}