llava-v1.6-vicuna-13b-AVS_train / trainer_state.json
SW-Yoon's picture
Upload model checkpoint
1ff35d4
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 790,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 8.333333333333333e-07,
"loss": 11.1556,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 1.6666666666666667e-06,
"loss": 11.1531,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 2.5e-06,
"loss": 11.1614,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 3.3333333333333333e-06,
"loss": 11.1446,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 4.166666666666667e-06,
"loss": 11.0054,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 5e-06,
"loss": 10.6955,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 5.833333333333334e-06,
"loss": 10.4968,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 6.666666666666667e-06,
"loss": 9.706,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 7.500000000000001e-06,
"loss": 9.5405,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 8.333333333333334e-06,
"loss": 9.2296,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 9.166666666666666e-06,
"loss": 9.7789,
"step": 11
},
{
"epoch": 0.02,
"learning_rate": 1e-05,
"loss": 8.9721,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 1.0833333333333334e-05,
"loss": 8.9369,
"step": 13
},
{
"epoch": 0.02,
"learning_rate": 1.1666666666666668e-05,
"loss": 9.1961,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 1.25e-05,
"loss": 9.0081,
"step": 15
},
{
"epoch": 0.02,
"learning_rate": 1.3333333333333333e-05,
"loss": 9.1296,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 1.416666666666667e-05,
"loss": 8.9271,
"step": 17
},
{
"epoch": 0.02,
"learning_rate": 1.5000000000000002e-05,
"loss": 8.6372,
"step": 18
},
{
"epoch": 0.02,
"learning_rate": 1.5833333333333333e-05,
"loss": 8.2846,
"step": 19
},
{
"epoch": 0.03,
"learning_rate": 1.6666666666666667e-05,
"loss": 8.1478,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 1.7500000000000002e-05,
"loss": 8.2667,
"step": 21
},
{
"epoch": 0.03,
"learning_rate": 1.8333333333333333e-05,
"loss": 8.1677,
"step": 22
},
{
"epoch": 0.03,
"learning_rate": 1.916666666666667e-05,
"loss": 8.1679,
"step": 23
},
{
"epoch": 0.03,
"learning_rate": 2e-05,
"loss": 8.2426,
"step": 24
},
{
"epoch": 0.03,
"learning_rate": 1.9999915896977905e-05,
"loss": 8.207,
"step": 25
},
{
"epoch": 0.03,
"learning_rate": 1.999966358932628e-05,
"loss": 8.1476,
"step": 26
},
{
"epoch": 0.03,
"learning_rate": 1.999924308128909e-05,
"loss": 7.929,
"step": 27
},
{
"epoch": 0.04,
"learning_rate": 1.9998654379939535e-05,
"loss": 7.7032,
"step": 28
},
{
"epoch": 0.04,
"learning_rate": 1.9997897495179932e-05,
"loss": 7.4376,
"step": 29
},
{
"epoch": 0.04,
"learning_rate": 1.9996972439741537e-05,
"loss": 7.3027,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 1.9995879229184343e-05,
"loss": 7.2694,
"step": 31
},
{
"epoch": 0.04,
"learning_rate": 1.999461788189681e-05,
"loss": 7.2901,
"step": 32
},
{
"epoch": 0.04,
"learning_rate": 1.9993188419095562e-05,
"loss": 7.2255,
"step": 33
},
{
"epoch": 0.04,
"learning_rate": 1.9991590864825026e-05,
"loss": 7.0736,
"step": 34
},
{
"epoch": 0.04,
"learning_rate": 1.9989825245957038e-05,
"loss": 7.0355,
"step": 35
},
{
"epoch": 0.05,
"learning_rate": 1.9987891592190367e-05,
"loss": 6.9484,
"step": 36
},
{
"epoch": 0.05,
"learning_rate": 1.998578993605024e-05,
"loss": 7.0153,
"step": 37
},
{
"epoch": 0.05,
"learning_rate": 1.9983520312887785e-05,
"loss": 6.9302,
"step": 38
},
{
"epoch": 0.05,
"learning_rate": 1.9981082760879432e-05,
"loss": 6.9273,
"step": 39
},
{
"epoch": 0.05,
"learning_rate": 1.9978477321026282e-05,
"loss": 6.8868,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 1.997570403715341e-05,
"loss": 6.8568,
"step": 41
},
{
"epoch": 0.05,
"learning_rate": 1.997276295590912e-05,
"loss": 6.8307,
"step": 42
},
{
"epoch": 0.05,
"learning_rate": 1.9969654126764183e-05,
"loss": 6.7708,
"step": 43
},
{
"epoch": 0.06,
"learning_rate": 1.9966377602010984e-05,
"loss": 6.6811,
"step": 44
},
{
"epoch": 0.06,
"learning_rate": 1.9962933436762644e-05,
"loss": 6.6613,
"step": 45
},
{
"epoch": 0.06,
"learning_rate": 1.995932168895211e-05,
"loss": 6.6736,
"step": 46
},
{
"epoch": 0.06,
"learning_rate": 1.9955542419331162e-05,
"loss": 6.5919,
"step": 47
},
{
"epoch": 0.06,
"learning_rate": 1.9951595691469397e-05,
"loss": 6.5106,
"step": 48
},
{
"epoch": 0.06,
"learning_rate": 1.9947481571753165e-05,
"loss": 6.4874,
"step": 49
},
{
"epoch": 0.06,
"learning_rate": 1.9943200129384444e-05,
"loss": 6.4935,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 1.9938751436379684e-05,
"loss": 6.4783,
"step": 51
},
{
"epoch": 0.07,
"learning_rate": 1.9934135567568594e-05,
"loss": 6.5052,
"step": 52
},
{
"epoch": 0.07,
"learning_rate": 1.992935260059287e-05,
"loss": 6.5075,
"step": 53
},
{
"epoch": 0.07,
"learning_rate": 1.992440261590491e-05,
"loss": 6.4666,
"step": 54
},
{
"epoch": 0.07,
"learning_rate": 1.9919285696766453e-05,
"loss": 6.406,
"step": 55
},
{
"epoch": 0.07,
"learning_rate": 1.991400192924717e-05,
"loss": 6.4111,
"step": 56
},
{
"epoch": 0.07,
"learning_rate": 1.9908551402223218e-05,
"loss": 6.4167,
"step": 57
},
{
"epoch": 0.07,
"learning_rate": 1.9902934207375758e-05,
"loss": 6.4207,
"step": 58
},
{
"epoch": 0.07,
"learning_rate": 1.989715043918941e-05,
"loss": 6.422,
"step": 59
},
{
"epoch": 0.08,
"learning_rate": 1.9891200194950644e-05,
"loss": 6.3422,
"step": 60
},
{
"epoch": 0.08,
"learning_rate": 1.9885083574746167e-05,
"loss": 6.2456,
"step": 61
},
{
"epoch": 0.08,
"learning_rate": 1.9878800681461222e-05,
"loss": 6.2807,
"step": 62
},
{
"epoch": 0.08,
"learning_rate": 1.9872351620777883e-05,
"loss": 6.3047,
"step": 63
},
{
"epoch": 0.08,
"learning_rate": 1.9865736501173237e-05,
"loss": 6.2419,
"step": 64
},
{
"epoch": 0.08,
"learning_rate": 1.9858955433917602e-05,
"loss": 6.2308,
"step": 65
},
{
"epoch": 0.08,
"learning_rate": 1.9852008533072627e-05,
"loss": 6.2322,
"step": 66
},
{
"epoch": 0.08,
"learning_rate": 1.9844895915489378e-05,
"loss": 6.2124,
"step": 67
},
{
"epoch": 0.09,
"learning_rate": 1.9837617700806385e-05,
"loss": 6.1977,
"step": 68
},
{
"epoch": 0.09,
"learning_rate": 1.9830174011447617e-05,
"loss": 6.1633,
"step": 69
},
{
"epoch": 0.09,
"learning_rate": 1.982256497262043e-05,
"loss": 6.1553,
"step": 70
},
{
"epoch": 0.09,
"learning_rate": 1.9814790712313456e-05,
"loss": 6.1803,
"step": 71
},
{
"epoch": 0.09,
"learning_rate": 1.980685136129445e-05,
"loss": 6.1184,
"step": 72
},
{
"epoch": 0.09,
"learning_rate": 1.9798747053108098e-05,
"loss": 6.155,
"step": 73
},
{
"epoch": 0.09,
"learning_rate": 1.9790477924073755e-05,
"loss": 6.101,
"step": 74
},
{
"epoch": 0.09,
"learning_rate": 1.978204411328318e-05,
"loss": 6.0193,
"step": 75
},
{
"epoch": 0.1,
"learning_rate": 1.977344576259816e-05,
"loss": 6.03,
"step": 76
},
{
"epoch": 0.1,
"learning_rate": 1.9764683016648156e-05,
"loss": 5.9886,
"step": 77
},
{
"epoch": 0.1,
"learning_rate": 1.9755756022827847e-05,
"loss": 6.0071,
"step": 78
},
{
"epoch": 0.1,
"learning_rate": 1.9746664931294667e-05,
"loss": 5.9421,
"step": 79
},
{
"epoch": 0.1,
"learning_rate": 1.9737409894966267e-05,
"loss": 5.9954,
"step": 80
},
{
"epoch": 0.1,
"learning_rate": 1.972799106951796e-05,
"loss": 5.9408,
"step": 81
},
{
"epoch": 0.1,
"learning_rate": 1.9718408613380077e-05,
"loss": 5.9167,
"step": 82
},
{
"epoch": 0.11,
"learning_rate": 1.9708662687735316e-05,
"loss": 5.9676,
"step": 83
},
{
"epoch": 0.11,
"learning_rate": 1.9698753456516047e-05,
"loss": 5.8603,
"step": 84
},
{
"epoch": 0.11,
"learning_rate": 1.9688681086401526e-05,
"loss": 5.827,
"step": 85
},
{
"epoch": 0.11,
"learning_rate": 1.9678445746815107e-05,
"loss": 5.8725,
"step": 86
},
{
"epoch": 0.11,
"learning_rate": 1.9668047609921385e-05,
"loss": 5.8081,
"step": 87
},
{
"epoch": 0.11,
"learning_rate": 1.9657486850623308e-05,
"loss": 5.728,
"step": 88
},
{
"epoch": 0.11,
"learning_rate": 1.9646763646559234e-05,
"loss": 5.8512,
"step": 89
},
{
"epoch": 0.11,
"learning_rate": 1.963587817809993e-05,
"loss": 5.7728,
"step": 90
},
{
"epoch": 0.12,
"learning_rate": 1.9624830628345562e-05,
"loss": 5.7605,
"step": 91
},
{
"epoch": 0.12,
"learning_rate": 1.961362118312259e-05,
"loss": 5.7939,
"step": 92
},
{
"epoch": 0.12,
"learning_rate": 1.9602250030980657e-05,
"loss": 5.7981,
"step": 93
},
{
"epoch": 0.12,
"learning_rate": 1.9590717363189424e-05,
"loss": 5.6852,
"step": 94
},
{
"epoch": 0.12,
"learning_rate": 1.957902337373532e-05,
"loss": 5.8011,
"step": 95
},
{
"epoch": 0.12,
"learning_rate": 1.9567168259318324e-05,
"loss": 5.649,
"step": 96
},
{
"epoch": 0.12,
"learning_rate": 1.955515221934863e-05,
"loss": 5.6449,
"step": 97
},
{
"epoch": 0.12,
"learning_rate": 1.9542975455943284e-05,
"loss": 5.5907,
"step": 98
},
{
"epoch": 0.13,
"learning_rate": 1.953063817392281e-05,
"loss": 5.6107,
"step": 99
},
{
"epoch": 0.13,
"learning_rate": 1.9518140580807746e-05,
"loss": 5.5694,
"step": 100
},
{
"epoch": 0.13,
"learning_rate": 1.9505482886815167e-05,
"loss": 5.6611,
"step": 101
},
{
"epoch": 0.13,
"learning_rate": 1.949266530485513e-05,
"loss": 5.5533,
"step": 102
},
{
"epoch": 0.13,
"learning_rate": 1.947968805052712e-05,
"loss": 5.4755,
"step": 103
},
{
"epoch": 0.13,
"learning_rate": 1.946655134211639e-05,
"loss": 5.6145,
"step": 104
},
{
"epoch": 0.13,
"learning_rate": 1.945325540059032e-05,
"loss": 5.6456,
"step": 105
},
{
"epoch": 0.13,
"learning_rate": 1.943980044959468e-05,
"loss": 5.4775,
"step": 106
},
{
"epoch": 0.14,
"learning_rate": 1.942618671544988e-05,
"loss": 5.7434,
"step": 107
},
{
"epoch": 0.14,
"learning_rate": 1.941241442714716e-05,
"loss": 5.6461,
"step": 108
},
{
"epoch": 0.14,
"learning_rate": 1.9398483816344728e-05,
"loss": 5.8886,
"step": 109
},
{
"epoch": 0.14,
"learning_rate": 1.938439511736388e-05,
"loss": 6.0387,
"step": 110
},
{
"epoch": 0.14,
"learning_rate": 1.9370148567185043e-05,
"loss": 5.8731,
"step": 111
},
{
"epoch": 0.14,
"learning_rate": 1.935574440544381e-05,
"loss": 5.7988,
"step": 112
},
{
"epoch": 0.14,
"learning_rate": 1.934118287442689e-05,
"loss": 5.5043,
"step": 113
},
{
"epoch": 0.14,
"learning_rate": 1.9326464219068023e-05,
"loss": 5.5951,
"step": 114
},
{
"epoch": 0.15,
"learning_rate": 1.9311588686943897e-05,
"loss": 5.7133,
"step": 115
},
{
"epoch": 0.15,
"learning_rate": 1.9296556528269954e-05,
"loss": 5.4865,
"step": 116
},
{
"epoch": 0.15,
"learning_rate": 1.9281367995896187e-05,
"loss": 5.5316,
"step": 117
},
{
"epoch": 0.15,
"learning_rate": 1.9266023345302886e-05,
"loss": 5.5545,
"step": 118
},
{
"epoch": 0.15,
"learning_rate": 1.9250522834596357e-05,
"loss": 5.5339,
"step": 119
},
{
"epoch": 0.15,
"learning_rate": 1.9234866724504554e-05,
"loss": 5.4314,
"step": 120
},
{
"epoch": 0.15,
"learning_rate": 1.9219055278372713e-05,
"loss": 5.4064,
"step": 121
},
{
"epoch": 0.15,
"learning_rate": 1.9203088762158914e-05,
"loss": 5.3135,
"step": 122
},
{
"epoch": 0.16,
"learning_rate": 1.9186967444429613e-05,
"loss": 5.3226,
"step": 123
},
{
"epoch": 0.16,
"learning_rate": 1.9170691596355113e-05,
"loss": 5.3193,
"step": 124
},
{
"epoch": 0.16,
"learning_rate": 1.915426149170502e-05,
"loss": 5.1942,
"step": 125
},
{
"epoch": 0.16,
"learning_rate": 1.913767740684362e-05,
"loss": 5.3009,
"step": 126
},
{
"epoch": 0.16,
"learning_rate": 1.912093962072525e-05,
"loss": 5.1603,
"step": 127
},
{
"epoch": 0.16,
"learning_rate": 1.910404841488959e-05,
"loss": 5.2039,
"step": 128
},
{
"epoch": 0.16,
"learning_rate": 1.9087004073456926e-05,
"loss": 5.1635,
"step": 129
},
{
"epoch": 0.16,
"learning_rate": 1.9069806883123386e-05,
"loss": 5.2517,
"step": 130
},
{
"epoch": 0.17,
"learning_rate": 1.9052457133156103e-05,
"loss": 5.2288,
"step": 131
},
{
"epoch": 0.17,
"learning_rate": 1.9034955115388364e-05,
"loss": 5.1153,
"step": 132
},
{
"epoch": 0.17,
"learning_rate": 1.901730112421468e-05,
"loss": 5.0183,
"step": 133
},
{
"epoch": 0.17,
"learning_rate": 1.8999495456585856e-05,
"loss": 5.1861,
"step": 134
},
{
"epoch": 0.17,
"learning_rate": 1.898153841200398e-05,
"loss": 5.0127,
"step": 135
},
{
"epoch": 0.17,
"learning_rate": 1.8963430292517398e-05,
"loss": 5.0361,
"step": 136
},
{
"epoch": 0.17,
"learning_rate": 1.8945171402715628e-05,
"loss": 4.976,
"step": 137
},
{
"epoch": 0.17,
"learning_rate": 1.892676204972423e-05,
"loss": 4.9992,
"step": 138
},
{
"epoch": 0.18,
"learning_rate": 1.8908202543199646e-05,
"loss": 4.9535,
"step": 139
},
{
"epoch": 0.18,
"learning_rate": 1.8889493195324e-05,
"loss": 5.0195,
"step": 140
},
{
"epoch": 0.18,
"learning_rate": 1.8870634320799822e-05,
"loss": 4.9628,
"step": 141
},
{
"epoch": 0.18,
"learning_rate": 1.8851626236844787e-05,
"loss": 4.9592,
"step": 142
},
{
"epoch": 0.18,
"learning_rate": 1.8832469263186352e-05,
"loss": 4.8603,
"step": 143
},
{
"epoch": 0.18,
"learning_rate": 1.8813163722056397e-05,
"loss": 4.9138,
"step": 144
},
{
"epoch": 0.18,
"learning_rate": 1.879370993818579e-05,
"loss": 5.0571,
"step": 145
},
{
"epoch": 0.18,
"learning_rate": 1.8774108238798932e-05,
"loss": 4.9104,
"step": 146
},
{
"epoch": 0.19,
"learning_rate": 1.875435895360826e-05,
"loss": 4.9345,
"step": 147
},
{
"epoch": 0.19,
"learning_rate": 1.873446241480868e-05,
"loss": 4.8746,
"step": 148
},
{
"epoch": 0.19,
"learning_rate": 1.8714418957072008e-05,
"loss": 4.8118,
"step": 149
},
{
"epoch": 0.19,
"learning_rate": 1.8694228917541313e-05,
"loss": 4.9833,
"step": 150
},
{
"epoch": 0.19,
"learning_rate": 1.8673892635825264e-05,
"loss": 4.777,
"step": 151
},
{
"epoch": 0.19,
"learning_rate": 1.8653410453992415e-05,
"loss": 4.8645,
"step": 152
},
{
"epoch": 0.19,
"learning_rate": 1.8632782716565438e-05,
"loss": 4.7069,
"step": 153
},
{
"epoch": 0.19,
"learning_rate": 1.861200977051535e-05,
"loss": 4.7141,
"step": 154
},
{
"epoch": 0.2,
"learning_rate": 1.8591091965255654e-05,
"loss": 4.7068,
"step": 155
},
{
"epoch": 0.2,
"learning_rate": 1.857002965263648e-05,
"loss": 4.8115,
"step": 156
},
{
"epoch": 0.2,
"learning_rate": 1.854882318693866e-05,
"loss": 4.7032,
"step": 157
},
{
"epoch": 0.2,
"learning_rate": 1.852747292486776e-05,
"loss": 4.7396,
"step": 158
},
{
"epoch": 0.2,
"learning_rate": 1.850597922554809e-05,
"loss": 4.577,
"step": 159
},
{
"epoch": 0.2,
"learning_rate": 1.8484342450516672e-05,
"loss": 4.6603,
"step": 160
},
{
"epoch": 0.2,
"learning_rate": 1.8462562963717134e-05,
"loss": 4.6679,
"step": 161
},
{
"epoch": 0.21,
"learning_rate": 1.844064113149361e-05,
"loss": 4.734,
"step": 162
},
{
"epoch": 0.21,
"learning_rate": 1.841857732258457e-05,
"loss": 4.6387,
"step": 163
},
{
"epoch": 0.21,
"learning_rate": 1.839637190811661e-05,
"loss": 4.7254,
"step": 164
},
{
"epoch": 0.21,
"learning_rate": 1.8374025261598224e-05,
"loss": 4.5621,
"step": 165
},
{
"epoch": 0.21,
"learning_rate": 1.8351537758913518e-05,
"loss": 4.5779,
"step": 166
},
{
"epoch": 0.21,
"learning_rate": 1.8328909778315876e-05,
"loss": 4.5663,
"step": 167
},
{
"epoch": 0.21,
"learning_rate": 1.8306141700421606e-05,
"loss": 4.444,
"step": 168
},
{
"epoch": 0.21,
"learning_rate": 1.8283233908203547e-05,
"loss": 4.4928,
"step": 169
},
{
"epoch": 0.22,
"learning_rate": 1.8260186786984603e-05,
"loss": 4.6391,
"step": 170
},
{
"epoch": 0.22,
"learning_rate": 1.8237000724431283e-05,
"loss": 4.5151,
"step": 171
},
{
"epoch": 0.22,
"learning_rate": 1.8213676110547177e-05,
"loss": 4.5845,
"step": 172
},
{
"epoch": 0.22,
"learning_rate": 1.8190213337666384e-05,
"loss": 4.6617,
"step": 173
},
{
"epoch": 0.22,
"learning_rate": 1.816661280044693e-05,
"loss": 4.4839,
"step": 174
},
{
"epoch": 0.22,
"learning_rate": 1.8142874895864113e-05,
"loss": 4.674,
"step": 175
},
{
"epoch": 0.22,
"learning_rate": 1.8119000023203838e-05,
"loss": 4.3447,
"step": 176
},
{
"epoch": 0.22,
"learning_rate": 1.809498858405589e-05,
"loss": 4.5803,
"step": 177
},
{
"epoch": 0.23,
"learning_rate": 1.807084098230719e-05,
"loss": 4.5345,
"step": 178
},
{
"epoch": 0.23,
"learning_rate": 1.8046557624134997e-05,
"loss": 4.4603,
"step": 179
},
{
"epoch": 0.23,
"learning_rate": 1.802213891800007e-05,
"loss": 4.4549,
"step": 180
},
{
"epoch": 0.23,
"learning_rate": 1.799758527463981e-05,
"loss": 4.5114,
"step": 181
},
{
"epoch": 0.23,
"learning_rate": 1.797289710706133e-05,
"loss": 4.3743,
"step": 182
},
{
"epoch": 0.23,
"learning_rate": 1.7948074830534535e-05,
"loss": 4.441,
"step": 183
},
{
"epoch": 0.23,
"learning_rate": 1.7923118862585123e-05,
"loss": 4.5071,
"step": 184
},
{
"epoch": 0.23,
"learning_rate": 1.7898029622987555e-05,
"loss": 4.3857,
"step": 185
},
{
"epoch": 0.24,
"learning_rate": 1.7872807533758007e-05,
"loss": 4.4135,
"step": 186
},
{
"epoch": 0.24,
"learning_rate": 1.7847453019147264e-05,
"loss": 4.4598,
"step": 187
},
{
"epoch": 0.24,
"learning_rate": 1.7821966505633587e-05,
"loss": 4.4086,
"step": 188
},
{
"epoch": 0.24,
"learning_rate": 1.7796348421915536e-05,
"loss": 4.3538,
"step": 189
},
{
"epoch": 0.24,
"learning_rate": 1.7770599198904762e-05,
"loss": 4.4076,
"step": 190
},
{
"epoch": 0.24,
"learning_rate": 1.774471926971877e-05,
"loss": 4.3045,
"step": 191
},
{
"epoch": 0.24,
"learning_rate": 1.7718709069673595e-05,
"loss": 4.5118,
"step": 192
},
{
"epoch": 0.24,
"learning_rate": 1.7692569036276533e-05,
"loss": 4.4237,
"step": 193
},
{
"epoch": 0.25,
"learning_rate": 1.7666299609218745e-05,
"loss": 4.3038,
"step": 194
},
{
"epoch": 0.25,
"learning_rate": 1.763990123036787e-05,
"loss": 4.3774,
"step": 195
},
{
"epoch": 0.25,
"learning_rate": 1.7613374343760595e-05,
"loss": 4.282,
"step": 196
},
{
"epoch": 0.25,
"learning_rate": 1.7586719395595185e-05,
"loss": 4.2489,
"step": 197
},
{
"epoch": 0.25,
"learning_rate": 1.7559936834223982e-05,
"loss": 4.2206,
"step": 198
},
{
"epoch": 0.25,
"learning_rate": 1.7533027110145857e-05,
"loss": 4.1831,
"step": 199
},
{
"epoch": 0.25,
"learning_rate": 1.7505990675998632e-05,
"loss": 4.4274,
"step": 200
},
{
"epoch": 0.25,
"learning_rate": 1.747882798655147e-05,
"loss": 4.2656,
"step": 201
},
{
"epoch": 0.26,
"learning_rate": 1.7451539498697225e-05,
"loss": 4.355,
"step": 202
},
{
"epoch": 0.26,
"learning_rate": 1.742412567144476e-05,
"loss": 4.3059,
"step": 203
},
{
"epoch": 0.26,
"learning_rate": 1.739658696591121e-05,
"loss": 4.2162,
"step": 204
},
{
"epoch": 0.26,
"learning_rate": 1.7368923845314262e-05,
"loss": 4.2575,
"step": 205
},
{
"epoch": 0.26,
"learning_rate": 1.7341136774964305e-05,
"loss": 4.1695,
"step": 206
},
{
"epoch": 0.26,
"learning_rate": 1.7313226222256675e-05,
"loss": 4.1604,
"step": 207
},
{
"epoch": 0.26,
"learning_rate": 1.728519265666373e-05,
"loss": 4.2444,
"step": 208
},
{
"epoch": 0.26,
"learning_rate": 1.7257036549726988e-05,
"loss": 4.1928,
"step": 209
},
{
"epoch": 0.27,
"learning_rate": 1.7228758375049186e-05,
"loss": 4.2985,
"step": 210
},
{
"epoch": 0.27,
"learning_rate": 1.7200358608286314e-05,
"loss": 4.3212,
"step": 211
},
{
"epoch": 0.27,
"learning_rate": 1.7171837727139613e-05,
"loss": 4.2987,
"step": 212
},
{
"epoch": 0.27,
"learning_rate": 1.714319621134755e-05,
"loss": 4.2621,
"step": 213
},
{
"epoch": 0.27,
"learning_rate": 1.711443454267772e-05,
"loss": 4.2537,
"step": 214
},
{
"epoch": 0.27,
"learning_rate": 1.708555320491878e-05,
"loss": 4.1606,
"step": 215
},
{
"epoch": 0.27,
"learning_rate": 1.705655268387229e-05,
"loss": 4.0981,
"step": 216
},
{
"epoch": 0.27,
"learning_rate": 1.702743346734454e-05,
"loss": 4.1428,
"step": 217
},
{
"epoch": 0.28,
"learning_rate": 1.6998196045138354e-05,
"loss": 4.0398,
"step": 218
},
{
"epoch": 0.28,
"learning_rate": 1.696884090904484e-05,
"loss": 4.1158,
"step": 219
},
{
"epoch": 0.28,
"learning_rate": 1.6939368552835137e-05,
"loss": 4.2338,
"step": 220
},
{
"epoch": 0.28,
"learning_rate": 1.6909779472252084e-05,
"loss": 4.2012,
"step": 221
},
{
"epoch": 0.28,
"learning_rate": 1.6880074165001906e-05,
"loss": 4.1079,
"step": 222
},
{
"epoch": 0.28,
"learning_rate": 1.685025313074582e-05,
"loss": 4.2466,
"step": 223
},
{
"epoch": 0.28,
"learning_rate": 1.682031687109165e-05,
"loss": 4.0848,
"step": 224
},
{
"epoch": 0.28,
"learning_rate": 1.679026588958538e-05,
"loss": 4.2162,
"step": 225
},
{
"epoch": 0.29,
"learning_rate": 1.6760100691702676e-05,
"loss": 4.1318,
"step": 226
},
{
"epoch": 0.29,
"learning_rate": 1.6729821784840398e-05,
"loss": 4.2007,
"step": 227
},
{
"epoch": 0.29,
"learning_rate": 1.669942967830807e-05,
"loss": 4.1688,
"step": 228
},
{
"epoch": 0.29,
"learning_rate": 1.6668924883319288e-05,
"loss": 4.1256,
"step": 229
},
{
"epoch": 0.29,
"learning_rate": 1.6638307912983135e-05,
"loss": 4.0463,
"step": 230
},
{
"epoch": 0.29,
"learning_rate": 1.6607579282295572e-05,
"loss": 4.1453,
"step": 231
},
{
"epoch": 0.29,
"learning_rate": 1.6576739508130725e-05,
"loss": 4.0776,
"step": 232
},
{
"epoch": 0.29,
"learning_rate": 1.6545789109232247e-05,
"loss": 4.2646,
"step": 233
},
{
"epoch": 0.3,
"learning_rate": 1.651472860620455e-05,
"loss": 4.0992,
"step": 234
},
{
"epoch": 0.3,
"learning_rate": 1.6483558521504068e-05,
"loss": 4.0812,
"step": 235
},
{
"epoch": 0.3,
"learning_rate": 1.6452279379430466e-05,
"loss": 4.0722,
"step": 236
},
{
"epoch": 0.3,
"learning_rate": 1.6420891706117818e-05,
"loss": 4.0237,
"step": 237
},
{
"epoch": 0.3,
"learning_rate": 1.638939602952576e-05,
"loss": 4.1031,
"step": 238
},
{
"epoch": 0.3,
"learning_rate": 1.6357792879430615e-05,
"loss": 4.0923,
"step": 239
},
{
"epoch": 0.3,
"learning_rate": 1.632608278741646e-05,
"loss": 4.0142,
"step": 240
},
{
"epoch": 0.31,
"learning_rate": 1.629426628686622e-05,
"loss": 4.0109,
"step": 241
},
{
"epoch": 0.31,
"learning_rate": 1.6262343912952656e-05,
"loss": 3.9415,
"step": 242
},
{
"epoch": 0.31,
"learning_rate": 1.6230316202629393e-05,
"loss": 4.0262,
"step": 243
},
{
"epoch": 0.31,
"learning_rate": 1.619818369462188e-05,
"loss": 3.951,
"step": 244
},
{
"epoch": 0.31,
"learning_rate": 1.6165946929418322e-05,
"loss": 4.0882,
"step": 245
},
{
"epoch": 0.31,
"learning_rate": 1.613360644926059e-05,
"loss": 4.054,
"step": 246
},
{
"epoch": 0.31,
"learning_rate": 1.610116279813511e-05,
"loss": 4.0486,
"step": 247
},
{
"epoch": 0.31,
"learning_rate": 1.6068616521763708e-05,
"loss": 4.0278,
"step": 248
},
{
"epoch": 0.32,
"learning_rate": 1.603596816759442e-05,
"loss": 4.0248,
"step": 249
},
{
"epoch": 0.32,
"learning_rate": 1.60032182847923e-05,
"loss": 4.0702,
"step": 250
},
{
"epoch": 0.32,
"learning_rate": 1.5970367424230162e-05,
"loss": 3.8974,
"step": 251
},
{
"epoch": 0.32,
"learning_rate": 1.5937416138479344e-05,
"loss": 4.0375,
"step": 252
},
{
"epoch": 0.32,
"learning_rate": 1.590436498180039e-05,
"loss": 4.0119,
"step": 253
},
{
"epoch": 0.32,
"learning_rate": 1.5871214510133734e-05,
"loss": 4.0036,
"step": 254
},
{
"epoch": 0.32,
"learning_rate": 1.5837965281090334e-05,
"loss": 3.9336,
"step": 255
},
{
"epoch": 0.32,
"learning_rate": 1.580461785394233e-05,
"loss": 3.806,
"step": 256
},
{
"epoch": 0.33,
"learning_rate": 1.57711727896136e-05,
"loss": 3.9693,
"step": 257
},
{
"epoch": 0.33,
"learning_rate": 1.5737630650670336e-05,
"loss": 3.9221,
"step": 258
},
{
"epoch": 0.33,
"learning_rate": 1.57039920013116e-05,
"loss": 3.8656,
"step": 259
},
{
"epoch": 0.33,
"learning_rate": 1.567025740735979e-05,
"loss": 4.0068,
"step": 260
},
{
"epoch": 0.33,
"learning_rate": 1.5636427436251182e-05,
"loss": 3.9293,
"step": 261
},
{
"epoch": 0.33,
"learning_rate": 1.5602502657026327e-05,
"loss": 3.9401,
"step": 262
},
{
"epoch": 0.33,
"learning_rate": 1.556848364032052e-05,
"loss": 3.9274,
"step": 263
},
{
"epoch": 0.33,
"learning_rate": 1.5534370958354184e-05,
"loss": 4.1849,
"step": 264
},
{
"epoch": 0.34,
"learning_rate": 1.550016518492325e-05,
"loss": 3.9264,
"step": 265
},
{
"epoch": 0.34,
"learning_rate": 1.5465866895389497e-05,
"loss": 3.8007,
"step": 266
},
{
"epoch": 0.34,
"learning_rate": 1.5431476666670885e-05,
"loss": 3.8248,
"step": 267
},
{
"epoch": 0.34,
"learning_rate": 1.5396995077231856e-05,
"loss": 3.891,
"step": 268
},
{
"epoch": 0.34,
"learning_rate": 1.5362422707073574e-05,
"loss": 3.801,
"step": 269
},
{
"epoch": 0.34,
"learning_rate": 1.5327760137724213e-05,
"loss": 3.9024,
"step": 270
},
{
"epoch": 0.34,
"learning_rate": 1.5293007952229127e-05,
"loss": 3.9204,
"step": 271
},
{
"epoch": 0.34,
"learning_rate": 1.5258166735141094e-05,
"loss": 3.9807,
"step": 272
},
{
"epoch": 0.35,
"learning_rate": 1.5223237072510433e-05,
"loss": 3.906,
"step": 273
},
{
"epoch": 0.35,
"learning_rate": 1.518821955187519e-05,
"loss": 3.8716,
"step": 274
},
{
"epoch": 0.35,
"learning_rate": 1.5153114762251221e-05,
"loss": 3.8384,
"step": 275
},
{
"epoch": 0.35,
"learning_rate": 1.5117923294122312e-05,
"loss": 3.7808,
"step": 276
},
{
"epoch": 0.35,
"learning_rate": 1.5082645739430224e-05,
"loss": 3.9122,
"step": 277
},
{
"epoch": 0.35,
"learning_rate": 1.5047282691564749e-05,
"loss": 3.7987,
"step": 278
},
{
"epoch": 0.35,
"learning_rate": 1.5011834745353725e-05,
"loss": 3.967,
"step": 279
},
{
"epoch": 0.35,
"learning_rate": 1.4976302497053036e-05,
"loss": 3.9335,
"step": 280
},
{
"epoch": 0.36,
"learning_rate": 1.4940686544336571e-05,
"loss": 3.8081,
"step": 281
},
{
"epoch": 0.36,
"learning_rate": 1.4904987486286184e-05,
"loss": 3.9681,
"step": 282
},
{
"epoch": 0.36,
"learning_rate": 1.4869205923381609e-05,
"loss": 3.8072,
"step": 283
},
{
"epoch": 0.36,
"learning_rate": 1.4833342457490363e-05,
"loss": 3.809,
"step": 284
},
{
"epoch": 0.36,
"learning_rate": 1.4797397691857614e-05,
"loss": 3.8076,
"step": 285
},
{
"epoch": 0.36,
"learning_rate": 1.4761372231096047e-05,
"loss": 3.8011,
"step": 286
},
{
"epoch": 0.36,
"learning_rate": 1.472526668117569e-05,
"loss": 3.9507,
"step": 287
},
{
"epoch": 0.36,
"learning_rate": 1.468908164941371e-05,
"loss": 3.8249,
"step": 288
},
{
"epoch": 0.37,
"learning_rate": 1.4652817744464214e-05,
"loss": 3.8342,
"step": 289
},
{
"epoch": 0.37,
"learning_rate": 1.4616475576308005e-05,
"loss": 3.7757,
"step": 290
},
{
"epoch": 0.37,
"learning_rate": 1.4580055756242315e-05,
"loss": 3.9205,
"step": 291
},
{
"epoch": 0.37,
"learning_rate": 1.454355889687053e-05,
"loss": 3.8701,
"step": 292
},
{
"epoch": 0.37,
"learning_rate": 1.4506985612091889e-05,
"loss": 3.8211,
"step": 293
},
{
"epoch": 0.37,
"learning_rate": 1.4470336517091139e-05,
"loss": 3.9225,
"step": 294
},
{
"epoch": 0.37,
"learning_rate": 1.4433612228328215e-05,
"loss": 3.7957,
"step": 295
},
{
"epoch": 0.37,
"learning_rate": 1.439681336352785e-05,
"loss": 3.745,
"step": 296
},
{
"epoch": 0.38,
"learning_rate": 1.435994054166919e-05,
"loss": 3.8174,
"step": 297
},
{
"epoch": 0.38,
"learning_rate": 1.4322994382975386e-05,
"loss": 3.7996,
"step": 298
},
{
"epoch": 0.38,
"learning_rate": 1.428597550890316e-05,
"loss": 3.789,
"step": 299
},
{
"epoch": 0.38,
"learning_rate": 1.4248884542132348e-05,
"loss": 3.9083,
"step": 300
},
{
"epoch": 0.38,
"learning_rate": 1.421172210655543e-05,
"loss": 3.6434,
"step": 301
},
{
"epoch": 0.38,
"learning_rate": 1.4174488827267032e-05,
"loss": 3.6945,
"step": 302
},
{
"epoch": 0.38,
"learning_rate": 1.4137185330553416e-05,
"loss": 3.8261,
"step": 303
},
{
"epoch": 0.38,
"learning_rate": 1.4099812243881947e-05,
"loss": 3.8346,
"step": 304
},
{
"epoch": 0.39,
"learning_rate": 1.406237019589053e-05,
"loss": 3.7792,
"step": 305
},
{
"epoch": 0.39,
"learning_rate": 1.4024859816377046e-05,
"loss": 3.7122,
"step": 306
},
{
"epoch": 0.39,
"learning_rate": 1.3987281736288743e-05,
"loss": 3.7728,
"step": 307
},
{
"epoch": 0.39,
"learning_rate": 1.3949636587711645e-05,
"loss": 3.6981,
"step": 308
},
{
"epoch": 0.39,
"learning_rate": 1.3911925003859907e-05,
"loss": 3.8742,
"step": 309
},
{
"epoch": 0.39,
"learning_rate": 1.3874147619065161e-05,
"loss": 3.7379,
"step": 310
},
{
"epoch": 0.39,
"learning_rate": 1.3836305068765852e-05,
"loss": 3.7293,
"step": 311
},
{
"epoch": 0.39,
"learning_rate": 1.3798397989496549e-05,
"loss": 3.7363,
"step": 312
},
{
"epoch": 0.4,
"learning_rate": 1.3760427018877236e-05,
"loss": 3.6561,
"step": 313
},
{
"epoch": 0.4,
"learning_rate": 1.3722392795602595e-05,
"loss": 3.8963,
"step": 314
},
{
"epoch": 0.4,
"learning_rate": 1.3684295959431241e-05,
"loss": 3.6974,
"step": 315
},
{
"epoch": 0.4,
"learning_rate": 1.3646137151174992e-05,
"loss": 3.9303,
"step": 316
},
{
"epoch": 0.4,
"learning_rate": 1.3607917012688063e-05,
"loss": 3.5819,
"step": 317
},
{
"epoch": 0.4,
"learning_rate": 1.3569636186856288e-05,
"loss": 3.7086,
"step": 318
},
{
"epoch": 0.4,
"learning_rate": 1.3531295317586291e-05,
"loss": 3.6635,
"step": 319
},
{
"epoch": 0.41,
"learning_rate": 1.349289504979467e-05,
"loss": 3.7356,
"step": 320
},
{
"epoch": 0.41,
"learning_rate": 1.3454436029397135e-05,
"loss": 3.8162,
"step": 321
},
{
"epoch": 0.41,
"learning_rate": 1.341591890329766e-05,
"loss": 3.7543,
"step": 322
},
{
"epoch": 0.41,
"learning_rate": 1.3377344319377585e-05,
"loss": 3.627,
"step": 323
},
{
"epoch": 0.41,
"learning_rate": 1.3338712926484725e-05,
"loss": 3.7216,
"step": 324
},
{
"epoch": 0.41,
"learning_rate": 1.3300025374422459e-05,
"loss": 3.6936,
"step": 325
},
{
"epoch": 0.41,
"learning_rate": 1.3261282313938795e-05,
"loss": 3.8305,
"step": 326
},
{
"epoch": 0.41,
"learning_rate": 1.322248439671543e-05,
"loss": 3.6515,
"step": 327
},
{
"epoch": 0.42,
"learning_rate": 1.3183632275356777e-05,
"loss": 3.7233,
"step": 328
},
{
"epoch": 0.42,
"learning_rate": 1.3144726603379008e-05,
"loss": 3.6564,
"step": 329
},
{
"epoch": 0.42,
"learning_rate": 1.3105768035199033e-05,
"loss": 3.8174,
"step": 330
},
{
"epoch": 0.42,
"learning_rate": 1.3066757226123522e-05,
"loss": 3.6796,
"step": 331
},
{
"epoch": 0.42,
"learning_rate": 1.3027694832337858e-05,
"loss": 3.6757,
"step": 332
},
{
"epoch": 0.42,
"learning_rate": 1.2988581510895118e-05,
"loss": 3.7172,
"step": 333
},
{
"epoch": 0.42,
"learning_rate": 1.2949417919705008e-05,
"loss": 3.6418,
"step": 334
},
{
"epoch": 0.42,
"learning_rate": 1.2910204717522805e-05,
"loss": 3.6337,
"step": 335
},
{
"epoch": 0.43,
"learning_rate": 1.2870942563938265e-05,
"loss": 3.7748,
"step": 336
},
{
"epoch": 0.43,
"learning_rate": 1.283163211936455e-05,
"loss": 3.6219,
"step": 337
},
{
"epoch": 0.43,
"learning_rate": 1.279227404502709e-05,
"loss": 3.6606,
"step": 338
},
{
"epoch": 0.43,
"learning_rate": 1.2752869002952492e-05,
"loss": 3.5741,
"step": 339
},
{
"epoch": 0.43,
"learning_rate": 1.2713417655957377e-05,
"loss": 3.6364,
"step": 340
},
{
"epoch": 0.43,
"learning_rate": 1.2673920667637244e-05,
"loss": 3.7044,
"step": 341
},
{
"epoch": 0.43,
"learning_rate": 1.2634378702355314e-05,
"loss": 3.7507,
"step": 342
},
{
"epoch": 0.43,
"learning_rate": 1.2594792425231339e-05,
"loss": 3.695,
"step": 343
},
{
"epoch": 0.44,
"learning_rate": 1.2555162502130434e-05,
"loss": 3.6151,
"step": 344
},
{
"epoch": 0.44,
"learning_rate": 1.251548959965185e-05,
"loss": 3.6708,
"step": 345
},
{
"epoch": 0.44,
"learning_rate": 1.2475774385117787e-05,
"loss": 3.5673,
"step": 346
},
{
"epoch": 0.44,
"learning_rate": 1.243601752656216e-05,
"loss": 3.5632,
"step": 347
},
{
"epoch": 0.44,
"learning_rate": 1.2396219692719364e-05,
"loss": 3.6272,
"step": 348
},
{
"epoch": 0.44,
"learning_rate": 1.2356381553013014e-05,
"loss": 3.6401,
"step": 349
},
{
"epoch": 0.44,
"learning_rate": 1.23165037775447e-05,
"loss": 3.6209,
"step": 350
},
{
"epoch": 0.44,
"learning_rate": 1.2276587037082706e-05,
"loss": 3.6672,
"step": 351
},
{
"epoch": 0.45,
"learning_rate": 1.2236632003050736e-05,
"loss": 3.7134,
"step": 352
},
{
"epoch": 0.45,
"learning_rate": 1.2196639347516613e-05,
"loss": 3.7017,
"step": 353
},
{
"epoch": 0.45,
"learning_rate": 1.215660974318097e-05,
"loss": 3.7002,
"step": 354
},
{
"epoch": 0.45,
"learning_rate": 1.211654386336595e-05,
"loss": 3.7195,
"step": 355
},
{
"epoch": 0.45,
"learning_rate": 1.207644238200387e-05,
"loss": 3.6528,
"step": 356
},
{
"epoch": 0.45,
"learning_rate": 1.2036305973625881e-05,
"loss": 3.5246,
"step": 357
},
{
"epoch": 0.45,
"learning_rate": 1.1996135313350636e-05,
"loss": 3.5674,
"step": 358
},
{
"epoch": 0.45,
"learning_rate": 1.1955931076872916e-05,
"loss": 3.6463,
"step": 359
},
{
"epoch": 0.46,
"learning_rate": 1.191569394045228e-05,
"loss": 3.543,
"step": 360
},
{
"epoch": 0.46,
"learning_rate": 1.1875424580901684e-05,
"loss": 3.6849,
"step": 361
},
{
"epoch": 0.46,
"learning_rate": 1.1835123675576092e-05,
"loss": 3.6798,
"step": 362
},
{
"epoch": 0.46,
"learning_rate": 1.1794791902361093e-05,
"loss": 3.639,
"step": 363
},
{
"epoch": 0.46,
"learning_rate": 1.1754429939661492e-05,
"loss": 3.6447,
"step": 364
},
{
"epoch": 0.46,
"learning_rate": 1.1714038466389892e-05,
"loss": 3.6637,
"step": 365
},
{
"epoch": 0.46,
"learning_rate": 1.1673618161955288e-05,
"loss": 3.6719,
"step": 366
},
{
"epoch": 0.46,
"learning_rate": 1.1633169706251637e-05,
"loss": 3.7565,
"step": 367
},
{
"epoch": 0.47,
"learning_rate": 1.1592693779646405e-05,
"loss": 3.645,
"step": 368
},
{
"epoch": 0.47,
"learning_rate": 1.1552191062969147e-05,
"loss": 3.5035,
"step": 369
},
{
"epoch": 0.47,
"learning_rate": 1.1511662237500032e-05,
"loss": 3.6058,
"step": 370
},
{
"epoch": 0.47,
"learning_rate": 1.1471107984958405e-05,
"loss": 3.5748,
"step": 371
},
{
"epoch": 0.47,
"learning_rate": 1.1430528987491305e-05,
"loss": 3.4794,
"step": 372
},
{
"epoch": 0.47,
"learning_rate": 1.1389925927661996e-05,
"loss": 3.6624,
"step": 373
},
{
"epoch": 0.47,
"learning_rate": 1.1349299488438485e-05,
"loss": 3.5667,
"step": 374
},
{
"epoch": 0.47,
"learning_rate": 1.1308650353182036e-05,
"loss": 3.5315,
"step": 375
},
{
"epoch": 0.48,
"learning_rate": 1.1267979205635675e-05,
"loss": 3.5055,
"step": 376
},
{
"epoch": 0.48,
"learning_rate": 1.1227286729912684e-05,
"loss": 3.5781,
"step": 377
},
{
"epoch": 0.48,
"learning_rate": 1.1186573610485099e-05,
"loss": 3.4713,
"step": 378
},
{
"epoch": 0.48,
"learning_rate": 1.1145840532172197e-05,
"loss": 3.5313,
"step": 379
},
{
"epoch": 0.48,
"learning_rate": 1.1105088180128975e-05,
"loss": 3.5475,
"step": 380
},
{
"epoch": 0.48,
"learning_rate": 1.1064317239834628e-05,
"loss": 3.5527,
"step": 381
},
{
"epoch": 0.48,
"learning_rate": 1.1023528397081011e-05,
"loss": 3.5274,
"step": 382
},
{
"epoch": 0.48,
"learning_rate": 1.0982722337961116e-05,
"loss": 3.5427,
"step": 383
},
{
"epoch": 0.49,
"learning_rate": 1.094189974885752e-05,
"loss": 3.4859,
"step": 384
},
{
"epoch": 0.49,
"learning_rate": 1.0901061316430848e-05,
"loss": 3.5526,
"step": 385
},
{
"epoch": 0.49,
"learning_rate": 1.0860207727608214e-05,
"loss": 3.544,
"step": 386
},
{
"epoch": 0.49,
"learning_rate": 1.0819339669571674e-05,
"loss": 3.525,
"step": 387
},
{
"epoch": 0.49,
"learning_rate": 1.0778457829746668e-05,
"loss": 3.6301,
"step": 388
},
{
"epoch": 0.49,
"learning_rate": 1.0737562895790447e-05,
"loss": 3.607,
"step": 389
},
{
"epoch": 0.49,
"learning_rate": 1.0696655555580524e-05,
"loss": 3.5375,
"step": 390
},
{
"epoch": 0.49,
"learning_rate": 1.0655736497203084e-05,
"loss": 3.4769,
"step": 391
},
{
"epoch": 0.5,
"learning_rate": 1.0614806408941422e-05,
"loss": 3.6209,
"step": 392
},
{
"epoch": 0.5,
"learning_rate": 1.0573865979264362e-05,
"loss": 3.5285,
"step": 393
},
{
"epoch": 0.5,
"learning_rate": 1.0532915896814673e-05,
"loss": 3.4459,
"step": 394
},
{
"epoch": 0.5,
"learning_rate": 1.0491956850397496e-05,
"loss": 3.5381,
"step": 395
},
{
"epoch": 0.5,
"learning_rate": 1.0450989528968747e-05,
"loss": 3.6046,
"step": 396
},
{
"epoch": 0.5,
"learning_rate": 1.0410014621623531e-05,
"loss": 3.5079,
"step": 397
},
{
"epoch": 0.5,
"learning_rate": 1.036903281758456e-05,
"loss": 3.4709,
"step": 398
},
{
"epoch": 0.51,
"learning_rate": 1.0328044806190549e-05,
"loss": 3.5259,
"step": 399
},
{
"epoch": 0.51,
"learning_rate": 1.028705127688462e-05,
"loss": 3.615,
"step": 400
},
{
"epoch": 0.51,
"learning_rate": 1.0246052919202713e-05,
"loss": 3.6054,
"step": 401
},
{
"epoch": 0.51,
"learning_rate": 1.0205050422761989e-05,
"loss": 3.5232,
"step": 402
},
{
"epoch": 0.51,
"learning_rate": 1.0164044477249215e-05,
"loss": 3.5266,
"step": 403
},
{
"epoch": 0.51,
"learning_rate": 1.0123035772409184e-05,
"loss": 3.4681,
"step": 404
},
{
"epoch": 0.51,
"learning_rate": 1.0082024998033092e-05,
"loss": 3.4198,
"step": 405
},
{
"epoch": 0.51,
"learning_rate": 1.004101284394696e-05,
"loss": 3.4836,
"step": 406
},
{
"epoch": 0.52,
"learning_rate": 1e-05,
"loss": 3.3732,
"step": 407
},
{
"epoch": 0.52,
"learning_rate": 9.958987156053046e-06,
"loss": 3.631,
"step": 408
},
{
"epoch": 0.52,
"learning_rate": 9.91797500196691e-06,
"loss": 3.3575,
"step": 409
},
{
"epoch": 0.52,
"learning_rate": 9.876964227590821e-06,
"loss": 3.5228,
"step": 410
},
{
"epoch": 0.52,
"learning_rate": 9.835955522750789e-06,
"loss": 3.4126,
"step": 411
},
{
"epoch": 0.52,
"learning_rate": 9.794949577238014e-06,
"loss": 3.5752,
"step": 412
},
{
"epoch": 0.52,
"learning_rate": 9.753947080797289e-06,
"loss": 3.6233,
"step": 413
},
{
"epoch": 0.52,
"learning_rate": 9.712948723115384e-06,
"loss": 3.5602,
"step": 414
},
{
"epoch": 0.53,
"learning_rate": 9.671955193809453e-06,
"loss": 3.3987,
"step": 415
},
{
"epoch": 0.53,
"learning_rate": 9.630967182415441e-06,
"loss": 3.5806,
"step": 416
},
{
"epoch": 0.53,
"learning_rate": 9.589985378376474e-06,
"loss": 3.5712,
"step": 417
},
{
"epoch": 0.53,
"learning_rate": 9.549010471031256e-06,
"loss": 3.5768,
"step": 418
},
{
"epoch": 0.53,
"learning_rate": 9.508043149602509e-06,
"loss": 3.5503,
"step": 419
},
{
"epoch": 0.53,
"learning_rate": 9.46708410318533e-06,
"loss": 3.4928,
"step": 420
},
{
"epoch": 0.53,
"learning_rate": 9.426134020735642e-06,
"loss": 3.5639,
"step": 421
},
{
"epoch": 0.53,
"learning_rate": 9.38519359105858e-06,
"loss": 3.4968,
"step": 422
},
{
"epoch": 0.54,
"learning_rate": 9.344263502796918e-06,
"loss": 3.5206,
"step": 423
},
{
"epoch": 0.54,
"learning_rate": 9.303344444419476e-06,
"loss": 3.5223,
"step": 424
},
{
"epoch": 0.54,
"learning_rate": 9.262437104209555e-06,
"loss": 3.5115,
"step": 425
},
{
"epoch": 0.54,
"learning_rate": 9.221542170253339e-06,
"loss": 3.5129,
"step": 426
},
{
"epoch": 0.54,
"learning_rate": 9.18066033042833e-06,
"loss": 3.4762,
"step": 427
},
{
"epoch": 0.54,
"learning_rate": 9.139792272391791e-06,
"loss": 3.4295,
"step": 428
},
{
"epoch": 0.54,
"learning_rate": 9.098938683569155e-06,
"loss": 3.402,
"step": 429
},
{
"epoch": 0.54,
"learning_rate": 9.058100251142483e-06,
"loss": 3.4283,
"step": 430
},
{
"epoch": 0.55,
"learning_rate": 9.017277662038884e-06,
"loss": 3.5585,
"step": 431
},
{
"epoch": 0.55,
"learning_rate": 8.97647160291899e-06,
"loss": 3.4686,
"step": 432
},
{
"epoch": 0.55,
"learning_rate": 8.935682760165377e-06,
"loss": 3.3924,
"step": 433
},
{
"epoch": 0.55,
"learning_rate": 8.894911819871027e-06,
"loss": 3.5207,
"step": 434
},
{
"epoch": 0.55,
"learning_rate": 8.854159467827808e-06,
"loss": 3.4214,
"step": 435
},
{
"epoch": 0.55,
"learning_rate": 8.813426389514903e-06,
"loss": 3.5747,
"step": 436
},
{
"epoch": 0.55,
"learning_rate": 8.77271327008732e-06,
"loss": 3.4337,
"step": 437
},
{
"epoch": 0.55,
"learning_rate": 8.732020794364327e-06,
"loss": 3.4204,
"step": 438
},
{
"epoch": 0.56,
"learning_rate": 8.691349646817965e-06,
"loss": 3.4279,
"step": 439
},
{
"epoch": 0.56,
"learning_rate": 8.650700511561515e-06,
"loss": 3.4796,
"step": 440
},
{
"epoch": 0.56,
"learning_rate": 8.610074072338006e-06,
"loss": 3.3757,
"step": 441
},
{
"epoch": 0.56,
"learning_rate": 8.5694710125087e-06,
"loss": 3.4833,
"step": 442
},
{
"epoch": 0.56,
"learning_rate": 8.528892015041598e-06,
"loss": 3.6173,
"step": 443
},
{
"epoch": 0.56,
"learning_rate": 8.488337762499971e-06,
"loss": 3.3047,
"step": 444
},
{
"epoch": 0.56,
"learning_rate": 8.447808937030856e-06,
"loss": 3.5363,
"step": 445
},
{
"epoch": 0.56,
"learning_rate": 8.407306220353597e-06,
"loss": 3.4376,
"step": 446
},
{
"epoch": 0.57,
"learning_rate": 8.366830293748364e-06,
"loss": 3.4486,
"step": 447
},
{
"epoch": 0.57,
"learning_rate": 8.326381838044713e-06,
"loss": 3.399,
"step": 448
},
{
"epoch": 0.57,
"learning_rate": 8.28596153361011e-06,
"loss": 3.4425,
"step": 449
},
{
"epoch": 0.57,
"learning_rate": 8.245570060338511e-06,
"loss": 3.4965,
"step": 450
},
{
"epoch": 0.57,
"learning_rate": 8.20520809763891e-06,
"loss": 3.4606,
"step": 451
},
{
"epoch": 0.57,
"learning_rate": 8.16487632442391e-06,
"loss": 3.3802,
"step": 452
},
{
"epoch": 0.57,
"learning_rate": 8.124575419098321e-06,
"loss": 3.4718,
"step": 453
},
{
"epoch": 0.57,
"learning_rate": 8.084306059547722e-06,
"loss": 3.4686,
"step": 454
},
{
"epoch": 0.58,
"learning_rate": 8.044068923127088e-06,
"loss": 3.4968,
"step": 455
},
{
"epoch": 0.58,
"learning_rate": 8.003864686649366e-06,
"loss": 3.3585,
"step": 456
},
{
"epoch": 0.58,
"learning_rate": 7.96369402637412e-06,
"loss": 3.4358,
"step": 457
},
{
"epoch": 0.58,
"learning_rate": 7.923557617996132e-06,
"loss": 3.3583,
"step": 458
},
{
"epoch": 0.58,
"learning_rate": 7.883456136634053e-06,
"loss": 3.4183,
"step": 459
},
{
"epoch": 0.58,
"learning_rate": 7.843390256819034e-06,
"loss": 3.4337,
"step": 460
},
{
"epoch": 0.58,
"learning_rate": 7.803360652483392e-06,
"loss": 3.4407,
"step": 461
},
{
"epoch": 0.58,
"learning_rate": 7.763367996949267e-06,
"loss": 3.2601,
"step": 462
},
{
"epoch": 0.59,
"learning_rate": 7.723412962917294e-06,
"loss": 3.2422,
"step": 463
},
{
"epoch": 0.59,
"learning_rate": 7.683496222455303e-06,
"loss": 3.4645,
"step": 464
},
{
"epoch": 0.59,
"learning_rate": 7.643618446986988e-06,
"loss": 3.5709,
"step": 465
},
{
"epoch": 0.59,
"learning_rate": 7.6037803072806396e-06,
"loss": 3.2944,
"step": 466
},
{
"epoch": 0.59,
"learning_rate": 7.563982473437843e-06,
"loss": 3.4792,
"step": 467
},
{
"epoch": 0.59,
"learning_rate": 7.524225614882216e-06,
"loss": 3.3571,
"step": 468
},
{
"epoch": 0.59,
"learning_rate": 7.4845104003481565e-06,
"loss": 3.3765,
"step": 469
},
{
"epoch": 0.59,
"learning_rate": 7.444837497869569e-06,
"loss": 3.4416,
"step": 470
},
{
"epoch": 0.6,
"learning_rate": 7.4052075747686625e-06,
"loss": 3.357,
"step": 471
},
{
"epoch": 0.6,
"learning_rate": 7.365621297644686e-06,
"loss": 3.5674,
"step": 472
},
{
"epoch": 0.6,
"learning_rate": 7.326079332362756e-06,
"loss": 3.2106,
"step": 473
},
{
"epoch": 0.6,
"learning_rate": 7.286582344042625e-06,
"loss": 3.3728,
"step": 474
},
{
"epoch": 0.6,
"learning_rate": 7.247130997047511e-06,
"loss": 3.4861,
"step": 475
},
{
"epoch": 0.6,
"learning_rate": 7.207725954972913e-06,
"loss": 3.4695,
"step": 476
},
{
"epoch": 0.6,
"learning_rate": 7.168367880635454e-06,
"loss": 3.3745,
"step": 477
},
{
"epoch": 0.61,
"learning_rate": 7.12905743606174e-06,
"loss": 3.366,
"step": 478
},
{
"epoch": 0.61,
"learning_rate": 7.089795282477199e-06,
"loss": 3.4958,
"step": 479
},
{
"epoch": 0.61,
"learning_rate": 7.050582080294996e-06,
"loss": 3.2569,
"step": 480
},
{
"epoch": 0.61,
"learning_rate": 7.011418489104883e-06,
"loss": 3.4334,
"step": 481
},
{
"epoch": 0.61,
"learning_rate": 6.972305167662144e-06,
"loss": 3.491,
"step": 482
},
{
"epoch": 0.61,
"learning_rate": 6.933242773876481e-06,
"loss": 3.3694,
"step": 483
},
{
"epoch": 0.61,
"learning_rate": 6.89423196480097e-06,
"loss": 3.5006,
"step": 484
},
{
"epoch": 0.61,
"learning_rate": 6.855273396620999e-06,
"loss": 3.3757,
"step": 485
},
{
"epoch": 0.62,
"learning_rate": 6.816367724643225e-06,
"loss": 3.4249,
"step": 486
},
{
"epoch": 0.62,
"learning_rate": 6.777515603284575e-06,
"loss": 3.3526,
"step": 487
},
{
"epoch": 0.62,
"learning_rate": 6.738717686061207e-06,
"loss": 3.4817,
"step": 488
},
{
"epoch": 0.62,
"learning_rate": 6.699974625577545e-06,
"loss": 3.455,
"step": 489
},
{
"epoch": 0.62,
"learning_rate": 6.661287073515276e-06,
"loss": 3.3259,
"step": 490
},
{
"epoch": 0.62,
"learning_rate": 6.622655680622416e-06,
"loss": 3.2455,
"step": 491
},
{
"epoch": 0.62,
"learning_rate": 6.5840810967023405e-06,
"loss": 3.2447,
"step": 492
},
{
"epoch": 0.62,
"learning_rate": 6.545563970602867e-06,
"loss": 3.2857,
"step": 493
},
{
"epoch": 0.63,
"learning_rate": 6.507104950205336e-06,
"loss": 3.3836,
"step": 494
},
{
"epoch": 0.63,
"learning_rate": 6.4687046824137115e-06,
"loss": 3.4351,
"step": 495
},
{
"epoch": 0.63,
"learning_rate": 6.430363813143716e-06,
"loss": 3.3191,
"step": 496
},
{
"epoch": 0.63,
"learning_rate": 6.392082987311938e-06,
"loss": 3.496,
"step": 497
},
{
"epoch": 0.63,
"learning_rate": 6.353862848825011e-06,
"loss": 3.2581,
"step": 498
},
{
"epoch": 0.63,
"learning_rate": 6.315704040568761e-06,
"loss": 3.3718,
"step": 499
},
{
"epoch": 0.63,
"learning_rate": 6.277607204397409e-06,
"loss": 3.445,
"step": 500
},
{
"epoch": 0.63,
"learning_rate": 6.239572981122766e-06,
"loss": 3.2989,
"step": 501
},
{
"epoch": 0.64,
"learning_rate": 6.201602010503454e-06,
"loss": 3.2606,
"step": 502
},
{
"epoch": 0.64,
"learning_rate": 6.163694931234153e-06,
"loss": 3.3261,
"step": 503
},
{
"epoch": 0.64,
"learning_rate": 6.125852380934841e-06,
"loss": 3.4656,
"step": 504
},
{
"epoch": 0.64,
"learning_rate": 6.088074996140096e-06,
"loss": 3.3822,
"step": 505
},
{
"epoch": 0.64,
"learning_rate": 6.050363412288356e-06,
"loss": 3.3442,
"step": 506
},
{
"epoch": 0.64,
"learning_rate": 6.012718263711261e-06,
"loss": 3.3625,
"step": 507
},
{
"epoch": 0.64,
"learning_rate": 5.975140183622958e-06,
"loss": 3.3852,
"step": 508
},
{
"epoch": 0.64,
"learning_rate": 5.93762980410947e-06,
"loss": 3.4399,
"step": 509
},
{
"epoch": 0.65,
"learning_rate": 5.900187756118055e-06,
"loss": 3.1638,
"step": 510
},
{
"epoch": 0.65,
"learning_rate": 5.862814669446586e-06,
"loss": 3.2364,
"step": 511
},
{
"epoch": 0.65,
"learning_rate": 5.825511172732971e-06,
"loss": 3.2059,
"step": 512
},
{
"epoch": 0.65,
"learning_rate": 5.788277893444574e-06,
"loss": 3.239,
"step": 513
},
{
"epoch": 0.65,
"learning_rate": 5.7511154578676535e-06,
"loss": 3.3294,
"step": 514
},
{
"epoch": 0.65,
"learning_rate": 5.714024491096841e-06,
"loss": 3.2771,
"step": 515
},
{
"epoch": 0.65,
"learning_rate": 5.677005617024618e-06,
"loss": 3.4053,
"step": 516
},
{
"epoch": 0.65,
"learning_rate": 5.640059458330811e-06,
"loss": 3.25,
"step": 517
},
{
"epoch": 0.66,
"learning_rate": 5.603186636472156e-06,
"loss": 3.2526,
"step": 518
},
{
"epoch": 0.66,
"learning_rate": 5.566387771671788e-06,
"loss": 3.4599,
"step": 519
},
{
"epoch": 0.66,
"learning_rate": 5.529663482908864e-06,
"loss": 3.3457,
"step": 520
},
{
"epoch": 0.66,
"learning_rate": 5.4930143879081146e-06,
"loss": 3.2894,
"step": 521
},
{
"epoch": 0.66,
"learning_rate": 5.4564411031294695e-06,
"loss": 3.3749,
"step": 522
},
{
"epoch": 0.66,
"learning_rate": 5.419944243757685e-06,
"loss": 3.3198,
"step": 523
},
{
"epoch": 0.66,
"learning_rate": 5.383524423691995e-06,
"loss": 3.3991,
"step": 524
},
{
"epoch": 0.66,
"learning_rate": 5.34718225553579e-06,
"loss": 3.429,
"step": 525
},
{
"epoch": 0.67,
"learning_rate": 5.310918350586292e-06,
"loss": 3.3088,
"step": 526
},
{
"epoch": 0.67,
"learning_rate": 5.274733318824317e-06,
"loss": 3.2755,
"step": 527
},
{
"epoch": 0.67,
"learning_rate": 5.238627768903957e-06,
"loss": 3.1154,
"step": 528
},
{
"epoch": 0.67,
"learning_rate": 5.202602308142391e-06,
"loss": 3.1803,
"step": 529
},
{
"epoch": 0.67,
"learning_rate": 5.16665754250964e-06,
"loss": 3.2609,
"step": 530
},
{
"epoch": 0.67,
"learning_rate": 5.130794076618391e-06,
"loss": 3.1746,
"step": 531
},
{
"epoch": 0.67,
"learning_rate": 5.095012513713815e-06,
"loss": 3.2586,
"step": 532
},
{
"epoch": 0.67,
"learning_rate": 5.059313455663429e-06,
"loss": 3.3339,
"step": 533
},
{
"epoch": 0.68,
"learning_rate": 5.02369750294697e-06,
"loss": 3.178,
"step": 534
},
{
"epoch": 0.68,
"learning_rate": 4.988165254646278e-06,
"loss": 3.3395,
"step": 535
},
{
"epoch": 0.68,
"learning_rate": 4.952717308435254e-06,
"loss": 3.3983,
"step": 536
},
{
"epoch": 0.68,
"learning_rate": 4.917354260569779e-06,
"loss": 3.1509,
"step": 537
},
{
"epoch": 0.68,
"learning_rate": 4.8820767058776895e-06,
"loss": 3.3076,
"step": 538
},
{
"epoch": 0.68,
"learning_rate": 4.846885237748779e-06,
"loss": 3.2966,
"step": 539
},
{
"epoch": 0.68,
"learning_rate": 4.811780448124812e-06,
"loss": 3.4145,
"step": 540
},
{
"epoch": 0.68,
"learning_rate": 4.776762927489568e-06,
"loss": 3.2694,
"step": 541
},
{
"epoch": 0.69,
"learning_rate": 4.74183326485891e-06,
"loss": 3.2737,
"step": 542
},
{
"epoch": 0.69,
"learning_rate": 4.706992047770877e-06,
"loss": 3.2447,
"step": 543
},
{
"epoch": 0.69,
"learning_rate": 4.672239862275794e-06,
"loss": 3.416,
"step": 544
},
{
"epoch": 0.69,
"learning_rate": 4.637577292926428e-06,
"loss": 3.3194,
"step": 545
},
{
"epoch": 0.69,
"learning_rate": 4.6030049227681484e-06,
"loss": 3.3098,
"step": 546
},
{
"epoch": 0.69,
"learning_rate": 4.568523333329116e-06,
"loss": 3.2507,
"step": 547
},
{
"epoch": 0.69,
"learning_rate": 4.534133104610507e-06,
"loss": 3.2184,
"step": 548
},
{
"epoch": 0.69,
"learning_rate": 4.4998348150767525e-06,
"loss": 3.3042,
"step": 549
},
{
"epoch": 0.7,
"learning_rate": 4.465629041645819e-06,
"loss": 3.2976,
"step": 550
},
{
"epoch": 0.7,
"learning_rate": 4.43151635967948e-06,
"loss": 3.2555,
"step": 551
},
{
"epoch": 0.7,
"learning_rate": 4.397497342973677e-06,
"loss": 3.2512,
"step": 552
},
{
"epoch": 0.7,
"learning_rate": 4.363572563748823e-06,
"loss": 3.2136,
"step": 553
},
{
"epoch": 0.7,
"learning_rate": 4.329742592640212e-06,
"loss": 3.4037,
"step": 554
},
{
"epoch": 0.7,
"learning_rate": 4.296007998688405e-06,
"loss": 3.2952,
"step": 555
},
{
"epoch": 0.7,
"learning_rate": 4.262369349329665e-06,
"loss": 3.3537,
"step": 556
},
{
"epoch": 0.71,
"learning_rate": 4.228827210386404e-06,
"loss": 3.1924,
"step": 557
},
{
"epoch": 0.71,
"learning_rate": 4.195382146057672e-06,
"loss": 3.2783,
"step": 558
},
{
"epoch": 0.71,
"learning_rate": 4.162034718909671e-06,
"loss": 3.263,
"step": 559
},
{
"epoch": 0.71,
"learning_rate": 4.12878548986627e-06,
"loss": 3.3612,
"step": 560
},
{
"epoch": 0.71,
"learning_rate": 4.095635018199612e-06,
"loss": 3.2958,
"step": 561
},
{
"epoch": 0.71,
"learning_rate": 4.062583861520657e-06,
"loss": 3.0953,
"step": 562
},
{
"epoch": 0.71,
"learning_rate": 4.029632575769841e-06,
"loss": 3.1859,
"step": 563
},
{
"epoch": 0.71,
"learning_rate": 3.996781715207706e-06,
"loss": 3.1945,
"step": 564
},
{
"epoch": 0.72,
"learning_rate": 3.964031832405581e-06,
"loss": 3.1413,
"step": 565
},
{
"epoch": 0.72,
"learning_rate": 3.931383478236292e-06,
"loss": 3.3722,
"step": 566
},
{
"epoch": 0.72,
"learning_rate": 3.8988372018648905e-06,
"loss": 3.2463,
"step": 567
},
{
"epoch": 0.72,
"learning_rate": 3.866393550739415e-06,
"loss": 3.221,
"step": 568
},
{
"epoch": 0.72,
"learning_rate": 3.834053070581685e-06,
"loss": 3.3415,
"step": 569
},
{
"epoch": 0.72,
"learning_rate": 3.8018163053781243e-06,
"loss": 3.2295,
"step": 570
},
{
"epoch": 0.72,
"learning_rate": 3.769683797370609e-06,
"loss": 3.2582,
"step": 571
},
{
"epoch": 0.72,
"learning_rate": 3.7376560870473465e-06,
"loss": 3.2662,
"step": 572
},
{
"epoch": 0.73,
"learning_rate": 3.7057337131337822e-06,
"loss": 3.2425,
"step": 573
},
{
"epoch": 0.73,
"learning_rate": 3.6739172125835386e-06,
"loss": 3.3492,
"step": 574
},
{
"epoch": 0.73,
"learning_rate": 3.6422071205693866e-06,
"loss": 3.2789,
"step": 575
},
{
"epoch": 0.73,
"learning_rate": 3.610603970474239e-06,
"loss": 3.1523,
"step": 576
},
{
"epoch": 0.73,
"learning_rate": 3.5791082938821874e-06,
"loss": 3.3764,
"step": 577
},
{
"epoch": 0.73,
"learning_rate": 3.5477206205695392e-06,
"loss": 3.2249,
"step": 578
},
{
"epoch": 0.73,
"learning_rate": 3.5164414784959368e-06,
"loss": 3.1767,
"step": 579
},
{
"epoch": 0.73,
"learning_rate": 3.485271393795453e-06,
"loss": 3.1767,
"step": 580
},
{
"epoch": 0.74,
"learning_rate": 3.454210890767755e-06,
"loss": 3.296,
"step": 581
},
{
"epoch": 0.74,
"learning_rate": 3.423260491869276e-06,
"loss": 3.3156,
"step": 582
},
{
"epoch": 0.74,
"learning_rate": 3.392420717704431e-06,
"loss": 3.3446,
"step": 583
},
{
"epoch": 0.74,
"learning_rate": 3.3616920870168633e-06,
"loss": 3.3076,
"step": 584
},
{
"epoch": 0.74,
"learning_rate": 3.331075116680715e-06,
"loss": 3.3178,
"step": 585
},
{
"epoch": 0.74,
"learning_rate": 3.3005703216919336e-06,
"loss": 3.158,
"step": 586
},
{
"epoch": 0.74,
"learning_rate": 3.2701782151596038e-06,
"loss": 3.2794,
"step": 587
},
{
"epoch": 0.74,
"learning_rate": 3.2398993082973294e-06,
"loss": 3.2356,
"step": 588
},
{
"epoch": 0.75,
"learning_rate": 3.209734110414625e-06,
"loss": 3.2024,
"step": 589
},
{
"epoch": 0.75,
"learning_rate": 3.179683128908352e-06,
"loss": 3.3495,
"step": 590
},
{
"epoch": 0.75,
"learning_rate": 3.1497468692541812e-06,
"loss": 3.2733,
"step": 591
},
{
"epoch": 0.75,
"learning_rate": 3.1199258349980965e-06,
"loss": 3.2758,
"step": 592
},
{
"epoch": 0.75,
"learning_rate": 3.090220527747916e-06,
"loss": 3.1778,
"step": 593
},
{
"epoch": 0.75,
"learning_rate": 3.0606314471648646e-06,
"loss": 3.2262,
"step": 594
},
{
"epoch": 0.75,
"learning_rate": 3.0311590909551626e-06,
"loss": 3.3652,
"step": 595
},
{
"epoch": 0.75,
"learning_rate": 3.0018039548616497e-06,
"loss": 3.341,
"step": 596
},
{
"epoch": 0.76,
"learning_rate": 2.972566532655462e-06,
"loss": 3.1817,
"step": 597
},
{
"epoch": 0.76,
"learning_rate": 2.943447316127712e-06,
"loss": 3.386,
"step": 598
},
{
"epoch": 0.76,
"learning_rate": 2.9144467950812203e-06,
"loss": 3.138,
"step": 599
},
{
"epoch": 0.76,
"learning_rate": 2.8855654573222824e-06,
"loss": 3.2193,
"step": 600
},
{
"epoch": 0.76,
"learning_rate": 2.8568037886524548e-06,
"loss": 3.3578,
"step": 601
},
{
"epoch": 0.76,
"learning_rate": 2.8281622728603862e-06,
"loss": 3.2952,
"step": 602
},
{
"epoch": 0.76,
"learning_rate": 2.79964139171369e-06,
"loss": 3.2715,
"step": 603
},
{
"epoch": 0.76,
"learning_rate": 2.771241624950818e-06,
"loss": 3.2274,
"step": 604
},
{
"epoch": 0.77,
"learning_rate": 2.742963450273016e-06,
"loss": 3.1811,
"step": 605
},
{
"epoch": 0.77,
"learning_rate": 2.7148073433362732e-06,
"loss": 3.3272,
"step": 606
},
{
"epoch": 0.77,
"learning_rate": 2.6867737777433276e-06,
"loss": 3.2594,
"step": 607
},
{
"epoch": 0.77,
"learning_rate": 2.658863225035695e-06,
"loss": 3.3312,
"step": 608
},
{
"epoch": 0.77,
"learning_rate": 2.6310761546857433e-06,
"loss": 3.3856,
"step": 609
},
{
"epoch": 0.77,
"learning_rate": 2.60341303408879e-06,
"loss": 3.4417,
"step": 610
},
{
"epoch": 0.77,
"learning_rate": 2.5758743285552435e-06,
"loss": 3.4408,
"step": 611
},
{
"epoch": 0.77,
"learning_rate": 2.5484605013027787e-06,
"loss": 3.147,
"step": 612
},
{
"epoch": 0.78,
"learning_rate": 2.521172013448534e-06,
"loss": 3.1479,
"step": 613
},
{
"epoch": 0.78,
"learning_rate": 2.494009324001372e-06,
"loss": 3.1448,
"step": 614
},
{
"epoch": 0.78,
"learning_rate": 2.4669728898541456e-06,
"loss": 3.0974,
"step": 615
},
{
"epoch": 0.78,
"learning_rate": 2.440063165776019e-06,
"loss": 3.1905,
"step": 616
},
{
"epoch": 0.78,
"learning_rate": 2.4132806044048163e-06,
"loss": 3.4658,
"step": 617
},
{
"epoch": 0.78,
"learning_rate": 2.3866256562394084e-06,
"loss": 3.2968,
"step": 618
},
{
"epoch": 0.78,
"learning_rate": 2.3600987696321332e-06,
"loss": 3.3054,
"step": 619
},
{
"epoch": 0.78,
"learning_rate": 2.333700390781256e-06,
"loss": 3.0319,
"step": 620
},
{
"epoch": 0.79,
"learning_rate": 2.3074309637234702e-06,
"loss": 3.2635,
"step": 621
},
{
"epoch": 0.79,
"learning_rate": 2.2812909303264084e-06,
"loss": 3.2026,
"step": 622
},
{
"epoch": 0.79,
"learning_rate": 2.2552807302812353e-06,
"loss": 3.2623,
"step": 623
},
{
"epoch": 0.79,
"learning_rate": 2.2294008010952383e-06,
"loss": 3.2525,
"step": 624
},
{
"epoch": 0.79,
"learning_rate": 2.203651578084467e-06,
"loss": 3.2043,
"step": 625
},
{
"epoch": 0.79,
"learning_rate": 2.178033494366416e-06,
"loss": 3.1327,
"step": 626
},
{
"epoch": 0.79,
"learning_rate": 2.1525469808527376e-06,
"loss": 3.1885,
"step": 627
},
{
"epoch": 0.79,
"learning_rate": 2.127192466241994e-06,
"loss": 3.2633,
"step": 628
},
{
"epoch": 0.8,
"learning_rate": 2.1019703770124454e-06,
"loss": 3.124,
"step": 629
},
{
"epoch": 0.8,
"learning_rate": 2.07688113741488e-06,
"loss": 3.3028,
"step": 630
},
{
"epoch": 0.8,
"learning_rate": 2.0519251694654674e-06,
"loss": 3.1655,
"step": 631
},
{
"epoch": 0.8,
"learning_rate": 2.027102892938674e-06,
"loss": 3.4117,
"step": 632
},
{
"epoch": 0.8,
"learning_rate": 2.0024147253601957e-06,
"loss": 3.1954,
"step": 633
},
{
"epoch": 0.8,
"learning_rate": 1.977861081999931e-06,
"loss": 3.1659,
"step": 634
},
{
"epoch": 0.8,
"learning_rate": 1.9534423758650043e-06,
"loss": 3.1071,
"step": 635
},
{
"epoch": 0.81,
"learning_rate": 1.92915901769281e-06,
"loss": 3.2877,
"step": 636
},
{
"epoch": 0.81,
"learning_rate": 1.9050114159441135e-06,
"loss": 3.3129,
"step": 637
},
{
"epoch": 0.81,
"learning_rate": 1.880999976796164e-06,
"loss": 3.3743,
"step": 638
},
{
"epoch": 0.81,
"learning_rate": 1.8571251041358895e-06,
"loss": 3.129,
"step": 639
},
{
"epoch": 0.81,
"learning_rate": 1.8333871995530728e-06,
"loss": 3.2712,
"step": 640
},
{
"epoch": 0.81,
"learning_rate": 1.809786662333619e-06,
"loss": 3.1361,
"step": 641
},
{
"epoch": 0.81,
"learning_rate": 1.786323889452828e-06,
"loss": 3.2079,
"step": 642
},
{
"epoch": 0.81,
"learning_rate": 1.762999275568721e-06,
"loss": 3.2124,
"step": 643
},
{
"epoch": 0.82,
"learning_rate": 1.739813213015401e-06,
"loss": 3.328,
"step": 644
},
{
"epoch": 0.82,
"learning_rate": 1.7167660917964557e-06,
"loss": 3.1001,
"step": 645
},
{
"epoch": 0.82,
"learning_rate": 1.6938582995783958e-06,
"loss": 3.3397,
"step": 646
},
{
"epoch": 0.82,
"learning_rate": 1.6710902216841241e-06,
"loss": 3.2774,
"step": 647
},
{
"epoch": 0.82,
"learning_rate": 1.6484622410864837e-06,
"loss": 3.3643,
"step": 648
},
{
"epoch": 0.82,
"learning_rate": 1.6259747384017766e-06,
"loss": 3.2594,
"step": 649
},
{
"epoch": 0.82,
"learning_rate": 1.6036280918833924e-06,
"loss": 3.1878,
"step": 650
},
{
"epoch": 0.82,
"learning_rate": 1.5814226774154328e-06,
"loss": 3.2801,
"step": 651
},
{
"epoch": 0.83,
"learning_rate": 1.5593588685063899e-06,
"loss": 3.1553,
"step": 652
},
{
"epoch": 0.83,
"learning_rate": 1.5374370362828662e-06,
"loss": 3.1934,
"step": 653
},
{
"epoch": 0.83,
"learning_rate": 1.515657549483328e-06,
"loss": 3.1606,
"step": 654
},
{
"epoch": 0.83,
"learning_rate": 1.494020774451912e-06,
"loss": 3.2433,
"step": 655
},
{
"epoch": 0.83,
"learning_rate": 1.472527075132245e-06,
"loss": 3.2234,
"step": 656
},
{
"epoch": 0.83,
"learning_rate": 1.4511768130613434e-06,
"loss": 3.1697,
"step": 657
},
{
"epoch": 0.83,
"learning_rate": 1.4299703473635217e-06,
"loss": 3.3123,
"step": 658
},
{
"epoch": 0.83,
"learning_rate": 1.4089080347443485e-06,
"loss": 3.2725,
"step": 659
},
{
"epoch": 0.84,
"learning_rate": 1.3879902294846537e-06,
"loss": 3.2027,
"step": 660
},
{
"epoch": 0.84,
"learning_rate": 1.3672172834345632e-06,
"loss": 3.2765,
"step": 661
},
{
"epoch": 0.84,
"learning_rate": 1.3465895460075872e-06,
"loss": 3.1554,
"step": 662
},
{
"epoch": 0.84,
"learning_rate": 1.3261073641747358e-06,
"loss": 3.2031,
"step": 663
},
{
"epoch": 0.84,
"learning_rate": 1.30577108245869e-06,
"loss": 3.2662,
"step": 664
},
{
"epoch": 0.84,
"learning_rate": 1.2855810429279958e-06,
"loss": 3.2323,
"step": 665
},
{
"epoch": 0.84,
"learning_rate": 1.2655375851913232e-06,
"loss": 3.2345,
"step": 666
},
{
"epoch": 0.84,
"learning_rate": 1.2456410463917445e-06,
"loss": 3.1215,
"step": 667
},
{
"epoch": 0.85,
"learning_rate": 1.22589176120107e-06,
"loss": 3.3559,
"step": 668
},
{
"epoch": 0.85,
"learning_rate": 1.2062900618142136e-06,
"loss": 3.3381,
"step": 669
},
{
"epoch": 0.85,
"learning_rate": 1.186836277943606e-06,
"loss": 3.1843,
"step": 670
},
{
"epoch": 0.85,
"learning_rate": 1.1675307368136513e-06,
"loss": 3.1898,
"step": 671
},
{
"epoch": 0.85,
"learning_rate": 1.1483737631552161e-06,
"loss": 3.1972,
"step": 672
},
{
"epoch": 0.85,
"learning_rate": 1.1293656792001817e-06,
"loss": 3.2011,
"step": 673
},
{
"epoch": 0.85,
"learning_rate": 1.1105068046760048e-06,
"loss": 3.2577,
"step": 674
},
{
"epoch": 0.85,
"learning_rate": 1.0917974568003552e-06,
"loss": 3.2048,
"step": 675
},
{
"epoch": 0.86,
"learning_rate": 1.0732379502757717e-06,
"loss": 3.1821,
"step": 676
},
{
"epoch": 0.86,
"learning_rate": 1.054828597284372e-06,
"loss": 3.2322,
"step": 677
},
{
"epoch": 0.86,
"learning_rate": 1.036569707482602e-06,
"loss": 3.3726,
"step": 678
},
{
"epoch": 0.86,
"learning_rate": 1.0184615879960224e-06,
"loss": 3.1546,
"step": 679
},
{
"epoch": 0.86,
"learning_rate": 1.0005045434141503e-06,
"loss": 3.2868,
"step": 680
},
{
"epoch": 0.86,
"learning_rate": 9.826988757853228e-07,
"loss": 3.2473,
"step": 681
},
{
"epoch": 0.86,
"learning_rate": 9.6504488461164e-07,
"loss": 3.2338,
"step": 682
},
{
"epoch": 0.86,
"learning_rate": 9.47542866843899e-07,
"loss": 3.3132,
"step": 683
},
{
"epoch": 0.87,
"learning_rate": 9.301931168766165e-07,
"loss": 3.266,
"step": 684
},
{
"epoch": 0.87,
"learning_rate": 9.129959265430766e-07,
"loss": 3.2878,
"step": 685
},
{
"epoch": 0.87,
"learning_rate": 8.959515851104117e-07,
"loss": 3.1873,
"step": 686
},
{
"epoch": 0.87,
"learning_rate": 8.790603792747499e-07,
"loss": 3.0949,
"step": 687
},
{
"epoch": 0.87,
"learning_rate": 8.623225931563806e-07,
"loss": 3.1953,
"step": 688
},
{
"epoch": 0.87,
"learning_rate": 8.457385082949842e-07,
"loss": 3.1766,
"step": 689
},
{
"epoch": 0.87,
"learning_rate": 8.293084036448895e-07,
"loss": 3.3156,
"step": 690
},
{
"epoch": 0.87,
"learning_rate": 8.130325555703911e-07,
"loss": 3.1429,
"step": 691
},
{
"epoch": 0.88,
"learning_rate": 7.969112378410882e-07,
"loss": 3.2375,
"step": 692
},
{
"epoch": 0.88,
"learning_rate": 7.809447216272892e-07,
"loss": 3.2285,
"step": 693
},
{
"epoch": 0.88,
"learning_rate": 7.651332754954477e-07,
"loss": 3.2257,
"step": 694
},
{
"epoch": 0.88,
"learning_rate": 7.494771654036448e-07,
"loss": 3.1945,
"step": 695
},
{
"epoch": 0.88,
"learning_rate": 7.33976654697115e-07,
"loss": 3.1229,
"step": 696
},
{
"epoch": 0.88,
"learning_rate": 7.186320041038175e-07,
"loss": 3.2002,
"step": 697
},
{
"epoch": 0.88,
"learning_rate": 7.034434717300509e-07,
"loss": 3.1972,
"step": 698
},
{
"epoch": 0.88,
"learning_rate": 6.884113130561043e-07,
"loss": 3.0296,
"step": 699
},
{
"epoch": 0.89,
"learning_rate": 6.735357809319809e-07,
"loss": 3.3183,
"step": 700
},
{
"epoch": 0.89,
"learning_rate": 6.588171255731157e-07,
"loss": 3.2142,
"step": 701
},
{
"epoch": 0.89,
"learning_rate": 6.442555945561901e-07,
"loss": 3.1247,
"step": 702
},
{
"epoch": 0.89,
"learning_rate": 6.298514328149574e-07,
"loss": 3.2135,
"step": 703
},
{
"epoch": 0.89,
"learning_rate": 6.156048826361239e-07,
"loss": 3.2563,
"step": 704
},
{
"epoch": 0.89,
"learning_rate": 6.015161836552764e-07,
"loss": 3.1705,
"step": 705
},
{
"epoch": 0.89,
"learning_rate": 5.875855728528412e-07,
"loss": 3.3331,
"step": 706
},
{
"epoch": 0.89,
"learning_rate": 5.738132845501199e-07,
"loss": 3.2448,
"step": 707
},
{
"epoch": 0.9,
"learning_rate": 5.601995504053193e-07,
"loss": 3.2878,
"step": 708
},
{
"epoch": 0.9,
"learning_rate": 5.467445994096821e-07,
"loss": 3.2399,
"step": 709
},
{
"epoch": 0.9,
"learning_rate": 5.334486578836118e-07,
"loss": 3.3755,
"step": 710
},
{
"epoch": 0.9,
"learning_rate": 5.203119494728826e-07,
"loss": 3.2725,
"step": 711
},
{
"epoch": 0.9,
"learning_rate": 5.073346951448699e-07,
"loss": 3.3156,
"step": 712
},
{
"epoch": 0.9,
"learning_rate": 4.945171131848358e-07,
"loss": 3.2566,
"step": 713
},
{
"epoch": 0.9,
"learning_rate": 4.818594191922577e-07,
"loss": 3.2779,
"step": 714
},
{
"epoch": 0.91,
"learning_rate": 4.6936182607719373e-07,
"loss": 3.1843,
"step": 715
},
{
"epoch": 0.91,
"learning_rate": 4.5702454405672004e-07,
"loss": 3.2624,
"step": 716
},
{
"epoch": 0.91,
"learning_rate": 4.448477806513729e-07,
"loss": 3.2075,
"step": 717
},
{
"epoch": 0.91,
"learning_rate": 4.328317406816751e-07,
"loss": 3.2446,
"step": 718
},
{
"epoch": 0.91,
"learning_rate": 4.2097662626468085e-07,
"loss": 3.1551,
"step": 719
},
{
"epoch": 0.91,
"learning_rate": 4.0928263681057956e-07,
"loss": 3.2224,
"step": 720
},
{
"epoch": 0.91,
"learning_rate": 3.97749969019342e-07,
"loss": 3.2041,
"step": 721
},
{
"epoch": 0.91,
"learning_rate": 3.863788168774119e-07,
"loss": 3.2021,
"step": 722
},
{
"epoch": 0.92,
"learning_rate": 3.7516937165444136e-07,
"loss": 3.2621,
"step": 723
},
{
"epoch": 0.92,
"learning_rate": 3.6412182190007086e-07,
"loss": 3.3452,
"step": 724
},
{
"epoch": 0.92,
"learning_rate": 3.5323635344077123e-07,
"loss": 3.1605,
"step": 725
},
{
"epoch": 0.92,
"learning_rate": 3.4251314937669313e-07,
"loss": 3.2814,
"step": 726
},
{
"epoch": 0.92,
"learning_rate": 3.3195239007861815e-07,
"loss": 3.2987,
"step": 727
},
{
"epoch": 0.92,
"learning_rate": 3.2155425318489584e-07,
"loss": 3.1901,
"step": 728
},
{
"epoch": 0.92,
"learning_rate": 3.1131891359847397e-07,
"loss": 3.3101,
"step": 729
},
{
"epoch": 0.92,
"learning_rate": 3.012465434839529e-07,
"loss": 3.0684,
"step": 730
},
{
"epoch": 0.93,
"learning_rate": 2.913373122646845e-07,
"loss": 3.1588,
"step": 731
},
{
"epoch": 0.93,
"learning_rate": 2.8159138661992824e-07,
"loss": 3.2064,
"step": 732
},
{
"epoch": 0.93,
"learning_rate": 2.720089304820417e-07,
"loss": 3.1514,
"step": 733
},
{
"epoch": 0.93,
"learning_rate": 2.6259010503373206e-07,
"loss": 3.3542,
"step": 734
},
{
"epoch": 0.93,
"learning_rate": 2.533350687053338e-07,
"loss": 3.1581,
"step": 735
},
{
"epoch": 0.93,
"learning_rate": 2.442439771721539e-07,
"loss": 3.217,
"step": 736
},
{
"epoch": 0.93,
"learning_rate": 2.353169833518454e-07,
"loss": 3.1335,
"step": 737
},
{
"epoch": 0.93,
"learning_rate": 2.2655423740183925e-07,
"loss": 3.122,
"step": 738
},
{
"epoch": 0.94,
"learning_rate": 2.179558867168219e-07,
"loss": 3.0938,
"step": 739
},
{
"epoch": 0.94,
"learning_rate": 2.0952207592624508e-07,
"loss": 3.1366,
"step": 740
},
{
"epoch": 0.94,
"learning_rate": 2.0125294689190666e-07,
"loss": 3.1353,
"step": 741
},
{
"epoch": 0.94,
"learning_rate": 1.9314863870555257e-07,
"loss": 3.208,
"step": 742
},
{
"epoch": 0.94,
"learning_rate": 1.8520928768654745e-07,
"loss": 3.1476,
"step": 743
},
{
"epoch": 0.94,
"learning_rate": 1.7743502737957107e-07,
"loss": 3.0601,
"step": 744
},
{
"epoch": 0.94,
"learning_rate": 1.6982598855238564e-07,
"loss": 3.238,
"step": 745
},
{
"epoch": 0.94,
"learning_rate": 1.6238229919361858e-07,
"loss": 3.1862,
"step": 746
},
{
"epoch": 0.95,
"learning_rate": 1.5510408451062552e-07,
"loss": 3.1664,
"step": 747
},
{
"epoch": 0.95,
"learning_rate": 1.4799146692737742e-07,
"loss": 3.2697,
"step": 748
},
{
"epoch": 0.95,
"learning_rate": 1.410445660823989e-07,
"loss": 3.149,
"step": 749
},
{
"epoch": 0.95,
"learning_rate": 1.3426349882676326e-07,
"loss": 3.2801,
"step": 750
},
{
"epoch": 0.95,
"learning_rate": 1.2764837922211947e-07,
"loss": 3.2374,
"step": 751
},
{
"epoch": 0.95,
"learning_rate": 1.211993185387772e-07,
"loss": 3.2966,
"step": 752
},
{
"epoch": 0.95,
"learning_rate": 1.1491642525383595e-07,
"loss": 3.2834,
"step": 753
},
{
"epoch": 0.95,
"learning_rate": 1.0879980504935772e-07,
"loss": 3.1645,
"step": 754
},
{
"epoch": 0.96,
"learning_rate": 1.0284956081059171e-07,
"loss": 3.1979,
"step": 755
},
{
"epoch": 0.96,
"learning_rate": 9.706579262424132e-08,
"loss": 3.2138,
"step": 756
},
{
"epoch": 0.96,
"learning_rate": 9.144859777678539e-08,
"loss": 3.284,
"step": 757
},
{
"epoch": 0.96,
"learning_rate": 8.599807075283406e-08,
"loss": 3.1706,
"step": 758
},
{
"epoch": 0.96,
"learning_rate": 8.071430323354778e-08,
"loss": 3.354,
"step": 759
},
{
"epoch": 0.96,
"learning_rate": 7.559738409508854e-08,
"loss": 3.2734,
"step": 760
},
{
"epoch": 0.96,
"learning_rate": 7.064739940713217e-08,
"loss": 3.1273,
"step": 761
},
{
"epoch": 0.96,
"learning_rate": 6.586443243140839e-08,
"loss": 3.2327,
"step": 762
},
{
"epoch": 0.97,
"learning_rate": 6.12485636203164e-08,
"loss": 3.2064,
"step": 763
},
{
"epoch": 0.97,
"learning_rate": 5.679987061555703e-08,
"loss": 3.2127,
"step": 764
},
{
"epoch": 0.97,
"learning_rate": 5.251842824683717e-08,
"loss": 3.1752,
"step": 765
},
{
"epoch": 0.97,
"learning_rate": 4.840430853060518e-08,
"loss": 3.198,
"step": 766
},
{
"epoch": 0.97,
"learning_rate": 4.4457580668839653e-08,
"loss": 3.1497,
"step": 767
},
{
"epoch": 0.97,
"learning_rate": 4.067831104789033e-08,
"loss": 3.2571,
"step": 768
},
{
"epoch": 0.97,
"learning_rate": 3.706656323735569e-08,
"loss": 3.1735,
"step": 769
},
{
"epoch": 0.97,
"learning_rate": 3.362239798901712e-08,
"loss": 3.1884,
"step": 770
},
{
"epoch": 0.98,
"learning_rate": 3.034587323581639e-08,
"loss": 3.1669,
"step": 771
},
{
"epoch": 0.98,
"learning_rate": 2.7237044090879795e-08,
"loss": 3.2778,
"step": 772
},
{
"epoch": 0.98,
"learning_rate": 2.429596284659219e-08,
"loss": 3.2229,
"step": 773
},
{
"epoch": 0.98,
"learning_rate": 2.1522678973718848e-08,
"loss": 3.1956,
"step": 774
},
{
"epoch": 0.98,
"learning_rate": 1.891723912056942e-08,
"loss": 3.074,
"step": 775
},
{
"epoch": 0.98,
"learning_rate": 1.6479687112217478e-08,
"loss": 3.1022,
"step": 776
},
{
"epoch": 0.98,
"learning_rate": 1.421006394976221e-08,
"loss": 3.268,
"step": 777
},
{
"epoch": 0.98,
"learning_rate": 1.2108407809635624e-08,
"loss": 3.2414,
"step": 778
},
{
"epoch": 0.99,
"learning_rate": 1.0174754042964197e-08,
"loss": 3.1635,
"step": 779
},
{
"epoch": 0.99,
"learning_rate": 8.40913517497377e-09,
"loss": 3.3233,
"step": 780
},
{
"epoch": 0.99,
"learning_rate": 6.8115809044411174e-09,
"loss": 3.2068,
"step": 781
},
{
"epoch": 0.99,
"learning_rate": 5.3821181031932235e-09,
"loss": 3.1507,
"step": 782
},
{
"epoch": 0.99,
"learning_rate": 4.120770815659869e-09,
"loss": 3.2037,
"step": 783
},
{
"epoch": 0.99,
"learning_rate": 3.0275602584650677e-09,
"loss": 3.1867,
"step": 784
},
{
"epoch": 0.99,
"learning_rate": 2.102504820069573e-09,
"loss": 3.2221,
"step": 785
},
{
"epoch": 0.99,
"learning_rate": 1.345620060465569e-09,
"loss": 3.1024,
"step": 786
},
{
"epoch": 1.0,
"learning_rate": 7.569187109124354e-10,
"loss": 3.1997,
"step": 787
},
{
"epoch": 1.0,
"learning_rate": 3.3641067372358617e-10,
"loss": 3.1689,
"step": 788
},
{
"epoch": 1.0,
"learning_rate": 8.410302209660437e-11,
"loss": 3.1614,
"step": 789
},
{
"epoch": 1.0,
"learning_rate": 0.0,
"loss": 3.105,
"step": 790
},
{
"epoch": 1.0,
"step": 790,
"total_flos": 261024117719040.0,
"train_loss": 4.121155046209505,
"train_runtime": 3165.0402,
"train_samples_per_second": 31.949,
"train_steps_per_second": 0.25
}
],
"logging_steps": 1.0,
"max_steps": 790,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50000,
"total_flos": 261024117719040.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}