|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998492272898606, |
|
"eval_steps": 500, |
|
"global_step": 4421, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011307953260459858, |
|
"grad_norm": 0.205332413315773, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.9778, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0022615906520919715, |
|
"grad_norm": 0.2380959391593933, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9816, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003392385978137957, |
|
"grad_norm": 0.22828762233257294, |
|
"learning_rate": 7.5e-06, |
|
"loss": 1.0123, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004523181304183943, |
|
"grad_norm": 0.1957542896270752, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9404, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005653976630229929, |
|
"grad_norm": 0.2502771019935608, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.9604, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.006784771956275914, |
|
"grad_norm": 0.24806493520736694, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.0407, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0079155672823219, |
|
"grad_norm": 0.28463977575302124, |
|
"learning_rate": 1.75e-05, |
|
"loss": 1.0461, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.009046362608367886, |
|
"grad_norm": 0.2142462134361267, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9104, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.010177157934413872, |
|
"grad_norm": 0.21732334792613983, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.8991, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.011307953260459858, |
|
"grad_norm": 0.2227325588464737, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.8901, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012438748586505842, |
|
"grad_norm": 0.19881105422973633, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.8378, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.013569543912551827, |
|
"grad_norm": 0.21935518085956573, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8743, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.014700339238597813, |
|
"grad_norm": 0.21730449795722961, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.8588, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0158311345646438, |
|
"grad_norm": 0.23200418055057526, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.7527, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.016961929890689786, |
|
"grad_norm": 0.20900775492191315, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.8365, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.018092725216735772, |
|
"grad_norm": 0.31192561984062195, |
|
"learning_rate": 4e-05, |
|
"loss": 0.7791, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.019223520542781758, |
|
"grad_norm": 0.25915804505348206, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.8506, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.020354315868827744, |
|
"grad_norm": 0.20527321100234985, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.8062, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02148511119487373, |
|
"grad_norm": 0.2385016530752182, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.7525, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.022615906520919715, |
|
"grad_norm": 0.2394818663597107, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7416, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.023746701846965697, |
|
"grad_norm": 0.269607275724411, |
|
"learning_rate": 4.999983481113995e-05, |
|
"loss": 0.7653, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.024877497173011683, |
|
"grad_norm": 0.21368731558322906, |
|
"learning_rate": 4.9999339246742786e-05, |
|
"loss": 0.75, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02600829249905767, |
|
"grad_norm": 0.25945496559143066, |
|
"learning_rate": 4.9998513313357435e-05, |
|
"loss": 0.7693, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.027139087825103655, |
|
"grad_norm": 0.2617523968219757, |
|
"learning_rate": 4.999735702189871e-05, |
|
"loss": 0.7995, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02826988315114964, |
|
"grad_norm": 0.26992905139923096, |
|
"learning_rate": 4.999587038764713e-05, |
|
"loss": 0.7784, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.029400678477195626, |
|
"grad_norm": 0.23823940753936768, |
|
"learning_rate": 4.999405343024871e-05, |
|
"loss": 0.7316, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.030531473803241612, |
|
"grad_norm": 0.2858569920063019, |
|
"learning_rate": 4.9991906173714756e-05, |
|
"loss": 0.7796, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0316622691292876, |
|
"grad_norm": 0.25298023223876953, |
|
"learning_rate": 4.99894286464215e-05, |
|
"loss": 0.7169, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03279306445533359, |
|
"grad_norm": 0.35693949460983276, |
|
"learning_rate": 4.998662088110972e-05, |
|
"loss": 0.8062, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.03392385978137957, |
|
"grad_norm": 0.42634308338165283, |
|
"learning_rate": 4.998348291488435e-05, |
|
"loss": 0.7035, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03505465510742556, |
|
"grad_norm": 0.34167715907096863, |
|
"learning_rate": 4.998001478921395e-05, |
|
"loss": 0.7683, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.036185450433471544, |
|
"grad_norm": 0.2687824070453644, |
|
"learning_rate": 4.997621654993018e-05, |
|
"loss": 0.7816, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03731624575951753, |
|
"grad_norm": 0.2919199764728546, |
|
"learning_rate": 4.997208824722719e-05, |
|
"loss": 0.7392, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.038447041085563516, |
|
"grad_norm": 0.24317045509815216, |
|
"learning_rate": 4.9967629935660944e-05, |
|
"loss": 0.6972, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0395778364116095, |
|
"grad_norm": 0.2556512951850891, |
|
"learning_rate": 4.9962841674148516e-05, |
|
"loss": 0.7431, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04070863173765549, |
|
"grad_norm": 0.35918310284614563, |
|
"learning_rate": 4.99577235259673e-05, |
|
"loss": 0.78, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04183942706370147, |
|
"grad_norm": 0.28553536534309387, |
|
"learning_rate": 4.9952275558754185e-05, |
|
"loss": 0.7467, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.04297022238974746, |
|
"grad_norm": 0.25147977471351624, |
|
"learning_rate": 4.994649784450465e-05, |
|
"loss": 0.7579, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.044101017715793445, |
|
"grad_norm": 0.3088456690311432, |
|
"learning_rate": 4.994039045957182e-05, |
|
"loss": 0.752, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.04523181304183943, |
|
"grad_norm": 0.32329487800598145, |
|
"learning_rate": 4.993395348466544e-05, |
|
"loss": 0.7012, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.046362608367885416, |
|
"grad_norm": 0.28732138872146606, |
|
"learning_rate": 4.992718700485085e-05, |
|
"loss": 0.7247, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.047493403693931395, |
|
"grad_norm": 0.2657299339771271, |
|
"learning_rate": 4.99200911095478e-05, |
|
"loss": 0.7247, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04862419901997738, |
|
"grad_norm": 0.30124104022979736, |
|
"learning_rate": 4.991266589252933e-05, |
|
"loss": 0.7001, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.049754994346023367, |
|
"grad_norm": 0.3533799946308136, |
|
"learning_rate": 4.990491145192049e-05, |
|
"loss": 0.7714, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05088578967206935, |
|
"grad_norm": 0.29441332817077637, |
|
"learning_rate": 4.989682789019706e-05, |
|
"loss": 0.7338, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.05201658499811534, |
|
"grad_norm": 0.2670339345932007, |
|
"learning_rate": 4.988841531418418e-05, |
|
"loss": 0.719, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.053147380324161324, |
|
"grad_norm": 0.44572877883911133, |
|
"learning_rate": 4.9879673835054955e-05, |
|
"loss": 0.7315, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.05427817565020731, |
|
"grad_norm": 0.29553067684173584, |
|
"learning_rate": 4.9870603568328985e-05, |
|
"loss": 0.7495, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.055408970976253295, |
|
"grad_norm": 0.26393231749534607, |
|
"learning_rate": 4.986120463387084e-05, |
|
"loss": 0.6637, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.05653976630229928, |
|
"grad_norm": 0.35982418060302734, |
|
"learning_rate": 4.985147715588845e-05, |
|
"loss": 0.7571, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05767056162834527, |
|
"grad_norm": 0.38977113366127014, |
|
"learning_rate": 4.9841421262931506e-05, |
|
"loss": 0.7551, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.05880135695439125, |
|
"grad_norm": 0.28935956954956055, |
|
"learning_rate": 4.983103708788972e-05, |
|
"loss": 0.7863, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05993215228043724, |
|
"grad_norm": 0.34443530440330505, |
|
"learning_rate": 4.98203247679911e-05, |
|
"loss": 0.8106, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.061062947606483224, |
|
"grad_norm": 0.4763427674770355, |
|
"learning_rate": 4.980928444480011e-05, |
|
"loss": 0.7729, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06219374293252921, |
|
"grad_norm": 0.2860422730445862, |
|
"learning_rate": 4.9797916264215824e-05, |
|
"loss": 0.7593, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.0633245382585752, |
|
"grad_norm": 0.28870680928230286, |
|
"learning_rate": 4.978622037647e-05, |
|
"loss": 0.7574, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06445533358462119, |
|
"grad_norm": 0.40277180075645447, |
|
"learning_rate": 4.9774196936125056e-05, |
|
"loss": 0.799, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.06558612891066717, |
|
"grad_norm": 0.3290288746356964, |
|
"learning_rate": 4.9761846102072065e-05, |
|
"loss": 0.7519, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06671692423671316, |
|
"grad_norm": 0.3139791190624237, |
|
"learning_rate": 4.9749168037528635e-05, |
|
"loss": 0.6837, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.06784771956275915, |
|
"grad_norm": 0.30802035331726074, |
|
"learning_rate": 4.9736162910036785e-05, |
|
"loss": 0.7662, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06897851488880513, |
|
"grad_norm": 0.34561124444007874, |
|
"learning_rate": 4.972283089146067e-05, |
|
"loss": 0.6897, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.07010931021485112, |
|
"grad_norm": 0.3372039198875427, |
|
"learning_rate": 4.970917215798438e-05, |
|
"loss": 0.7344, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.0712401055408971, |
|
"grad_norm": 0.41160914301872253, |
|
"learning_rate": 4.9695186890109567e-05, |
|
"loss": 0.832, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.07237090086694309, |
|
"grad_norm": 0.2914057672023773, |
|
"learning_rate": 4.968087527265306e-05, |
|
"loss": 0.7113, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07350169619298907, |
|
"grad_norm": 0.3247675597667694, |
|
"learning_rate": 4.966623749474445e-05, |
|
"loss": 0.6996, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.07463249151903506, |
|
"grad_norm": 0.435735285282135, |
|
"learning_rate": 4.9651273749823546e-05, |
|
"loss": 0.8236, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07576328684508105, |
|
"grad_norm": 0.3213053047657013, |
|
"learning_rate": 4.963598423563788e-05, |
|
"loss": 0.7012, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.07689408217112703, |
|
"grad_norm": 0.3745056390762329, |
|
"learning_rate": 4.962036915424004e-05, |
|
"loss": 0.7018, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07802487749717302, |
|
"grad_norm": 0.28368842601776123, |
|
"learning_rate": 4.960442871198503e-05, |
|
"loss": 0.7084, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.079155672823219, |
|
"grad_norm": 0.2621799409389496, |
|
"learning_rate": 4.958816311952752e-05, |
|
"loss": 0.7217, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08028646814926499, |
|
"grad_norm": 0.25561287999153137, |
|
"learning_rate": 4.95715725918191e-05, |
|
"loss": 0.7616, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.08141726347531097, |
|
"grad_norm": 0.3495071828365326, |
|
"learning_rate": 4.9554657348105385e-05, |
|
"loss": 0.7061, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08254805880135696, |
|
"grad_norm": 0.3490068018436432, |
|
"learning_rate": 4.953741761192317e-05, |
|
"loss": 0.7809, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.08367885412740295, |
|
"grad_norm": 0.39416739344596863, |
|
"learning_rate": 4.9519853611097434e-05, |
|
"loss": 0.7282, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.08480964945344893, |
|
"grad_norm": 0.2763444185256958, |
|
"learning_rate": 4.950196557773837e-05, |
|
"loss": 0.7262, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.08594044477949492, |
|
"grad_norm": 0.29107871651649475, |
|
"learning_rate": 4.948375374823828e-05, |
|
"loss": 0.7346, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0870712401055409, |
|
"grad_norm": 0.28965339064598083, |
|
"learning_rate": 4.946521836326847e-05, |
|
"loss": 0.6768, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.08820203543158689, |
|
"grad_norm": 0.31072792410850525, |
|
"learning_rate": 4.9446359667776065e-05, |
|
"loss": 0.7277, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.08933283075763288, |
|
"grad_norm": 0.2789427936077118, |
|
"learning_rate": 4.9427177910980794e-05, |
|
"loss": 0.7481, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.09046362608367886, |
|
"grad_norm": 0.2573710083961487, |
|
"learning_rate": 4.9407673346371644e-05, |
|
"loss": 0.7077, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09159442140972485, |
|
"grad_norm": 0.4152914881706238, |
|
"learning_rate": 4.938784623170357e-05, |
|
"loss": 0.7233, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.09272521673577083, |
|
"grad_norm": 0.30680012702941895, |
|
"learning_rate": 4.936769682899404e-05, |
|
"loss": 0.7353, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.0938560120618168, |
|
"grad_norm": 0.30145958065986633, |
|
"learning_rate": 4.934722540451961e-05, |
|
"loss": 0.7001, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.09498680738786279, |
|
"grad_norm": 0.31772518157958984, |
|
"learning_rate": 4.932643222881238e-05, |
|
"loss": 0.7183, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.09611760271390878, |
|
"grad_norm": 0.3001084327697754, |
|
"learning_rate": 4.930531757665643e-05, |
|
"loss": 0.6898, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.09724839803995476, |
|
"grad_norm": 0.2780250012874603, |
|
"learning_rate": 4.928388172708418e-05, |
|
"loss": 0.7782, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.09837919336600075, |
|
"grad_norm": 0.28147390484809875, |
|
"learning_rate": 4.926212496337272e-05, |
|
"loss": 0.7311, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.09950998869204673, |
|
"grad_norm": 0.4945797324180603, |
|
"learning_rate": 4.924004757304005e-05, |
|
"loss": 0.8001, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10064078401809272, |
|
"grad_norm": 0.3075043857097626, |
|
"learning_rate": 4.921764984784128e-05, |
|
"loss": 0.7233, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.1017715793441387, |
|
"grad_norm": 0.3451552093029022, |
|
"learning_rate": 4.919493208376479e-05, |
|
"loss": 0.6629, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.10290237467018469, |
|
"grad_norm": 0.28970155119895935, |
|
"learning_rate": 4.917189458102831e-05, |
|
"loss": 0.7793, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.10403316999623068, |
|
"grad_norm": 0.2446502447128296, |
|
"learning_rate": 4.9148537644074936e-05, |
|
"loss": 0.6899, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.10516396532227666, |
|
"grad_norm": 0.2791134715080261, |
|
"learning_rate": 4.912486158156912e-05, |
|
"loss": 0.69, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.10629476064832265, |
|
"grad_norm": 0.35021790862083435, |
|
"learning_rate": 4.910086670639264e-05, |
|
"loss": 0.7497, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.10742555597436863, |
|
"grad_norm": 0.27730756998062134, |
|
"learning_rate": 4.907655333564035e-05, |
|
"loss": 0.6799, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.10855635130041462, |
|
"grad_norm": 0.3183215856552124, |
|
"learning_rate": 4.9051921790616095e-05, |
|
"loss": 0.723, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1096871466264606, |
|
"grad_norm": 0.31501445174217224, |
|
"learning_rate": 4.902697239682844e-05, |
|
"loss": 0.7611, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.11081794195250659, |
|
"grad_norm": 0.30429741740226746, |
|
"learning_rate": 4.9001705483986314e-05, |
|
"loss": 0.7909, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.11194873727855258, |
|
"grad_norm": 0.27980148792266846, |
|
"learning_rate": 4.8976121385994735e-05, |
|
"loss": 0.7085, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.11307953260459856, |
|
"grad_norm": 0.2850303649902344, |
|
"learning_rate": 4.895022044095034e-05, |
|
"loss": 0.751, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11421032793064455, |
|
"grad_norm": 0.30970653891563416, |
|
"learning_rate": 4.892400299113693e-05, |
|
"loss": 0.6766, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.11534112325669053, |
|
"grad_norm": 0.4121417999267578, |
|
"learning_rate": 4.8897469383020966e-05, |
|
"loss": 0.6824, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.11647191858273652, |
|
"grad_norm": 0.3178861737251282, |
|
"learning_rate": 4.887061996724696e-05, |
|
"loss": 0.6798, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.1176027139087825, |
|
"grad_norm": 0.3267967700958252, |
|
"learning_rate": 4.884345509863286e-05, |
|
"loss": 0.7661, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.11873350923482849, |
|
"grad_norm": 0.3270506262779236, |
|
"learning_rate": 4.881597513616536e-05, |
|
"loss": 0.7321, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.11986430456087448, |
|
"grad_norm": 0.3873696029186249, |
|
"learning_rate": 4.878818044299517e-05, |
|
"loss": 0.7278, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.12099509988692046, |
|
"grad_norm": 0.3305418789386749, |
|
"learning_rate": 4.876007138643216e-05, |
|
"loss": 0.7304, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.12212589521296645, |
|
"grad_norm": 0.26419228315353394, |
|
"learning_rate": 4.873164833794059e-05, |
|
"loss": 0.7248, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.12325669053901243, |
|
"grad_norm": 0.3038617968559265, |
|
"learning_rate": 4.870291167313413e-05, |
|
"loss": 0.6681, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.12438748586505842, |
|
"grad_norm": 0.2820129692554474, |
|
"learning_rate": 4.8673861771770934e-05, |
|
"loss": 0.7434, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.12551828119110442, |
|
"grad_norm": 0.3421660363674164, |
|
"learning_rate": 4.8644499017748615e-05, |
|
"loss": 0.7266, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.1266490765171504, |
|
"grad_norm": 0.3642486035823822, |
|
"learning_rate": 4.861482379909914e-05, |
|
"loss": 0.7421, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1277798718431964, |
|
"grad_norm": 0.35517194867134094, |
|
"learning_rate": 4.8584836507983786e-05, |
|
"loss": 0.7432, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.12891066716924238, |
|
"grad_norm": 0.3161648213863373, |
|
"learning_rate": 4.855453754068784e-05, |
|
"loss": 0.7098, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13004146249528836, |
|
"grad_norm": 0.296561598777771, |
|
"learning_rate": 4.852392729761547e-05, |
|
"loss": 0.6641, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.13117225782133435, |
|
"grad_norm": 0.323515921831131, |
|
"learning_rate": 4.849300618328435e-05, |
|
"loss": 0.7522, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.13230305314738033, |
|
"grad_norm": 0.34789595007896423, |
|
"learning_rate": 4.8461774606320386e-05, |
|
"loss": 0.7712, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.13343384847342632, |
|
"grad_norm": 0.3661488890647888, |
|
"learning_rate": 4.843023297945226e-05, |
|
"loss": 0.6862, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1345646437994723, |
|
"grad_norm": 0.43650659918785095, |
|
"learning_rate": 4.8398381719506e-05, |
|
"loss": 0.7003, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.1356954391255183, |
|
"grad_norm": 0.38563141226768494, |
|
"learning_rate": 4.836622124739948e-05, |
|
"loss": 0.7094, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13682623445156428, |
|
"grad_norm": 0.30190715193748474, |
|
"learning_rate": 4.833375198813683e-05, |
|
"loss": 0.6664, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.13795702977761026, |
|
"grad_norm": 0.35016635060310364, |
|
"learning_rate": 4.8300974370802855e-05, |
|
"loss": 0.6657, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.13908782510365625, |
|
"grad_norm": 0.3495071530342102, |
|
"learning_rate": 4.8267888828557315e-05, |
|
"loss": 0.7689, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.14021862042970223, |
|
"grad_norm": 0.2628171145915985, |
|
"learning_rate": 4.823449579862927e-05, |
|
"loss": 0.7278, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.14134941575574822, |
|
"grad_norm": 0.3362691104412079, |
|
"learning_rate": 4.820079572231123e-05, |
|
"loss": 0.6934, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.1424802110817942, |
|
"grad_norm": 0.32949429750442505, |
|
"learning_rate": 4.8166789044953385e-05, |
|
"loss": 0.6363, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1436110064078402, |
|
"grad_norm": 0.3482156991958618, |
|
"learning_rate": 4.813247621595766e-05, |
|
"loss": 0.6735, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.14474180173388618, |
|
"grad_norm": 0.27361541986465454, |
|
"learning_rate": 4.809785768877183e-05, |
|
"loss": 0.6783, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.14587259705993216, |
|
"grad_norm": 0.29385972023010254, |
|
"learning_rate": 4.80629339208835e-05, |
|
"loss": 0.6947, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.14700339238597815, |
|
"grad_norm": 0.2907145023345947, |
|
"learning_rate": 4.802770537381407e-05, |
|
"loss": 0.6583, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.14813418771202413, |
|
"grad_norm": 0.3557474613189697, |
|
"learning_rate": 4.799217251311261e-05, |
|
"loss": 0.6196, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.14926498303807012, |
|
"grad_norm": 0.3381137251853943, |
|
"learning_rate": 4.795633580834974e-05, |
|
"loss": 0.6959, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1503957783641161, |
|
"grad_norm": 0.3507809042930603, |
|
"learning_rate": 4.792019573311142e-05, |
|
"loss": 0.7787, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.1515265736901621, |
|
"grad_norm": 0.3603408634662628, |
|
"learning_rate": 4.7883752764992676e-05, |
|
"loss": 0.6956, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.15265736901620808, |
|
"grad_norm": 0.3778272867202759, |
|
"learning_rate": 4.7847007385591295e-05, |
|
"loss": 0.6352, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.15378816434225406, |
|
"grad_norm": 0.3363897502422333, |
|
"learning_rate": 4.7809960080501464e-05, |
|
"loss": 0.6615, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.15491895966830005, |
|
"grad_norm": 0.32491081953048706, |
|
"learning_rate": 4.777261133930735e-05, |
|
"loss": 0.7499, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.15604975499434603, |
|
"grad_norm": 0.318862646818161, |
|
"learning_rate": 4.773496165557663e-05, |
|
"loss": 0.725, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.15718055032039202, |
|
"grad_norm": 0.45129063725471497, |
|
"learning_rate": 4.7697011526853976e-05, |
|
"loss": 0.7582, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.158311345646438, |
|
"grad_norm": 0.3082630932331085, |
|
"learning_rate": 4.7658761454654454e-05, |
|
"loss": 0.834, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.159442140972484, |
|
"grad_norm": 0.29232099652290344, |
|
"learning_rate": 4.762021194445695e-05, |
|
"loss": 0.688, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.16057293629852998, |
|
"grad_norm": 0.304189532995224, |
|
"learning_rate": 4.758136350569743e-05, |
|
"loss": 0.6758, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.16170373162457596, |
|
"grad_norm": 0.3389667570590973, |
|
"learning_rate": 4.754221665176223e-05, |
|
"loss": 0.6746, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.16283452695062195, |
|
"grad_norm": 0.5311838388442993, |
|
"learning_rate": 4.7502771899981284e-05, |
|
"loss": 0.8003, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.16396532227666794, |
|
"grad_norm": 0.26352110505104065, |
|
"learning_rate": 4.7463029771621294e-05, |
|
"loss": 0.6647, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.16509611760271392, |
|
"grad_norm": 0.3928554058074951, |
|
"learning_rate": 4.74229907918788e-05, |
|
"loss": 0.7258, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.1662269129287599, |
|
"grad_norm": 0.4840872883796692, |
|
"learning_rate": 4.738265548987327e-05, |
|
"loss": 0.7886, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.1673577082548059, |
|
"grad_norm": 0.324370414018631, |
|
"learning_rate": 4.734202439864012e-05, |
|
"loss": 0.7031, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.16848850358085188, |
|
"grad_norm": 0.30743566155433655, |
|
"learning_rate": 4.730109805512363e-05, |
|
"loss": 0.7228, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.16961929890689786, |
|
"grad_norm": 0.3641277551651001, |
|
"learning_rate": 4.7259877000169896e-05, |
|
"loss": 0.7265, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17075009423294385, |
|
"grad_norm": 0.40837985277175903, |
|
"learning_rate": 4.721836177851963e-05, |
|
"loss": 0.7128, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.17188088955898984, |
|
"grad_norm": 0.28167346119880676, |
|
"learning_rate": 4.717655293880102e-05, |
|
"loss": 0.6837, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.17301168488503582, |
|
"grad_norm": 0.37647080421447754, |
|
"learning_rate": 4.713445103352241e-05, |
|
"loss": 0.7493, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.1741424802110818, |
|
"grad_norm": 0.3222416043281555, |
|
"learning_rate": 4.7092056619065084e-05, |
|
"loss": 0.6314, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1752732755371278, |
|
"grad_norm": 0.29139477014541626, |
|
"learning_rate": 4.704937025567582e-05, |
|
"loss": 0.7274, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.17640407086317378, |
|
"grad_norm": 0.3189648687839508, |
|
"learning_rate": 4.700639250745957e-05, |
|
"loss": 0.7202, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.17753486618921976, |
|
"grad_norm": 0.26070472598075867, |
|
"learning_rate": 4.696312394237195e-05, |
|
"loss": 0.7426, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.17866566151526575, |
|
"grad_norm": 0.384833961725235, |
|
"learning_rate": 4.691956513221174e-05, |
|
"loss": 0.7669, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.17979645684131174, |
|
"grad_norm": 0.3161134421825409, |
|
"learning_rate": 4.6875716652613366e-05, |
|
"loss": 0.7224, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.18092725216735772, |
|
"grad_norm": 0.40663212537765503, |
|
"learning_rate": 4.6831579083039265e-05, |
|
"loss": 0.7176, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1820580474934037, |
|
"grad_norm": 0.4073905646800995, |
|
"learning_rate": 4.6787153006772214e-05, |
|
"loss": 0.7454, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.1831888428194497, |
|
"grad_norm": 0.36114805936813354, |
|
"learning_rate": 4.6742439010907645e-05, |
|
"loss": 0.7271, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.18431963814549568, |
|
"grad_norm": 0.35414162278175354, |
|
"learning_rate": 4.6697437686345883e-05, |
|
"loss": 0.8134, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.18545043347154166, |
|
"grad_norm": 0.3441600799560547, |
|
"learning_rate": 4.6652149627784324e-05, |
|
"loss": 0.7259, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.18658122879758765, |
|
"grad_norm": 0.34488874673843384, |
|
"learning_rate": 4.660657543370958e-05, |
|
"loss": 0.7541, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.1877120241236336, |
|
"grad_norm": 0.3300029933452606, |
|
"learning_rate": 4.65607157063896e-05, |
|
"loss": 0.7123, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.1888428194496796, |
|
"grad_norm": 0.39021798968315125, |
|
"learning_rate": 4.651457105186566e-05, |
|
"loss": 0.7049, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.18997361477572558, |
|
"grad_norm": 0.3784525394439697, |
|
"learning_rate": 4.646814207994441e-05, |
|
"loss": 0.7892, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.19110441010177157, |
|
"grad_norm": 0.3650527000427246, |
|
"learning_rate": 4.642142940418973e-05, |
|
"loss": 0.7315, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.19223520542781755, |
|
"grad_norm": 0.36192572116851807, |
|
"learning_rate": 4.637443364191474e-05, |
|
"loss": 0.6201, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.19336600075386354, |
|
"grad_norm": 0.3428821265697479, |
|
"learning_rate": 4.6327155414173554e-05, |
|
"loss": 0.7248, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.19449679607990952, |
|
"grad_norm": 0.2692446708679199, |
|
"learning_rate": 4.627959534575307e-05, |
|
"loss": 0.6986, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.1956275914059555, |
|
"grad_norm": 0.33562323451042175, |
|
"learning_rate": 4.623175406516479e-05, |
|
"loss": 0.7553, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.1967583867320015, |
|
"grad_norm": 0.332381010055542, |
|
"learning_rate": 4.618363220463644e-05, |
|
"loss": 0.7021, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.19788918205804748, |
|
"grad_norm": 0.3331127166748047, |
|
"learning_rate": 4.6135230400103636e-05, |
|
"loss": 0.7278, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.19901997738409347, |
|
"grad_norm": 0.32819780707359314, |
|
"learning_rate": 4.6086549291201485e-05, |
|
"loss": 0.7189, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.20015077271013945, |
|
"grad_norm": 0.31646525859832764, |
|
"learning_rate": 4.603758952125615e-05, |
|
"loss": 0.6949, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.20128156803618544, |
|
"grad_norm": 0.3622991740703583, |
|
"learning_rate": 4.5988351737276316e-05, |
|
"loss": 0.7193, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.20241236336223142, |
|
"grad_norm": 0.3097212016582489, |
|
"learning_rate": 4.593883658994466e-05, |
|
"loss": 0.6913, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.2035431586882774, |
|
"grad_norm": 0.3757197856903076, |
|
"learning_rate": 4.588904473360923e-05, |
|
"loss": 0.6859, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2046739540143234, |
|
"grad_norm": 0.3894336223602295, |
|
"learning_rate": 4.5838976826274826e-05, |
|
"loss": 0.7495, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.20580474934036938, |
|
"grad_norm": 0.2777577042579651, |
|
"learning_rate": 4.578863352959429e-05, |
|
"loss": 0.7305, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.20693554466641537, |
|
"grad_norm": 0.30092760920524597, |
|
"learning_rate": 4.573801550885979e-05, |
|
"loss": 0.6952, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.20806633999246135, |
|
"grad_norm": 0.31918197870254517, |
|
"learning_rate": 4.568712343299394e-05, |
|
"loss": 0.6309, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.20919713531850734, |
|
"grad_norm": 0.3190583884716034, |
|
"learning_rate": 4.563595797454109e-05, |
|
"loss": 0.6932, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.21032793064455332, |
|
"grad_norm": 0.4575042128562927, |
|
"learning_rate": 4.558451980965832e-05, |
|
"loss": 0.7446, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2114587259705993, |
|
"grad_norm": 0.3298736810684204, |
|
"learning_rate": 4.553280961810658e-05, |
|
"loss": 0.7434, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.2125895212966453, |
|
"grad_norm": 0.2681873142719269, |
|
"learning_rate": 4.548082808324169e-05, |
|
"loss": 0.7609, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.21372031662269128, |
|
"grad_norm": 0.32544100284576416, |
|
"learning_rate": 4.542857589200527e-05, |
|
"loss": 0.7076, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.21485111194873727, |
|
"grad_norm": 0.3351302444934845, |
|
"learning_rate": 4.537605373491573e-05, |
|
"loss": 0.7442, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.21598190727478325, |
|
"grad_norm": 0.3408782482147217, |
|
"learning_rate": 4.532326230605908e-05, |
|
"loss": 0.6697, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.21711270260082924, |
|
"grad_norm": 0.31308743357658386, |
|
"learning_rate": 4.52702023030798e-05, |
|
"loss": 0.6795, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.21824349792687522, |
|
"grad_norm": 0.31887832283973694, |
|
"learning_rate": 4.521687442717161e-05, |
|
"loss": 0.6907, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.2193742932529212, |
|
"grad_norm": 0.28720954060554504, |
|
"learning_rate": 4.516327938306818e-05, |
|
"loss": 0.6951, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2205050885789672, |
|
"grad_norm": 0.35572728514671326, |
|
"learning_rate": 4.510941787903385e-05, |
|
"loss": 0.6731, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.22163588390501318, |
|
"grad_norm": 0.32665789127349854, |
|
"learning_rate": 4.505529062685426e-05, |
|
"loss": 0.6859, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.22276667923105917, |
|
"grad_norm": 0.425155907869339, |
|
"learning_rate": 4.5000898341826935e-05, |
|
"loss": 0.7611, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.22389747455710515, |
|
"grad_norm": 0.3223753273487091, |
|
"learning_rate": 4.494624174275185e-05, |
|
"loss": 0.6784, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.22502826988315114, |
|
"grad_norm": 0.29629823565483093, |
|
"learning_rate": 4.48913215519219e-05, |
|
"loss": 0.7528, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.22615906520919712, |
|
"grad_norm": 0.45501330494880676, |
|
"learning_rate": 4.483613849511337e-05, |
|
"loss": 0.7412, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2272898605352431, |
|
"grad_norm": 0.47708141803741455, |
|
"learning_rate": 4.478069330157638e-05, |
|
"loss": 0.7186, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.2284206558612891, |
|
"grad_norm": 0.46172332763671875, |
|
"learning_rate": 4.472498670402519e-05, |
|
"loss": 0.7429, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.22955145118733508, |
|
"grad_norm": 0.2885262966156006, |
|
"learning_rate": 4.4669019438628545e-05, |
|
"loss": 0.6749, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.23068224651338107, |
|
"grad_norm": 0.3848798871040344, |
|
"learning_rate": 4.461279224499995e-05, |
|
"loss": 0.6889, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.23181304183942705, |
|
"grad_norm": 0.3475760519504547, |
|
"learning_rate": 4.455630586618788e-05, |
|
"loss": 0.7423, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.23294383716547304, |
|
"grad_norm": 0.3690018653869629, |
|
"learning_rate": 4.449956104866597e-05, |
|
"loss": 0.6995, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.23407463249151902, |
|
"grad_norm": 0.4979022741317749, |
|
"learning_rate": 4.444255854232318e-05, |
|
"loss": 0.7137, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.235205427817565, |
|
"grad_norm": 0.3002910017967224, |
|
"learning_rate": 4.438529910045381e-05, |
|
"loss": 0.6342, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.236336223143611, |
|
"grad_norm": 0.2860986292362213, |
|
"learning_rate": 4.432778347974764e-05, |
|
"loss": 0.6486, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.23746701846965698, |
|
"grad_norm": 0.3187776207923889, |
|
"learning_rate": 4.427001244027984e-05, |
|
"loss": 0.6935, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.23859781379570297, |
|
"grad_norm": 0.436594694852829, |
|
"learning_rate": 4.4211986745500976e-05, |
|
"loss": 0.7125, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.23972860912174895, |
|
"grad_norm": 0.25989067554473877, |
|
"learning_rate": 4.415370716222693e-05, |
|
"loss": 0.6699, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.24085940444779494, |
|
"grad_norm": 0.30455416440963745, |
|
"learning_rate": 4.4095174460628734e-05, |
|
"loss": 0.7244, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.24199019977384092, |
|
"grad_norm": 0.2574412226676941, |
|
"learning_rate": 4.40363894142224e-05, |
|
"loss": 0.6719, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2431209950998869, |
|
"grad_norm": 0.2614154815673828, |
|
"learning_rate": 4.397735279985873e-05, |
|
"loss": 0.7, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.2442517904259329, |
|
"grad_norm": 0.32729870080947876, |
|
"learning_rate": 4.3918065397712983e-05, |
|
"loss": 0.6669, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.24538258575197888, |
|
"grad_norm": 0.5149984359741211, |
|
"learning_rate": 4.385852799127464e-05, |
|
"loss": 0.7371, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.24651338107802487, |
|
"grad_norm": 0.322007417678833, |
|
"learning_rate": 4.379874136733702e-05, |
|
"loss": 0.7595, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.24764417640407085, |
|
"grad_norm": 0.38709428906440735, |
|
"learning_rate": 4.373870631598683e-05, |
|
"loss": 0.7662, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.24877497173011684, |
|
"grad_norm": 0.3887243866920471, |
|
"learning_rate": 4.367842363059383e-05, |
|
"loss": 0.6608, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.24990576705616283, |
|
"grad_norm": 0.343573659658432, |
|
"learning_rate": 4.3617894107800275e-05, |
|
"loss": 0.7364, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.25103656238220884, |
|
"grad_norm": 0.3381284773349762, |
|
"learning_rate": 4.355711854751037e-05, |
|
"loss": 0.6939, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2521673577082548, |
|
"grad_norm": 0.428345650434494, |
|
"learning_rate": 4.3496097752879764e-05, |
|
"loss": 0.7322, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.2532981530343008, |
|
"grad_norm": 0.3029363453388214, |
|
"learning_rate": 4.3434832530304906e-05, |
|
"loss": 0.6434, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2544289483603468, |
|
"grad_norm": 0.32285043597221375, |
|
"learning_rate": 4.337332368941237e-05, |
|
"loss": 0.686, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.2555597436863928, |
|
"grad_norm": 0.2844852805137634, |
|
"learning_rate": 4.331157204304819e-05, |
|
"loss": 0.6786, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.25669053901243877, |
|
"grad_norm": 0.38639211654663086, |
|
"learning_rate": 4.324957840726708e-05, |
|
"loss": 0.669, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.25782133433848475, |
|
"grad_norm": 0.29250484704971313, |
|
"learning_rate": 4.3187343601321696e-05, |
|
"loss": 0.684, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.25895212966453074, |
|
"grad_norm": 0.3040000796318054, |
|
"learning_rate": 4.312486844765175e-05, |
|
"loss": 0.6721, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.2600829249905767, |
|
"grad_norm": 0.3095468580722809, |
|
"learning_rate": 4.3062153771873214e-05, |
|
"loss": 0.8026, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2612137203166227, |
|
"grad_norm": 0.3532247543334961, |
|
"learning_rate": 4.299920040276735e-05, |
|
"loss": 0.7338, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.2623445156426687, |
|
"grad_norm": 0.3691394627094269, |
|
"learning_rate": 4.2936009172269766e-05, |
|
"loss": 0.6489, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2634753109687147, |
|
"grad_norm": 0.3503078520298004, |
|
"learning_rate": 4.287258091545946e-05, |
|
"loss": 0.6705, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.26460610629476067, |
|
"grad_norm": 0.31756189465522766, |
|
"learning_rate": 4.280891647054775e-05, |
|
"loss": 0.6642, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.26573690162080665, |
|
"grad_norm": 0.27942630648612976, |
|
"learning_rate": 4.274501667886718e-05, |
|
"loss": 0.7139, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.26686769694685264, |
|
"grad_norm": 0.35604235529899597, |
|
"learning_rate": 4.268088238486048e-05, |
|
"loss": 0.8335, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2679984922728986, |
|
"grad_norm": 0.3140622675418854, |
|
"learning_rate": 4.261651443606931e-05, |
|
"loss": 0.8127, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.2691292875989446, |
|
"grad_norm": 0.327470988035202, |
|
"learning_rate": 4.255191368312311e-05, |
|
"loss": 0.7311, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.2702600829249906, |
|
"grad_norm": 0.3089313805103302, |
|
"learning_rate": 4.2487080979727876e-05, |
|
"loss": 0.733, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.2713908782510366, |
|
"grad_norm": 0.3237866163253784, |
|
"learning_rate": 4.242201718265483e-05, |
|
"loss": 0.6754, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.27252167357708257, |
|
"grad_norm": 0.3597028851509094, |
|
"learning_rate": 4.235672315172912e-05, |
|
"loss": 0.741, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.27365246890312855, |
|
"grad_norm": 0.30509960651397705, |
|
"learning_rate": 4.229119974981848e-05, |
|
"loss": 0.7098, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.27478326422917454, |
|
"grad_norm": 0.37183189392089844, |
|
"learning_rate": 4.222544784282178e-05, |
|
"loss": 0.7037, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.2759140595552205, |
|
"grad_norm": 0.35368862748146057, |
|
"learning_rate": 4.2159468299657645e-05, |
|
"loss": 0.654, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2770448548812665, |
|
"grad_norm": 0.3120376765727997, |
|
"learning_rate": 4.209326199225291e-05, |
|
"loss": 0.6845, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.2781756502073125, |
|
"grad_norm": 0.3322497308254242, |
|
"learning_rate": 4.202682979553112e-05, |
|
"loss": 0.738, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.2793064455333585, |
|
"grad_norm": 0.39859551191329956, |
|
"learning_rate": 4.1960172587401007e-05, |
|
"loss": 0.7208, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.28043724085940447, |
|
"grad_norm": 0.304196298122406, |
|
"learning_rate": 4.1893291248744794e-05, |
|
"loss": 0.6701, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.28156803618545045, |
|
"grad_norm": 0.30052655935287476, |
|
"learning_rate": 4.1826186663406685e-05, |
|
"loss": 0.7255, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.28269883151149644, |
|
"grad_norm": 0.3247777223587036, |
|
"learning_rate": 4.1758859718181054e-05, |
|
"loss": 0.7067, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2838296268375424, |
|
"grad_norm": 0.39652687311172485, |
|
"learning_rate": 4.169131130280081e-05, |
|
"loss": 0.8056, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.2849604221635884, |
|
"grad_norm": 0.299211710691452, |
|
"learning_rate": 4.162354230992562e-05, |
|
"loss": 0.7158, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2860912174896344, |
|
"grad_norm": 0.34312811493873596, |
|
"learning_rate": 4.155555363513009e-05, |
|
"loss": 0.6555, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.2872220128156804, |
|
"grad_norm": 0.34061411023139954, |
|
"learning_rate": 4.148734617689196e-05, |
|
"loss": 0.6973, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.28835280814172637, |
|
"grad_norm": 0.32622766494750977, |
|
"learning_rate": 4.1418920836580214e-05, |
|
"loss": 0.7034, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.28948360346777235, |
|
"grad_norm": 0.31413719058036804, |
|
"learning_rate": 4.135027851844316e-05, |
|
"loss": 0.6874, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.29061439879381834, |
|
"grad_norm": 0.3852449357509613, |
|
"learning_rate": 4.1281420129596504e-05, |
|
"loss": 0.6937, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.2917451941198643, |
|
"grad_norm": 0.25905337929725647, |
|
"learning_rate": 4.121234658001135e-05, |
|
"loss": 0.7273, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2928759894459103, |
|
"grad_norm": 0.33746325969696045, |
|
"learning_rate": 4.114305878250218e-05, |
|
"loss": 0.6815, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.2940067847719563, |
|
"grad_norm": 0.36523139476776123, |
|
"learning_rate": 4.1073557652714755e-05, |
|
"loss": 0.6763, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2951375800980023, |
|
"grad_norm": 0.4286907911300659, |
|
"learning_rate": 4.100384410911409e-05, |
|
"loss": 0.7807, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.29626837542404827, |
|
"grad_norm": 0.27938035130500793, |
|
"learning_rate": 4.0933919072972224e-05, |
|
"loss": 0.6515, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.29739917075009425, |
|
"grad_norm": 0.28958678245544434, |
|
"learning_rate": 4.086378346835614e-05, |
|
"loss": 0.6303, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.29852996607614024, |
|
"grad_norm": 0.31973332166671753, |
|
"learning_rate": 4.0793438222115477e-05, |
|
"loss": 0.733, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.2996607614021862, |
|
"grad_norm": 0.302673876285553, |
|
"learning_rate": 4.072288426387032e-05, |
|
"loss": 0.6551, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.3007915567282322, |
|
"grad_norm": 0.3454115092754364, |
|
"learning_rate": 4.065212252599889e-05, |
|
"loss": 0.6847, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3019223520542782, |
|
"grad_norm": 0.32197806239128113, |
|
"learning_rate": 4.0581153943625266e-05, |
|
"loss": 0.7283, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.3030531473803242, |
|
"grad_norm": 0.2939291000366211, |
|
"learning_rate": 4.050997945460699e-05, |
|
"loss": 0.6519, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.30418394270637017, |
|
"grad_norm": 0.34127116203308105, |
|
"learning_rate": 4.043859999952266e-05, |
|
"loss": 0.7041, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.30531473803241616, |
|
"grad_norm": 0.3606717586517334, |
|
"learning_rate": 4.0367016521659564e-05, |
|
"loss": 0.6745, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.30644553335846214, |
|
"grad_norm": 0.3977923095226288, |
|
"learning_rate": 4.029522996700112e-05, |
|
"loss": 0.6635, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.3075763286845081, |
|
"grad_norm": 0.27561894059181213, |
|
"learning_rate": 4.0223241284214496e-05, |
|
"loss": 0.6661, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3087071240105541, |
|
"grad_norm": 0.31549111008644104, |
|
"learning_rate": 4.015105142463794e-05, |
|
"loss": 0.6659, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.3098379193366001, |
|
"grad_norm": 0.32156458497047424, |
|
"learning_rate": 4.0078661342268314e-05, |
|
"loss": 0.6656, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3109687146626461, |
|
"grad_norm": 0.33597517013549805, |
|
"learning_rate": 4.000607199374843e-05, |
|
"loss": 0.6291, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.31209950998869207, |
|
"grad_norm": 0.2836547791957855, |
|
"learning_rate": 3.9933284338354415e-05, |
|
"loss": 0.6936, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.31323030531473806, |
|
"grad_norm": 0.3355998396873474, |
|
"learning_rate": 3.986029933798308e-05, |
|
"loss": 0.6578, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.31436110064078404, |
|
"grad_norm": 0.3303869962692261, |
|
"learning_rate": 3.9787117957139116e-05, |
|
"loss": 0.6859, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.31549189596683, |
|
"grad_norm": 0.3788108825683594, |
|
"learning_rate": 3.9713741162922455e-05, |
|
"loss": 0.6997, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.316622691292876, |
|
"grad_norm": 0.33582428097724915, |
|
"learning_rate": 3.964016992501541e-05, |
|
"loss": 0.689, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.317753486618922, |
|
"grad_norm": 0.35693231225013733, |
|
"learning_rate": 3.956640521566989e-05, |
|
"loss": 0.676, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.318884281944968, |
|
"grad_norm": 0.3589436709880829, |
|
"learning_rate": 3.949244800969456e-05, |
|
"loss": 0.7545, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.32001507727101397, |
|
"grad_norm": 0.3047327399253845, |
|
"learning_rate": 3.941829928444194e-05, |
|
"loss": 0.6391, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.32114587259705996, |
|
"grad_norm": 0.292953759431839, |
|
"learning_rate": 3.9343960019795525e-05, |
|
"loss": 0.6886, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.32227666792310594, |
|
"grad_norm": 0.3644665777683258, |
|
"learning_rate": 3.926943119815675e-05, |
|
"loss": 0.7283, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.3234074632491519, |
|
"grad_norm": 0.3624630570411682, |
|
"learning_rate": 3.919471380443212e-05, |
|
"loss": 0.6566, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3245382585751979, |
|
"grad_norm": 0.48623165488243103, |
|
"learning_rate": 3.911980882602011e-05, |
|
"loss": 0.8311, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.3256690539012439, |
|
"grad_norm": 0.3244991600513458, |
|
"learning_rate": 3.904471725279818e-05, |
|
"loss": 0.7087, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3267998492272899, |
|
"grad_norm": 0.3399847149848938, |
|
"learning_rate": 3.8969440077109634e-05, |
|
"loss": 0.6146, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.32793064455333587, |
|
"grad_norm": 0.3181338310241699, |
|
"learning_rate": 3.889397829375052e-05, |
|
"loss": 0.7608, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.32906143987938186, |
|
"grad_norm": 0.5128947496414185, |
|
"learning_rate": 3.881833289995654e-05, |
|
"loss": 0.7225, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.33019223520542784, |
|
"grad_norm": 0.3176124095916748, |
|
"learning_rate": 3.874250489538981e-05, |
|
"loss": 0.7225, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.33132303053147383, |
|
"grad_norm": 0.3748844563961029, |
|
"learning_rate": 3.866649528212563e-05, |
|
"loss": 0.7188, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.3324538258575198, |
|
"grad_norm": 0.974604606628418, |
|
"learning_rate": 3.859030506463932e-05, |
|
"loss": 0.7509, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3335846211835658, |
|
"grad_norm": 0.3221200704574585, |
|
"learning_rate": 3.851393524979291e-05, |
|
"loss": 0.6781, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.3347154165096118, |
|
"grad_norm": 0.33971571922302246, |
|
"learning_rate": 3.84373868468218e-05, |
|
"loss": 0.6711, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.33584621183565777, |
|
"grad_norm": 0.3183509409427643, |
|
"learning_rate": 3.836066086732145e-05, |
|
"loss": 0.6808, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.33697700716170376, |
|
"grad_norm": 0.2814907729625702, |
|
"learning_rate": 3.828375832523407e-05, |
|
"loss": 0.7171, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.33810780248774974, |
|
"grad_norm": 0.2738807797431946, |
|
"learning_rate": 3.820668023683507e-05, |
|
"loss": 0.7934, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.33923859781379573, |
|
"grad_norm": 0.3376060128211975, |
|
"learning_rate": 3.812942762071981e-05, |
|
"loss": 0.6045, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3403693931398417, |
|
"grad_norm": 0.3851218819618225, |
|
"learning_rate": 3.8052001497790005e-05, |
|
"loss": 0.7214, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.3415001884658877, |
|
"grad_norm": 0.2853710949420929, |
|
"learning_rate": 3.7974402891240294e-05, |
|
"loss": 0.7312, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3426309837919337, |
|
"grad_norm": 0.34209561347961426, |
|
"learning_rate": 3.78966328265447e-05, |
|
"loss": 0.66, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.34376177911797967, |
|
"grad_norm": 0.2967279851436615, |
|
"learning_rate": 3.7818692331443093e-05, |
|
"loss": 0.7354, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.34489257444402566, |
|
"grad_norm": 0.31301623582839966, |
|
"learning_rate": 3.7740582435927614e-05, |
|
"loss": 0.6634, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.34602336977007164, |
|
"grad_norm": 0.287758469581604, |
|
"learning_rate": 3.766230417222901e-05, |
|
"loss": 0.7688, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.34715416509611763, |
|
"grad_norm": 0.34585824608802795, |
|
"learning_rate": 3.7583858574803046e-05, |
|
"loss": 0.6542, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.3482849604221636, |
|
"grad_norm": 0.32640525698661804, |
|
"learning_rate": 3.7505246680316853e-05, |
|
"loss": 0.71, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3494157557482096, |
|
"grad_norm": 0.2845459580421448, |
|
"learning_rate": 3.742646952763515e-05, |
|
"loss": 0.6233, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.3505465510742556, |
|
"grad_norm": 0.30241382122039795, |
|
"learning_rate": 3.7347528157806586e-05, |
|
"loss": 0.6739, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.35167734640030157, |
|
"grad_norm": 0.35119229555130005, |
|
"learning_rate": 3.726842361404996e-05, |
|
"loss": 0.72, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.35280814172634756, |
|
"grad_norm": 0.3631749153137207, |
|
"learning_rate": 3.718915694174042e-05, |
|
"loss": 0.6596, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.35393893705239354, |
|
"grad_norm": 0.258357971906662, |
|
"learning_rate": 3.7109729188395666e-05, |
|
"loss": 0.7037, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.35506973237843953, |
|
"grad_norm": 0.2907659113407135, |
|
"learning_rate": 3.703014140366209e-05, |
|
"loss": 0.6494, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3562005277044855, |
|
"grad_norm": 0.309076189994812, |
|
"learning_rate": 3.695039463930093e-05, |
|
"loss": 0.6668, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.3573313230305315, |
|
"grad_norm": 0.33287695050239563, |
|
"learning_rate": 3.687048994917437e-05, |
|
"loss": 0.7215, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3584621183565775, |
|
"grad_norm": 0.2877466082572937, |
|
"learning_rate": 3.679042838923157e-05, |
|
"loss": 0.6261, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.35959291368262347, |
|
"grad_norm": 0.26237618923187256, |
|
"learning_rate": 3.671021101749476e-05, |
|
"loss": 0.6966, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.36072370900866946, |
|
"grad_norm": 0.34308937191963196, |
|
"learning_rate": 3.6629838894045224e-05, |
|
"loss": 0.662, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.36185450433471544, |
|
"grad_norm": 0.337215393781662, |
|
"learning_rate": 3.654931308100934e-05, |
|
"loss": 0.7402, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.36298529966076143, |
|
"grad_norm": 0.4486747980117798, |
|
"learning_rate": 3.646863464254447e-05, |
|
"loss": 0.7111, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.3641160949868074, |
|
"grad_norm": 0.37535396218299866, |
|
"learning_rate": 3.638780464482497e-05, |
|
"loss": 0.7322, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3652468903128534, |
|
"grad_norm": 0.4385060966014862, |
|
"learning_rate": 3.630682415602804e-05, |
|
"loss": 0.6517, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.3663776856388994, |
|
"grad_norm": 0.29366278648376465, |
|
"learning_rate": 3.6225694246319666e-05, |
|
"loss": 0.636, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.36750848096494537, |
|
"grad_norm": 0.3330417573451996, |
|
"learning_rate": 3.614441598784042e-05, |
|
"loss": 0.727, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.36863927629099136, |
|
"grad_norm": 0.3851955831050873, |
|
"learning_rate": 3.6062990454691334e-05, |
|
"loss": 0.7019, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.36977007161703734, |
|
"grad_norm": 0.4180035889148712, |
|
"learning_rate": 3.598141872291969e-05, |
|
"loss": 0.7318, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.37090086694308333, |
|
"grad_norm": 0.28281131386756897, |
|
"learning_rate": 3.589970187050481e-05, |
|
"loss": 0.7143, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.3720316622691293, |
|
"grad_norm": 0.35991495847702026, |
|
"learning_rate": 3.581784097734376e-05, |
|
"loss": 0.7144, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.3731624575951753, |
|
"grad_norm": 0.3908022940158844, |
|
"learning_rate": 3.5735837125237174e-05, |
|
"loss": 0.6779, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3742932529212213, |
|
"grad_norm": 0.3579081594944, |
|
"learning_rate": 3.565369139787488e-05, |
|
"loss": 0.6774, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.3754240482472672, |
|
"grad_norm": 0.37918293476104736, |
|
"learning_rate": 3.5571404880821594e-05, |
|
"loss": 0.7551, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.3765548435733132, |
|
"grad_norm": 0.372585654258728, |
|
"learning_rate": 3.548897866150259e-05, |
|
"loss": 0.7081, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.3776856388993592, |
|
"grad_norm": 0.38565728068351746, |
|
"learning_rate": 3.540641382918934e-05, |
|
"loss": 0.6547, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.3788164342254052, |
|
"grad_norm": 0.3910474479198456, |
|
"learning_rate": 3.532371147498507e-05, |
|
"loss": 0.6847, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.37994722955145116, |
|
"grad_norm": 0.3123336732387543, |
|
"learning_rate": 3.524087269181039e-05, |
|
"loss": 0.6692, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.38107802487749715, |
|
"grad_norm": 0.3222855031490326, |
|
"learning_rate": 3.515789857438885e-05, |
|
"loss": 0.7101, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.38220882020354313, |
|
"grad_norm": 0.3308558762073517, |
|
"learning_rate": 3.507479021923241e-05, |
|
"loss": 0.7193, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.3833396155295891, |
|
"grad_norm": 0.36425960063934326, |
|
"learning_rate": 3.4991548724627054e-05, |
|
"loss": 0.6698, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.3844704108556351, |
|
"grad_norm": 0.3454649746417999, |
|
"learning_rate": 3.490817519061819e-05, |
|
"loss": 0.6996, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3856012061816811, |
|
"grad_norm": 0.39363983273506165, |
|
"learning_rate": 3.4824670718996114e-05, |
|
"loss": 0.7256, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.3867320015077271, |
|
"grad_norm": 0.29884523153305054, |
|
"learning_rate": 3.4741036413281534e-05, |
|
"loss": 0.706, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.38786279683377306, |
|
"grad_norm": 0.6705525517463684, |
|
"learning_rate": 3.4657273378710874e-05, |
|
"loss": 0.7508, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.38899359215981905, |
|
"grad_norm": 0.31176072359085083, |
|
"learning_rate": 3.4573382722221776e-05, |
|
"loss": 0.6792, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.39012438748586503, |
|
"grad_norm": 0.37332355976104736, |
|
"learning_rate": 3.448936555243837e-05, |
|
"loss": 0.6805, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.391255182811911, |
|
"grad_norm": 0.4867086112499237, |
|
"learning_rate": 3.440522297965671e-05, |
|
"loss": 0.6306, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.392385978137957, |
|
"grad_norm": 0.32693204283714294, |
|
"learning_rate": 3.4320956115830046e-05, |
|
"loss": 0.719, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.393516773464003, |
|
"grad_norm": 0.2943226993083954, |
|
"learning_rate": 3.4236566074554157e-05, |
|
"loss": 0.7405, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.394647568790049, |
|
"grad_norm": 0.3139977753162384, |
|
"learning_rate": 3.415205397105261e-05, |
|
"loss": 0.7152, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.39577836411609496, |
|
"grad_norm": 0.33439525961875916, |
|
"learning_rate": 3.406742092216206e-05, |
|
"loss": 0.7017, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.39690915944214095, |
|
"grad_norm": 0.3081996440887451, |
|
"learning_rate": 3.398266804631744e-05, |
|
"loss": 0.6647, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.39803995476818693, |
|
"grad_norm": 0.3134262263774872, |
|
"learning_rate": 3.389779646353724e-05, |
|
"loss": 0.7313, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.3991707500942329, |
|
"grad_norm": 0.3375689685344696, |
|
"learning_rate": 3.381280729540866e-05, |
|
"loss": 0.6829, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.4003015454202789, |
|
"grad_norm": 0.38416242599487305, |
|
"learning_rate": 3.37277016650728e-05, |
|
"loss": 0.7534, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4014323407463249, |
|
"grad_norm": 0.3711940050125122, |
|
"learning_rate": 3.364248069720982e-05, |
|
"loss": 0.6618, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.4025631360723709, |
|
"grad_norm": 0.338777631521225, |
|
"learning_rate": 3.3557145518024094e-05, |
|
"loss": 0.6692, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.40369393139841686, |
|
"grad_norm": 0.2786078155040741, |
|
"learning_rate": 3.3471697255229294e-05, |
|
"loss": 0.7504, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.40482472672446285, |
|
"grad_norm": 0.33004823327064514, |
|
"learning_rate": 3.338613703803351e-05, |
|
"loss": 0.7056, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.40595552205050883, |
|
"grad_norm": 0.3257131278514862, |
|
"learning_rate": 3.330046599712432e-05, |
|
"loss": 0.7102, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.4070863173765548, |
|
"grad_norm": 0.3138837516307831, |
|
"learning_rate": 3.321468526465386e-05, |
|
"loss": 0.6638, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4082171127026008, |
|
"grad_norm": 0.3327350914478302, |
|
"learning_rate": 3.312879597422383e-05, |
|
"loss": 0.7355, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.4093479080286468, |
|
"grad_norm": 0.2875402569770813, |
|
"learning_rate": 3.304279926087055e-05, |
|
"loss": 0.7113, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4104787033546928, |
|
"grad_norm": 0.5153040289878845, |
|
"learning_rate": 3.295669626104995e-05, |
|
"loss": 0.7401, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.41160949868073876, |
|
"grad_norm": 0.3518928587436676, |
|
"learning_rate": 3.287048811262254e-05, |
|
"loss": 0.6864, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.41274029400678475, |
|
"grad_norm": 0.3488028049468994, |
|
"learning_rate": 3.2784175954838376e-05, |
|
"loss": 0.6401, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.41387108933283073, |
|
"grad_norm": 0.37360239028930664, |
|
"learning_rate": 3.2697760928322016e-05, |
|
"loss": 0.7004, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.4150018846588767, |
|
"grad_norm": 0.3383936285972595, |
|
"learning_rate": 3.261124417505745e-05, |
|
"loss": 0.6563, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.4161326799849227, |
|
"grad_norm": 0.36131277680397034, |
|
"learning_rate": 3.252462683837297e-05, |
|
"loss": 0.6737, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4172634753109687, |
|
"grad_norm": 0.3024144768714905, |
|
"learning_rate": 3.2437910062926116e-05, |
|
"loss": 0.6466, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.4183942706370147, |
|
"grad_norm": 0.6971142888069153, |
|
"learning_rate": 3.235109499468849e-05, |
|
"loss": 0.6927, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.41952506596306066, |
|
"grad_norm": 0.3525508642196655, |
|
"learning_rate": 3.226418278093069e-05, |
|
"loss": 0.7009, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.42065586128910665, |
|
"grad_norm": 0.3152811527252197, |
|
"learning_rate": 3.2177174570207066e-05, |
|
"loss": 0.7065, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.42178665661515263, |
|
"grad_norm": 0.2631702721118927, |
|
"learning_rate": 3.2090071512340584e-05, |
|
"loss": 0.6723, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.4229174519411986, |
|
"grad_norm": 0.35791584849357605, |
|
"learning_rate": 3.200287475840764e-05, |
|
"loss": 0.6927, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4240482472672446, |
|
"grad_norm": 0.30266880989074707, |
|
"learning_rate": 3.191558546072283e-05, |
|
"loss": 0.6395, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.4251790425932906, |
|
"grad_norm": 0.27712151408195496, |
|
"learning_rate": 3.1828204772823705e-05, |
|
"loss": 0.6246, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4263098379193366, |
|
"grad_norm": 0.4084063172340393, |
|
"learning_rate": 3.174073384945556e-05, |
|
"loss": 0.6993, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.42744063324538256, |
|
"grad_norm": 0.3760344088077545, |
|
"learning_rate": 3.1653173846556186e-05, |
|
"loss": 0.6413, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.41881611943244934, |
|
"learning_rate": 3.156552592124054e-05, |
|
"loss": 0.7295, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.42970222389747453, |
|
"grad_norm": 0.3386279046535492, |
|
"learning_rate": 3.147779123178548e-05, |
|
"loss": 0.7482, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4308330192235205, |
|
"grad_norm": 0.4601892828941345, |
|
"learning_rate": 3.138997093761449e-05, |
|
"loss": 0.7499, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.4319638145495665, |
|
"grad_norm": 0.4254579246044159, |
|
"learning_rate": 3.1302066199282295e-05, |
|
"loss": 0.7148, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.4330946098756125, |
|
"grad_norm": 0.3381584584712982, |
|
"learning_rate": 3.121407817845959e-05, |
|
"loss": 0.6117, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.4342254052016585, |
|
"grad_norm": 0.3117331266403198, |
|
"learning_rate": 3.112600803791764e-05, |
|
"loss": 0.6246, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.43535620052770446, |
|
"grad_norm": 0.4453639090061188, |
|
"learning_rate": 3.103785694151293e-05, |
|
"loss": 0.754, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.43648699585375045, |
|
"grad_norm": 0.4143831729888916, |
|
"learning_rate": 3.094962605417179e-05, |
|
"loss": 0.7966, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.43761779117979643, |
|
"grad_norm": 0.2990778684616089, |
|
"learning_rate": 3.086131654187501e-05, |
|
"loss": 0.6519, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.4387485865058424, |
|
"grad_norm": 0.3955526649951935, |
|
"learning_rate": 3.077292957164238e-05, |
|
"loss": 0.7048, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4398793818318884, |
|
"grad_norm": 0.3522753119468689, |
|
"learning_rate": 3.068446631151736e-05, |
|
"loss": 0.7202, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.4410101771579344, |
|
"grad_norm": 0.3563268482685089, |
|
"learning_rate": 3.0595927930551524e-05, |
|
"loss": 0.7145, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4421409724839804, |
|
"grad_norm": 0.38255730271339417, |
|
"learning_rate": 3.0507315598789237e-05, |
|
"loss": 0.7158, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.44327176781002636, |
|
"grad_norm": 0.3502512276172638, |
|
"learning_rate": 3.0418630487252087e-05, |
|
"loss": 0.644, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.44440256313607235, |
|
"grad_norm": 0.36824584007263184, |
|
"learning_rate": 3.0329873767923477e-05, |
|
"loss": 0.7561, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.44553335846211833, |
|
"grad_norm": 0.32158178091049194, |
|
"learning_rate": 3.0241046613733114e-05, |
|
"loss": 0.6694, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4466641537881643, |
|
"grad_norm": 0.28382861614227295, |
|
"learning_rate": 3.01521501985415e-05, |
|
"loss": 0.6803, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.4477949491142103, |
|
"grad_norm": 0.3525499999523163, |
|
"learning_rate": 3.0063185697124446e-05, |
|
"loss": 0.7263, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.4489257444402563, |
|
"grad_norm": 0.2863157093524933, |
|
"learning_rate": 2.9974154285157497e-05, |
|
"loss": 0.7232, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.4500565397663023, |
|
"grad_norm": 0.3138844668865204, |
|
"learning_rate": 2.9885057139200468e-05, |
|
"loss": 0.6912, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.45118733509234826, |
|
"grad_norm": 0.33406513929367065, |
|
"learning_rate": 2.979589543668182e-05, |
|
"loss": 0.684, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.45231813041839425, |
|
"grad_norm": 0.3506259620189667, |
|
"learning_rate": 2.970667035588317e-05, |
|
"loss": 0.7522, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.45344892574444023, |
|
"grad_norm": 0.37139952182769775, |
|
"learning_rate": 2.9617383075923665e-05, |
|
"loss": 0.6471, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.4545797210704862, |
|
"grad_norm": 0.295625239610672, |
|
"learning_rate": 2.952803477674441e-05, |
|
"loss": 0.7209, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4557105163965322, |
|
"grad_norm": 0.3062797486782074, |
|
"learning_rate": 2.9438626639092932e-05, |
|
"loss": 0.7059, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.4568413117225782, |
|
"grad_norm": 0.3885577917098999, |
|
"learning_rate": 2.9349159844507455e-05, |
|
"loss": 0.7319, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4579721070486242, |
|
"grad_norm": 0.365987628698349, |
|
"learning_rate": 2.9259635575301436e-05, |
|
"loss": 0.6858, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.45910290237467016, |
|
"grad_norm": 0.32557693123817444, |
|
"learning_rate": 2.9170055014547825e-05, |
|
"loss": 0.622, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.46023369770071615, |
|
"grad_norm": 0.31643807888031006, |
|
"learning_rate": 2.908041934606347e-05, |
|
"loss": 0.6794, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.46136449302676213, |
|
"grad_norm": 0.3457587957382202, |
|
"learning_rate": 2.89907297543935e-05, |
|
"loss": 0.7015, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4624952883528081, |
|
"grad_norm": 0.3037043809890747, |
|
"learning_rate": 2.8900987424795606e-05, |
|
"loss": 0.6773, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.4636260836788541, |
|
"grad_norm": 0.3223413825035095, |
|
"learning_rate": 2.8811193543224462e-05, |
|
"loss": 0.643, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4647568790049001, |
|
"grad_norm": 0.5646958947181702, |
|
"learning_rate": 2.8721349296315963e-05, |
|
"loss": 0.6942, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.4658876743309461, |
|
"grad_norm": 0.3289279043674469, |
|
"learning_rate": 2.8631455871371614e-05, |
|
"loss": 0.6679, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.46701846965699206, |
|
"grad_norm": 0.4061075448989868, |
|
"learning_rate": 2.8541514456342815e-05, |
|
"loss": 0.7992, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.46814926498303805, |
|
"grad_norm": 0.37772536277770996, |
|
"learning_rate": 2.8451526239815134e-05, |
|
"loss": 0.6817, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.46928006030908404, |
|
"grad_norm": 0.31532320380210876, |
|
"learning_rate": 2.8361492410992662e-05, |
|
"loss": 0.6771, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.47041085563513, |
|
"grad_norm": 0.352198988199234, |
|
"learning_rate": 2.8271414159682224e-05, |
|
"loss": 0.6515, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.471541650961176, |
|
"grad_norm": 0.39696019887924194, |
|
"learning_rate": 2.8181292676277738e-05, |
|
"loss": 0.7276, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.472672446287222, |
|
"grad_norm": 0.4117799997329712, |
|
"learning_rate": 2.809112915174439e-05, |
|
"loss": 0.6333, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.473803241613268, |
|
"grad_norm": 0.36984243988990784, |
|
"learning_rate": 2.8000924777602965e-05, |
|
"loss": 0.7202, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.47493403693931396, |
|
"grad_norm": 0.3305279612541199, |
|
"learning_rate": 2.79106807459141e-05, |
|
"loss": 0.6418, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.47606483226535995, |
|
"grad_norm": 0.40777119994163513, |
|
"learning_rate": 2.7820398249262474e-05, |
|
"loss": 0.7948, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.47719562759140594, |
|
"grad_norm": 0.3309784233570099, |
|
"learning_rate": 2.7730078480741122e-05, |
|
"loss": 0.6319, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.4783264229174519, |
|
"grad_norm": 0.3214864134788513, |
|
"learning_rate": 2.7639722633935605e-05, |
|
"loss": 0.7008, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.4794572182434979, |
|
"grad_norm": 0.3192216157913208, |
|
"learning_rate": 2.754933190290826e-05, |
|
"loss": 0.6489, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.4805880135695439, |
|
"grad_norm": 0.31766754388809204, |
|
"learning_rate": 2.745890748218245e-05, |
|
"loss": 0.6728, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.4817188088955899, |
|
"grad_norm": 0.32393330335617065, |
|
"learning_rate": 2.736845056672671e-05, |
|
"loss": 0.6808, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.48284960422163586, |
|
"grad_norm": 0.3087853193283081, |
|
"learning_rate": 2.727796235193904e-05, |
|
"loss": 0.7033, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.48398039954768185, |
|
"grad_norm": 0.3951945900917053, |
|
"learning_rate": 2.7187444033631044e-05, |
|
"loss": 0.6537, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.48511119487372784, |
|
"grad_norm": 0.31923210620880127, |
|
"learning_rate": 2.709689680801213e-05, |
|
"loss": 0.6795, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.4862419901997738, |
|
"grad_norm": 0.4405725300312042, |
|
"learning_rate": 2.7006321871673752e-05, |
|
"loss": 0.7204, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.4873727855258198, |
|
"grad_norm": 0.36784470081329346, |
|
"learning_rate": 2.6915720421573538e-05, |
|
"loss": 0.698, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.4885035808518658, |
|
"grad_norm": 0.38032978773117065, |
|
"learning_rate": 2.682509365501953e-05, |
|
"loss": 0.74, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.4896343761779118, |
|
"grad_norm": 0.36600053310394287, |
|
"learning_rate": 2.6734442769654273e-05, |
|
"loss": 0.6317, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.49076517150395776, |
|
"grad_norm": 0.39383023977279663, |
|
"learning_rate": 2.6643768963439113e-05, |
|
"loss": 0.6807, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.49189596683000375, |
|
"grad_norm": 0.37128937244415283, |
|
"learning_rate": 2.6553073434638248e-05, |
|
"loss": 0.7359, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.49302676215604974, |
|
"grad_norm": 0.32236599922180176, |
|
"learning_rate": 2.6462357381802966e-05, |
|
"loss": 0.6154, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.4941575574820957, |
|
"grad_norm": 0.3519161343574524, |
|
"learning_rate": 2.6371622003755768e-05, |
|
"loss": 0.7197, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.4952883528081417, |
|
"grad_norm": 0.38883543014526367, |
|
"learning_rate": 2.628086849957455e-05, |
|
"loss": 0.7554, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.4964191481341877, |
|
"grad_norm": 0.34098756313323975, |
|
"learning_rate": 2.6190098068576763e-05, |
|
"loss": 0.7133, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.4975499434602337, |
|
"grad_norm": 0.36088091135025024, |
|
"learning_rate": 2.6099311910303502e-05, |
|
"loss": 0.6746, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.49868073878627966, |
|
"grad_norm": 0.38198113441467285, |
|
"learning_rate": 2.6008511224503728e-05, |
|
"loss": 0.6848, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.49981153411232565, |
|
"grad_norm": 0.3310260474681854, |
|
"learning_rate": 2.59176972111184e-05, |
|
"loss": 0.6657, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5009423294383717, |
|
"grad_norm": 0.3948574364185333, |
|
"learning_rate": 2.582687107026458e-05, |
|
"loss": 0.6704, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.5020731247644177, |
|
"grad_norm": 0.31727057695388794, |
|
"learning_rate": 2.5736034002219594e-05, |
|
"loss": 0.6454, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5032039200904637, |
|
"grad_norm": 0.33022522926330566, |
|
"learning_rate": 2.564518720740519e-05, |
|
"loss": 0.6928, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.5043347154165096, |
|
"grad_norm": 0.5317490696907043, |
|
"learning_rate": 2.555433188637164e-05, |
|
"loss": 0.823, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5054655107425556, |
|
"grad_norm": 0.39583778381347656, |
|
"learning_rate": 2.54634692397819e-05, |
|
"loss": 0.7081, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.5065963060686016, |
|
"grad_norm": 0.36913448572158813, |
|
"learning_rate": 2.5372600468395723e-05, |
|
"loss": 0.6707, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5077271013946476, |
|
"grad_norm": 0.33116042613983154, |
|
"learning_rate": 2.528172677305382e-05, |
|
"loss": 0.7008, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.5088578967206936, |
|
"grad_norm": 0.3586164116859436, |
|
"learning_rate": 2.5190849354661955e-05, |
|
"loss": 0.6895, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5099886920467396, |
|
"grad_norm": 0.44672051072120667, |
|
"learning_rate": 2.50999694141751e-05, |
|
"loss": 0.7304, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.5111194873727856, |
|
"grad_norm": 0.4558676779270172, |
|
"learning_rate": 2.5009088152581565e-05, |
|
"loss": 0.7073, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5122502826988315, |
|
"grad_norm": 0.31825345754623413, |
|
"learning_rate": 2.4918206770887102e-05, |
|
"loss": 0.7007, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.5133810780248775, |
|
"grad_norm": 0.41337841749191284, |
|
"learning_rate": 2.482732647009907e-05, |
|
"loss": 0.7995, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5145118733509235, |
|
"grad_norm": 0.3080434799194336, |
|
"learning_rate": 2.473644845121051e-05, |
|
"loss": 0.7367, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.5156426686769695, |
|
"grad_norm": 0.35662513971328735, |
|
"learning_rate": 2.4645573915184354e-05, |
|
"loss": 0.6669, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5167734640030155, |
|
"grad_norm": 0.41301533579826355, |
|
"learning_rate": 2.4554704062937467e-05, |
|
"loss": 0.6953, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.5179042593290615, |
|
"grad_norm": 0.42937204241752625, |
|
"learning_rate": 2.4463840095324834e-05, |
|
"loss": 0.6625, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5190350546551075, |
|
"grad_norm": 0.32970476150512695, |
|
"learning_rate": 2.437298321312369e-05, |
|
"loss": 0.6823, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.5201658499811534, |
|
"grad_norm": 0.36597487330436707, |
|
"learning_rate": 2.428213461701759e-05, |
|
"loss": 0.6233, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5212966453071994, |
|
"grad_norm": 0.31977376341819763, |
|
"learning_rate": 2.4191295507580648e-05, |
|
"loss": 0.6732, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.5224274406332454, |
|
"grad_norm": 0.3720978796482086, |
|
"learning_rate": 2.410046708526155e-05, |
|
"loss": 0.7449, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5235582359592914, |
|
"grad_norm": 0.4317164421081543, |
|
"learning_rate": 2.4009650550367804e-05, |
|
"loss": 0.6818, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.5246890312853374, |
|
"grad_norm": 0.358803391456604, |
|
"learning_rate": 2.3918847103049792e-05, |
|
"loss": 0.7051, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5258198266113834, |
|
"grad_norm": 0.37477102875709534, |
|
"learning_rate": 2.3828057943284932e-05, |
|
"loss": 0.6474, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.5269506219374294, |
|
"grad_norm": 0.3854588568210602, |
|
"learning_rate": 2.373728427086188e-05, |
|
"loss": 0.6464, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5280814172634754, |
|
"grad_norm": 0.29804185032844543, |
|
"learning_rate": 2.3646527285364565e-05, |
|
"loss": 0.6824, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.5292122125895213, |
|
"grad_norm": 0.3477884829044342, |
|
"learning_rate": 2.3555788186156442e-05, |
|
"loss": 0.7401, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5303430079155673, |
|
"grad_norm": 0.3655013144016266, |
|
"learning_rate": 2.346506817236457e-05, |
|
"loss": 0.6915, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.5314738032416133, |
|
"grad_norm": 0.31074225902557373, |
|
"learning_rate": 2.3374368442863814e-05, |
|
"loss": 0.7442, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5326045985676593, |
|
"grad_norm": 0.38817688822746277, |
|
"learning_rate": 2.3283690196260967e-05, |
|
"loss": 0.7317, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.5337353938937053, |
|
"grad_norm": 0.2897610366344452, |
|
"learning_rate": 2.3193034630878907e-05, |
|
"loss": 0.6206, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5348661892197513, |
|
"grad_norm": 0.38513097167015076, |
|
"learning_rate": 2.310240294474081e-05, |
|
"loss": 0.7794, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.5359969845457973, |
|
"grad_norm": 0.3019099533557892, |
|
"learning_rate": 2.3011796335554258e-05, |
|
"loss": 0.6191, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5371277798718432, |
|
"grad_norm": 0.29924795031547546, |
|
"learning_rate": 2.2921216000695465e-05, |
|
"loss": 0.6881, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.5382585751978892, |
|
"grad_norm": 0.37753212451934814, |
|
"learning_rate": 2.2830663137193398e-05, |
|
"loss": 0.6226, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5393893705239352, |
|
"grad_norm": 0.3259458839893341, |
|
"learning_rate": 2.274013894171401e-05, |
|
"loss": 0.7258, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.5405201658499812, |
|
"grad_norm": 0.3186294436454773, |
|
"learning_rate": 2.2649644610544392e-05, |
|
"loss": 0.7074, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5416509611760272, |
|
"grad_norm": 0.328595370054245, |
|
"learning_rate": 2.255918133957697e-05, |
|
"loss": 0.6656, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.5427817565020732, |
|
"grad_norm": 0.34288713335990906, |
|
"learning_rate": 2.2468750324293717e-05, |
|
"loss": 0.6913, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5439125518281192, |
|
"grad_norm": 0.34917885065078735, |
|
"learning_rate": 2.2378352759750333e-05, |
|
"loss": 0.6997, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.5450433471541651, |
|
"grad_norm": 0.38892245292663574, |
|
"learning_rate": 2.2287989840560485e-05, |
|
"loss": 0.6667, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5461741424802111, |
|
"grad_norm": 0.41548117995262146, |
|
"learning_rate": 2.219766276087996e-05, |
|
"loss": 0.648, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.5473049378062571, |
|
"grad_norm": 0.37720760703086853, |
|
"learning_rate": 2.2107372714390974e-05, |
|
"loss": 0.7646, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5484357331323031, |
|
"grad_norm": 0.32246890664100647, |
|
"learning_rate": 2.2017120894286287e-05, |
|
"loss": 0.6772, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.5495665284583491, |
|
"grad_norm": 0.35085204243659973, |
|
"learning_rate": 2.1926908493253527e-05, |
|
"loss": 0.658, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5506973237843951, |
|
"grad_norm": 0.32103869318962097, |
|
"learning_rate": 2.1836736703459398e-05, |
|
"loss": 0.6576, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.551828119110441, |
|
"grad_norm": 0.30640605092048645, |
|
"learning_rate": 2.1746606716533907e-05, |
|
"loss": 0.7009, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.552958914436487, |
|
"grad_norm": 0.4351046681404114, |
|
"learning_rate": 2.1656519723554643e-05, |
|
"loss": 0.7124, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.554089709762533, |
|
"grad_norm": 0.3515176773071289, |
|
"learning_rate": 2.1566476915031013e-05, |
|
"loss": 0.7086, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.555220505088579, |
|
"grad_norm": 0.35644426941871643, |
|
"learning_rate": 2.1476479480888545e-05, |
|
"loss": 0.7245, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.556351300414625, |
|
"grad_norm": 0.49966442584991455, |
|
"learning_rate": 2.1386528610453104e-05, |
|
"loss": 0.7511, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.557482095740671, |
|
"grad_norm": 0.3358660340309143, |
|
"learning_rate": 2.129662549243523e-05, |
|
"loss": 0.6579, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.558612891066717, |
|
"grad_norm": 0.392120361328125, |
|
"learning_rate": 2.120677131491442e-05, |
|
"loss": 0.7838, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.559743686392763, |
|
"grad_norm": 0.3123244047164917, |
|
"learning_rate": 2.11169672653234e-05, |
|
"loss": 0.6543, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.5608744817188089, |
|
"grad_norm": 0.3226960301399231, |
|
"learning_rate": 2.1027214530432465e-05, |
|
"loss": 0.6582, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5620052770448549, |
|
"grad_norm": 0.3497219681739807, |
|
"learning_rate": 2.0937514296333754e-05, |
|
"loss": 0.6815, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.5631360723709009, |
|
"grad_norm": 0.39245134592056274, |
|
"learning_rate": 2.0847867748425648e-05, |
|
"loss": 0.7226, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5642668676969469, |
|
"grad_norm": 0.3870549499988556, |
|
"learning_rate": 2.0758276071397012e-05, |
|
"loss": 0.7073, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.5653976630229929, |
|
"grad_norm": 0.40596914291381836, |
|
"learning_rate": 2.0668740449211605e-05, |
|
"loss": 0.6929, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5665284583490389, |
|
"grad_norm": 0.3204245865345001, |
|
"learning_rate": 2.0579262065092423e-05, |
|
"loss": 0.7193, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.5676592536750849, |
|
"grad_norm": 0.30433857440948486, |
|
"learning_rate": 2.048984210150604e-05, |
|
"loss": 0.6859, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.5687900490011308, |
|
"grad_norm": 0.392553448677063, |
|
"learning_rate": 2.0400481740147022e-05, |
|
"loss": 0.7217, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.5699208443271768, |
|
"grad_norm": 0.3402389585971832, |
|
"learning_rate": 2.0311182161922237e-05, |
|
"loss": 0.6868, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.5710516396532228, |
|
"grad_norm": 0.42901313304901123, |
|
"learning_rate": 2.022194454693536e-05, |
|
"loss": 0.6861, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.5721824349792688, |
|
"grad_norm": 0.34680864214897156, |
|
"learning_rate": 2.013277007447117e-05, |
|
"loss": 0.7805, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.5733132303053148, |
|
"grad_norm": 0.30028700828552246, |
|
"learning_rate": 2.0043659922980005e-05, |
|
"loss": 0.6454, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.5744440256313608, |
|
"grad_norm": 0.3310604691505432, |
|
"learning_rate": 1.995461527006225e-05, |
|
"loss": 0.6193, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.5755748209574068, |
|
"grad_norm": 0.3977152407169342, |
|
"learning_rate": 1.9865637292452636e-05, |
|
"loss": 0.7275, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.5767056162834527, |
|
"grad_norm": 0.42726007103919983, |
|
"learning_rate": 1.977672716600486e-05, |
|
"loss": 0.7321, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.5778364116094987, |
|
"grad_norm": 0.4253356158733368, |
|
"learning_rate": 1.968788606567589e-05, |
|
"loss": 0.7107, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.5789672069355447, |
|
"grad_norm": 0.3486230969429016, |
|
"learning_rate": 1.9599115165510544e-05, |
|
"loss": 0.6859, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.5800980022615907, |
|
"grad_norm": 0.3471638560295105, |
|
"learning_rate": 1.9510415638625932e-05, |
|
"loss": 0.656, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.5812287975876367, |
|
"grad_norm": 0.37314942479133606, |
|
"learning_rate": 1.942178865719593e-05, |
|
"loss": 0.6545, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.5823595929136827, |
|
"grad_norm": 0.3019452393054962, |
|
"learning_rate": 1.9333235392435774e-05, |
|
"loss": 0.6422, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.5834903882397287, |
|
"grad_norm": 0.30790606141090393, |
|
"learning_rate": 1.9244757014586458e-05, |
|
"loss": 0.6182, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.5846211835657746, |
|
"grad_norm": 0.3539668917655945, |
|
"learning_rate": 1.9156354692899405e-05, |
|
"loss": 0.6835, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.5857519788918206, |
|
"grad_norm": 0.3208529055118561, |
|
"learning_rate": 1.9068029595620884e-05, |
|
"loss": 0.6619, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.5868827742178666, |
|
"grad_norm": 0.49773553013801575, |
|
"learning_rate": 1.897978288997669e-05, |
|
"loss": 0.7187, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.5880135695439126, |
|
"grad_norm": 0.3386790156364441, |
|
"learning_rate": 1.889161574215663e-05, |
|
"loss": 0.6659, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5891443648699586, |
|
"grad_norm": 0.3373807370662689, |
|
"learning_rate": 1.880352931729914e-05, |
|
"loss": 0.6461, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.5902751601960046, |
|
"grad_norm": 0.4661053717136383, |
|
"learning_rate": 1.8715524779475944e-05, |
|
"loss": 0.6994, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.5914059555220506, |
|
"grad_norm": 0.38146570324897766, |
|
"learning_rate": 1.862760329167655e-05, |
|
"loss": 0.6413, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.5925367508480965, |
|
"grad_norm": 0.3764294981956482, |
|
"learning_rate": 1.8539766015793006e-05, |
|
"loss": 0.6617, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.5936675461741425, |
|
"grad_norm": 0.35271722078323364, |
|
"learning_rate": 1.845201411260446e-05, |
|
"loss": 0.7036, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.5947983415001885, |
|
"grad_norm": 0.3613468110561371, |
|
"learning_rate": 1.8364348741761867e-05, |
|
"loss": 0.7361, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.5959291368262345, |
|
"grad_norm": 0.34245991706848145, |
|
"learning_rate": 1.8276771061772647e-05, |
|
"loss": 0.7073, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.5970599321522805, |
|
"grad_norm": 0.32761844992637634, |
|
"learning_rate": 1.8189282229985345e-05, |
|
"loss": 0.7661, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.5981907274783265, |
|
"grad_norm": 0.3382299542427063, |
|
"learning_rate": 1.8101883402574415e-05, |
|
"loss": 0.6813, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.5993215228043725, |
|
"grad_norm": 0.30160099267959595, |
|
"learning_rate": 1.8014575734524865e-05, |
|
"loss": 0.7183, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6004523181304184, |
|
"grad_norm": 0.3124518096446991, |
|
"learning_rate": 1.7927360379617024e-05, |
|
"loss": 0.6506, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.6015831134564644, |
|
"grad_norm": 0.3907219469547272, |
|
"learning_rate": 1.78402384904113e-05, |
|
"loss": 0.6575, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6027139087825104, |
|
"grad_norm": 0.35735592246055603, |
|
"learning_rate": 1.7753211218232938e-05, |
|
"loss": 0.6877, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.6038447041085564, |
|
"grad_norm": 0.40482988953590393, |
|
"learning_rate": 1.7666279713156815e-05, |
|
"loss": 0.6788, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6049754994346024, |
|
"grad_norm": 0.40024474263191223, |
|
"learning_rate": 1.757944512399221e-05, |
|
"loss": 0.7644, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.6061062947606484, |
|
"grad_norm": 0.4042050242424011, |
|
"learning_rate": 1.7492708598267683e-05, |
|
"loss": 0.7347, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6072370900866944, |
|
"grad_norm": 0.38071730732917786, |
|
"learning_rate": 1.7406071282215854e-05, |
|
"loss": 0.6841, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.6083678854127403, |
|
"grad_norm": 0.4327053427696228, |
|
"learning_rate": 1.7319534320758284e-05, |
|
"loss": 0.7712, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6094986807387863, |
|
"grad_norm": 0.41496187448501587, |
|
"learning_rate": 1.7233098857490325e-05, |
|
"loss": 0.7306, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.6106294760648323, |
|
"grad_norm": 0.48277217149734497, |
|
"learning_rate": 1.714676603466605e-05, |
|
"loss": 0.6843, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6117602713908783, |
|
"grad_norm": 0.4226689338684082, |
|
"learning_rate": 1.7060536993183084e-05, |
|
"loss": 0.6336, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.6128910667169243, |
|
"grad_norm": 0.3646908104419708, |
|
"learning_rate": 1.6974412872567597e-05, |
|
"loss": 0.6637, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6140218620429703, |
|
"grad_norm": 0.30986544489860535, |
|
"learning_rate": 1.688839481095922e-05, |
|
"loss": 0.6905, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.6151526573690163, |
|
"grad_norm": 0.416892409324646, |
|
"learning_rate": 1.680248394509599e-05, |
|
"loss": 0.7408, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6162834526950622, |
|
"grad_norm": 0.3360169231891632, |
|
"learning_rate": 1.6716681410299348e-05, |
|
"loss": 0.7591, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.6174142480211082, |
|
"grad_norm": 0.40622156858444214, |
|
"learning_rate": 1.6630988340459128e-05, |
|
"loss": 0.6792, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6185450433471542, |
|
"grad_norm": 0.37062937021255493, |
|
"learning_rate": 1.654540586801858e-05, |
|
"loss": 0.6656, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.6196758386732002, |
|
"grad_norm": 0.29908493161201477, |
|
"learning_rate": 1.645993512395938e-05, |
|
"loss": 0.6576, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6208066339992462, |
|
"grad_norm": 0.3012191951274872, |
|
"learning_rate": 1.6374577237786703e-05, |
|
"loss": 0.6174, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.6219374293252922, |
|
"grad_norm": 0.322457879781723, |
|
"learning_rate": 1.628933333751432e-05, |
|
"loss": 0.6562, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6230682246513382, |
|
"grad_norm": 0.3631836771965027, |
|
"learning_rate": 1.6204204549649628e-05, |
|
"loss": 0.6264, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.6241990199773841, |
|
"grad_norm": 0.30250152945518494, |
|
"learning_rate": 1.6119191999178847e-05, |
|
"loss": 0.7027, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6253298153034301, |
|
"grad_norm": 0.31643325090408325, |
|
"learning_rate": 1.6034296809552047e-05, |
|
"loss": 0.6767, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.6264606106294761, |
|
"grad_norm": 0.3336418569087982, |
|
"learning_rate": 1.594952010266843e-05, |
|
"loss": 0.67, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6275914059555221, |
|
"grad_norm": 0.33178821206092834, |
|
"learning_rate": 1.5864862998861384e-05, |
|
"loss": 0.6477, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.6287222012815681, |
|
"grad_norm": 0.3760960102081299, |
|
"learning_rate": 1.5780326616883745e-05, |
|
"loss": 0.6692, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6298529966076141, |
|
"grad_norm": 0.38543248176574707, |
|
"learning_rate": 1.5695912073893006e-05, |
|
"loss": 0.6762, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.63098379193366, |
|
"grad_norm": 0.39795583486557007, |
|
"learning_rate": 1.561162048543653e-05, |
|
"loss": 0.6861, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.632114587259706, |
|
"grad_norm": 0.30678924918174744, |
|
"learning_rate": 1.552745296543684e-05, |
|
"loss": 0.7045, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.633245382585752, |
|
"grad_norm": 0.39268332719802856, |
|
"learning_rate": 1.544341062617685e-05, |
|
"loss": 0.6791, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.634376177911798, |
|
"grad_norm": 0.3372342586517334, |
|
"learning_rate": 1.535949457828525e-05, |
|
"loss": 0.6737, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.635506973237844, |
|
"grad_norm": 0.3903445303440094, |
|
"learning_rate": 1.527570593072172e-05, |
|
"loss": 0.7094, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.63663776856389, |
|
"grad_norm": 0.3412375748157501, |
|
"learning_rate": 1.5192045790762354e-05, |
|
"loss": 0.7126, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.637768563889936, |
|
"grad_norm": 0.37893742322921753, |
|
"learning_rate": 1.5108515263985018e-05, |
|
"loss": 0.739, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.638899359215982, |
|
"grad_norm": 0.3894254267215729, |
|
"learning_rate": 1.502511545425469e-05, |
|
"loss": 0.7108, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.6400301545420279, |
|
"grad_norm": 0.3613717257976532, |
|
"learning_rate": 1.4941847463708958e-05, |
|
"loss": 0.672, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6411609498680739, |
|
"grad_norm": 0.2811620235443115, |
|
"learning_rate": 1.4858712392743352e-05, |
|
"loss": 0.7129, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.6422917451941199, |
|
"grad_norm": 0.411286324262619, |
|
"learning_rate": 1.4775711339996896e-05, |
|
"loss": 0.6747, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6434225405201659, |
|
"grad_norm": 0.39716291427612305, |
|
"learning_rate": 1.4692845402337523e-05, |
|
"loss": 0.7217, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.6445533358462119, |
|
"grad_norm": 0.3730713725090027, |
|
"learning_rate": 1.4610115674847619e-05, |
|
"loss": 0.6249, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6456841311722579, |
|
"grad_norm": 0.3958978056907654, |
|
"learning_rate": 1.4527523250809545e-05, |
|
"loss": 0.6599, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.6468149264983039, |
|
"grad_norm": 0.2955171763896942, |
|
"learning_rate": 1.4445069221691148e-05, |
|
"loss": 0.6542, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6479457218243498, |
|
"grad_norm": 0.45475757122039795, |
|
"learning_rate": 1.436275467713141e-05, |
|
"loss": 0.8182, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.6490765171503958, |
|
"grad_norm": 0.40360134840011597, |
|
"learning_rate": 1.428058070492599e-05, |
|
"loss": 0.6866, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.6502073124764418, |
|
"grad_norm": 0.3490990996360779, |
|
"learning_rate": 1.4198548391012878e-05, |
|
"loss": 0.6879, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.6513381078024878, |
|
"grad_norm": 0.3447786569595337, |
|
"learning_rate": 1.4116658819458025e-05, |
|
"loss": 0.6206, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6524689031285338, |
|
"grad_norm": 0.4018050730228424, |
|
"learning_rate": 1.4034913072441015e-05, |
|
"loss": 0.6705, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.6535996984545798, |
|
"grad_norm": 0.31316670775413513, |
|
"learning_rate": 1.3953312230240801e-05, |
|
"loss": 0.7058, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6547304937806258, |
|
"grad_norm": 0.4424934387207031, |
|
"learning_rate": 1.3871857371221389e-05, |
|
"loss": 0.6871, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.6558612891066717, |
|
"grad_norm": 0.36514902114868164, |
|
"learning_rate": 1.3790549571817615e-05, |
|
"loss": 0.6632, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6569920844327177, |
|
"grad_norm": 0.3037571609020233, |
|
"learning_rate": 1.3709389906520875e-05, |
|
"loss": 0.6516, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.6581228797587637, |
|
"grad_norm": 0.3230195641517639, |
|
"learning_rate": 1.3628379447864997e-05, |
|
"loss": 0.7393, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.6592536750848097, |
|
"grad_norm": 0.425601601600647, |
|
"learning_rate": 1.3547519266411985e-05, |
|
"loss": 0.6665, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.6603844704108557, |
|
"grad_norm": 0.39113959670066833, |
|
"learning_rate": 1.3466810430737941e-05, |
|
"loss": 0.6772, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.6615152657369017, |
|
"grad_norm": 0.3463885188102722, |
|
"learning_rate": 1.3386254007418928e-05, |
|
"loss": 0.7132, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.6626460610629477, |
|
"grad_norm": 0.29994767904281616, |
|
"learning_rate": 1.3305851061016821e-05, |
|
"loss": 0.7092, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.6637768563889936, |
|
"grad_norm": 0.3709157407283783, |
|
"learning_rate": 1.3225602654065323e-05, |
|
"loss": 0.6795, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.6649076517150396, |
|
"grad_norm": 0.36443623900413513, |
|
"learning_rate": 1.3145509847055837e-05, |
|
"loss": 0.6979, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.6660384470410856, |
|
"grad_norm": 0.3367445468902588, |
|
"learning_rate": 1.3065573698423558e-05, |
|
"loss": 0.7412, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.6671692423671316, |
|
"grad_norm": 0.3487666845321655, |
|
"learning_rate": 1.2985795264533372e-05, |
|
"loss": 0.8255, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.6683000376931776, |
|
"grad_norm": 0.3769291937351227, |
|
"learning_rate": 1.2906175599665949e-05, |
|
"loss": 0.6697, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.6694308330192236, |
|
"grad_norm": 0.3350309431552887, |
|
"learning_rate": 1.2826715756003846e-05, |
|
"loss": 0.7478, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.6705616283452696, |
|
"grad_norm": 0.30802276730537415, |
|
"learning_rate": 1.2747416783617511e-05, |
|
"loss": 0.6233, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.6716924236713155, |
|
"grad_norm": 0.5399759411811829, |
|
"learning_rate": 1.2668279730451535e-05, |
|
"loss": 0.7359, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.6728232189973615, |
|
"grad_norm": 0.38919568061828613, |
|
"learning_rate": 1.2589305642310651e-05, |
|
"loss": 0.6935, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.6739540143234075, |
|
"grad_norm": 0.30821794271469116, |
|
"learning_rate": 1.2510495562846053e-05, |
|
"loss": 0.7083, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.6750848096494535, |
|
"grad_norm": 0.28666800260543823, |
|
"learning_rate": 1.2431850533541487e-05, |
|
"loss": 0.6569, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.6762156049754995, |
|
"grad_norm": 0.36479493975639343, |
|
"learning_rate": 1.2353371593699592e-05, |
|
"loss": 0.6867, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.6773464003015455, |
|
"grad_norm": 0.3713551461696625, |
|
"learning_rate": 1.22750597804281e-05, |
|
"loss": 0.68, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.6784771956275915, |
|
"grad_norm": 0.3654766380786896, |
|
"learning_rate": 1.2196916128626126e-05, |
|
"loss": 0.73, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6796079909536374, |
|
"grad_norm": 0.2997152507305145, |
|
"learning_rate": 1.2118941670970551e-05, |
|
"loss": 0.6777, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.6807387862796834, |
|
"grad_norm": 0.3098806142807007, |
|
"learning_rate": 1.2041137437902297e-05, |
|
"loss": 0.709, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.6818695816057294, |
|
"grad_norm": 0.32077932357788086, |
|
"learning_rate": 1.1963504457612781e-05, |
|
"loss": 0.6451, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.6830003769317754, |
|
"grad_norm": 0.33692800998687744, |
|
"learning_rate": 1.1886043756030294e-05, |
|
"loss": 0.6855, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.6841311722578214, |
|
"grad_norm": 0.4159882664680481, |
|
"learning_rate": 1.1808756356806411e-05, |
|
"loss": 0.6746, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.6852619675838674, |
|
"grad_norm": 0.4914894104003906, |
|
"learning_rate": 1.1731643281302548e-05, |
|
"loss": 0.7548, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.6863927629099134, |
|
"grad_norm": 0.354419082403183, |
|
"learning_rate": 1.1654705548576364e-05, |
|
"loss": 0.7227, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.6875235582359593, |
|
"grad_norm": 0.42316100001335144, |
|
"learning_rate": 1.157794417536838e-05, |
|
"loss": 0.709, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.6886543535620053, |
|
"grad_norm": 0.35025471448898315, |
|
"learning_rate": 1.1501360176088494e-05, |
|
"loss": 0.6336, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.6897851488880513, |
|
"grad_norm": 0.33178970217704773, |
|
"learning_rate": 1.1424954562802598e-05, |
|
"loss": 0.616, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.6909159442140973, |
|
"grad_norm": 0.32804471254348755, |
|
"learning_rate": 1.1348728345219176e-05, |
|
"loss": 0.6617, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.6920467395401433, |
|
"grad_norm": 0.29233989119529724, |
|
"learning_rate": 1.127268253067598e-05, |
|
"loss": 0.6296, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.6931775348661893, |
|
"grad_norm": 0.3966659605503082, |
|
"learning_rate": 1.1196818124126729e-05, |
|
"loss": 0.6721, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.6943083301922353, |
|
"grad_norm": 0.34669914841651917, |
|
"learning_rate": 1.1121136128127812e-05, |
|
"loss": 0.6118, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.6954391255182812, |
|
"grad_norm": 0.33230316638946533, |
|
"learning_rate": 1.104563754282505e-05, |
|
"loss": 0.6855, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.6965699208443272, |
|
"grad_norm": 0.3585960865020752, |
|
"learning_rate": 1.0970323365940444e-05, |
|
"loss": 0.6893, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.6977007161703732, |
|
"grad_norm": 0.4158352315425873, |
|
"learning_rate": 1.0895194592759042e-05, |
|
"loss": 0.7072, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.6988315114964192, |
|
"grad_norm": 0.43162232637405396, |
|
"learning_rate": 1.082025221611577e-05, |
|
"loss": 0.7138, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.6999623068224652, |
|
"grad_norm": 0.3278350830078125, |
|
"learning_rate": 1.0745497226382267e-05, |
|
"loss": 0.6111, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.7010931021485112, |
|
"grad_norm": 0.44768843054771423, |
|
"learning_rate": 1.0670930611453874e-05, |
|
"loss": 0.6449, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7022238974745572, |
|
"grad_norm": 0.48205995559692383, |
|
"learning_rate": 1.0596553356736507e-05, |
|
"loss": 0.6902, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.7033546928006031, |
|
"grad_norm": 0.3945559561252594, |
|
"learning_rate": 1.0522366445133686e-05, |
|
"loss": 0.6727, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7044854881266491, |
|
"grad_norm": 0.33756589889526367, |
|
"learning_rate": 1.044837085703352e-05, |
|
"loss": 0.6969, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.7056162834526951, |
|
"grad_norm": 0.2790575325489044, |
|
"learning_rate": 1.0374567570295766e-05, |
|
"loss": 0.625, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7067470787787411, |
|
"grad_norm": 0.4053255021572113, |
|
"learning_rate": 1.0300957560238875e-05, |
|
"loss": 0.7338, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.7078778741047871, |
|
"grad_norm": 0.3397720158100128, |
|
"learning_rate": 1.0227541799627136e-05, |
|
"loss": 0.6771, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7090086694308331, |
|
"grad_norm": 0.3540814518928528, |
|
"learning_rate": 1.015432125865782e-05, |
|
"loss": 0.6582, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.7101394647568791, |
|
"grad_norm": 0.3383145034313202, |
|
"learning_rate": 1.0081296904948342e-05, |
|
"loss": 0.5987, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.711270260082925, |
|
"grad_norm": 0.4115695357322693, |
|
"learning_rate": 1.0008469703523493e-05, |
|
"loss": 0.6981, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.712401055408971, |
|
"grad_norm": 0.3034178912639618, |
|
"learning_rate": 9.935840616802645e-06, |
|
"loss": 0.6991, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.713531850735017, |
|
"grad_norm": 0.32083261013031006, |
|
"learning_rate": 9.863410604587095e-06, |
|
"loss": 0.6806, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.714662646061063, |
|
"grad_norm": 0.5665989518165588, |
|
"learning_rate": 9.791180624047322e-06, |
|
"loss": 0.7539, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.715793441387109, |
|
"grad_norm": 0.4178657829761505, |
|
"learning_rate": 9.719151629710386e-06, |
|
"loss": 0.6961, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.716924236713155, |
|
"grad_norm": 0.36418062448501587, |
|
"learning_rate": 9.647324573447291e-06, |
|
"loss": 0.7055, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.718055032039201, |
|
"grad_norm": 0.32820287346839905, |
|
"learning_rate": 9.575700404460386e-06, |
|
"loss": 0.6329, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.7191858273652469, |
|
"grad_norm": 0.5332444906234741, |
|
"learning_rate": 9.504280069270871e-06, |
|
"loss": 0.723, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7203166226912929, |
|
"grad_norm": 0.5376641154289246, |
|
"learning_rate": 9.433064511706225e-06, |
|
"loss": 0.7362, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.7214474180173389, |
|
"grad_norm": 0.32007166743278503, |
|
"learning_rate": 9.362054672887819e-06, |
|
"loss": 0.6754, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7225782133433849, |
|
"grad_norm": 0.37915733456611633, |
|
"learning_rate": 9.291251491218387e-06, |
|
"loss": 0.6565, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.7237090086694309, |
|
"grad_norm": 0.35389769077301025, |
|
"learning_rate": 9.220655902369665e-06, |
|
"loss": 0.6775, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7248398039954769, |
|
"grad_norm": 0.5367900133132935, |
|
"learning_rate": 9.150268839270055e-06, |
|
"loss": 0.7366, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.7259705993215229, |
|
"grad_norm": 0.4069572985172272, |
|
"learning_rate": 9.080091232092247e-06, |
|
"loss": 0.6873, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.7271013946475688, |
|
"grad_norm": 0.5752056837081909, |
|
"learning_rate": 9.01012400824097e-06, |
|
"loss": 0.7199, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.7282321899736148, |
|
"grad_norm": 0.29914751648902893, |
|
"learning_rate": 8.940368092340682e-06, |
|
"loss": 0.7129, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7293629852996608, |
|
"grad_norm": 0.33143705129623413, |
|
"learning_rate": 8.870824406223416e-06, |
|
"loss": 0.6581, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.7304937806257068, |
|
"grad_norm": 0.4050018787384033, |
|
"learning_rate": 8.801493868916536e-06, |
|
"loss": 0.6941, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7316245759517528, |
|
"grad_norm": 0.33594897389411926, |
|
"learning_rate": 8.732377396630642e-06, |
|
"loss": 0.6639, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.7327553712777988, |
|
"grad_norm": 0.4058912694454193, |
|
"learning_rate": 8.663475902747445e-06, |
|
"loss": 0.7139, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7338861666038448, |
|
"grad_norm": 0.5566866397857666, |
|
"learning_rate": 8.594790297807667e-06, |
|
"loss": 0.6765, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.7350169619298907, |
|
"grad_norm": 0.36217668652534485, |
|
"learning_rate": 8.526321489499067e-06, |
|
"loss": 0.6592, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7361477572559367, |
|
"grad_norm": 0.3255480229854584, |
|
"learning_rate": 8.458070382644382e-06, |
|
"loss": 0.7567, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.7372785525819827, |
|
"grad_norm": 0.4506484866142273, |
|
"learning_rate": 8.390037879189422e-06, |
|
"loss": 0.6732, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7384093479080287, |
|
"grad_norm": 0.4981943368911743, |
|
"learning_rate": 8.322224878191126e-06, |
|
"loss": 0.6665, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.7395401432340747, |
|
"grad_norm": 0.36179500818252563, |
|
"learning_rate": 8.25463227580567e-06, |
|
"loss": 0.6821, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7406709385601207, |
|
"grad_norm": 0.34908345341682434, |
|
"learning_rate": 8.187260965276666e-06, |
|
"loss": 0.6194, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.7418017338861667, |
|
"grad_norm": 0.3363327980041504, |
|
"learning_rate": 8.120111836923283e-06, |
|
"loss": 0.6294, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7429325292122126, |
|
"grad_norm": 0.34136900305747986, |
|
"learning_rate": 8.053185778128594e-06, |
|
"loss": 0.6208, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.7440633245382586, |
|
"grad_norm": 0.37522128224372864, |
|
"learning_rate": 7.986483673327724e-06, |
|
"loss": 0.6751, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.7451941198643046, |
|
"grad_norm": 0.34423232078552246, |
|
"learning_rate": 7.92000640399626e-06, |
|
"loss": 0.733, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.7463249151903506, |
|
"grad_norm": 0.4137992858886719, |
|
"learning_rate": 7.853754848638542e-06, |
|
"loss": 0.7044, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7474557105163966, |
|
"grad_norm": 0.36555016040802, |
|
"learning_rate": 7.787729882776065e-06, |
|
"loss": 0.6735, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.7485865058424426, |
|
"grad_norm": 0.33875149488449097, |
|
"learning_rate": 7.721932378935973e-06, |
|
"loss": 0.732, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7497173011684886, |
|
"grad_norm": 0.32351580262184143, |
|
"learning_rate": 7.656363206639409e-06, |
|
"loss": 0.7191, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.7508480964945344, |
|
"grad_norm": 0.48030319809913635, |
|
"learning_rate": 7.591023232390138e-06, |
|
"loss": 0.6972, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7519788918205804, |
|
"grad_norm": 0.381740540266037, |
|
"learning_rate": 7.525913319663011e-06, |
|
"loss": 0.6752, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.7531096871466264, |
|
"grad_norm": 0.3707197308540344, |
|
"learning_rate": 7.461034328892621e-06, |
|
"loss": 0.6924, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.7542404824726724, |
|
"grad_norm": 0.4179406762123108, |
|
"learning_rate": 7.3963871174618945e-06, |
|
"loss": 0.6774, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.7553712777987184, |
|
"grad_norm": 0.3096112012863159, |
|
"learning_rate": 7.3319725396907485e-06, |
|
"loss": 0.6671, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.7565020731247644, |
|
"grad_norm": 0.37638741731643677, |
|
"learning_rate": 7.267791446824854e-06, |
|
"loss": 0.739, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 0.7576328684508103, |
|
"grad_norm": 0.33642110228538513, |
|
"learning_rate": 7.2038446870243195e-06, |
|
"loss": 0.6591, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.7587636637768563, |
|
"grad_norm": 0.3964068591594696, |
|
"learning_rate": 7.140133105352545e-06, |
|
"loss": 0.6936, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 0.7598944591029023, |
|
"grad_norm": 0.42048409581184387, |
|
"learning_rate": 7.076657543765008e-06, |
|
"loss": 0.729, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.7610252544289483, |
|
"grad_norm": 0.3949214518070221, |
|
"learning_rate": 7.013418841098174e-06, |
|
"loss": 0.7064, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 0.7621560497549943, |
|
"grad_norm": 0.38450565934181213, |
|
"learning_rate": 6.95041783305837e-06, |
|
"loss": 0.6666, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.7632868450810403, |
|
"grad_norm": 0.33812659978866577, |
|
"learning_rate": 6.887655352210765e-06, |
|
"loss": 0.6572, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.7644176404070863, |
|
"grad_norm": 0.373017281293869, |
|
"learning_rate": 6.825132227968378e-06, |
|
"loss": 0.7411, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.7655484357331322, |
|
"grad_norm": 0.36028534173965454, |
|
"learning_rate": 6.7628492865810995e-06, |
|
"loss": 0.6234, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 0.7666792310591782, |
|
"grad_norm": 0.3726188838481903, |
|
"learning_rate": 6.700807351124785e-06, |
|
"loss": 0.6261, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.7678100263852242, |
|
"grad_norm": 0.32167547941207886, |
|
"learning_rate": 6.639007241490347e-06, |
|
"loss": 0.7218, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 0.7689408217112702, |
|
"grad_norm": 0.3346633315086365, |
|
"learning_rate": 6.5774497743729734e-06, |
|
"loss": 0.6264, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.7700716170373162, |
|
"grad_norm": 0.5438939929008484, |
|
"learning_rate": 6.5161357632612745e-06, |
|
"loss": 0.6799, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 0.7712024123633622, |
|
"grad_norm": 0.7162203192710876, |
|
"learning_rate": 6.4550660184265866e-06, |
|
"loss": 0.7282, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.7723332076894082, |
|
"grad_norm": 0.3571074306964874, |
|
"learning_rate": 6.394241346912236e-06, |
|
"loss": 0.7061, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 0.7734640030154541, |
|
"grad_norm": 0.37110117077827454, |
|
"learning_rate": 6.333662552522865e-06, |
|
"loss": 0.6464, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.7745947983415001, |
|
"grad_norm": 0.42400380969047546, |
|
"learning_rate": 6.273330435813837e-06, |
|
"loss": 0.6814, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.7757255936675461, |
|
"grad_norm": 0.41441938281059265, |
|
"learning_rate": 6.213245794080641e-06, |
|
"loss": 0.6435, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.7768563889935921, |
|
"grad_norm": 0.38226518034935, |
|
"learning_rate": 6.153409421348358e-06, |
|
"loss": 0.6979, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 0.7779871843196381, |
|
"grad_norm": 0.28945887088775635, |
|
"learning_rate": 6.093822108361163e-06, |
|
"loss": 0.6509, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.7791179796456841, |
|
"grad_norm": 0.3852415680885315, |
|
"learning_rate": 6.034484642571866e-06, |
|
"loss": 0.7581, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 0.7802487749717301, |
|
"grad_norm": 0.36283373832702637, |
|
"learning_rate": 5.975397808131549e-06, |
|
"loss": 0.6021, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.781379570297776, |
|
"grad_norm": 0.3490721583366394, |
|
"learning_rate": 5.916562385879151e-06, |
|
"loss": 0.6571, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 0.782510365623822, |
|
"grad_norm": 0.32459717988967896, |
|
"learning_rate": 5.857979153331189e-06, |
|
"loss": 0.6211, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.783641160949868, |
|
"grad_norm": 0.37339502573013306, |
|
"learning_rate": 5.799648884671441e-06, |
|
"loss": 0.6819, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 0.784771956275914, |
|
"grad_norm": 0.35320839285850525, |
|
"learning_rate": 5.741572350740768e-06, |
|
"loss": 0.7348, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.78590275160196, |
|
"grad_norm": 0.2820529043674469, |
|
"learning_rate": 5.68375031902687e-06, |
|
"loss": 0.6302, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.787033546928006, |
|
"grad_norm": 0.35477685928344727, |
|
"learning_rate": 5.626183553654194e-06, |
|
"loss": 0.6241, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.788164342254052, |
|
"grad_norm": 0.36558765172958374, |
|
"learning_rate": 5.5688728153738155e-06, |
|
"loss": 0.6594, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 0.789295137580098, |
|
"grad_norm": 0.3570399880409241, |
|
"learning_rate": 5.511818861553364e-06, |
|
"loss": 0.6271, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.7904259329061439, |
|
"grad_norm": 0.4297529458999634, |
|
"learning_rate": 5.45502244616706e-06, |
|
"loss": 0.7279, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 0.7915567282321899, |
|
"grad_norm": 0.3277917504310608, |
|
"learning_rate": 5.398484319785688e-06, |
|
"loss": 0.7204, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7926875235582359, |
|
"grad_norm": 0.35679319500923157, |
|
"learning_rate": 5.342205229566774e-06, |
|
"loss": 0.6979, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 0.7938183188842819, |
|
"grad_norm": 0.5490666627883911, |
|
"learning_rate": 5.286185919244599e-06, |
|
"loss": 0.7884, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.7949491142103279, |
|
"grad_norm": 0.3300570845603943, |
|
"learning_rate": 5.230427129120441e-06, |
|
"loss": 0.6661, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 0.7960799095363739, |
|
"grad_norm": 0.34464097023010254, |
|
"learning_rate": 5.174929596052791e-06, |
|
"loss": 0.729, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.7972107048624198, |
|
"grad_norm": 0.36439618468284607, |
|
"learning_rate": 5.119694053447566e-06, |
|
"loss": 0.6483, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.7983415001884658, |
|
"grad_norm": 0.3646329939365387, |
|
"learning_rate": 5.064721231248498e-06, |
|
"loss": 0.6497, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.7994722955145118, |
|
"grad_norm": 0.42587414383888245, |
|
"learning_rate": 5.010011855927393e-06, |
|
"loss": 0.6638, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 0.8006030908405578, |
|
"grad_norm": 0.3738311529159546, |
|
"learning_rate": 4.955566650474616e-06, |
|
"loss": 0.806, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8017338861666038, |
|
"grad_norm": 0.4998151659965515, |
|
"learning_rate": 4.90138633438946e-06, |
|
"loss": 0.6658, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 0.8028646814926498, |
|
"grad_norm": 0.39495596289634705, |
|
"learning_rate": 4.847471623670713e-06, |
|
"loss": 0.7759, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8039954768186958, |
|
"grad_norm": 0.38152778148651123, |
|
"learning_rate": 4.79382323080714e-06, |
|
"loss": 0.6445, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.8051262721447418, |
|
"grad_norm": 0.5026568174362183, |
|
"learning_rate": 4.740441864768086e-06, |
|
"loss": 0.7176, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8062570674707877, |
|
"grad_norm": 0.3014233112335205, |
|
"learning_rate": 4.687328230994118e-06, |
|
"loss": 0.6597, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.8073878627968337, |
|
"grad_norm": 0.4386585056781769, |
|
"learning_rate": 4.634483031387676e-06, |
|
"loss": 0.7718, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8085186581228797, |
|
"grad_norm": 0.3882271647453308, |
|
"learning_rate": 4.581906964303825e-06, |
|
"loss": 0.6668, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.8096494534489257, |
|
"grad_norm": 0.3510667681694031, |
|
"learning_rate": 4.529600724541022e-06, |
|
"loss": 0.7296, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8107802487749717, |
|
"grad_norm": 0.5134342908859253, |
|
"learning_rate": 4.477565003331904e-06, |
|
"loss": 0.7208, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 0.8119110441010177, |
|
"grad_norm": 0.32369402050971985, |
|
"learning_rate": 4.4258004883342e-06, |
|
"loss": 0.6951, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8130418394270637, |
|
"grad_norm": 0.4089120030403137, |
|
"learning_rate": 4.3743078636215935e-06, |
|
"loss": 0.6571, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 0.8141726347531096, |
|
"grad_norm": 0.3248507082462311, |
|
"learning_rate": 4.323087809674733e-06, |
|
"loss": 0.6267, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8153034300791556, |
|
"grad_norm": 0.3590109348297119, |
|
"learning_rate": 4.2721410033722014e-06, |
|
"loss": 0.6919, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 0.8164342254052016, |
|
"grad_norm": 0.3924254775047302, |
|
"learning_rate": 4.221468117981592e-06, |
|
"loss": 0.6, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8175650207312476, |
|
"grad_norm": 0.42247772216796875, |
|
"learning_rate": 4.1710698231505975e-06, |
|
"loss": 0.6375, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 0.8186958160572936, |
|
"grad_norm": 0.3658187985420227, |
|
"learning_rate": 4.120946784898156e-06, |
|
"loss": 0.7743, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8198266113833396, |
|
"grad_norm": 0.39758992195129395, |
|
"learning_rate": 4.071099665605682e-06, |
|
"loss": 0.6259, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.8209574067093856, |
|
"grad_norm": 0.45203524827957153, |
|
"learning_rate": 4.021529124008278e-06, |
|
"loss": 0.7297, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8220882020354315, |
|
"grad_norm": 0.43119361996650696, |
|
"learning_rate": 3.9722358151860515e-06, |
|
"loss": 0.6612, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 0.8232189973614775, |
|
"grad_norm": 0.41796061396598816, |
|
"learning_rate": 3.923220390555432e-06, |
|
"loss": 0.7526, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8243497926875235, |
|
"grad_norm": 0.33241549134254456, |
|
"learning_rate": 3.87448349786059e-06, |
|
"loss": 0.6832, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 0.8254805880135695, |
|
"grad_norm": 0.3728543817996979, |
|
"learning_rate": 3.826025781164874e-06, |
|
"loss": 0.6604, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8266113833396155, |
|
"grad_norm": 0.297720342874527, |
|
"learning_rate": 3.7778478808422753e-06, |
|
"loss": 0.7111, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 0.8277421786656615, |
|
"grad_norm": 0.3133184015750885, |
|
"learning_rate": 3.7299504335689905e-06, |
|
"loss": 0.6552, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8288729739917075, |
|
"grad_norm": 0.3344557583332062, |
|
"learning_rate": 3.682334072314994e-06, |
|
"loss": 0.6516, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 0.8300037693177534, |
|
"grad_norm": 0.33505749702453613, |
|
"learning_rate": 3.6349994263356806e-06, |
|
"loss": 0.6788, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8311345646437994, |
|
"grad_norm": 0.32801946997642517, |
|
"learning_rate": 3.587947121163551e-06, |
|
"loss": 0.6627, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.8322653599698454, |
|
"grad_norm": 0.3641601800918579, |
|
"learning_rate": 3.541177778599944e-06, |
|
"loss": 0.6904, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8333961552958914, |
|
"grad_norm": 0.3527655005455017, |
|
"learning_rate": 3.494692016706799e-06, |
|
"loss": 0.7227, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 0.8345269506219374, |
|
"grad_norm": 0.32480356097221375, |
|
"learning_rate": 3.4484904497985167e-06, |
|
"loss": 0.6718, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8356577459479834, |
|
"grad_norm": 0.40660572052001953, |
|
"learning_rate": 3.4025736884338326e-06, |
|
"loss": 0.7252, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 0.8367885412740294, |
|
"grad_norm": 0.3736969530582428, |
|
"learning_rate": 3.356942339407748e-06, |
|
"loss": 0.6344, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8379193366000753, |
|
"grad_norm": 0.3524364233016968, |
|
"learning_rate": 3.311597005743508e-06, |
|
"loss": 0.6561, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 0.8390501319261213, |
|
"grad_norm": 0.346884548664093, |
|
"learning_rate": 3.26653828668462e-06, |
|
"loss": 0.7203, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8401809272521673, |
|
"grad_norm": 0.3504559099674225, |
|
"learning_rate": 3.2217667776869716e-06, |
|
"loss": 0.6846, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 0.8413117225782133, |
|
"grad_norm": 0.4117507338523865, |
|
"learning_rate": 3.1772830704109108e-06, |
|
"loss": 0.7109, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8424425179042593, |
|
"grad_norm": 0.35699552297592163, |
|
"learning_rate": 3.133087752713479e-06, |
|
"loss": 0.7086, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.8435733132303053, |
|
"grad_norm": 0.4353185296058655, |
|
"learning_rate": 3.089181408640612e-06, |
|
"loss": 0.6974, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8447041085563513, |
|
"grad_norm": 0.35224634408950806, |
|
"learning_rate": 3.0455646184194137e-06, |
|
"loss": 0.695, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 0.8458349038823972, |
|
"grad_norm": 0.3479955792427063, |
|
"learning_rate": 3.0022379584505212e-06, |
|
"loss": 0.7459, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8469656992084432, |
|
"grad_norm": 0.33437180519104004, |
|
"learning_rate": 2.9592020013004455e-06, |
|
"loss": 0.6236, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 0.8480964945344892, |
|
"grad_norm": 0.3484211266040802, |
|
"learning_rate": 2.9164573156940654e-06, |
|
"loss": 0.6564, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.8492272898605352, |
|
"grad_norm": 0.42290642857551575, |
|
"learning_rate": 2.874004466507041e-06, |
|
"loss": 0.8202, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 0.8503580851865812, |
|
"grad_norm": 0.3348793089389801, |
|
"learning_rate": 2.8318440147583862e-06, |
|
"loss": 0.6083, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.8514888805126272, |
|
"grad_norm": 0.45830124616622925, |
|
"learning_rate": 2.7899765176030627e-06, |
|
"loss": 0.6741, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 0.8526196758386732, |
|
"grad_norm": 0.40784764289855957, |
|
"learning_rate": 2.7484025283246034e-06, |
|
"loss": 0.6632, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.8537504711647191, |
|
"grad_norm": 0.31340643763542175, |
|
"learning_rate": 2.707122596327805e-06, |
|
"loss": 0.6891, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.8548812664907651, |
|
"grad_norm": 0.5138049721717834, |
|
"learning_rate": 2.6661372671314493e-06, |
|
"loss": 0.7407, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.8560120618168111, |
|
"grad_norm": 0.3300493359565735, |
|
"learning_rate": 2.6254470823611323e-06, |
|
"loss": 0.7163, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.4111888110637665, |
|
"learning_rate": 2.585052579742059e-06, |
|
"loss": 0.7343, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.8582736524689031, |
|
"grad_norm": 0.3648470938205719, |
|
"learning_rate": 2.5449542930919864e-06, |
|
"loss": 0.6905, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 0.8594044477949491, |
|
"grad_norm": 0.3930950164794922, |
|
"learning_rate": 2.5051527523141356e-06, |
|
"loss": 0.6164, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.860535243120995, |
|
"grad_norm": 0.35205841064453125, |
|
"learning_rate": 2.465648483390193e-06, |
|
"loss": 0.6893, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 0.861666038447041, |
|
"grad_norm": 0.3441345989704132, |
|
"learning_rate": 2.4264420083733807e-06, |
|
"loss": 0.6441, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.862796833773087, |
|
"grad_norm": 0.3523414134979248, |
|
"learning_rate": 2.387533845381518e-06, |
|
"loss": 0.7179, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 0.863927629099133, |
|
"grad_norm": 0.4754193425178528, |
|
"learning_rate": 2.3489245085902194e-06, |
|
"loss": 0.7682, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.865058424425179, |
|
"grad_norm": 0.3973066508769989, |
|
"learning_rate": 2.310614508226078e-06, |
|
"loss": 0.6431, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.866189219751225, |
|
"grad_norm": 0.49921613931655884, |
|
"learning_rate": 2.2726043505599036e-06, |
|
"loss": 0.7379, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.867320015077271, |
|
"grad_norm": 0.3483542203903198, |
|
"learning_rate": 2.2348945379000783e-06, |
|
"loss": 0.6746, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 0.868450810403317, |
|
"grad_norm": 0.409015417098999, |
|
"learning_rate": 2.1974855685858663e-06, |
|
"loss": 0.6205, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.8695816057293629, |
|
"grad_norm": 0.38850805163383484, |
|
"learning_rate": 2.1603779369808757e-06, |
|
"loss": 0.6971, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 0.8707124010554089, |
|
"grad_norm": 0.39465731382369995, |
|
"learning_rate": 2.123572133466495e-06, |
|
"loss": 0.6327, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.8718431963814549, |
|
"grad_norm": 0.3345524072647095, |
|
"learning_rate": 2.087068644435425e-06, |
|
"loss": 0.6426, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 0.8729739917075009, |
|
"grad_norm": 0.28288835287094116, |
|
"learning_rate": 2.050867952285243e-06, |
|
"loss": 0.5873, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.8741047870335469, |
|
"grad_norm": 0.3995983898639679, |
|
"learning_rate": 2.0149705354120224e-06, |
|
"loss": 0.6867, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 0.8752355823595929, |
|
"grad_norm": 0.4070720076560974, |
|
"learning_rate": 1.9793768682040524e-06, |
|
"loss": 0.726, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.8763663776856389, |
|
"grad_norm": 0.37636858224868774, |
|
"learning_rate": 1.9440874210355065e-06, |
|
"loss": 0.6516, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.8774971730116848, |
|
"grad_norm": 0.2892749309539795, |
|
"learning_rate": 1.909102660260273e-06, |
|
"loss": 0.6692, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.8786279683377308, |
|
"grad_norm": 0.3219640851020813, |
|
"learning_rate": 1.8744230482057673e-06, |
|
"loss": 0.7656, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 0.8797587636637768, |
|
"grad_norm": 0.4004978835582733, |
|
"learning_rate": 1.8400490431668387e-06, |
|
"loss": 0.7057, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.8808895589898228, |
|
"grad_norm": 0.3124302327632904, |
|
"learning_rate": 1.805981099399709e-06, |
|
"loss": 0.6377, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 0.8820203543158688, |
|
"grad_norm": 0.3707364797592163, |
|
"learning_rate": 1.7722196671159542e-06, |
|
"loss": 0.6751, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.8831511496419148, |
|
"grad_norm": 0.38595885038375854, |
|
"learning_rate": 1.7387651924765796e-06, |
|
"loss": 0.6968, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 0.8842819449679608, |
|
"grad_norm": 0.3807552754878998, |
|
"learning_rate": 1.7056181175861025e-06, |
|
"loss": 0.7338, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.8854127402940067, |
|
"grad_norm": 0.40677499771118164, |
|
"learning_rate": 1.6727788804867277e-06, |
|
"loss": 0.713, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 0.8865435356200527, |
|
"grad_norm": 0.5259581804275513, |
|
"learning_rate": 1.6402479151525458e-06, |
|
"loss": 0.6833, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.8876743309460987, |
|
"grad_norm": 0.3447456359863281, |
|
"learning_rate": 1.6080256514838077e-06, |
|
"loss": 0.6712, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.8888051262721447, |
|
"grad_norm": 0.31114619970321655, |
|
"learning_rate": 1.5761125153012312e-06, |
|
"loss": 0.73, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.8899359215981907, |
|
"grad_norm": 0.29841819405555725, |
|
"learning_rate": 1.5445089283403768e-06, |
|
"loss": 0.6782, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 0.8910667169242367, |
|
"grad_norm": 0.45541536808013916, |
|
"learning_rate": 1.5132153082460908e-06, |
|
"loss": 0.7093, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.8921975122502827, |
|
"grad_norm": 0.31067731976509094, |
|
"learning_rate": 1.482232068566966e-06, |
|
"loss": 0.6212, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 0.8933283075763286, |
|
"grad_norm": 0.3661406934261322, |
|
"learning_rate": 1.4515596187498898e-06, |
|
"loss": 0.6728, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.8944591029023746, |
|
"grad_norm": 0.31056010723114014, |
|
"learning_rate": 1.4211983641346154e-06, |
|
"loss": 0.64, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 0.8955898982284206, |
|
"grad_norm": 0.3716438114643097, |
|
"learning_rate": 1.3911487059484362e-06, |
|
"loss": 0.7058, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.8967206935544666, |
|
"grad_norm": 0.3605138659477234, |
|
"learning_rate": 1.3614110413008474e-06, |
|
"loss": 0.7142, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 0.8978514888805126, |
|
"grad_norm": 0.39523765444755554, |
|
"learning_rate": 1.3319857631783227e-06, |
|
"loss": 0.667, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.8989822842065586, |
|
"grad_norm": 0.4720902144908905, |
|
"learning_rate": 1.302873260439122e-06, |
|
"loss": 0.7009, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.9001130795326046, |
|
"grad_norm": 0.39917027950286865, |
|
"learning_rate": 1.2740739178081274e-06, |
|
"loss": 0.6236, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9012438748586505, |
|
"grad_norm": 0.38968703150749207, |
|
"learning_rate": 1.2455881158717874e-06, |
|
"loss": 0.6108, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 0.9023746701846965, |
|
"grad_norm": 0.3744681775569916, |
|
"learning_rate": 1.2174162310730764e-06, |
|
"loss": 0.674, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9035054655107425, |
|
"grad_norm": 0.41147854924201965, |
|
"learning_rate": 1.1895586357065197e-06, |
|
"loss": 0.6971, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 0.9046362608367885, |
|
"grad_norm": 0.4496522843837738, |
|
"learning_rate": 1.1620156979132685e-06, |
|
"loss": 0.7027, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9057670561628345, |
|
"grad_norm": 0.38566187024116516, |
|
"learning_rate": 1.134787781676236e-06, |
|
"loss": 0.6488, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 0.9068978514888805, |
|
"grad_norm": 0.3715657591819763, |
|
"learning_rate": 1.1078752468153042e-06, |
|
"loss": 0.6727, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9080286468149265, |
|
"grad_norm": 0.3041117787361145, |
|
"learning_rate": 1.0812784489825507e-06, |
|
"loss": 0.6763, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 0.9091594421409724, |
|
"grad_norm": 0.40202027559280396, |
|
"learning_rate": 1.054997739657551e-06, |
|
"loss": 0.652, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.9102902374670184, |
|
"grad_norm": 0.41109445691108704, |
|
"learning_rate": 1.029033466142737e-06, |
|
"loss": 0.7183, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.9114210327930644, |
|
"grad_norm": 0.31253260374069214, |
|
"learning_rate": 1.0033859715588122e-06, |
|
"loss": 0.6929, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9125518281191104, |
|
"grad_norm": 0.4093742072582245, |
|
"learning_rate": 9.780555948401994e-07, |
|
"loss": 0.7043, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 0.9136826234451564, |
|
"grad_norm": 0.3625013828277588, |
|
"learning_rate": 9.530426707305918e-07, |
|
"loss": 0.7268, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9148134187712024, |
|
"grad_norm": 0.34964805841445923, |
|
"learning_rate": 9.283475297785005e-07, |
|
"loss": 0.6746, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 0.9159442140972484, |
|
"grad_norm": 0.3727870285511017, |
|
"learning_rate": 9.039704983328984e-07, |
|
"loss": 0.6868, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.9170750094232943, |
|
"grad_norm": 0.5575105547904968, |
|
"learning_rate": 8.799118985389126e-07, |
|
"loss": 0.7606, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 0.9182058047493403, |
|
"grad_norm": 0.38986077904701233, |
|
"learning_rate": 8.561720483335478e-07, |
|
"loss": 0.6885, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9193366000753863, |
|
"grad_norm": 0.35799407958984375, |
|
"learning_rate": 8.327512614415195e-07, |
|
"loss": 0.6676, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 0.9204673954014323, |
|
"grad_norm": 0.3649112284183502, |
|
"learning_rate": 8.09649847371069e-07, |
|
"loss": 0.6487, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9215981907274783, |
|
"grad_norm": 0.4163808226585388, |
|
"learning_rate": 7.868681114098914e-07, |
|
"loss": 0.6342, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.9227289860535243, |
|
"grad_norm": 0.34704506397247314, |
|
"learning_rate": 7.644063546211167e-07, |
|
"loss": 0.6623, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9238597813795703, |
|
"grad_norm": 0.3512720465660095, |
|
"learning_rate": 7.422648738392934e-07, |
|
"loss": 0.6688, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 0.9249905767056162, |
|
"grad_norm": 0.33337023854255676, |
|
"learning_rate": 7.204439616665115e-07, |
|
"loss": 0.6587, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9261213720316622, |
|
"grad_norm": 0.35821977257728577, |
|
"learning_rate": 6.989439064684911e-07, |
|
"loss": 0.6823, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 0.9272521673577082, |
|
"grad_norm": 0.46636807918548584, |
|
"learning_rate": 6.777649923708024e-07, |
|
"loss": 0.7261, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.9283829626837542, |
|
"grad_norm": 0.4258100688457489, |
|
"learning_rate": 6.569074992551022e-07, |
|
"loss": 0.6615, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 0.9295137580098002, |
|
"grad_norm": 0.39648309350013733, |
|
"learning_rate": 6.363717027554256e-07, |
|
"loss": 0.7147, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9306445533358462, |
|
"grad_norm": 0.41724279522895813, |
|
"learning_rate": 6.161578742545665e-07, |
|
"loss": 0.6852, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 0.9317753486618922, |
|
"grad_norm": 0.3736780285835266, |
|
"learning_rate": 5.962662808804587e-07, |
|
"loss": 0.717, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9329061439879381, |
|
"grad_norm": 0.3304630517959595, |
|
"learning_rate": 5.766971855026809e-07, |
|
"loss": 0.6539, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.9340369393139841, |
|
"grad_norm": 0.39884060621261597, |
|
"learning_rate": 5.574508467289518e-07, |
|
"loss": 0.7029, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9351677346400301, |
|
"grad_norm": 0.49207261204719543, |
|
"learning_rate": 5.385275189017353e-07, |
|
"loss": 0.7092, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 0.9362985299660761, |
|
"grad_norm": 0.3424831032752991, |
|
"learning_rate": 5.199274520948677e-07, |
|
"loss": 0.6355, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9374293252921221, |
|
"grad_norm": 0.32665055990219116, |
|
"learning_rate": 5.01650892110253e-07, |
|
"loss": 0.6958, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 0.9385601206181681, |
|
"grad_norm": 0.38883742690086365, |
|
"learning_rate": 4.836980804746261e-07, |
|
"loss": 0.6334, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9396909159442141, |
|
"grad_norm": 0.3798101842403412, |
|
"learning_rate": 4.660692544363382e-07, |
|
"loss": 0.781, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.94082171127026, |
|
"grad_norm": 0.4334189295768738, |
|
"learning_rate": 4.487646469622464e-07, |
|
"loss": 0.7235, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.941952506596306, |
|
"grad_norm": 0.34717586636543274, |
|
"learning_rate": 4.31784486734621e-07, |
|
"loss": 0.669, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 0.943083301922352, |
|
"grad_norm": 0.3831476867198944, |
|
"learning_rate": 4.1512899814813156e-07, |
|
"loss": 0.6536, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.944214097248398, |
|
"grad_norm": 0.35673925280570984, |
|
"learning_rate": 3.9879840130686576e-07, |
|
"loss": 0.6853, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.945344892574444, |
|
"grad_norm": 0.48463523387908936, |
|
"learning_rate": 3.82792912021443e-07, |
|
"loss": 0.7454, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.94647568790049, |
|
"grad_norm": 0.36075058579444885, |
|
"learning_rate": 3.6711274180614153e-07, |
|
"loss": 0.6806, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.947606483226536, |
|
"grad_norm": 0.42450064420700073, |
|
"learning_rate": 3.517580978761148e-07, |
|
"loss": 0.7356, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.9487372785525819, |
|
"grad_norm": 0.36473605036735535, |
|
"learning_rate": 3.3672918314466007e-07, |
|
"loss": 0.717, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 0.9498680738786279, |
|
"grad_norm": 0.4312973916530609, |
|
"learning_rate": 3.220261962205179e-07, |
|
"loss": 0.6991, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.9509988692046739, |
|
"grad_norm": 0.3392401933670044, |
|
"learning_rate": 3.0764933140525475e-07, |
|
"loss": 0.6468, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 0.9521296645307199, |
|
"grad_norm": 0.426111102104187, |
|
"learning_rate": 2.935987786907124e-07, |
|
"loss": 0.7004, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.9532604598567659, |
|
"grad_norm": 0.3565954864025116, |
|
"learning_rate": 2.7987472375646804e-07, |
|
"loss": 0.7199, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.9543912551828119, |
|
"grad_norm": 0.3929762542247772, |
|
"learning_rate": 2.664773479674032e-07, |
|
"loss": 0.7348, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.9555220505088579, |
|
"grad_norm": 0.4081243872642517, |
|
"learning_rate": 2.5340682837129146e-07, |
|
"loss": 0.662, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.9566528458349038, |
|
"grad_norm": 0.3799093961715698, |
|
"learning_rate": 2.406633376964784e-07, |
|
"loss": 0.6571, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.9577836411609498, |
|
"grad_norm": 0.35904359817504883, |
|
"learning_rate": 2.2824704434957766e-07, |
|
"loss": 0.7287, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 0.9589144364869958, |
|
"grad_norm": 0.38494235277175903, |
|
"learning_rate": 2.1615811241325613e-07, |
|
"loss": 0.71, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.9600452318130418, |
|
"grad_norm": 0.4001871943473816, |
|
"learning_rate": 2.0439670164406345e-07, |
|
"loss": 0.7414, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 0.9611760271390878, |
|
"grad_norm": 0.33206847310066223, |
|
"learning_rate": 1.929629674703226e-07, |
|
"loss": 0.6673, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.9623068224651338, |
|
"grad_norm": 0.3849842846393585, |
|
"learning_rate": 1.8185706099007883e-07, |
|
"loss": 0.7487, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 0.9634376177911798, |
|
"grad_norm": 0.33372604846954346, |
|
"learning_rate": 1.7107912896908995e-07, |
|
"loss": 0.6522, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.9645684131172257, |
|
"grad_norm": 0.4485720992088318, |
|
"learning_rate": 1.6062931383890312e-07, |
|
"loss": 0.6499, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 0.9656992084432717, |
|
"grad_norm": 0.5238537788391113, |
|
"learning_rate": 1.5050775369495895e-07, |
|
"loss": 0.6708, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.9668300037693177, |
|
"grad_norm": 0.31111645698547363, |
|
"learning_rate": 1.4071458229478196e-07, |
|
"loss": 0.6394, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.9679607990953637, |
|
"grad_norm": 0.3432201147079468, |
|
"learning_rate": 1.3124992905619028e-07, |
|
"loss": 0.7097, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.9690915944214097, |
|
"grad_norm": 0.38018399477005005, |
|
"learning_rate": 1.2211391905561086e-07, |
|
"loss": 0.711, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 0.9702223897474557, |
|
"grad_norm": 0.36301785707473755, |
|
"learning_rate": 1.1330667302641151e-07, |
|
"loss": 0.664, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.9713531850735017, |
|
"grad_norm": 0.3279567360877991, |
|
"learning_rate": 1.0482830735730198e-07, |
|
"loss": 0.6311, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 0.9724839803995476, |
|
"grad_norm": 0.31279444694519043, |
|
"learning_rate": 9.66789340908103e-08, |
|
"loss": 0.6588, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.9736147757255936, |
|
"grad_norm": 0.38352543115615845, |
|
"learning_rate": 8.885866092178952e-08, |
|
"loss": 0.6798, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 0.9747455710516396, |
|
"grad_norm": 0.3573386073112488, |
|
"learning_rate": 8.136759119600213e-08, |
|
"loss": 0.6686, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.9758763663776856, |
|
"grad_norm": 0.3751896917819977, |
|
"learning_rate": 7.42058239087462e-08, |
|
"loss": 0.7003, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 0.9770071617037316, |
|
"grad_norm": 0.425658643245697, |
|
"learning_rate": 6.737345370355919e-08, |
|
"loss": 0.7152, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.9781379570297776, |
|
"grad_norm": 0.34428244829177856, |
|
"learning_rate": 6.087057087095504e-08, |
|
"loss": 0.7485, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.9792687523558236, |
|
"grad_norm": 0.4021622836589813, |
|
"learning_rate": 5.469726134723907e-08, |
|
"loss": 0.7458, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.9803995476818695, |
|
"grad_norm": 0.364872545003891, |
|
"learning_rate": 4.885360671336714e-08, |
|
"loss": 0.7113, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 0.9815303430079155, |
|
"grad_norm": 0.4006316363811493, |
|
"learning_rate": 4.3339684193871576e-08, |
|
"loss": 0.7263, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.9826611383339615, |
|
"grad_norm": 0.3257283866405487, |
|
"learning_rate": 3.8155566655839746e-08, |
|
"loss": 0.6425, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 0.9837919336600075, |
|
"grad_norm": 0.37064969539642334, |
|
"learning_rate": 3.330132260794538e-08, |
|
"loss": 0.678, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.9849227289860535, |
|
"grad_norm": 0.4389401078224182, |
|
"learning_rate": 2.8777016199554863e-08, |
|
"loss": 0.651, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 0.9860535243120995, |
|
"grad_norm": 0.4058115482330322, |
|
"learning_rate": 2.4582707219866772e-08, |
|
"loss": 0.7288, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.9871843196381455, |
|
"grad_norm": 0.36480987071990967, |
|
"learning_rate": 2.0718451097134773e-08, |
|
"loss": 0.7054, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 0.9883151149641914, |
|
"grad_norm": 0.33493801951408386, |
|
"learning_rate": 1.718429889792095e-08, |
|
"loss": 0.6635, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.9894459102902374, |
|
"grad_norm": 0.4736829698085785, |
|
"learning_rate": 1.3980297326432468e-08, |
|
"loss": 0.6585, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.9905767056162834, |
|
"grad_norm": 0.4238462746143341, |
|
"learning_rate": 1.110648872389708e-08, |
|
"loss": 0.6614, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.9917075009423294, |
|
"grad_norm": 0.32537785172462463, |
|
"learning_rate": 8.56291106801077e-09, |
|
"loss": 0.6681, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 0.9928382962683754, |
|
"grad_norm": 0.4619493782520294, |
|
"learning_rate": 6.349597972424293e-09, |
|
"loss": 0.6741, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.9939690915944214, |
|
"grad_norm": 0.35761797428131104, |
|
"learning_rate": 4.4665786863185014e-09, |
|
"loss": 0.6885, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 0.9950998869204674, |
|
"grad_norm": 0.3071589767932892, |
|
"learning_rate": 2.913878093990796e-09, |
|
"loss": 0.6676, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.9962306822465133, |
|
"grad_norm": 0.3391641080379486, |
|
"learning_rate": 1.6915167145525878e-09, |
|
"loss": 0.6981, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 0.9973614775725593, |
|
"grad_norm": 0.41276663541793823, |
|
"learning_rate": 7.995107016406378e-10, |
|
"loss": 0.6594, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.9984922728986053, |
|
"grad_norm": 0.4570743143558502, |
|
"learning_rate": 2.3787184321444335e-10, |
|
"loss": 0.7059, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 0.9996230682246513, |
|
"grad_norm": 0.3156117796897888, |
|
"learning_rate": 6.607561386928751e-12, |
|
"loss": 0.6465, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.9998492272898606, |
|
"step": 4421, |
|
"total_flos": 4.746119130111803e+18, |
|
"train_loss": 0.6650473316231887, |
|
"train_runtime": 22780.4973, |
|
"train_samples_per_second": 9.316, |
|
"train_steps_per_second": 0.194 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4421, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.746119130111803e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|