| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9121164846593866, | |
| "eval_steps": 500, | |
| "global_step": 7000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0020800832033281333, | |
| "grad_norm": 2.962590217590332, | |
| "learning_rate": 2.5000000000000004e-07, | |
| "loss": 3.7411, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.004160166406656267, | |
| "grad_norm": 2.7364747524261475, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 3.8199, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0062402496099844, | |
| "grad_norm": 2.7037243843078613, | |
| "learning_rate": 7.5e-07, | |
| "loss": 3.6922, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.008320332813312533, | |
| "grad_norm": 2.5150601863861084, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 3.6965, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.010400416016640665, | |
| "grad_norm": 2.7693450450897217, | |
| "learning_rate": 1.25e-06, | |
| "loss": 3.7636, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0124804992199688, | |
| "grad_norm": 2.946901321411133, | |
| "learning_rate": 1.5e-06, | |
| "loss": 3.5892, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.014560582423296931, | |
| "grad_norm": 2.705421209335327, | |
| "learning_rate": 1.7500000000000002e-06, | |
| "loss": 3.6004, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.016640665626625067, | |
| "grad_norm": 2.718595504760742, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 3.4766, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0187207488299532, | |
| "grad_norm": 3.268495798110962, | |
| "learning_rate": 2.25e-06, | |
| "loss": 3.4046, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.02080083203328133, | |
| "grad_norm": 3.0189549922943115, | |
| "learning_rate": 2.5e-06, | |
| "loss": 3.4256, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.022880915236609463, | |
| "grad_norm": 3.067143201828003, | |
| "learning_rate": 2.7500000000000004e-06, | |
| "loss": 3.4206, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.0249609984399376, | |
| "grad_norm": 2.973191261291504, | |
| "learning_rate": 3e-06, | |
| "loss": 3.2975, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02704108164326573, | |
| "grad_norm": 3.4896562099456787, | |
| "learning_rate": 3.2500000000000002e-06, | |
| "loss": 3.1883, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.029121164846593862, | |
| "grad_norm": 2.4497926235198975, | |
| "learning_rate": 3.5000000000000004e-06, | |
| "loss": 3.0682, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.031201248049921998, | |
| "grad_norm": 3.044771909713745, | |
| "learning_rate": 3.75e-06, | |
| "loss": 2.9224, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.033281331253250133, | |
| "grad_norm": 2.6263840198516846, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 2.9386, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03536141445657826, | |
| "grad_norm": 1.6827579736709595, | |
| "learning_rate": 4.250000000000001e-06, | |
| "loss": 2.8187, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.0374414976599064, | |
| "grad_norm": 1.493446707725525, | |
| "learning_rate": 4.5e-06, | |
| "loss": 2.7529, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.039521580863234526, | |
| "grad_norm": 1.1336227655410767, | |
| "learning_rate": 4.75e-06, | |
| "loss": 2.7076, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.04160166406656266, | |
| "grad_norm": 0.8525938391685486, | |
| "learning_rate": 5e-06, | |
| "loss": 2.678, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0436817472698908, | |
| "grad_norm": 0.9276126027107239, | |
| "learning_rate": 5.25e-06, | |
| "loss": 2.6097, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.045761830473218926, | |
| "grad_norm": 0.7816782593727112, | |
| "learning_rate": 5.500000000000001e-06, | |
| "loss": 2.614, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04784191367654706, | |
| "grad_norm": 0.8164133429527283, | |
| "learning_rate": 5.750000000000001e-06, | |
| "loss": 2.6019, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.0499219968798752, | |
| "grad_norm": 0.555113673210144, | |
| "learning_rate": 6e-06, | |
| "loss": 2.5088, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.052002080083203325, | |
| "grad_norm": 0.45174235105514526, | |
| "learning_rate": 6.25e-06, | |
| "loss": 2.5117, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05408216328653146, | |
| "grad_norm": 0.7130635380744934, | |
| "learning_rate": 6.5000000000000004e-06, | |
| "loss": 2.5214, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.056162246489859596, | |
| "grad_norm": 0.5437763333320618, | |
| "learning_rate": 6.750000000000001e-06, | |
| "loss": 2.5454, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.058242329693187725, | |
| "grad_norm": 0.48792022466659546, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 2.5159, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06032241289651586, | |
| "grad_norm": 0.7019992470741272, | |
| "learning_rate": 7.25e-06, | |
| "loss": 2.5082, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.062402496099843996, | |
| "grad_norm": 0.5933384895324707, | |
| "learning_rate": 7.5e-06, | |
| "loss": 2.5135, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06448257930317212, | |
| "grad_norm": 0.4854763150215149, | |
| "learning_rate": 7.75e-06, | |
| "loss": 2.4585, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.06656266250650027, | |
| "grad_norm": 0.4506765902042389, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 2.5187, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0686427457098284, | |
| "grad_norm": 0.6778927445411682, | |
| "learning_rate": 8.25e-06, | |
| "loss": 2.4645, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.07072282891315652, | |
| "grad_norm": 0.6200412511825562, | |
| "learning_rate": 8.500000000000002e-06, | |
| "loss": 2.5131, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07280291211648465, | |
| "grad_norm": 0.6752357482910156, | |
| "learning_rate": 8.75e-06, | |
| "loss": 2.5049, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0748829953198128, | |
| "grad_norm": 0.5805301070213318, | |
| "learning_rate": 9e-06, | |
| "loss": 2.5014, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07696307852314092, | |
| "grad_norm": 1.1979331970214844, | |
| "learning_rate": 9.25e-06, | |
| "loss": 2.4334, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.07904316172646905, | |
| "grad_norm": 0.8396961688995361, | |
| "learning_rate": 9.5e-06, | |
| "loss": 2.4531, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0811232449297972, | |
| "grad_norm": 0.6947128772735596, | |
| "learning_rate": 9.750000000000002e-06, | |
| "loss": 2.4758, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.08320332813312532, | |
| "grad_norm": 0.46556374430656433, | |
| "learning_rate": 1e-05, | |
| "loss": 2.4426, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08528341133645345, | |
| "grad_norm": 0.6042707562446594, | |
| "learning_rate": 1.025e-05, | |
| "loss": 2.3977, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.0873634945397816, | |
| "grad_norm": 0.5161399245262146, | |
| "learning_rate": 1.05e-05, | |
| "loss": 2.4229, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08944357774310972, | |
| "grad_norm": 0.9716496467590332, | |
| "learning_rate": 1.075e-05, | |
| "loss": 2.4166, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.09152366094643785, | |
| "grad_norm": 0.754511296749115, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 2.4372, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.093603744149766, | |
| "grad_norm": 0.6887955665588379, | |
| "learning_rate": 1.125e-05, | |
| "loss": 2.4147, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.09568382735309412, | |
| "grad_norm": 0.7467107772827148, | |
| "learning_rate": 1.1500000000000002e-05, | |
| "loss": 2.4222, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09776391055642225, | |
| "grad_norm": 0.945798397064209, | |
| "learning_rate": 1.175e-05, | |
| "loss": 2.4285, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.0998439937597504, | |
| "grad_norm": 0.6666924953460693, | |
| "learning_rate": 1.2e-05, | |
| "loss": 2.3843, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10192407696307852, | |
| "grad_norm": 0.6966888308525085, | |
| "learning_rate": 1.225e-05, | |
| "loss": 2.4212, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.10400416016640665, | |
| "grad_norm": 0.6751601099967957, | |
| "learning_rate": 1.25e-05, | |
| "loss": 2.3562, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1060842433697348, | |
| "grad_norm": 0.7203898429870605, | |
| "learning_rate": 1.2750000000000002e-05, | |
| "loss": 2.3661, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.10816432657306292, | |
| "grad_norm": 0.8724287748336792, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 2.4047, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.11024440977639105, | |
| "grad_norm": 0.8295998573303223, | |
| "learning_rate": 1.3250000000000002e-05, | |
| "loss": 2.387, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.11232449297971919, | |
| "grad_norm": 0.7787670493125916, | |
| "learning_rate": 1.3500000000000001e-05, | |
| "loss": 2.3892, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11440457618304732, | |
| "grad_norm": 0.5952211022377014, | |
| "learning_rate": 1.3750000000000002e-05, | |
| "loss": 2.3992, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.11648465938637545, | |
| "grad_norm": 0.8523284792900085, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 2.3621, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11856474258970359, | |
| "grad_norm": 0.7084488868713379, | |
| "learning_rate": 1.4249999999999999e-05, | |
| "loss": 2.3823, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.12064482579303172, | |
| "grad_norm": 0.7749157547950745, | |
| "learning_rate": 1.45e-05, | |
| "loss": 2.3772, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.12272490899635985, | |
| "grad_norm": 0.6760996580123901, | |
| "learning_rate": 1.475e-05, | |
| "loss": 2.3897, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.12480499219968799, | |
| "grad_norm": 0.7566614151000977, | |
| "learning_rate": 1.5e-05, | |
| "loss": 2.3939, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12688507540301613, | |
| "grad_norm": 0.7718506455421448, | |
| "learning_rate": 1.525e-05, | |
| "loss": 2.4124, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.12896515860634425, | |
| "grad_norm": 0.6160978078842163, | |
| "learning_rate": 1.55e-05, | |
| "loss": 2.4009, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1310452418096724, | |
| "grad_norm": 1.0341984033584595, | |
| "learning_rate": 1.575e-05, | |
| "loss": 2.3489, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.13312532501300053, | |
| "grad_norm": 0.7184290289878845, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 2.3588, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.13520540821632865, | |
| "grad_norm": 0.6868571639060974, | |
| "learning_rate": 1.6250000000000002e-05, | |
| "loss": 2.427, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.1372854914196568, | |
| "grad_norm": 0.837578535079956, | |
| "learning_rate": 1.65e-05, | |
| "loss": 2.3661, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1393655746229849, | |
| "grad_norm": 0.6868174076080322, | |
| "learning_rate": 1.675e-05, | |
| "loss": 2.3912, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.14144565782631305, | |
| "grad_norm": 0.625311017036438, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 2.3164, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1435257410296412, | |
| "grad_norm": 0.6201218962669373, | |
| "learning_rate": 1.725e-05, | |
| "loss": 2.3786, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.1456058242329693, | |
| "grad_norm": 0.7219041585922241, | |
| "learning_rate": 1.75e-05, | |
| "loss": 2.3528, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14768590743629745, | |
| "grad_norm": 0.6239330172538757, | |
| "learning_rate": 1.775e-05, | |
| "loss": 2.3686, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.1497659906396256, | |
| "grad_norm": 0.7319772839546204, | |
| "learning_rate": 1.8e-05, | |
| "loss": 2.3694, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1518460738429537, | |
| "grad_norm": 0.6200202703475952, | |
| "learning_rate": 1.825e-05, | |
| "loss": 2.3627, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.15392615704628185, | |
| "grad_norm": 0.8407759666442871, | |
| "learning_rate": 1.85e-05, | |
| "loss": 2.3931, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.15600624024961, | |
| "grad_norm": 1.3796571493148804, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 2.3787, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1580863234529381, | |
| "grad_norm": 0.6906828880310059, | |
| "learning_rate": 1.9e-05, | |
| "loss": 2.3771, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.16016640665626625, | |
| "grad_norm": 0.6497045159339905, | |
| "learning_rate": 1.925e-05, | |
| "loss": 2.3581, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.1622464898595944, | |
| "grad_norm": 0.5141230821609497, | |
| "learning_rate": 1.9500000000000003e-05, | |
| "loss": 2.3419, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1643265730629225, | |
| "grad_norm": 0.9040182828903198, | |
| "learning_rate": 1.9750000000000002e-05, | |
| "loss": 2.3513, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.16640665626625065, | |
| "grad_norm": 0.7217531204223633, | |
| "learning_rate": 2e-05, | |
| "loss": 2.3448, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1684867394695788, | |
| "grad_norm": 0.8354098200798035, | |
| "learning_rate": 2.025e-05, | |
| "loss": 2.3951, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.1705668226729069, | |
| "grad_norm": 0.5832729935646057, | |
| "learning_rate": 2.05e-05, | |
| "loss": 2.323, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.17264690587623505, | |
| "grad_norm": 0.6963520050048828, | |
| "learning_rate": 2.075e-05, | |
| "loss": 2.3438, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.1747269890795632, | |
| "grad_norm": 1.1836349964141846, | |
| "learning_rate": 2.1e-05, | |
| "loss": 2.3125, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1768070722828913, | |
| "grad_norm": 0.7848880887031555, | |
| "learning_rate": 2.125e-05, | |
| "loss": 2.401, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.17888715548621945, | |
| "grad_norm": 0.757739782333374, | |
| "learning_rate": 2.15e-05, | |
| "loss": 2.3487, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1809672386895476, | |
| "grad_norm": 0.5723095536231995, | |
| "learning_rate": 2.175e-05, | |
| "loss": 2.346, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.1830473218928757, | |
| "grad_norm": 0.7125130891799927, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 2.3236, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.18512740509620385, | |
| "grad_norm": 0.6598831415176392, | |
| "learning_rate": 2.2250000000000002e-05, | |
| "loss": 2.3828, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.187207488299532, | |
| "grad_norm": 0.6140534281730652, | |
| "learning_rate": 2.25e-05, | |
| "loss": 2.3335, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1892875715028601, | |
| "grad_norm": 0.6711081266403198, | |
| "learning_rate": 2.275e-05, | |
| "loss": 2.3336, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.19136765470618824, | |
| "grad_norm": 0.6882185339927673, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 2.3423, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1934477379095164, | |
| "grad_norm": 0.7247514724731445, | |
| "learning_rate": 2.3250000000000003e-05, | |
| "loss": 2.3326, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.1955278211128445, | |
| "grad_norm": 0.6698940992355347, | |
| "learning_rate": 2.35e-05, | |
| "loss": 2.2953, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.19760790431617264, | |
| "grad_norm": 0.7808359265327454, | |
| "learning_rate": 2.375e-05, | |
| "loss": 2.3261, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.1996879875195008, | |
| "grad_norm": 0.7009552717208862, | |
| "learning_rate": 2.4e-05, | |
| "loss": 2.3248, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2017680707228289, | |
| "grad_norm": 0.6713016033172607, | |
| "learning_rate": 2.425e-05, | |
| "loss": 2.3566, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.20384815392615704, | |
| "grad_norm": 0.784695565700531, | |
| "learning_rate": 2.45e-05, | |
| "loss": 2.3474, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2059282371294852, | |
| "grad_norm": 0.7487632632255554, | |
| "learning_rate": 2.4750000000000002e-05, | |
| "loss": 2.3279, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2080083203328133, | |
| "grad_norm": 0.5700154304504395, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.3325, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.21008840353614144, | |
| "grad_norm": 0.7952355146408081, | |
| "learning_rate": 2.525e-05, | |
| "loss": 2.3383, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2121684867394696, | |
| "grad_norm": 0.6064152717590332, | |
| "learning_rate": 2.5500000000000003e-05, | |
| "loss": 2.2919, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2142485699427977, | |
| "grad_norm": 0.6273530721664429, | |
| "learning_rate": 2.5750000000000002e-05, | |
| "loss": 2.3059, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.21632865314612584, | |
| "grad_norm": 0.683093786239624, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 2.2865, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.21840873634945399, | |
| "grad_norm": 0.8195337653160095, | |
| "learning_rate": 2.625e-05, | |
| "loss": 2.3474, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.2204888195527821, | |
| "grad_norm": 0.7622310519218445, | |
| "learning_rate": 2.6500000000000004e-05, | |
| "loss": 2.3112, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.22256890275611024, | |
| "grad_norm": 0.7957525253295898, | |
| "learning_rate": 2.6750000000000003e-05, | |
| "loss": 2.3254, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.22464898595943839, | |
| "grad_norm": 0.5769463181495667, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 2.3433, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2267290691627665, | |
| "grad_norm": 0.6178082823753357, | |
| "learning_rate": 2.725e-05, | |
| "loss": 2.3502, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.22880915236609464, | |
| "grad_norm": 0.633885383605957, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 2.3422, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.23088923556942278, | |
| "grad_norm": 0.48240986466407776, | |
| "learning_rate": 2.7750000000000004e-05, | |
| "loss": 2.2952, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.2329693187727509, | |
| "grad_norm": 0.7513511180877686, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 2.2972, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.23504940197607904, | |
| "grad_norm": 0.5911456942558289, | |
| "learning_rate": 2.825e-05, | |
| "loss": 2.3214, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.23712948517940718, | |
| "grad_norm": 0.6111375689506531, | |
| "learning_rate": 2.8499999999999998e-05, | |
| "loss": 2.3611, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.2392095683827353, | |
| "grad_norm": 0.6953846216201782, | |
| "learning_rate": 2.8749999999999997e-05, | |
| "loss": 2.2539, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.24128965158606344, | |
| "grad_norm": 0.5785839557647705, | |
| "learning_rate": 2.9e-05, | |
| "loss": 2.3212, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.24336973478939158, | |
| "grad_norm": 0.6091140508651733, | |
| "learning_rate": 2.925e-05, | |
| "loss": 2.2971, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.2454498179927197, | |
| "grad_norm": 0.5193526744842529, | |
| "learning_rate": 2.95e-05, | |
| "loss": 2.293, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.24752990119604784, | |
| "grad_norm": 0.7062333226203918, | |
| "learning_rate": 2.975e-05, | |
| "loss": 2.3222, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.24960998439937598, | |
| "grad_norm": 1.2192779779434204, | |
| "learning_rate": 3e-05, | |
| "loss": 2.3591, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2516900676027041, | |
| "grad_norm": 0.6763813495635986, | |
| "learning_rate": 3.025e-05, | |
| "loss": 2.2771, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.25377015080603227, | |
| "grad_norm": 0.6813860535621643, | |
| "learning_rate": 3.05e-05, | |
| "loss": 2.3141, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.25585023400936036, | |
| "grad_norm": 0.8562334179878235, | |
| "learning_rate": 3.075e-05, | |
| "loss": 2.3094, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.2579303172126885, | |
| "grad_norm": 0.7040572762489319, | |
| "learning_rate": 3.1e-05, | |
| "loss": 2.3033, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.26001040041601664, | |
| "grad_norm": 0.6882712841033936, | |
| "learning_rate": 3.125e-05, | |
| "loss": 2.2834, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.2620904836193448, | |
| "grad_norm": 0.8077874779701233, | |
| "learning_rate": 3.15e-05, | |
| "loss": 2.3071, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2641705668226729, | |
| "grad_norm": 0.6731362342834473, | |
| "learning_rate": 3.175e-05, | |
| "loss": 2.3127, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.26625065002600107, | |
| "grad_norm": 0.5459744334220886, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 2.3146, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.26833073322932915, | |
| "grad_norm": 0.6492711901664734, | |
| "learning_rate": 3.2250000000000005e-05, | |
| "loss": 2.3144, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.2704108164326573, | |
| "grad_norm": 1.2923798561096191, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 2.3319, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.27249089963598544, | |
| "grad_norm": 0.6995902061462402, | |
| "learning_rate": 3.275e-05, | |
| "loss": 2.3251, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.2745709828393136, | |
| "grad_norm": 0.598090410232544, | |
| "learning_rate": 3.3e-05, | |
| "loss": 2.3024, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2766510660426417, | |
| "grad_norm": 0.5931279063224792, | |
| "learning_rate": 3.325e-05, | |
| "loss": 2.289, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.2787311492459698, | |
| "grad_norm": 0.5426341891288757, | |
| "learning_rate": 3.35e-05, | |
| "loss": 2.3031, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.28081123244929795, | |
| "grad_norm": 0.6066926717758179, | |
| "learning_rate": 3.375000000000001e-05, | |
| "loss": 2.3116, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.2828913156526261, | |
| "grad_norm": 0.7575869560241699, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 2.256, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.28497139885595424, | |
| "grad_norm": 0.6038545370101929, | |
| "learning_rate": 3.4250000000000006e-05, | |
| "loss": 2.3257, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.2870514820592824, | |
| "grad_norm": 0.5261275768280029, | |
| "learning_rate": 3.45e-05, | |
| "loss": 2.287, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2891315652626105, | |
| "grad_norm": 0.6376118659973145, | |
| "learning_rate": 3.475e-05, | |
| "loss": 2.2707, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.2912116484659386, | |
| "grad_norm": 0.6691327095031738, | |
| "learning_rate": 3.5e-05, | |
| "loss": 2.3172, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.29329173166926675, | |
| "grad_norm": 0.8731220364570618, | |
| "learning_rate": 3.525e-05, | |
| "loss": 2.2722, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.2953718148725949, | |
| "grad_norm": 0.6707152724266052, | |
| "learning_rate": 3.55e-05, | |
| "loss": 2.3535, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.29745189807592304, | |
| "grad_norm": 0.6515153646469116, | |
| "learning_rate": 3.575e-05, | |
| "loss": 2.297, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.2995319812792512, | |
| "grad_norm": 0.5436397790908813, | |
| "learning_rate": 3.6e-05, | |
| "loss": 2.3256, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.3016120644825793, | |
| "grad_norm": 0.4850907027721405, | |
| "learning_rate": 3.625e-05, | |
| "loss": 2.2774, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.3036921476859074, | |
| "grad_norm": 0.559877872467041, | |
| "learning_rate": 3.65e-05, | |
| "loss": 2.2895, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.30577223088923555, | |
| "grad_norm": 0.6224697232246399, | |
| "learning_rate": 3.675e-05, | |
| "loss": 2.2715, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.3078523140925637, | |
| "grad_norm": 0.5158293843269348, | |
| "learning_rate": 3.7e-05, | |
| "loss": 2.2788, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.30993239729589184, | |
| "grad_norm": 0.6136394143104553, | |
| "learning_rate": 3.7250000000000004e-05, | |
| "loss": 2.3017, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.31201248049922, | |
| "grad_norm": 0.6287189722061157, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 2.2602, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3140925637025481, | |
| "grad_norm": 0.6049214601516724, | |
| "learning_rate": 3.775e-05, | |
| "loss": 2.2554, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.3161726469058762, | |
| "grad_norm": 0.7491621375083923, | |
| "learning_rate": 3.8e-05, | |
| "loss": 2.2689, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.31825273010920435, | |
| "grad_norm": 0.6048611402511597, | |
| "learning_rate": 3.825e-05, | |
| "loss": 2.2535, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.3203328133125325, | |
| "grad_norm": 0.9862955808639526, | |
| "learning_rate": 3.85e-05, | |
| "loss": 2.2708, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.32241289651586064, | |
| "grad_norm": 0.7605366706848145, | |
| "learning_rate": 3.875e-05, | |
| "loss": 2.2885, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.3244929797191888, | |
| "grad_norm": 1.3617628812789917, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 2.2877, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3265730629225169, | |
| "grad_norm": 0.7338688373565674, | |
| "learning_rate": 3.9250000000000005e-05, | |
| "loss": 2.3489, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.328653146125845, | |
| "grad_norm": 0.8271191716194153, | |
| "learning_rate": 3.9500000000000005e-05, | |
| "loss": 2.2867, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.33073322932917315, | |
| "grad_norm": 0.6508281230926514, | |
| "learning_rate": 3.9750000000000004e-05, | |
| "loss": 2.2863, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.3328133125325013, | |
| "grad_norm": 0.635067880153656, | |
| "learning_rate": 4e-05, | |
| "loss": 2.2428, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.33489339573582944, | |
| "grad_norm": 0.6569282412528992, | |
| "learning_rate": 4.025e-05, | |
| "loss": 2.2202, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.3369734789391576, | |
| "grad_norm": 0.529431164264679, | |
| "learning_rate": 4.05e-05, | |
| "loss": 2.2532, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3390535621424857, | |
| "grad_norm": 0.5580635070800781, | |
| "learning_rate": 4.075e-05, | |
| "loss": 2.2774, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.3411336453458138, | |
| "grad_norm": 0.794660210609436, | |
| "learning_rate": 4.1e-05, | |
| "loss": 2.3053, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.34321372854914195, | |
| "grad_norm": 0.5378262996673584, | |
| "learning_rate": 4.125e-05, | |
| "loss": 2.3062, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.3452938117524701, | |
| "grad_norm": 0.660877525806427, | |
| "learning_rate": 4.15e-05, | |
| "loss": 2.2977, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.34737389495579823, | |
| "grad_norm": 0.6711246371269226, | |
| "learning_rate": 4.175e-05, | |
| "loss": 2.2724, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.3494539781591264, | |
| "grad_norm": 0.5555285215377808, | |
| "learning_rate": 4.2e-05, | |
| "loss": 2.2536, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3515340613624545, | |
| "grad_norm": 0.5838858485221863, | |
| "learning_rate": 4.2250000000000004e-05, | |
| "loss": 2.2838, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.3536141445657826, | |
| "grad_norm": 0.8371697068214417, | |
| "learning_rate": 4.25e-05, | |
| "loss": 2.2882, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.35569422776911075, | |
| "grad_norm": 0.6019457578659058, | |
| "learning_rate": 4.275e-05, | |
| "loss": 2.2534, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.3577743109724389, | |
| "grad_norm": 0.5931807160377502, | |
| "learning_rate": 4.3e-05, | |
| "loss": 2.2513, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.35985439417576703, | |
| "grad_norm": 0.6282745003700256, | |
| "learning_rate": 4.325e-05, | |
| "loss": 2.2739, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.3619344773790952, | |
| "grad_norm": 0.605859100818634, | |
| "learning_rate": 4.35e-05, | |
| "loss": 2.2923, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3640145605824233, | |
| "grad_norm": 0.5225040912628174, | |
| "learning_rate": 4.375e-05, | |
| "loss": 2.2917, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.3660946437857514, | |
| "grad_norm": 0.638031005859375, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 2.3202, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.36817472698907955, | |
| "grad_norm": 0.5370813608169556, | |
| "learning_rate": 4.4250000000000005e-05, | |
| "loss": 2.2645, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.3702548101924077, | |
| "grad_norm": 0.5657123327255249, | |
| "learning_rate": 4.4500000000000004e-05, | |
| "loss": 2.2923, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.37233489339573583, | |
| "grad_norm": 0.7133671045303345, | |
| "learning_rate": 4.4750000000000004e-05, | |
| "loss": 2.2233, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.374414976599064, | |
| "grad_norm": 0.7067397236824036, | |
| "learning_rate": 4.5e-05, | |
| "loss": 2.2615, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3764950598023921, | |
| "grad_norm": 0.5847836136817932, | |
| "learning_rate": 4.525e-05, | |
| "loss": 2.2535, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.3785751430057202, | |
| "grad_norm": 0.616258442401886, | |
| "learning_rate": 4.55e-05, | |
| "loss": 2.2308, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.38065522620904835, | |
| "grad_norm": 0.5688422918319702, | |
| "learning_rate": 4.575e-05, | |
| "loss": 2.2858, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.3827353094123765, | |
| "grad_norm": 0.7827875018119812, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 2.2429, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.38481539261570463, | |
| "grad_norm": 0.6340644359588623, | |
| "learning_rate": 4.6250000000000006e-05, | |
| "loss": 2.2596, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.3868954758190328, | |
| "grad_norm": 0.6311989426612854, | |
| "learning_rate": 4.6500000000000005e-05, | |
| "loss": 2.2857, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3889755590223609, | |
| "grad_norm": 0.6470209956169128, | |
| "learning_rate": 4.6750000000000005e-05, | |
| "loss": 2.252, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.391055642225689, | |
| "grad_norm": 0.7288528084754944, | |
| "learning_rate": 4.7e-05, | |
| "loss": 2.2766, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.39313572542901715, | |
| "grad_norm": 0.5812355875968933, | |
| "learning_rate": 4.7249999999999997e-05, | |
| "loss": 2.3149, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.3952158086323453, | |
| "grad_norm": 0.6074076294898987, | |
| "learning_rate": 4.75e-05, | |
| "loss": 2.2789, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.39729589183567343, | |
| "grad_norm": 0.6139565110206604, | |
| "learning_rate": 4.775e-05, | |
| "loss": 2.3189, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.3993759750390016, | |
| "grad_norm": 0.7194942235946655, | |
| "learning_rate": 4.8e-05, | |
| "loss": 2.2049, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.4014560582423297, | |
| "grad_norm": 0.5972425937652588, | |
| "learning_rate": 4.825e-05, | |
| "loss": 2.2476, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.4035361414456578, | |
| "grad_norm": 0.557567298412323, | |
| "learning_rate": 4.85e-05, | |
| "loss": 2.2489, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.40561622464898595, | |
| "grad_norm": 0.5463237762451172, | |
| "learning_rate": 4.875e-05, | |
| "loss": 2.2416, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.4076963078523141, | |
| "grad_norm": 0.5343475341796875, | |
| "learning_rate": 4.9e-05, | |
| "loss": 2.2684, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.40977639105564223, | |
| "grad_norm": 0.5676887035369873, | |
| "learning_rate": 4.9250000000000004e-05, | |
| "loss": 2.2401, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.4118564742589704, | |
| "grad_norm": 0.6001689434051514, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 2.2077, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4139365574622985, | |
| "grad_norm": 0.6760561466217041, | |
| "learning_rate": 4.975e-05, | |
| "loss": 2.3, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.4160166406656266, | |
| "grad_norm": 0.7530073523521423, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2847, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.41809672386895474, | |
| "grad_norm": 0.6120262742042542, | |
| "learning_rate": 4.9999919997010506e-05, | |
| "loss": 2.2774, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.4201768070722829, | |
| "grad_norm": 0.6917073130607605, | |
| "learning_rate": 4.9999679988554024e-05, | |
| "loss": 2.2252, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.42225689027561103, | |
| "grad_norm": 0.6386430859565735, | |
| "learning_rate": 4.999927997616671e-05, | |
| "loss": 2.2213, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.4243369734789392, | |
| "grad_norm": 0.7094517946243286, | |
| "learning_rate": 4.99987199624087e-05, | |
| "loss": 2.245, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.4264170566822673, | |
| "grad_norm": 0.7361143231391907, | |
| "learning_rate": 4.999799995086424e-05, | |
| "loss": 2.2545, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.4284971398855954, | |
| "grad_norm": 0.6875327825546265, | |
| "learning_rate": 4.999711994614157e-05, | |
| "loss": 2.2491, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.43057722308892354, | |
| "grad_norm": 0.6336541175842285, | |
| "learning_rate": 4.999607995387292e-05, | |
| "loss": 2.2433, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.4326573062922517, | |
| "grad_norm": 0.6160376667976379, | |
| "learning_rate": 4.9994879980714507e-05, | |
| "loss": 2.2845, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.43473738949557983, | |
| "grad_norm": 0.5946584939956665, | |
| "learning_rate": 4.999352003434643e-05, | |
| "loss": 2.2479, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.43681747269890797, | |
| "grad_norm": 0.6029415130615234, | |
| "learning_rate": 4.9992000123472676e-05, | |
| "loss": 2.2685, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4388975559022361, | |
| "grad_norm": 0.7253440618515015, | |
| "learning_rate": 4.999032025782104e-05, | |
| "loss": 2.2398, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.4409776391055642, | |
| "grad_norm": 0.6144683957099915, | |
| "learning_rate": 4.998848044814307e-05, | |
| "loss": 2.2668, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.44305772230889234, | |
| "grad_norm": 0.5762149095535278, | |
| "learning_rate": 4.998648070621398e-05, | |
| "loss": 2.2581, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.4451378055122205, | |
| "grad_norm": 0.5672757625579834, | |
| "learning_rate": 4.9984321044832606e-05, | |
| "loss": 2.2518, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.44721788871554863, | |
| "grad_norm": 0.7117498517036438, | |
| "learning_rate": 4.998200147782128e-05, | |
| "loss": 2.2842, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.44929797191887677, | |
| "grad_norm": 0.6527178883552551, | |
| "learning_rate": 4.9979522020025795e-05, | |
| "loss": 2.2687, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.4513780551222049, | |
| "grad_norm": 0.7282842397689819, | |
| "learning_rate": 4.997688268731528e-05, | |
| "loss": 2.2443, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.453458138325533, | |
| "grad_norm": 0.7003041505813599, | |
| "learning_rate": 4.997408349658209e-05, | |
| "loss": 2.2616, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.45553822152886114, | |
| "grad_norm": 0.58209228515625, | |
| "learning_rate": 4.9971124465741716e-05, | |
| "loss": 2.2741, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.4576183047321893, | |
| "grad_norm": 0.5157697200775146, | |
| "learning_rate": 4.996800561373266e-05, | |
| "loss": 2.2557, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4596983879355174, | |
| "grad_norm": 0.7559351325035095, | |
| "learning_rate": 4.996472696051632e-05, | |
| "loss": 2.2787, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.46177847113884557, | |
| "grad_norm": 0.5898042917251587, | |
| "learning_rate": 4.996128852707687e-05, | |
| "loss": 2.2248, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.4638585543421737, | |
| "grad_norm": 0.6440080404281616, | |
| "learning_rate": 4.9957690335421094e-05, | |
| "loss": 2.2747, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.4659386375455018, | |
| "grad_norm": 0.5132443308830261, | |
| "learning_rate": 4.9953932408578286e-05, | |
| "loss": 2.2839, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.46801872074882994, | |
| "grad_norm": 0.688789963722229, | |
| "learning_rate": 4.9950014770600075e-05, | |
| "loss": 2.2402, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.4700988039521581, | |
| "grad_norm": 0.6494508385658264, | |
| "learning_rate": 4.994593744656029e-05, | |
| "loss": 2.2473, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.4721788871554862, | |
| "grad_norm": 0.5445535778999329, | |
| "learning_rate": 4.994170046255476e-05, | |
| "loss": 2.2316, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.47425897035881437, | |
| "grad_norm": 0.6383847594261169, | |
| "learning_rate": 4.993730384570121e-05, | |
| "loss": 2.2671, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4763390535621425, | |
| "grad_norm": 0.6905817985534668, | |
| "learning_rate": 4.9932747624139045e-05, | |
| "loss": 2.2444, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.4784191367654706, | |
| "grad_norm": 0.6420992612838745, | |
| "learning_rate": 4.992803182702916e-05, | |
| "loss": 2.2843, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.48049921996879874, | |
| "grad_norm": 0.619663655757904, | |
| "learning_rate": 4.992315648455379e-05, | |
| "loss": 2.2571, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.4825793031721269, | |
| "grad_norm": 0.6715114116668701, | |
| "learning_rate": 4.9918121627916294e-05, | |
| "loss": 2.2266, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.484659386375455, | |
| "grad_norm": 0.611705482006073, | |
| "learning_rate": 4.991292728934095e-05, | |
| "loss": 2.2844, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.48673946957878317, | |
| "grad_norm": 0.5942493677139282, | |
| "learning_rate": 4.990757350207278e-05, | |
| "loss": 2.2116, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4888195527821113, | |
| "grad_norm": 0.7864916920661926, | |
| "learning_rate": 4.990206030037729e-05, | |
| "loss": 2.262, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.4908996359854394, | |
| "grad_norm": 0.7583130598068237, | |
| "learning_rate": 4.98963877195403e-05, | |
| "loss": 2.2963, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.49297971918876754, | |
| "grad_norm": 0.5617021322250366, | |
| "learning_rate": 4.9890555795867675e-05, | |
| "loss": 2.2711, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.4950598023920957, | |
| "grad_norm": 0.7314319610595703, | |
| "learning_rate": 4.9884564566685135e-05, | |
| "loss": 2.2533, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.4971398855954238, | |
| "grad_norm": 0.5248362421989441, | |
| "learning_rate": 4.9878414070337967e-05, | |
| "loss": 2.2582, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.49921996879875197, | |
| "grad_norm": 0.5933838486671448, | |
| "learning_rate": 4.9872104346190826e-05, | |
| "loss": 2.2481, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.50130005200208, | |
| "grad_norm": 0.6278464794158936, | |
| "learning_rate": 4.986563543462745e-05, | |
| "loss": 2.288, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.5033801352054083, | |
| "grad_norm": 0.6378714442253113, | |
| "learning_rate": 4.985900737705041e-05, | |
| "loss": 2.2828, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.5054602184087363, | |
| "grad_norm": 0.575777530670166, | |
| "learning_rate": 4.9852220215880893e-05, | |
| "loss": 2.2452, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.5075403016120645, | |
| "grad_norm": 0.6294274926185608, | |
| "learning_rate": 4.984527399455832e-05, | |
| "loss": 2.254, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.5096203848153926, | |
| "grad_norm": 0.8891189098358154, | |
| "learning_rate": 4.983816875754018e-05, | |
| "loss": 2.2727, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.5117004680187207, | |
| "grad_norm": 0.6461197137832642, | |
| "learning_rate": 4.9830904550301695e-05, | |
| "loss": 2.2339, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5137805512220489, | |
| "grad_norm": 0.6093956232070923, | |
| "learning_rate": 4.982348141933553e-05, | |
| "loss": 2.2483, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.515860634425377, | |
| "grad_norm": 0.6348010897636414, | |
| "learning_rate": 4.9815899412151476e-05, | |
| "loss": 2.2093, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5179407176287052, | |
| "grad_norm": 0.7893829941749573, | |
| "learning_rate": 4.9808158577276224e-05, | |
| "loss": 2.2995, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.5200208008320333, | |
| "grad_norm": 0.528293788433075, | |
| "learning_rate": 4.9800258964252946e-05, | |
| "loss": 2.2726, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5221008840353614, | |
| "grad_norm": 0.6920540928840637, | |
| "learning_rate": 4.9792200623641066e-05, | |
| "loss": 2.2444, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.5241809672386896, | |
| "grad_norm": 0.7353582978248596, | |
| "learning_rate": 4.9783983607015885e-05, | |
| "loss": 2.2409, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5262610504420177, | |
| "grad_norm": 0.7552638649940491, | |
| "learning_rate": 4.977560796696828e-05, | |
| "loss": 2.2735, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.5283411336453459, | |
| "grad_norm": 0.6445308327674866, | |
| "learning_rate": 4.9767073757104346e-05, | |
| "loss": 2.2176, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5304212168486739, | |
| "grad_norm": 0.5857995748519897, | |
| "learning_rate": 4.975838103204506e-05, | |
| "loss": 2.2663, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.5325013000520021, | |
| "grad_norm": 0.6020021438598633, | |
| "learning_rate": 4.974952984742596e-05, | |
| "loss": 2.2685, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5345813832553302, | |
| "grad_norm": 0.9829360246658325, | |
| "learning_rate": 4.974052025989673e-05, | |
| "loss": 2.1992, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.5366614664586583, | |
| "grad_norm": 0.6377243995666504, | |
| "learning_rate": 4.9731352327120883e-05, | |
| "loss": 2.2451, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5387415496619865, | |
| "grad_norm": 0.5337876081466675, | |
| "learning_rate": 4.97220261077754e-05, | |
| "loss": 2.2365, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.5408216328653146, | |
| "grad_norm": 0.6845943927764893, | |
| "learning_rate": 4.97125416615503e-05, | |
| "loss": 2.2317, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5429017160686428, | |
| "grad_norm": 0.6941018104553223, | |
| "learning_rate": 4.97028990491483e-05, | |
| "loss": 2.2123, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.5449817992719709, | |
| "grad_norm": 0.626502275466919, | |
| "learning_rate": 4.969309833228444e-05, | |
| "loss": 2.2336, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.547061882475299, | |
| "grad_norm": 0.5843268036842346, | |
| "learning_rate": 4.968313957368564e-05, | |
| "loss": 2.2062, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.5491419656786272, | |
| "grad_norm": 0.687279999256134, | |
| "learning_rate": 4.967302283709036e-05, | |
| "loss": 2.2564, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.5512220488819553, | |
| "grad_norm": 0.5501798987388611, | |
| "learning_rate": 4.966274818724811e-05, | |
| "loss": 2.2667, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.5533021320852834, | |
| "grad_norm": 0.6118011474609375, | |
| "learning_rate": 4.9652315689919117e-05, | |
| "loss": 2.2369, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5553822152886115, | |
| "grad_norm": 0.6296612024307251, | |
| "learning_rate": 4.9641725411873854e-05, | |
| "loss": 2.2542, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.5574622984919396, | |
| "grad_norm": 0.7094495296478271, | |
| "learning_rate": 4.963097742089263e-05, | |
| "loss": 2.2562, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5595423816952678, | |
| "grad_norm": 0.6220189929008484, | |
| "learning_rate": 4.962007178576517e-05, | |
| "loss": 2.2373, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.5616224648985959, | |
| "grad_norm": 0.6659913659095764, | |
| "learning_rate": 4.9609008576290135e-05, | |
| "loss": 2.2629, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5637025481019241, | |
| "grad_norm": 0.5892766714096069, | |
| "learning_rate": 4.9597787863274715e-05, | |
| "loss": 2.2143, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.5657826313052522, | |
| "grad_norm": 0.5834632515907288, | |
| "learning_rate": 4.958640971853417e-05, | |
| "loss": 2.219, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5678627145085804, | |
| "grad_norm": 0.7557886242866516, | |
| "learning_rate": 4.957487421489132e-05, | |
| "loss": 2.2199, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.5699427977119085, | |
| "grad_norm": 0.7835619449615479, | |
| "learning_rate": 4.956318142617617e-05, | |
| "loss": 2.2092, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5720228809152366, | |
| "grad_norm": 0.6275124549865723, | |
| "learning_rate": 4.955133142722536e-05, | |
| "loss": 2.2569, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.5741029641185648, | |
| "grad_norm": 0.6475622057914734, | |
| "learning_rate": 4.953932429388171e-05, | |
| "loss": 2.2476, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5761830473218928, | |
| "grad_norm": 0.6012747287750244, | |
| "learning_rate": 4.952716010299375e-05, | |
| "loss": 2.1796, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.578263130525221, | |
| "grad_norm": 0.5755728483200073, | |
| "learning_rate": 4.9514838932415216e-05, | |
| "loss": 2.2479, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5803432137285491, | |
| "grad_norm": 0.5810622572898865, | |
| "learning_rate": 4.950236086100454e-05, | |
| "loss": 2.2458, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.5824232969318772, | |
| "grad_norm": 0.5510530471801758, | |
| "learning_rate": 4.9489725968624354e-05, | |
| "loss": 2.2085, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5845033801352054, | |
| "grad_norm": 0.5717112421989441, | |
| "learning_rate": 4.9476934336141014e-05, | |
| "loss": 2.2497, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.5865834633385335, | |
| "grad_norm": 0.5643584132194519, | |
| "learning_rate": 4.9463986045424006e-05, | |
| "loss": 2.2368, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5886635465418617, | |
| "grad_norm": 0.7527519464492798, | |
| "learning_rate": 4.94508811793455e-05, | |
| "loss": 2.2424, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.5907436297451898, | |
| "grad_norm": 0.7746401429176331, | |
| "learning_rate": 4.9437619821779766e-05, | |
| "loss": 2.2678, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.592823712948518, | |
| "grad_norm": 0.6275284886360168, | |
| "learning_rate": 4.9424202057602664e-05, | |
| "loss": 2.2083, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.5949037961518461, | |
| "grad_norm": 0.5806483626365662, | |
| "learning_rate": 4.94106279726911e-05, | |
| "loss": 2.2478, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5969838793551742, | |
| "grad_norm": 0.6382482647895813, | |
| "learning_rate": 4.939689765392246e-05, | |
| "loss": 2.225, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.5990639625585024, | |
| "grad_norm": 0.712761402130127, | |
| "learning_rate": 4.938301118917407e-05, | |
| "loss": 2.2336, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.6011440457618304, | |
| "grad_norm": 0.6290826201438904, | |
| "learning_rate": 4.936896866732262e-05, | |
| "loss": 2.2524, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.6032241289651586, | |
| "grad_norm": 0.676964521408081, | |
| "learning_rate": 4.935477017824361e-05, | |
| "loss": 2.203, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6053042121684867, | |
| "grad_norm": 0.648561418056488, | |
| "learning_rate": 4.934041581281078e-05, | |
| "loss": 2.2604, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.6073842953718148, | |
| "grad_norm": 0.6181101202964783, | |
| "learning_rate": 4.9325905662895474e-05, | |
| "loss": 2.2018, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.609464378575143, | |
| "grad_norm": 0.654924750328064, | |
| "learning_rate": 4.931123982136615e-05, | |
| "loss": 2.243, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.6115444617784711, | |
| "grad_norm": 0.6459677219390869, | |
| "learning_rate": 4.929641838208768e-05, | |
| "loss": 2.1945, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.6136245449817993, | |
| "grad_norm": 0.7053624391555786, | |
| "learning_rate": 4.928144143992083e-05, | |
| "loss": 2.2052, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.6157046281851274, | |
| "grad_norm": 0.7262877225875854, | |
| "learning_rate": 4.926630909072161e-05, | |
| "loss": 2.2281, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.6177847113884556, | |
| "grad_norm": 0.6927151083946228, | |
| "learning_rate": 4.925102143134068e-05, | |
| "loss": 2.2062, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.6198647945917837, | |
| "grad_norm": 0.623932957649231, | |
| "learning_rate": 4.92355785596227e-05, | |
| "loss": 2.2255, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6219448777951118, | |
| "grad_norm": 0.706231415271759, | |
| "learning_rate": 4.921998057440576e-05, | |
| "loss": 2.2341, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.62402496099844, | |
| "grad_norm": 0.737194299697876, | |
| "learning_rate": 4.920422757552069e-05, | |
| "loss": 2.2169, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.626105044201768, | |
| "grad_norm": 0.9163033962249756, | |
| "learning_rate": 4.918831966379044e-05, | |
| "loss": 2.2055, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.6281851274050962, | |
| "grad_norm": 0.5982114672660828, | |
| "learning_rate": 4.917225694102947e-05, | |
| "loss": 2.2223, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.6302652106084243, | |
| "grad_norm": 0.6721086502075195, | |
| "learning_rate": 4.9156039510043025e-05, | |
| "loss": 2.2271, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.6323452938117524, | |
| "grad_norm": 0.6458872556686401, | |
| "learning_rate": 4.913966747462656e-05, | |
| "loss": 2.2341, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.6344253770150806, | |
| "grad_norm": 0.8973822593688965, | |
| "learning_rate": 4.9123140939565e-05, | |
| "loss": 2.2489, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.6365054602184087, | |
| "grad_norm": 0.6235558390617371, | |
| "learning_rate": 4.9106460010632146e-05, | |
| "loss": 2.2126, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.6385855434217369, | |
| "grad_norm": 0.6284027695655823, | |
| "learning_rate": 4.908962479458991e-05, | |
| "loss": 2.2565, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.640665626625065, | |
| "grad_norm": 0.663979172706604, | |
| "learning_rate": 4.907263539918771e-05, | |
| "loss": 2.2357, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.6427457098283932, | |
| "grad_norm": 0.7080653309822083, | |
| "learning_rate": 4.905549193316174e-05, | |
| "loss": 2.2386, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.6448257930317213, | |
| "grad_norm": 0.5746604800224304, | |
| "learning_rate": 4.903819450623428e-05, | |
| "loss": 2.218, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.6469058762350494, | |
| "grad_norm": 0.5668492913246155, | |
| "learning_rate": 4.9020743229113e-05, | |
| "loss": 2.1871, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.6489859594383776, | |
| "grad_norm": 0.8596884608268738, | |
| "learning_rate": 4.900313821349025e-05, | |
| "loss": 2.2023, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.6510660426417056, | |
| "grad_norm": 0.6699584722518921, | |
| "learning_rate": 4.898537957204234e-05, | |
| "loss": 2.2124, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.6531461258450338, | |
| "grad_norm": 0.7021228075027466, | |
| "learning_rate": 4.8967467418428826e-05, | |
| "loss": 2.218, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.6552262090483619, | |
| "grad_norm": 0.6994513869285583, | |
| "learning_rate": 4.894940186729176e-05, | |
| "loss": 2.2321, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.65730629225169, | |
| "grad_norm": 0.6525283455848694, | |
| "learning_rate": 4.8931183034255e-05, | |
| "loss": 2.2323, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.6593863754550182, | |
| "grad_norm": 0.6755690574645996, | |
| "learning_rate": 4.891281103592344e-05, | |
| "loss": 2.2491, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.6614664586583463, | |
| "grad_norm": 0.7468230724334717, | |
| "learning_rate": 4.889428598988226e-05, | |
| "loss": 2.2817, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.6635465418616745, | |
| "grad_norm": 0.6302746534347534, | |
| "learning_rate": 4.887560801469617e-05, | |
| "loss": 2.2178, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.6656266250650026, | |
| "grad_norm": 0.5519952774047852, | |
| "learning_rate": 4.88567772299087e-05, | |
| "loss": 2.2314, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6677067082683308, | |
| "grad_norm": 0.706723153591156, | |
| "learning_rate": 4.8837793756041364e-05, | |
| "loss": 2.2246, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.6697867914716589, | |
| "grad_norm": 0.6226012110710144, | |
| "learning_rate": 4.881865771459294e-05, | |
| "loss": 2.2303, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.671866874674987, | |
| "grad_norm": 0.6458874344825745, | |
| "learning_rate": 4.879936922803867e-05, | |
| "loss": 2.2443, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.6739469578783152, | |
| "grad_norm": 0.6402535438537598, | |
| "learning_rate": 4.8779928419829475e-05, | |
| "loss": 2.2227, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6760270410816432, | |
| "grad_norm": 0.6376984715461731, | |
| "learning_rate": 4.876033541439118e-05, | |
| "loss": 2.2644, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.6781071242849714, | |
| "grad_norm": 0.7197051048278809, | |
| "learning_rate": 4.874059033712371e-05, | |
| "loss": 2.225, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6801872074882995, | |
| "grad_norm": 0.6593906283378601, | |
| "learning_rate": 4.872069331440028e-05, | |
| "loss": 2.224, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.6822672906916276, | |
| "grad_norm": 0.6552969217300415, | |
| "learning_rate": 4.870064447356658e-05, | |
| "loss": 2.2376, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6843473738949558, | |
| "grad_norm": 0.665170431137085, | |
| "learning_rate": 4.8680443942940014e-05, | |
| "loss": 2.2294, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.6864274570982839, | |
| "grad_norm": 0.6637657880783081, | |
| "learning_rate": 4.8660091851808784e-05, | |
| "loss": 2.2462, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6885075403016121, | |
| "grad_norm": 0.5620794892311096, | |
| "learning_rate": 4.863958833043115e-05, | |
| "loss": 2.2032, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.6905876235049402, | |
| "grad_norm": 0.7143023014068604, | |
| "learning_rate": 4.861893351003456e-05, | |
| "loss": 2.2158, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6926677067082684, | |
| "grad_norm": 0.6956245303153992, | |
| "learning_rate": 4.859812752281479e-05, | |
| "loss": 2.185, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.6947477899115965, | |
| "grad_norm": 0.6536686420440674, | |
| "learning_rate": 4.857717050193514e-05, | |
| "loss": 2.2095, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6968278731149246, | |
| "grad_norm": 0.728190004825592, | |
| "learning_rate": 4.855606258152556e-05, | |
| "loss": 2.2691, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.6989079563182528, | |
| "grad_norm": 0.7523576617240906, | |
| "learning_rate": 4.853480389668179e-05, | |
| "loss": 2.2348, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.7009880395215808, | |
| "grad_norm": 0.6006982922554016, | |
| "learning_rate": 4.851339458346449e-05, | |
| "loss": 2.2242, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.703068122724909, | |
| "grad_norm": 0.6585814356803894, | |
| "learning_rate": 4.8491834778898385e-05, | |
| "loss": 2.2263, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.7051482059282371, | |
| "grad_norm": 0.7236796617507935, | |
| "learning_rate": 4.847012462097139e-05, | |
| "loss": 2.2498, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.7072282891315652, | |
| "grad_norm": 0.8331758379936218, | |
| "learning_rate": 4.84482642486337e-05, | |
| "loss": 2.2006, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.7093083723348934, | |
| "grad_norm": 0.7638906836509705, | |
| "learning_rate": 4.8426253801796914e-05, | |
| "loss": 2.2025, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.7113884555382215, | |
| "grad_norm": 0.6874154210090637, | |
| "learning_rate": 4.840409342133318e-05, | |
| "loss": 2.2119, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.7134685387415497, | |
| "grad_norm": 0.7299637198448181, | |
| "learning_rate": 4.8381783249074224e-05, | |
| "loss": 2.2233, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.7155486219448778, | |
| "grad_norm": 0.8418067097663879, | |
| "learning_rate": 4.8359323427810476e-05, | |
| "loss": 2.2407, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.717628705148206, | |
| "grad_norm": 0.6470432281494141, | |
| "learning_rate": 4.833671410129018e-05, | |
| "loss": 2.2548, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.7197087883515341, | |
| "grad_norm": 0.6883252263069153, | |
| "learning_rate": 4.831395541421841e-05, | |
| "loss": 2.2434, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.7217888715548622, | |
| "grad_norm": 0.7531642317771912, | |
| "learning_rate": 4.8291047512256223e-05, | |
| "loss": 2.2151, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.7238689547581904, | |
| "grad_norm": 0.6417682766914368, | |
| "learning_rate": 4.826799054201967e-05, | |
| "loss": 2.2081, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7259490379615184, | |
| "grad_norm": 0.8133646845817566, | |
| "learning_rate": 4.824478465107887e-05, | |
| "loss": 2.2138, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.7280291211648466, | |
| "grad_norm": 0.7256624698638916, | |
| "learning_rate": 4.8221429987957076e-05, | |
| "loss": 2.2553, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7301092043681747, | |
| "grad_norm": 0.7030817866325378, | |
| "learning_rate": 4.819792670212971e-05, | |
| "loss": 2.2055, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.7321892875715028, | |
| "grad_norm": 0.6028294563293457, | |
| "learning_rate": 4.817427494402344e-05, | |
| "loss": 2.2026, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.734269370774831, | |
| "grad_norm": 0.628900945186615, | |
| "learning_rate": 4.815047486501515e-05, | |
| "loss": 2.2114, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.7363494539781591, | |
| "grad_norm": 0.6979473233222961, | |
| "learning_rate": 4.8126526617431065e-05, | |
| "loss": 2.2334, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.7384295371814873, | |
| "grad_norm": 0.6751022934913635, | |
| "learning_rate": 4.810243035454568e-05, | |
| "loss": 2.2157, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.7405096203848154, | |
| "grad_norm": 0.6779219508171082, | |
| "learning_rate": 4.8078186230580845e-05, | |
| "loss": 2.2429, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.7425897035881436, | |
| "grad_norm": 0.7016996741294861, | |
| "learning_rate": 4.805379440070475e-05, | |
| "loss": 2.2412, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.7446697867914717, | |
| "grad_norm": 0.6258619427680969, | |
| "learning_rate": 4.802925502103094e-05, | |
| "loss": 2.2488, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.7467498699947998, | |
| "grad_norm": 0.622600793838501, | |
| "learning_rate": 4.800456824861731e-05, | |
| "loss": 2.2435, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.748829953198128, | |
| "grad_norm": 0.5709562301635742, | |
| "learning_rate": 4.797973424146512e-05, | |
| "loss": 2.1844, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.750910036401456, | |
| "grad_norm": 0.7474341988563538, | |
| "learning_rate": 4.795475315851795e-05, | |
| "loss": 2.2236, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.7529901196047842, | |
| "grad_norm": 0.6307352781295776, | |
| "learning_rate": 4.7929625159660694e-05, | |
| "loss": 2.2427, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.7550702028081123, | |
| "grad_norm": 0.792239248752594, | |
| "learning_rate": 4.7904350405718555e-05, | |
| "loss": 2.23, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.7571502860114404, | |
| "grad_norm": 0.5931370258331299, | |
| "learning_rate": 4.7878929058456027e-05, | |
| "loss": 2.2158, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.7592303692147686, | |
| "grad_norm": 0.6610358357429504, | |
| "learning_rate": 4.7853361280575786e-05, | |
| "loss": 2.1798, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.7613104524180967, | |
| "grad_norm": 0.7434505224227905, | |
| "learning_rate": 4.782764723571774e-05, | |
| "loss": 2.2291, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.7633905356214249, | |
| "grad_norm": 0.6113109588623047, | |
| "learning_rate": 4.780178708845792e-05, | |
| "loss": 2.2246, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.765470618824753, | |
| "grad_norm": 0.6270626783370972, | |
| "learning_rate": 4.7775781004307446e-05, | |
| "loss": 2.2527, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.7675507020280812, | |
| "grad_norm": 0.8917273879051208, | |
| "learning_rate": 4.7749629149711495e-05, | |
| "loss": 2.2343, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.7696307852314093, | |
| "grad_norm": 0.6628624200820923, | |
| "learning_rate": 4.7723331692048174e-05, | |
| "loss": 2.2066, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7717108684347374, | |
| "grad_norm": 0.7599259614944458, | |
| "learning_rate": 4.76968887996275e-05, | |
| "loss": 2.2003, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.7737909516380655, | |
| "grad_norm": 0.7547385692596436, | |
| "learning_rate": 4.767030064169034e-05, | |
| "loss": 2.209, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.7758710348413936, | |
| "grad_norm": 0.5945778489112854, | |
| "learning_rate": 4.764356738840722e-05, | |
| "loss": 2.1807, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.7779511180447218, | |
| "grad_norm": 0.6278342008590698, | |
| "learning_rate": 4.7616689210877374e-05, | |
| "loss": 2.2512, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.7800312012480499, | |
| "grad_norm": 0.6003873944282532, | |
| "learning_rate": 4.7589666281127575e-05, | |
| "loss": 2.2352, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.782111284451378, | |
| "grad_norm": 0.6956392526626587, | |
| "learning_rate": 4.756249877211102e-05, | |
| "loss": 2.2109, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7841913676547062, | |
| "grad_norm": 0.724892795085907, | |
| "learning_rate": 4.7535186857706274e-05, | |
| "loss": 2.2142, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.7862714508580343, | |
| "grad_norm": 0.6417645215988159, | |
| "learning_rate": 4.750773071271612e-05, | |
| "loss": 2.2437, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7883515340613625, | |
| "grad_norm": 0.7107153534889221, | |
| "learning_rate": 4.748013051286646e-05, | |
| "loss": 2.2092, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.7904316172646906, | |
| "grad_norm": 0.7158809304237366, | |
| "learning_rate": 4.7452386434805154e-05, | |
| "loss": 2.1911, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7925117004680188, | |
| "grad_norm": 0.5407947301864624, | |
| "learning_rate": 4.7424498656100954e-05, | |
| "loss": 2.2399, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.7945917836713469, | |
| "grad_norm": 0.638559103012085, | |
| "learning_rate": 4.73964673552423e-05, | |
| "loss": 2.2175, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.796671866874675, | |
| "grad_norm": 0.6588721871376038, | |
| "learning_rate": 4.736829271163624e-05, | |
| "loss": 2.1737, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.7987519500780031, | |
| "grad_norm": 1.2813347578048706, | |
| "learning_rate": 4.7339974905607206e-05, | |
| "loss": 2.214, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.8008320332813312, | |
| "grad_norm": 0.6472169756889343, | |
| "learning_rate": 4.731151411839596e-05, | |
| "loss": 2.2123, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.8029121164846594, | |
| "grad_norm": 0.6989557147026062, | |
| "learning_rate": 4.728291053215832e-05, | |
| "loss": 2.266, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.8049921996879875, | |
| "grad_norm": 0.5853009223937988, | |
| "learning_rate": 4.725416432996409e-05, | |
| "loss": 2.2408, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.8070722828913156, | |
| "grad_norm": 0.6867830157279968, | |
| "learning_rate": 4.722527569579584e-05, | |
| "loss": 2.2089, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.8091523660946438, | |
| "grad_norm": 0.5697247385978699, | |
| "learning_rate": 4.719624481454773e-05, | |
| "loss": 2.2268, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.8112324492979719, | |
| "grad_norm": 0.7628327012062073, | |
| "learning_rate": 4.716707187202436e-05, | |
| "loss": 2.2137, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.8133125325013001, | |
| "grad_norm": 0.6538941264152527, | |
| "learning_rate": 4.7137757054939516e-05, | |
| "loss": 2.2173, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.8153926157046282, | |
| "grad_norm": 0.66108638048172, | |
| "learning_rate": 4.710830055091506e-05, | |
| "loss": 2.2273, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.8174726989079563, | |
| "grad_norm": 0.8627687692642212, | |
| "learning_rate": 4.707870254847965e-05, | |
| "loss": 2.2459, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.8195527821112845, | |
| "grad_norm": 1.0859020948410034, | |
| "learning_rate": 4.7048963237067576e-05, | |
| "loss": 2.2171, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.8216328653146125, | |
| "grad_norm": 0.5735368132591248, | |
| "learning_rate": 4.7019082807017555e-05, | |
| "loss": 2.201, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.8237129485179407, | |
| "grad_norm": 0.7044029831886292, | |
| "learning_rate": 4.698906144957148e-05, | |
| "loss": 2.1652, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.8257930317212688, | |
| "grad_norm": 0.6482775211334229, | |
| "learning_rate": 4.695889935687322e-05, | |
| "loss": 2.227, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.827873114924597, | |
| "grad_norm": 0.6682475209236145, | |
| "learning_rate": 4.692859672196738e-05, | |
| "loss": 2.1841, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.8299531981279251, | |
| "grad_norm": 0.925491988658905, | |
| "learning_rate": 4.689815373879808e-05, | |
| "loss": 2.2046, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.8320332813312532, | |
| "grad_norm": 0.6961370706558228, | |
| "learning_rate": 4.686757060220768e-05, | |
| "loss": 2.1795, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8341133645345814, | |
| "grad_norm": 0.669620931148529, | |
| "learning_rate": 4.6836847507935566e-05, | |
| "loss": 2.2046, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.8361934477379095, | |
| "grad_norm": 1.242880940437317, | |
| "learning_rate": 4.6805984652616905e-05, | |
| "loss": 2.2444, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.8382735309412377, | |
| "grad_norm": 0.6134272217750549, | |
| "learning_rate": 4.677498223378134e-05, | |
| "loss": 2.2067, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.8403536141445658, | |
| "grad_norm": 0.6093041896820068, | |
| "learning_rate": 4.674384044985177e-05, | |
| "loss": 2.2191, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.8424336973478939, | |
| "grad_norm": 0.7351178526878357, | |
| "learning_rate": 4.6712559500143064e-05, | |
| "loss": 2.197, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.8445137805512221, | |
| "grad_norm": 0.7967379093170166, | |
| "learning_rate": 4.668113958486077e-05, | |
| "loss": 2.1565, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.8465938637545501, | |
| "grad_norm": 0.7329113483428955, | |
| "learning_rate": 4.6649580905099875e-05, | |
| "loss": 2.2587, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.8486739469578783, | |
| "grad_norm": 0.6913650631904602, | |
| "learning_rate": 4.6617883662843464e-05, | |
| "loss": 2.1888, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.8507540301612064, | |
| "grad_norm": 0.6449567675590515, | |
| "learning_rate": 4.658604806096147e-05, | |
| "loss": 2.1799, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.8528341133645346, | |
| "grad_norm": 0.6242998838424683, | |
| "learning_rate": 4.655407430320937e-05, | |
| "loss": 2.2185, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.8549141965678627, | |
| "grad_norm": 0.7295005917549133, | |
| "learning_rate": 4.652196259422685e-05, | |
| "loss": 2.1757, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.8569942797711908, | |
| "grad_norm": 0.6830917000770569, | |
| "learning_rate": 4.648971313953654e-05, | |
| "loss": 2.216, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.859074362974519, | |
| "grad_norm": 0.6306335926055908, | |
| "learning_rate": 4.645732614554264e-05, | |
| "loss": 2.1863, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.8611544461778471, | |
| "grad_norm": 0.7190198302268982, | |
| "learning_rate": 4.642480181952967e-05, | |
| "loss": 2.1922, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.8632345293811753, | |
| "grad_norm": 0.6659998893737793, | |
| "learning_rate": 4.6392140369661104e-05, | |
| "loss": 2.2265, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.8653146125845034, | |
| "grad_norm": 0.8210431337356567, | |
| "learning_rate": 4.6359342004978016e-05, | |
| "loss": 2.2259, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.8673946957878315, | |
| "grad_norm": 0.7920681834220886, | |
| "learning_rate": 4.6326406935397797e-05, | |
| "loss": 2.222, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.8694747789911597, | |
| "grad_norm": 0.7032017707824707, | |
| "learning_rate": 4.629333537171277e-05, | |
| "loss": 2.2289, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.8715548621944877, | |
| "grad_norm": 0.648613691329956, | |
| "learning_rate": 4.6260127525588824e-05, | |
| "loss": 2.2021, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.8736349453978159, | |
| "grad_norm": 0.6248600482940674, | |
| "learning_rate": 4.622678360956415e-05, | |
| "loss": 2.2162, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.875715028601144, | |
| "grad_norm": 0.6035733222961426, | |
| "learning_rate": 4.619330383704778e-05, | |
| "loss": 2.2385, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.8777951118044722, | |
| "grad_norm": 0.6163062453269958, | |
| "learning_rate": 4.615968842231825e-05, | |
| "loss": 2.222, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.8798751950078003, | |
| "grad_norm": 0.6982393860816956, | |
| "learning_rate": 4.612593758052227e-05, | |
| "loss": 2.191, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.8819552782111284, | |
| "grad_norm": 0.9503993391990662, | |
| "learning_rate": 4.609205152767329e-05, | |
| "loss": 2.1852, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.8840353614144566, | |
| "grad_norm": 0.6817579865455627, | |
| "learning_rate": 4.605803048065014e-05, | |
| "loss": 2.2085, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.8861154446177847, | |
| "grad_norm": 0.621665358543396, | |
| "learning_rate": 4.6023874657195686e-05, | |
| "loss": 2.2261, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8881955278211129, | |
| "grad_norm": 0.7342547178268433, | |
| "learning_rate": 4.5989584275915345e-05, | |
| "loss": 2.2056, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.890275611024441, | |
| "grad_norm": 0.7103343605995178, | |
| "learning_rate": 4.595515955627576e-05, | |
| "loss": 2.1456, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8923556942277691, | |
| "grad_norm": 0.7423024773597717, | |
| "learning_rate": 4.592060071860339e-05, | |
| "loss": 2.2315, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.8944357774310973, | |
| "grad_norm": 0.6223635077476501, | |
| "learning_rate": 4.5885907984083034e-05, | |
| "loss": 2.2524, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8965158606344253, | |
| "grad_norm": 0.6376893520355225, | |
| "learning_rate": 4.5851081574756504e-05, | |
| "loss": 2.2102, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.8985959438377535, | |
| "grad_norm": 0.6602868437767029, | |
| "learning_rate": 4.5816121713521155e-05, | |
| "loss": 2.2622, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.9006760270410816, | |
| "grad_norm": 0.5940150022506714, | |
| "learning_rate": 4.578102862412844e-05, | |
| "loss": 2.241, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.9027561102444098, | |
| "grad_norm": 0.7278428673744202, | |
| "learning_rate": 4.5745802531182544e-05, | |
| "loss": 2.1557, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.9048361934477379, | |
| "grad_norm": 0.6823807954788208, | |
| "learning_rate": 4.5710443660138874e-05, | |
| "loss": 2.1973, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.906916276651066, | |
| "grad_norm": 0.6403205990791321, | |
| "learning_rate": 4.5674952237302664e-05, | |
| "loss": 2.2092, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.9089963598543942, | |
| "grad_norm": 0.8217492699623108, | |
| "learning_rate": 4.563932848982752e-05, | |
| "loss": 2.1835, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.9110764430577223, | |
| "grad_norm": 0.6273100972175598, | |
| "learning_rate": 4.560357264571392e-05, | |
| "loss": 2.1914, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.9131565262610505, | |
| "grad_norm": 0.7473320364952087, | |
| "learning_rate": 4.5567684933807844e-05, | |
| "loss": 2.2302, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.9152366094643786, | |
| "grad_norm": 0.6911905407905579, | |
| "learning_rate": 4.553166558379922e-05, | |
| "loss": 2.1685, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.9173166926677067, | |
| "grad_norm": 0.817228376865387, | |
| "learning_rate": 4.54955148262205e-05, | |
| "loss": 2.1914, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.9193967758710349, | |
| "grad_norm": 0.934036910533905, | |
| "learning_rate": 4.545923289244517e-05, | |
| "loss": 2.2018, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.9214768590743629, | |
| "grad_norm": 0.6456049680709839, | |
| "learning_rate": 4.542282001468631e-05, | |
| "loss": 2.2071, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.9235569422776911, | |
| "grad_norm": 0.9103360772132874, | |
| "learning_rate": 4.5386276425995025e-05, | |
| "loss": 2.2058, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.9256370254810192, | |
| "grad_norm": 0.7046015858650208, | |
| "learning_rate": 4.5349602360259026e-05, | |
| "loss": 2.231, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.9277171086843474, | |
| "grad_norm": 0.7220612168312073, | |
| "learning_rate": 4.531279805220111e-05, | |
| "loss": 2.2151, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.9297971918876755, | |
| "grad_norm": 0.759607195854187, | |
| "learning_rate": 4.5275863737377644e-05, | |
| "loss": 2.2082, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.9318772750910036, | |
| "grad_norm": 0.6310170888900757, | |
| "learning_rate": 4.523879965217708e-05, | |
| "loss": 2.2005, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.9339573582943318, | |
| "grad_norm": 0.8374559879302979, | |
| "learning_rate": 4.520160603381842e-05, | |
| "loss": 2.2175, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.9360374414976599, | |
| "grad_norm": 0.6279659271240234, | |
| "learning_rate": 4.516428312034974e-05, | |
| "loss": 2.1931, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.9381175247009881, | |
| "grad_norm": 0.6658344864845276, | |
| "learning_rate": 4.512683115064658e-05, | |
| "loss": 2.2423, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.9401976079043162, | |
| "grad_norm": 0.6160857081413269, | |
| "learning_rate": 4.508925036441053e-05, | |
| "loss": 2.1999, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.9422776911076443, | |
| "grad_norm": 0.685691773891449, | |
| "learning_rate": 4.505154100216759e-05, | |
| "loss": 2.2019, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.9443577743109725, | |
| "grad_norm": 0.5475696325302124, | |
| "learning_rate": 4.501370330526671e-05, | |
| "loss": 2.1944, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.9464378575143005, | |
| "grad_norm": 0.6216719746589661, | |
| "learning_rate": 4.497573751587819e-05, | |
| "loss": 2.2182, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.9485179407176287, | |
| "grad_norm": 0.708109438419342, | |
| "learning_rate": 4.4937643876992176e-05, | |
| "loss": 2.1936, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.9505980239209568, | |
| "grad_norm": 0.8892818093299866, | |
| "learning_rate": 4.489942263241705e-05, | |
| "loss": 2.194, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.952678107124285, | |
| "grad_norm": 0.6160807013511658, | |
| "learning_rate": 4.4861074026777936e-05, | |
| "loss": 2.2116, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.9547581903276131, | |
| "grad_norm": 0.7309338450431824, | |
| "learning_rate": 4.482259830551507e-05, | |
| "loss": 2.2298, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.9568382735309412, | |
| "grad_norm": 0.6691438555717468, | |
| "learning_rate": 4.4783995714882265e-05, | |
| "loss": 2.2271, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9589183567342694, | |
| "grad_norm": 0.7534273266792297, | |
| "learning_rate": 4.474526650194535e-05, | |
| "loss": 2.195, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.9609984399375975, | |
| "grad_norm": 0.7900533676147461, | |
| "learning_rate": 4.4706410914580535e-05, | |
| "loss": 2.1975, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.9630785231409257, | |
| "grad_norm": 0.8033674359321594, | |
| "learning_rate": 4.4667429201472876e-05, | |
| "loss": 2.2036, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.9651586063442538, | |
| "grad_norm": 0.6255626678466797, | |
| "learning_rate": 4.4628321612114666e-05, | |
| "loss": 2.2145, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.9672386895475819, | |
| "grad_norm": 0.7248416543006897, | |
| "learning_rate": 4.458908839680382e-05, | |
| "loss": 2.2367, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.96931877275091, | |
| "grad_norm": 0.5884549021720886, | |
| "learning_rate": 4.454972980664231e-05, | |
| "loss": 2.2555, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.9713988559542381, | |
| "grad_norm": 0.7502307891845703, | |
| "learning_rate": 4.451024609353451e-05, | |
| "loss": 2.2157, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.9734789391575663, | |
| "grad_norm": 0.6624306440353394, | |
| "learning_rate": 4.447063751018565e-05, | |
| "loss": 2.2068, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.9755590223608944, | |
| "grad_norm": 0.8079151511192322, | |
| "learning_rate": 4.4430904310100117e-05, | |
| "loss": 2.1948, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.9776391055642226, | |
| "grad_norm": 0.6674748659133911, | |
| "learning_rate": 4.4391046747579903e-05, | |
| "loss": 2.2253, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.9797191887675507, | |
| "grad_norm": 0.644355297088623, | |
| "learning_rate": 4.435106507772294e-05, | |
| "loss": 2.2232, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.9817992719708788, | |
| "grad_norm": 0.7964586615562439, | |
| "learning_rate": 4.431095955642147e-05, | |
| "loss": 2.1854, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.983879355174207, | |
| "grad_norm": 0.6387479305267334, | |
| "learning_rate": 4.4270730440360434e-05, | |
| "loss": 2.1848, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.9859594383775351, | |
| "grad_norm": 0.7305927872657776, | |
| "learning_rate": 4.4230377987015773e-05, | |
| "loss": 2.1617, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.9880395215808633, | |
| "grad_norm": 0.7234925031661987, | |
| "learning_rate": 4.418990245465286e-05, | |
| "loss": 2.2035, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.9901196047841914, | |
| "grad_norm": 0.9234485030174255, | |
| "learning_rate": 4.4149304102324784e-05, | |
| "loss": 2.1871, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.9921996879875195, | |
| "grad_norm": 0.6665201783180237, | |
| "learning_rate": 4.41085831898707e-05, | |
| "loss": 2.2183, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.9942797711908476, | |
| "grad_norm": 0.6990284323692322, | |
| "learning_rate": 4.406773997791418e-05, | |
| "loss": 2.1597, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.9963598543941757, | |
| "grad_norm": 0.7203196883201599, | |
| "learning_rate": 4.402677472786156e-05, | |
| "loss": 2.203, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.9984399375975039, | |
| "grad_norm": 0.9164974093437195, | |
| "learning_rate": 4.398568770190025e-05, | |
| "loss": 2.2222, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.000520020800832, | |
| "grad_norm": 0.655780553817749, | |
| "learning_rate": 4.394447916299701e-05, | |
| "loss": 2.188, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 1.00260010400416, | |
| "grad_norm": 0.7037012577056885, | |
| "learning_rate": 4.3903149374896366e-05, | |
| "loss": 2.1934, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.0046801872074882, | |
| "grad_norm": 0.6656583547592163, | |
| "learning_rate": 4.386169860211884e-05, | |
| "loss": 2.2366, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 1.0067602704108165, | |
| "grad_norm": 0.6743558049201965, | |
| "learning_rate": 4.3820127109959294e-05, | |
| "loss": 2.1893, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.0088403536141446, | |
| "grad_norm": 0.6413071751594543, | |
| "learning_rate": 4.3778435164485216e-05, | |
| "loss": 2.1776, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.0109204368174727, | |
| "grad_norm": 0.7319051623344421, | |
| "learning_rate": 4.373662303253504e-05, | |
| "loss": 2.2236, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.0130005200208008, | |
| "grad_norm": 0.7512479424476624, | |
| "learning_rate": 4.369469098171639e-05, | |
| "loss": 2.1662, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 1.015080603224129, | |
| "grad_norm": 0.961911678314209, | |
| "learning_rate": 4.365263928040444e-05, | |
| "loss": 2.1793, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.0171606864274572, | |
| "grad_norm": 0.7037209272384644, | |
| "learning_rate": 4.361046819774012e-05, | |
| "loss": 2.2268, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 1.0192407696307852, | |
| "grad_norm": 0.6789669394493103, | |
| "learning_rate": 4.356817800362846e-05, | |
| "loss": 2.1593, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.0213208528341133, | |
| "grad_norm": 0.7595046758651733, | |
| "learning_rate": 4.35257689687368e-05, | |
| "loss": 2.1979, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 1.0234009360374414, | |
| "grad_norm": 0.6408656239509583, | |
| "learning_rate": 4.348324136449311e-05, | |
| "loss": 2.1928, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.0254810192407697, | |
| "grad_norm": 0.7805280685424805, | |
| "learning_rate": 4.344059546308424e-05, | |
| "loss": 2.2062, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 1.0275611024440978, | |
| "grad_norm": 0.6017510294914246, | |
| "learning_rate": 4.3397831537454146e-05, | |
| "loss": 2.1883, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.029641185647426, | |
| "grad_norm": 0.6209552884101868, | |
| "learning_rate": 4.335494986130219e-05, | |
| "loss": 2.2042, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.031721268850754, | |
| "grad_norm": 0.641849935054779, | |
| "learning_rate": 4.331195070908134e-05, | |
| "loss": 2.2132, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.033801352054082, | |
| "grad_norm": 0.6367346048355103, | |
| "learning_rate": 4.326883435599646e-05, | |
| "loss": 2.1853, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 1.0358814352574104, | |
| "grad_norm": 0.745715856552124, | |
| "learning_rate": 4.322560107800253e-05, | |
| "loss": 2.1998, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.0379615184607385, | |
| "grad_norm": 0.9358944296836853, | |
| "learning_rate": 4.318225115180287e-05, | |
| "loss": 2.2148, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 1.0400416016640666, | |
| "grad_norm": 0.771369457244873, | |
| "learning_rate": 4.313878485484735e-05, | |
| "loss": 2.2095, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.0421216848673946, | |
| "grad_norm": 0.8903271555900574, | |
| "learning_rate": 4.3095202465330695e-05, | |
| "loss": 2.1885, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 1.0442017680707227, | |
| "grad_norm": 0.83650141954422, | |
| "learning_rate": 4.305150426219061e-05, | |
| "loss": 2.1791, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.046281851274051, | |
| "grad_norm": 0.8761057257652283, | |
| "learning_rate": 4.300769052510604e-05, | |
| "loss": 2.2408, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 1.0483619344773791, | |
| "grad_norm": 0.8988538384437561, | |
| "learning_rate": 4.296376153449539e-05, | |
| "loss": 2.2054, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.0504420176807072, | |
| "grad_norm": 0.7735057473182678, | |
| "learning_rate": 4.29197175715147e-05, | |
| "loss": 2.2135, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.0525221008840353, | |
| "grad_norm": 0.7575799226760864, | |
| "learning_rate": 4.287555891805587e-05, | |
| "loss": 2.1696, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.0546021840873634, | |
| "grad_norm": 0.8359115719795227, | |
| "learning_rate": 4.283128585674485e-05, | |
| "loss": 2.2099, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 1.0566822672906917, | |
| "grad_norm": 0.8799722194671631, | |
| "learning_rate": 4.27868986709398e-05, | |
| "loss": 2.173, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.0587623504940198, | |
| "grad_norm": 0.6700918078422546, | |
| "learning_rate": 4.274239764472935e-05, | |
| "loss": 2.2171, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 1.0608424336973479, | |
| "grad_norm": 0.6470414400100708, | |
| "learning_rate": 4.269778306293068e-05, | |
| "loss": 2.2247, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.062922516900676, | |
| "grad_norm": 0.7337380051612854, | |
| "learning_rate": 4.2653055211087824e-05, | |
| "loss": 2.19, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 1.065002600104004, | |
| "grad_norm": 0.6618953347206116, | |
| "learning_rate": 4.26082143754697e-05, | |
| "loss": 2.2051, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.0670826833073324, | |
| "grad_norm": 0.6239338517189026, | |
| "learning_rate": 4.256326084306839e-05, | |
| "loss": 2.2373, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 1.0691627665106604, | |
| "grad_norm": 0.6498297452926636, | |
| "learning_rate": 4.2518194901597244e-05, | |
| "loss": 2.155, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.0712428497139885, | |
| "grad_norm": 0.7128412127494812, | |
| "learning_rate": 4.2473016839489084e-05, | |
| "loss": 2.2039, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.0733229329173166, | |
| "grad_norm": 0.63873291015625, | |
| "learning_rate": 4.2427726945894294e-05, | |
| "loss": 2.2305, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.075403016120645, | |
| "grad_norm": 0.6624719500541687, | |
| "learning_rate": 4.2382325510679034e-05, | |
| "loss": 2.1742, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 1.077483099323973, | |
| "grad_norm": 0.7572752833366394, | |
| "learning_rate": 4.2336812824423345e-05, | |
| "loss": 2.2121, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.079563182527301, | |
| "grad_norm": 0.7270757555961609, | |
| "learning_rate": 4.229118917841931e-05, | |
| "loss": 2.201, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 1.0816432657306292, | |
| "grad_norm": 0.7370373606681824, | |
| "learning_rate": 4.224545486466916e-05, | |
| "loss": 2.1859, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.0837233489339573, | |
| "grad_norm": 0.7897046208381653, | |
| "learning_rate": 4.219961017588345e-05, | |
| "loss": 2.1436, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 1.0858034321372856, | |
| "grad_norm": 0.7892904281616211, | |
| "learning_rate": 4.215365540547916e-05, | |
| "loss": 2.2309, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.0878835153406137, | |
| "grad_norm": 0.67086261510849, | |
| "learning_rate": 4.2107590847577795e-05, | |
| "loss": 2.2064, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 1.0899635985439418, | |
| "grad_norm": 0.7036137580871582, | |
| "learning_rate": 4.2061416797003563e-05, | |
| "loss": 2.1563, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.0920436817472698, | |
| "grad_norm": 0.678588330745697, | |
| "learning_rate": 4.2015133549281405e-05, | |
| "loss": 2.2293, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.094123764950598, | |
| "grad_norm": 0.6367912888526917, | |
| "learning_rate": 4.196874140063519e-05, | |
| "loss": 2.1538, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.0962038481539262, | |
| "grad_norm": 0.8265711665153503, | |
| "learning_rate": 4.192224064798577e-05, | |
| "loss": 2.2269, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 1.0982839313572543, | |
| "grad_norm": 0.7148616909980774, | |
| "learning_rate": 4.187563158894907e-05, | |
| "loss": 2.1895, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.1003640145605824, | |
| "grad_norm": 0.7453672289848328, | |
| "learning_rate": 4.182891452183423e-05, | |
| "loss": 2.2019, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 1.1024440977639105, | |
| "grad_norm": 0.7494534254074097, | |
| "learning_rate": 4.178208974564164e-05, | |
| "loss": 2.1909, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.1045241809672386, | |
| "grad_norm": 0.7747659683227539, | |
| "learning_rate": 4.173515756006107e-05, | |
| "loss": 2.1631, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 1.106604264170567, | |
| "grad_norm": 0.7446553707122803, | |
| "learning_rate": 4.168811826546972e-05, | |
| "loss": 2.2373, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.108684347373895, | |
| "grad_norm": 1.0015227794647217, | |
| "learning_rate": 4.164097216293035e-05, | |
| "loss": 2.1514, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 1.110764430577223, | |
| "grad_norm": 0.7084265351295471, | |
| "learning_rate": 4.159371955418928e-05, | |
| "loss": 2.1887, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.1128445137805512, | |
| "grad_norm": 0.8212253451347351, | |
| "learning_rate": 4.15463607416745e-05, | |
| "loss": 2.1795, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.1149245969838795, | |
| "grad_norm": 0.6273921728134155, | |
| "learning_rate": 4.149889602849375e-05, | |
| "loss": 2.2028, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.1170046801872076, | |
| "grad_norm": 0.7842991948127747, | |
| "learning_rate": 4.145132571843253e-05, | |
| "loss": 2.2196, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 1.1190847633905356, | |
| "grad_norm": 1.2877519130706787, | |
| "learning_rate": 4.140365011595222e-05, | |
| "loss": 2.1534, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.1211648465938637, | |
| "grad_norm": 0.8273409008979797, | |
| "learning_rate": 4.1355869526188065e-05, | |
| "loss": 2.2098, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 1.1232449297971918, | |
| "grad_norm": 0.7299179434776306, | |
| "learning_rate": 4.130798425494726e-05, | |
| "loss": 2.1747, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.12532501300052, | |
| "grad_norm": 0.6302483677864075, | |
| "learning_rate": 4.125999460870701e-05, | |
| "loss": 2.1866, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 1.1274050962038482, | |
| "grad_norm": 0.6543394327163696, | |
| "learning_rate": 4.121190089461252e-05, | |
| "loss": 2.2179, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.1294851794071763, | |
| "grad_norm": 0.6725994944572449, | |
| "learning_rate": 4.1163703420475065e-05, | |
| "loss": 2.1915, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 1.1315652626105044, | |
| "grad_norm": 0.6935145854949951, | |
| "learning_rate": 4.111540249476999e-05, | |
| "loss": 2.133, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.1336453458138325, | |
| "grad_norm": 0.6305291652679443, | |
| "learning_rate": 4.106699842663481e-05, | |
| "loss": 2.2123, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.1357254290171608, | |
| "grad_norm": 0.6931216716766357, | |
| "learning_rate": 4.10184915258671e-05, | |
| "loss": 2.2118, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.1378055122204889, | |
| "grad_norm": 0.6725122928619385, | |
| "learning_rate": 4.096988210292264e-05, | |
| "loss": 2.1921, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 1.139885595423817, | |
| "grad_norm": 0.6598119139671326, | |
| "learning_rate": 4.092117046891336e-05, | |
| "loss": 2.2118, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.141965678627145, | |
| "grad_norm": 0.8654290437698364, | |
| "learning_rate": 4.0872356935605365e-05, | |
| "loss": 2.1693, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 1.1440457618304731, | |
| "grad_norm": 0.7804104089736938, | |
| "learning_rate": 4.082344181541695e-05, | |
| "loss": 2.1692, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.1461258450338014, | |
| "grad_norm": 0.725069522857666, | |
| "learning_rate": 4.0774425421416586e-05, | |
| "loss": 2.2162, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 1.1482059282371295, | |
| "grad_norm": 0.8145197629928589, | |
| "learning_rate": 4.07253080673209e-05, | |
| "loss": 2.1679, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.1502860114404576, | |
| "grad_norm": 0.785518229007721, | |
| "learning_rate": 4.0676090067492725e-05, | |
| "loss": 2.2055, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 1.1523660946437857, | |
| "grad_norm": 0.8365340232849121, | |
| "learning_rate": 4.062677173693901e-05, | |
| "loss": 2.1862, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.154446177847114, | |
| "grad_norm": 0.79314786195755, | |
| "learning_rate": 4.057735339130888e-05, | |
| "loss": 2.2076, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.156526261050442, | |
| "grad_norm": 0.8674628138542175, | |
| "learning_rate": 4.052783534689157e-05, | |
| "loss": 2.1995, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.1586063442537702, | |
| "grad_norm": 0.8610255122184753, | |
| "learning_rate": 4.047821792061439e-05, | |
| "loss": 2.1898, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 1.1606864274570983, | |
| "grad_norm": 0.6575633883476257, | |
| "learning_rate": 4.042850143004075e-05, | |
| "loss": 2.1737, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.1627665106604264, | |
| "grad_norm": 0.6737598776817322, | |
| "learning_rate": 4.0378686193368076e-05, | |
| "loss": 2.2065, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 1.1648465938637544, | |
| "grad_norm": 0.6962065100669861, | |
| "learning_rate": 4.0328772529425796e-05, | |
| "loss": 2.1664, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.1669266770670828, | |
| "grad_norm": 0.6436595320701599, | |
| "learning_rate": 4.027876075767329e-05, | |
| "loss": 2.171, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 1.1690067602704108, | |
| "grad_norm": 0.6421999335289001, | |
| "learning_rate": 4.0228651198197865e-05, | |
| "loss": 2.1698, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.171086843473739, | |
| "grad_norm": 0.8119395971298218, | |
| "learning_rate": 4.017844417171269e-05, | |
| "loss": 2.2039, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 1.173166926677067, | |
| "grad_norm": 0.7124528884887695, | |
| "learning_rate": 4.012813999955473e-05, | |
| "loss": 2.1838, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.1752470098803953, | |
| "grad_norm": 0.7611419558525085, | |
| "learning_rate": 4.007773900368272e-05, | |
| "loss": 2.2004, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.1773270930837234, | |
| "grad_norm": 0.7459845542907715, | |
| "learning_rate": 4.002724150667509e-05, | |
| "loss": 2.248, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.1794071762870515, | |
| "grad_norm": 0.6019890904426575, | |
| "learning_rate": 3.997664783172792e-05, | |
| "loss": 2.1439, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 1.1814872594903796, | |
| "grad_norm": 0.7191320061683655, | |
| "learning_rate": 3.992595830265279e-05, | |
| "loss": 2.1846, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.1835673426937077, | |
| "grad_norm": 0.7781062722206116, | |
| "learning_rate": 3.987517324387483e-05, | |
| "loss": 2.1967, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 1.185647425897036, | |
| "grad_norm": 0.7874257564544678, | |
| "learning_rate": 3.982429298043057e-05, | |
| "loss": 2.206, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.187727509100364, | |
| "grad_norm": 0.6833536624908447, | |
| "learning_rate": 3.977331783796584e-05, | |
| "loss": 2.1726, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 1.1898075923036922, | |
| "grad_norm": 0.7719831466674805, | |
| "learning_rate": 3.972224814273377e-05, | |
| "loss": 2.1507, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.1918876755070202, | |
| "grad_norm": 0.784296989440918, | |
| "learning_rate": 3.9671084221592604e-05, | |
| "loss": 2.1744, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 1.1939677587103483, | |
| "grad_norm": 0.8013231754302979, | |
| "learning_rate": 3.961982640200368e-05, | |
| "loss": 2.2045, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.1960478419136766, | |
| "grad_norm": 0.648163914680481, | |
| "learning_rate": 3.95684750120293e-05, | |
| "loss": 2.1638, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.1981279251170047, | |
| "grad_norm": 0.7477614879608154, | |
| "learning_rate": 3.951703038033066e-05, | |
| "loss": 2.1993, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.2002080083203328, | |
| "grad_norm": 0.6471365094184875, | |
| "learning_rate": 3.9465492836165665e-05, | |
| "loss": 2.1837, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 1.202288091523661, | |
| "grad_norm": 0.708446204662323, | |
| "learning_rate": 3.9413862709386964e-05, | |
| "loss": 2.2097, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.204368174726989, | |
| "grad_norm": 0.5960673689842224, | |
| "learning_rate": 3.93621403304397e-05, | |
| "loss": 2.2038, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 1.2064482579303173, | |
| "grad_norm": 0.7760262489318848, | |
| "learning_rate": 3.931032603035947e-05, | |
| "loss": 2.2158, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.2085283411336454, | |
| "grad_norm": 0.833264172077179, | |
| "learning_rate": 3.925842014077018e-05, | |
| "loss": 2.1335, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 1.2106084243369735, | |
| "grad_norm": 0.6717672944068909, | |
| "learning_rate": 3.920642299388194e-05, | |
| "loss": 2.1679, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.2126885075403016, | |
| "grad_norm": 0.7024863362312317, | |
| "learning_rate": 3.915433492248894e-05, | |
| "loss": 2.2027, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 1.2147685907436299, | |
| "grad_norm": 0.6785237789154053, | |
| "learning_rate": 3.910215625996727e-05, | |
| "loss": 2.1512, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.216848673946958, | |
| "grad_norm": 0.6357817649841309, | |
| "learning_rate": 3.904988734027287e-05, | |
| "loss": 2.1536, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.218928757150286, | |
| "grad_norm": 0.7310034036636353, | |
| "learning_rate": 3.899752849793932e-05, | |
| "loss": 2.156, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.2210088403536141, | |
| "grad_norm": 0.8692020773887634, | |
| "learning_rate": 3.8945080068075726e-05, | |
| "loss": 2.2005, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 1.2230889235569422, | |
| "grad_norm": 0.7720586657524109, | |
| "learning_rate": 3.8892542386364594e-05, | |
| "loss": 2.2019, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.2251690067602703, | |
| "grad_norm": 0.8210708498954773, | |
| "learning_rate": 3.8839915789059636e-05, | |
| "loss": 2.1664, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 1.2272490899635986, | |
| "grad_norm": 0.813205361366272, | |
| "learning_rate": 3.8787200612983683e-05, | |
| "loss": 2.2265, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.2293291731669267, | |
| "grad_norm": 0.7682729363441467, | |
| "learning_rate": 3.873439719552645e-05, | |
| "loss": 2.1928, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 1.2314092563702548, | |
| "grad_norm": 0.8033297061920166, | |
| "learning_rate": 3.8681505874642446e-05, | |
| "loss": 2.1884, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.2334893395735829, | |
| "grad_norm": 0.7240457534790039, | |
| "learning_rate": 3.8628526988848776e-05, | |
| "loss": 2.1819, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 1.2355694227769112, | |
| "grad_norm": 0.7186746597290039, | |
| "learning_rate": 3.857546087722297e-05, | |
| "loss": 2.184, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.2376495059802393, | |
| "grad_norm": 0.8010230660438538, | |
| "learning_rate": 3.8522307879400835e-05, | |
| "loss": 2.1917, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.2397295891835673, | |
| "grad_norm": 0.8483126759529114, | |
| "learning_rate": 3.846906833557429e-05, | |
| "loss": 2.1907, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.2418096723868954, | |
| "grad_norm": 0.8604680895805359, | |
| "learning_rate": 3.841574258648912e-05, | |
| "loss": 2.1904, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 1.2438897555902235, | |
| "grad_norm": 0.7877135276794434, | |
| "learning_rate": 3.836233097344288e-05, | |
| "loss": 2.1713, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.2459698387935518, | |
| "grad_norm": 0.7193028926849365, | |
| "learning_rate": 3.8308833838282696e-05, | |
| "loss": 2.1867, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 1.24804992199688, | |
| "grad_norm": 0.7264684438705444, | |
| "learning_rate": 3.825525152340298e-05, | |
| "loss": 2.1526, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.250130005200208, | |
| "grad_norm": 0.6914860606193542, | |
| "learning_rate": 3.82015843717434e-05, | |
| "loss": 2.1867, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 1.252210088403536, | |
| "grad_norm": 0.6980414986610413, | |
| "learning_rate": 3.814783272678654e-05, | |
| "loss": 2.1812, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.2542901716068644, | |
| "grad_norm": 0.6947855949401855, | |
| "learning_rate": 3.809399693255579e-05, | |
| "loss": 2.1888, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 1.2563702548101925, | |
| "grad_norm": 0.8331231474876404, | |
| "learning_rate": 3.8040077333613114e-05, | |
| "loss": 2.1927, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.2584503380135206, | |
| "grad_norm": 0.6281008720397949, | |
| "learning_rate": 3.798607427505684e-05, | |
| "loss": 2.1284, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.2605304212168487, | |
| "grad_norm": 0.6286195516586304, | |
| "learning_rate": 3.7931988102519436e-05, | |
| "loss": 2.1813, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.2626105044201767, | |
| "grad_norm": 0.6835711002349854, | |
| "learning_rate": 3.7877819162165376e-05, | |
| "loss": 2.1774, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 1.2646905876235048, | |
| "grad_norm": 0.7094857692718506, | |
| "learning_rate": 3.7823567800688805e-05, | |
| "loss": 2.1944, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.2667706708268331, | |
| "grad_norm": 0.7638453841209412, | |
| "learning_rate": 3.776923436531142e-05, | |
| "loss": 2.1965, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 1.2688507540301612, | |
| "grad_norm": 0.8362658023834229, | |
| "learning_rate": 3.7714819203780215e-05, | |
| "loss": 2.2046, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.2709308372334893, | |
| "grad_norm": 0.7839365601539612, | |
| "learning_rate": 3.7660322664365226e-05, | |
| "loss": 2.1612, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 1.2730109204368174, | |
| "grad_norm": 0.7160779237747192, | |
| "learning_rate": 3.760574509585734e-05, | |
| "loss": 2.1517, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.2750910036401457, | |
| "grad_norm": 0.6370715498924255, | |
| "learning_rate": 3.7551086847566045e-05, | |
| "loss": 2.1783, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 1.2771710868434738, | |
| "grad_norm": 0.7071606516838074, | |
| "learning_rate": 3.74963482693172e-05, | |
| "loss": 2.2303, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.2792511700468019, | |
| "grad_norm": 0.6695096492767334, | |
| "learning_rate": 3.744152971145081e-05, | |
| "loss": 2.2031, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.28133125325013, | |
| "grad_norm": 0.8058128356933594, | |
| "learning_rate": 3.738663152481875e-05, | |
| "loss": 2.1971, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.283411336453458, | |
| "grad_norm": 0.7660072445869446, | |
| "learning_rate": 3.733165406078254e-05, | |
| "loss": 2.1417, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 1.2854914196567861, | |
| "grad_norm": 0.639193058013916, | |
| "learning_rate": 3.727659767121109e-05, | |
| "loss": 2.2019, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.2875715028601145, | |
| "grad_norm": 0.7742004990577698, | |
| "learning_rate": 3.722146270847848e-05, | |
| "loss": 2.1982, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 1.2896515860634425, | |
| "grad_norm": 0.7002267241477966, | |
| "learning_rate": 3.716624952546166e-05, | |
| "loss": 2.1751, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.2917316692667706, | |
| "grad_norm": 0.7309245467185974, | |
| "learning_rate": 3.711095847553817e-05, | |
| "loss": 2.1215, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 1.293811752470099, | |
| "grad_norm": 0.7643809914588928, | |
| "learning_rate": 3.7055589912583995e-05, | |
| "loss": 2.1208, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.295891835673427, | |
| "grad_norm": 0.6457206606864929, | |
| "learning_rate": 3.700014419097115e-05, | |
| "loss": 2.112, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 1.2979719188767551, | |
| "grad_norm": 0.7436161041259766, | |
| "learning_rate": 3.694462166556554e-05, | |
| "loss": 2.1684, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.3000520020800832, | |
| "grad_norm": 0.8125039339065552, | |
| "learning_rate": 3.688902269172458e-05, | |
| "loss": 2.1971, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.3021320852834113, | |
| "grad_norm": 0.7858961224555969, | |
| "learning_rate": 3.6833347625295016e-05, | |
| "loss": 2.1598, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.3042121684867394, | |
| "grad_norm": 0.7834795117378235, | |
| "learning_rate": 3.677759682261058e-05, | |
| "loss": 2.1854, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 1.3062922516900677, | |
| "grad_norm": 0.6971118450164795, | |
| "learning_rate": 3.672177064048976e-05, | |
| "loss": 2.1651, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.3083723348933958, | |
| "grad_norm": 0.7360151410102844, | |
| "learning_rate": 3.6665869436233446e-05, | |
| "loss": 2.2039, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 1.3104524180967239, | |
| "grad_norm": 0.7403413653373718, | |
| "learning_rate": 3.6609893567622735e-05, | |
| "loss": 2.1726, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.312532501300052, | |
| "grad_norm": 0.6509247422218323, | |
| "learning_rate": 3.655384339291657e-05, | |
| "loss": 2.1739, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 1.3146125845033803, | |
| "grad_norm": 0.7621864080429077, | |
| "learning_rate": 3.6497719270849464e-05, | |
| "loss": 2.2427, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.3166926677067083, | |
| "grad_norm": 0.851795494556427, | |
| "learning_rate": 3.6441521560629225e-05, | |
| "loss": 2.1947, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 1.3187727509100364, | |
| "grad_norm": 0.7552398443222046, | |
| "learning_rate": 3.6385250621934655e-05, | |
| "loss": 2.1688, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.3208528341133645, | |
| "grad_norm": 0.824918270111084, | |
| "learning_rate": 3.6328906814913194e-05, | |
| "loss": 2.149, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.3229329173166926, | |
| "grad_norm": 1.407629370689392, | |
| "learning_rate": 3.62724905001787e-05, | |
| "loss": 2.1911, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.3250130005200207, | |
| "grad_norm": 0.7478744983673096, | |
| "learning_rate": 3.621600203880907e-05, | |
| "loss": 2.1778, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 1.327093083723349, | |
| "grad_norm": 0.7519647479057312, | |
| "learning_rate": 3.615944179234397e-05, | |
| "loss": 2.1899, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.329173166926677, | |
| "grad_norm": 0.8431859612464905, | |
| "learning_rate": 3.610281012278252e-05, | |
| "loss": 2.1521, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 1.3312532501300052, | |
| "grad_norm": 0.6993555426597595, | |
| "learning_rate": 3.604610739258091e-05, | |
| "loss": 2.1763, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.7812565565109253, | |
| "learning_rate": 3.5989333964650216e-05, | |
| "loss": 2.1523, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 1.3354134165366616, | |
| "grad_norm": 0.6882584691047668, | |
| "learning_rate": 3.593249020235393e-05, | |
| "loss": 2.1866, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.3374934997399897, | |
| "grad_norm": 0.7424536943435669, | |
| "learning_rate": 3.5875576469505735e-05, | |
| "loss": 2.2083, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 1.3395735829433177, | |
| "grad_norm": 0.7519948482513428, | |
| "learning_rate": 3.581859313036712e-05, | |
| "loss": 2.1977, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.3416536661466458, | |
| "grad_norm": 0.759537398815155, | |
| "learning_rate": 3.576154054964511e-05, | |
| "loss": 2.1824, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.343733749349974, | |
| "grad_norm": 0.8406732678413391, | |
| "learning_rate": 3.570441909248984e-05, | |
| "loss": 2.1667, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.345813832553302, | |
| "grad_norm": 0.7489810585975647, | |
| "learning_rate": 3.564722912449231e-05, | |
| "loss": 2.1611, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 1.3478939157566303, | |
| "grad_norm": 0.6325241327285767, | |
| "learning_rate": 3.558997101168199e-05, | |
| "loss": 2.1867, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.3499739989599584, | |
| "grad_norm": 1.2815579175949097, | |
| "learning_rate": 3.553264512052449e-05, | |
| "loss": 2.2166, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 1.3520540821632865, | |
| "grad_norm": 0.7271225452423096, | |
| "learning_rate": 3.5475251817919234e-05, | |
| "loss": 2.1725, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.3541341653666148, | |
| "grad_norm": 0.7447043657302856, | |
| "learning_rate": 3.5417791471197083e-05, | |
| "loss": 2.1804, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 1.3562142485699429, | |
| "grad_norm": 0.8641957640647888, | |
| "learning_rate": 3.5360264448117986e-05, | |
| "loss": 2.2064, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.358294331773271, | |
| "grad_norm": 0.8609848618507385, | |
| "learning_rate": 3.530267111686867e-05, | |
| "loss": 2.2272, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 1.360374414976599, | |
| "grad_norm": 0.7370724678039551, | |
| "learning_rate": 3.52450118460602e-05, | |
| "loss": 2.1857, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.3624544981799271, | |
| "grad_norm": 0.7609270215034485, | |
| "learning_rate": 3.518728700472573e-05, | |
| "loss": 2.189, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.3645345813832552, | |
| "grad_norm": 0.6914607286453247, | |
| "learning_rate": 3.512949696231804e-05, | |
| "loss": 2.18, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.3666146645865835, | |
| "grad_norm": 0.8150199055671692, | |
| "learning_rate": 3.507164208870721e-05, | |
| "loss": 2.1867, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 1.3686947477899116, | |
| "grad_norm": 0.6963294148445129, | |
| "learning_rate": 3.501372275417828e-05, | |
| "loss": 2.1847, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.3707748309932397, | |
| "grad_norm": 0.8265756368637085, | |
| "learning_rate": 3.495573932942884e-05, | |
| "loss": 2.1786, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 1.3728549141965678, | |
| "grad_norm": 0.7163086533546448, | |
| "learning_rate": 3.489769218556667e-05, | |
| "loss": 2.2068, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.374934997399896, | |
| "grad_norm": 0.7726351618766785, | |
| "learning_rate": 3.483958169410738e-05, | |
| "loss": 2.1578, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 1.3770150806032242, | |
| "grad_norm": 0.9150016903877258, | |
| "learning_rate": 3.478140822697202e-05, | |
| "loss": 2.1928, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.3790951638065523, | |
| "grad_norm": 0.7856032848358154, | |
| "learning_rate": 3.472317215648467e-05, | |
| "loss": 2.1963, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 1.3811752470098804, | |
| "grad_norm": 0.8869948387145996, | |
| "learning_rate": 3.466487385537013e-05, | |
| "loss": 2.1658, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.3832553302132085, | |
| "grad_norm": 0.7349131107330322, | |
| "learning_rate": 3.460651369675147e-05, | |
| "loss": 2.1698, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.3853354134165365, | |
| "grad_norm": 0.6802077889442444, | |
| "learning_rate": 3.4548092054147645e-05, | |
| "loss": 2.1982, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.3874154966198649, | |
| "grad_norm": 0.6826719045639038, | |
| "learning_rate": 3.448960930147115e-05, | |
| "loss": 2.1806, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 1.389495579823193, | |
| "grad_norm": 0.7698726654052734, | |
| "learning_rate": 3.44310658130256e-05, | |
| "loss": 2.1971, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.391575663026521, | |
| "grad_norm": 0.7256355285644531, | |
| "learning_rate": 3.4372461963503294e-05, | |
| "loss": 2.1719, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 1.3936557462298491, | |
| "grad_norm": 0.8313025832176208, | |
| "learning_rate": 3.431379812798291e-05, | |
| "loss": 2.1915, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.3957358294331774, | |
| "grad_norm": 0.8327659368515015, | |
| "learning_rate": 3.425507468192702e-05, | |
| "loss": 2.1955, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 1.3978159126365055, | |
| "grad_norm": 0.6724772453308105, | |
| "learning_rate": 3.419629200117972e-05, | |
| "loss": 2.1378, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.3998959958398336, | |
| "grad_norm": 0.6691730618476868, | |
| "learning_rate": 3.4137450461964213e-05, | |
| "loss": 2.2271, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 1.4019760790431617, | |
| "grad_norm": 0.8396449685096741, | |
| "learning_rate": 3.407855044088045e-05, | |
| "loss": 2.2151, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.4040561622464898, | |
| "grad_norm": 0.9861184358596802, | |
| "learning_rate": 3.401959231490263e-05, | |
| "loss": 2.1944, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.4061362454498179, | |
| "grad_norm": 0.6781579256057739, | |
| "learning_rate": 3.396057646137687e-05, | |
| "loss": 2.1622, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.4082163286531462, | |
| "grad_norm": 0.7667004466056824, | |
| "learning_rate": 3.390150325801874e-05, | |
| "loss": 2.2198, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 1.4102964118564743, | |
| "grad_norm": 0.7576628923416138, | |
| "learning_rate": 3.3842373082910884e-05, | |
| "loss": 2.1466, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.4123764950598023, | |
| "grad_norm": 0.7718074917793274, | |
| "learning_rate": 3.378318631450055e-05, | |
| "loss": 2.2033, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 1.4144565782631306, | |
| "grad_norm": 0.5833465456962585, | |
| "learning_rate": 3.3723943331597205e-05, | |
| "loss": 2.2106, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.4165366614664587, | |
| "grad_norm": 0.7592945098876953, | |
| "learning_rate": 3.366464451337012e-05, | |
| "loss": 2.1829, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 1.4186167446697868, | |
| "grad_norm": 0.8660997748374939, | |
| "learning_rate": 3.360529023934592e-05, | |
| "loss": 2.1769, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.420696827873115, | |
| "grad_norm": 0.6722576022148132, | |
| "learning_rate": 3.354588088940614e-05, | |
| "loss": 2.1786, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 1.422776911076443, | |
| "grad_norm": 0.689155638217926, | |
| "learning_rate": 3.348641684378483e-05, | |
| "loss": 2.1651, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.424856994279771, | |
| "grad_norm": 0.8105888366699219, | |
| "learning_rate": 3.342689848306611e-05, | |
| "loss": 2.1733, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.4269370774830994, | |
| "grad_norm": 0.8982015252113342, | |
| "learning_rate": 3.3367326188181725e-05, | |
| "loss": 2.1921, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.4290171606864275, | |
| "grad_norm": 0.7339545488357544, | |
| "learning_rate": 3.3307700340408596e-05, | |
| "loss": 2.1979, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 1.4310972438897556, | |
| "grad_norm": 0.8030118942260742, | |
| "learning_rate": 3.324802132136642e-05, | |
| "loss": 2.2051, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.4331773270930837, | |
| "grad_norm": 0.7789422869682312, | |
| "learning_rate": 3.318828951301519e-05, | |
| "loss": 2.1387, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 1.435257410296412, | |
| "grad_norm": 0.7332764863967896, | |
| "learning_rate": 3.3128505297652765e-05, | |
| "loss": 2.1851, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.43733749349974, | |
| "grad_norm": 0.7092201709747314, | |
| "learning_rate": 3.306866905791242e-05, | |
| "loss": 2.201, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 1.4394175767030681, | |
| "grad_norm": 0.7219560146331787, | |
| "learning_rate": 3.30087811767604e-05, | |
| "loss": 2.1875, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.4414976599063962, | |
| "grad_norm": 0.7875995635986328, | |
| "learning_rate": 3.2948842037493466e-05, | |
| "loss": 2.1303, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 1.4435777431097243, | |
| "grad_norm": 1.0195878744125366, | |
| "learning_rate": 3.288885202373644e-05, | |
| "loss": 2.189, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.4456578263130524, | |
| "grad_norm": 0.7329500913619995, | |
| "learning_rate": 3.282881151943977e-05, | |
| "loss": 2.2193, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.4477379095163807, | |
| "grad_norm": 0.6737660765647888, | |
| "learning_rate": 3.276872090887702e-05, | |
| "loss": 2.2346, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.4498179927197088, | |
| "grad_norm": 0.8194296360015869, | |
| "learning_rate": 3.270858057664251e-05, | |
| "loss": 2.177, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 1.4518980759230369, | |
| "grad_norm": 0.696855366230011, | |
| "learning_rate": 3.264839090764871e-05, | |
| "loss": 2.1669, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.4539781591263652, | |
| "grad_norm": 0.7700757384300232, | |
| "learning_rate": 3.2588152287123904e-05, | |
| "loss": 2.1862, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 1.4560582423296933, | |
| "grad_norm": 0.6540327668190002, | |
| "learning_rate": 3.252786510060969e-05, | |
| "loss": 2.2274, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.4581383255330214, | |
| "grad_norm": 0.5958786606788635, | |
| "learning_rate": 3.246752973395846e-05, | |
| "loss": 2.1997, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 1.4602184087363494, | |
| "grad_norm": 0.7021071314811707, | |
| "learning_rate": 3.2407146573331e-05, | |
| "loss": 2.1759, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.4622984919396775, | |
| "grad_norm": 0.6807276606559753, | |
| "learning_rate": 3.234671600519397e-05, | |
| "loss": 2.2311, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 1.4643785751430056, | |
| "grad_norm": 0.7907546162605286, | |
| "learning_rate": 3.228623841631747e-05, | |
| "loss": 2.2092, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.466458658346334, | |
| "grad_norm": 0.8120606541633606, | |
| "learning_rate": 3.2225714193772526e-05, | |
| "loss": 2.1309, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.468538741549662, | |
| "grad_norm": 0.7305984497070312, | |
| "learning_rate": 3.216514372492864e-05, | |
| "loss": 2.1823, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.47061882475299, | |
| "grad_norm": 0.737861156463623, | |
| "learning_rate": 3.210452739745129e-05, | |
| "loss": 2.1817, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 1.4726989079563182, | |
| "grad_norm": 0.6479620933532715, | |
| "learning_rate": 3.2043865599299484e-05, | |
| "loss": 2.1553, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.4747789911596465, | |
| "grad_norm": 0.8242900371551514, | |
| "learning_rate": 3.1983158718723225e-05, | |
| "loss": 2.1756, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 1.4768590743629746, | |
| "grad_norm": 1.2284351587295532, | |
| "learning_rate": 3.192240714426108e-05, | |
| "loss": 2.1861, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.4789391575663027, | |
| "grad_norm": 0.8496856093406677, | |
| "learning_rate": 3.1861611264737644e-05, | |
| "loss": 2.1947, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 1.4810192407696308, | |
| "grad_norm": 0.6714478135108948, | |
| "learning_rate": 3.180077146926109e-05, | |
| "loss": 2.1857, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.4830993239729588, | |
| "grad_norm": 0.7734602689743042, | |
| "learning_rate": 3.173988814722065e-05, | |
| "loss": 2.1711, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 1.485179407176287, | |
| "grad_norm": 0.6525964736938477, | |
| "learning_rate": 3.167896168828417e-05, | |
| "loss": 2.1995, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.4872594903796152, | |
| "grad_norm": 0.9553260803222656, | |
| "learning_rate": 3.161799248239553e-05, | |
| "loss": 2.1814, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.4893395735829433, | |
| "grad_norm": 0.8433247804641724, | |
| "learning_rate": 3.155698091977224e-05, | |
| "loss": 2.1783, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.4914196567862714, | |
| "grad_norm": 0.706148087978363, | |
| "learning_rate": 3.1495927390902905e-05, | |
| "loss": 2.1987, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 1.4934997399895995, | |
| "grad_norm": 0.7170814275741577, | |
| "learning_rate": 3.14348322865447e-05, | |
| "loss": 2.152, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.4955798231929278, | |
| "grad_norm": 0.726800799369812, | |
| "learning_rate": 3.1373695997720895e-05, | |
| "loss": 2.1854, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 1.497659906396256, | |
| "grad_norm": 0.7688819169998169, | |
| "learning_rate": 3.131251891571839e-05, | |
| "loss": 2.1488, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.499739989599584, | |
| "grad_norm": 0.7639352679252625, | |
| "learning_rate": 3.1251301432085106e-05, | |
| "loss": 2.1583, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 1.501820072802912, | |
| "grad_norm": 0.6791670322418213, | |
| "learning_rate": 3.11900439386276e-05, | |
| "loss": 2.1783, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.5039001560062402, | |
| "grad_norm": 0.6605120897293091, | |
| "learning_rate": 3.112874682740847e-05, | |
| "loss": 2.1583, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 1.5059802392095682, | |
| "grad_norm": 0.7585316896438599, | |
| "learning_rate": 3.10674104907439e-05, | |
| "loss": 2.1527, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.5080603224128966, | |
| "grad_norm": 0.7344695329666138, | |
| "learning_rate": 3.10060353212011e-05, | |
| "loss": 2.1664, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.5101404056162246, | |
| "grad_norm": 0.7378405332565308, | |
| "learning_rate": 3.094462171159584e-05, | |
| "loss": 2.1505, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.5122204888195527, | |
| "grad_norm": 0.7696133255958557, | |
| "learning_rate": 3.088317005498991e-05, | |
| "loss": 2.1686, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 1.514300572022881, | |
| "grad_norm": 0.6149158477783203, | |
| "learning_rate": 3.082168074468861e-05, | |
| "loss": 2.17, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.5163806552262091, | |
| "grad_norm": 0.7041980028152466, | |
| "learning_rate": 3.0760154174238226e-05, | |
| "loss": 2.1967, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 1.5184607384295372, | |
| "grad_norm": 0.8049356937408447, | |
| "learning_rate": 3.069859073742352e-05, | |
| "loss": 2.1727, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.5205408216328653, | |
| "grad_norm": 0.75400710105896, | |
| "learning_rate": 3.0636990828265236e-05, | |
| "loss": 2.1826, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 1.5226209048361934, | |
| "grad_norm": 0.8219313621520996, | |
| "learning_rate": 3.0575354841017495e-05, | |
| "loss": 2.1961, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.5247009880395215, | |
| "grad_norm": 0.8475666046142578, | |
| "learning_rate": 3.051368317016537e-05, | |
| "loss": 2.1641, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 1.5267810712428496, | |
| "grad_norm": 0.7787706851959229, | |
| "learning_rate": 3.0451976210422307e-05, | |
| "loss": 2.17, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.5288611544461779, | |
| "grad_norm": 0.8453835844993591, | |
| "learning_rate": 3.03902343567276e-05, | |
| "loss": 2.2105, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.530941237649506, | |
| "grad_norm": 0.7797660231590271, | |
| "learning_rate": 3.0328458004243877e-05, | |
| "loss": 2.1681, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.5330213208528343, | |
| "grad_norm": 0.7047582864761353, | |
| "learning_rate": 3.0266647548354576e-05, | |
| "loss": 2.1242, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 1.5351014040561624, | |
| "grad_norm": 0.6521434187889099, | |
| "learning_rate": 3.0204803384661386e-05, | |
| "loss": 2.2045, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.5371814872594904, | |
| "grad_norm": 0.7803774476051331, | |
| "learning_rate": 3.0142925908981756e-05, | |
| "loss": 2.1639, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 1.5392615704628185, | |
| "grad_norm": 0.7463592886924744, | |
| "learning_rate": 3.0081015517346328e-05, | |
| "loss": 2.1969, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.5413416536661466, | |
| "grad_norm": 0.689757764339447, | |
| "learning_rate": 3.0019072605996412e-05, | |
| "loss": 2.2282, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 1.5434217368694747, | |
| "grad_norm": 0.9387602806091309, | |
| "learning_rate": 2.9957097571381453e-05, | |
| "loss": 2.1989, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.5455018200728028, | |
| "grad_norm": 0.749153196811676, | |
| "learning_rate": 2.98950908101565e-05, | |
| "loss": 2.1492, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 1.547581903276131, | |
| "grad_norm": 0.7032657265663147, | |
| "learning_rate": 2.983305271917965e-05, | |
| "loss": 2.1923, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.5496619864794592, | |
| "grad_norm": 0.640876054763794, | |
| "learning_rate": 2.9770983695509517e-05, | |
| "loss": 2.1863, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.5517420696827873, | |
| "grad_norm": 0.6808728575706482, | |
| "learning_rate": 2.9708884136402715e-05, | |
| "loss": 2.1861, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.5538221528861156, | |
| "grad_norm": 0.6913876533508301, | |
| "learning_rate": 2.9646754439311252e-05, | |
| "loss": 2.1534, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 1.5559022360894437, | |
| "grad_norm": 0.8387054800987244, | |
| "learning_rate": 2.9584595001880065e-05, | |
| "loss": 2.1821, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.5579823192927718, | |
| "grad_norm": 0.7485213279724121, | |
| "learning_rate": 2.9522406221944415e-05, | |
| "loss": 2.1615, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 1.5600624024960998, | |
| "grad_norm": 0.6283704042434692, | |
| "learning_rate": 2.9460188497527363e-05, | |
| "loss": 2.11, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.562142485699428, | |
| "grad_norm": 0.8199918270111084, | |
| "learning_rate": 2.9397942226837222e-05, | |
| "loss": 2.182, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 1.564222568902756, | |
| "grad_norm": 0.7533789873123169, | |
| "learning_rate": 2.9335667808265023e-05, | |
| "loss": 2.1962, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.566302652106084, | |
| "grad_norm": 1.0285567045211792, | |
| "learning_rate": 2.9273365640381924e-05, | |
| "loss": 2.1573, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 1.5683827353094124, | |
| "grad_norm": 0.7705976366996765, | |
| "learning_rate": 2.921103612193672e-05, | |
| "loss": 2.1847, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.5704628185127405, | |
| "grad_norm": 0.7277804613113403, | |
| "learning_rate": 2.9148679651853212e-05, | |
| "loss": 2.1516, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.5725429017160688, | |
| "grad_norm": 0.7203589677810669, | |
| "learning_rate": 2.9086296629227738e-05, | |
| "loss": 2.1295, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.574622984919397, | |
| "grad_norm": 0.7318481802940369, | |
| "learning_rate": 2.9023887453326554e-05, | |
| "loss": 2.1474, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 1.576703068122725, | |
| "grad_norm": 0.7423680424690247, | |
| "learning_rate": 2.8961452523583322e-05, | |
| "loss": 2.2318, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.578783151326053, | |
| "grad_norm": 0.6626203060150146, | |
| "learning_rate": 2.8898992239596507e-05, | |
| "loss": 2.1331, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 1.5808632345293812, | |
| "grad_norm": 0.6937516331672668, | |
| "learning_rate": 2.883650700112689e-05, | |
| "loss": 2.1473, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.5829433177327092, | |
| "grad_norm": 0.7485554814338684, | |
| "learning_rate": 2.8773997208094912e-05, | |
| "loss": 2.1692, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 1.5850234009360373, | |
| "grad_norm": 0.7731395363807678, | |
| "learning_rate": 2.8711463260578214e-05, | |
| "loss": 2.2108, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.5871034841393654, | |
| "grad_norm": 0.6718974709510803, | |
| "learning_rate": 2.864890555880902e-05, | |
| "loss": 2.1694, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 1.5891835673426937, | |
| "grad_norm": 0.7455317974090576, | |
| "learning_rate": 2.8586324503171574e-05, | |
| "loss": 2.1801, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.5912636505460218, | |
| "grad_norm": 0.6923863887786865, | |
| "learning_rate": 2.8523720494199595e-05, | |
| "loss": 2.2099, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.5933437337493501, | |
| "grad_norm": 0.7777950167655945, | |
| "learning_rate": 2.8461093932573736e-05, | |
| "loss": 2.1788, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.5954238169526782, | |
| "grad_norm": 0.7518746256828308, | |
| "learning_rate": 2.8398445219118935e-05, | |
| "loss": 2.1768, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 1.5975039001560063, | |
| "grad_norm": 0.6466406583786011, | |
| "learning_rate": 2.8335774754801965e-05, | |
| "loss": 2.1786, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.5995839833593344, | |
| "grad_norm": 0.9300071001052856, | |
| "learning_rate": 2.8273082940728784e-05, | |
| "loss": 2.13, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 1.6016640665626625, | |
| "grad_norm": 0.668032169342041, | |
| "learning_rate": 2.8210370178141987e-05, | |
| "loss": 2.1955, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.6037441497659906, | |
| "grad_norm": 0.713634192943573, | |
| "learning_rate": 2.814763686841826e-05, | |
| "loss": 2.2051, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 1.6058242329693186, | |
| "grad_norm": 0.7603687644004822, | |
| "learning_rate": 2.808488341306578e-05, | |
| "loss": 2.1569, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.607904316172647, | |
| "grad_norm": 0.7058757543563843, | |
| "learning_rate": 2.8022110213721688e-05, | |
| "loss": 2.1072, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 1.609984399375975, | |
| "grad_norm": 0.649702787399292, | |
| "learning_rate": 2.7959317672149444e-05, | |
| "loss": 2.1538, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.6120644825793031, | |
| "grad_norm": 0.7990389466285706, | |
| "learning_rate": 2.789650619023636e-05, | |
| "loss": 2.1701, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.6141445657826314, | |
| "grad_norm": 0.814145028591156, | |
| "learning_rate": 2.783367616999092e-05, | |
| "loss": 2.1796, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.6162246489859595, | |
| "grad_norm": 0.8015204071998596, | |
| "learning_rate": 2.7770828013540294e-05, | |
| "loss": 2.1958, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 1.6183047321892876, | |
| "grad_norm": 0.7484777569770813, | |
| "learning_rate": 2.7707962123127707e-05, | |
| "loss": 2.156, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.6203848153926157, | |
| "grad_norm": 1.0027931928634644, | |
| "learning_rate": 2.7645078901109893e-05, | |
| "loss": 2.1935, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 1.6224648985959438, | |
| "grad_norm": 0.9040629863739014, | |
| "learning_rate": 2.7582178749954523e-05, | |
| "loss": 2.1512, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.6245449817992719, | |
| "grad_norm": 0.6937136054039001, | |
| "learning_rate": 2.7519262072237594e-05, | |
| "loss": 2.1613, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 1.6266250650026, | |
| "grad_norm": 0.7338537573814392, | |
| "learning_rate": 2.745632927064089e-05, | |
| "loss": 2.1566, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.6287051482059283, | |
| "grad_norm": 0.7064099907875061, | |
| "learning_rate": 2.739338074794941e-05, | |
| "loss": 2.1996, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 1.6307852314092564, | |
| "grad_norm": 0.7720285654067993, | |
| "learning_rate": 2.7330416907048727e-05, | |
| "loss": 2.1751, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.6328653146125847, | |
| "grad_norm": 1.0123454332351685, | |
| "learning_rate": 2.7267438150922508e-05, | |
| "loss": 2.1707, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.6349453978159127, | |
| "grad_norm": 0.7546709179878235, | |
| "learning_rate": 2.720444488264984e-05, | |
| "loss": 2.1563, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.6370254810192408, | |
| "grad_norm": 0.6726992726325989, | |
| "learning_rate": 2.7141437505402705e-05, | |
| "loss": 2.2035, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 1.639105564222569, | |
| "grad_norm": 0.8486371040344238, | |
| "learning_rate": 2.7078416422443386e-05, | |
| "loss": 2.1483, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.641185647425897, | |
| "grad_norm": 0.8228679299354553, | |
| "learning_rate": 2.7015382037121896e-05, | |
| "loss": 2.1901, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 1.643265730629225, | |
| "grad_norm": 0.7839644551277161, | |
| "learning_rate": 2.695233475287336e-05, | |
| "loss": 2.1339, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.6453458138325532, | |
| "grad_norm": 0.8633984923362732, | |
| "learning_rate": 2.6889274973215495e-05, | |
| "loss": 2.2077, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 1.6474258970358813, | |
| "grad_norm": 0.8162491321563721, | |
| "learning_rate": 2.6826203101745956e-05, | |
| "loss": 2.1532, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.6495059802392096, | |
| "grad_norm": 0.7576460242271423, | |
| "learning_rate": 2.6763119542139813e-05, | |
| "loss": 2.1764, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 1.6515860634425377, | |
| "grad_norm": 0.7479214668273926, | |
| "learning_rate": 2.670002469814693e-05, | |
| "loss": 2.1551, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.653666146645866, | |
| "grad_norm": 0.6720142960548401, | |
| "learning_rate": 2.6636918973589402e-05, | |
| "loss": 2.1667, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.655746229849194, | |
| "grad_norm": 0.8261317014694214, | |
| "learning_rate": 2.6573802772358965e-05, | |
| "loss": 2.1691, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.6578263130525221, | |
| "grad_norm": 0.7510902881622314, | |
| "learning_rate": 2.6510676498414377e-05, | |
| "loss": 2.2111, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 1.6599063962558502, | |
| "grad_norm": 0.7204596996307373, | |
| "learning_rate": 2.644754055577892e-05, | |
| "loss": 2.168, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.6619864794591783, | |
| "grad_norm": 0.6878061890602112, | |
| "learning_rate": 2.6384395348537704e-05, | |
| "loss": 2.1643, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 1.6640665626625064, | |
| "grad_norm": 0.6784379482269287, | |
| "learning_rate": 2.6321241280835173e-05, | |
| "loss": 2.1648, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6661466458658345, | |
| "grad_norm": 0.6045475006103516, | |
| "learning_rate": 2.6258078756872445e-05, | |
| "loss": 2.1654, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 1.6682267290691628, | |
| "grad_norm": 0.7655680775642395, | |
| "learning_rate": 2.6194908180904798e-05, | |
| "loss": 2.1774, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.670306812272491, | |
| "grad_norm": 0.9217324256896973, | |
| "learning_rate": 2.613172995723902e-05, | |
| "loss": 2.1726, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 1.672386895475819, | |
| "grad_norm": 0.8260428309440613, | |
| "learning_rate": 2.6068544490230852e-05, | |
| "loss": 2.2147, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.6744669786791473, | |
| "grad_norm": 0.7907747030258179, | |
| "learning_rate": 2.6005352184282384e-05, | |
| "loss": 2.2013, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.6765470618824754, | |
| "grad_norm": 0.8157804012298584, | |
| "learning_rate": 2.5942153443839506e-05, | |
| "loss": 2.1584, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.6786271450858035, | |
| "grad_norm": 0.9395479559898376, | |
| "learning_rate": 2.5878948673389254e-05, | |
| "loss": 2.1244, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 1.6807072282891315, | |
| "grad_norm": 0.7687943577766418, | |
| "learning_rate": 2.5815738277457285e-05, | |
| "loss": 2.1902, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.6827873114924596, | |
| "grad_norm": 0.6989742517471313, | |
| "learning_rate": 2.575252266060525e-05, | |
| "loss": 2.208, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 1.6848673946957877, | |
| "grad_norm": 1.0736007690429688, | |
| "learning_rate": 2.5689302227428215e-05, | |
| "loss": 2.1795, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.6869474778991158, | |
| "grad_norm": 0.7165335416793823, | |
| "learning_rate": 2.5626077382552072e-05, | |
| "loss": 2.1777, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 1.6890275611024441, | |
| "grad_norm": 0.6504753828048706, | |
| "learning_rate": 2.5562848530630945e-05, | |
| "loss": 2.1933, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.6911076443057722, | |
| "grad_norm": 1.0563914775848389, | |
| "learning_rate": 2.5499616076344607e-05, | |
| "loss": 2.15, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 1.6931877275091005, | |
| "grad_norm": 0.8301395177841187, | |
| "learning_rate": 2.5436380424395895e-05, | |
| "loss": 2.1229, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.6952678107124286, | |
| "grad_norm": 0.697247326374054, | |
| "learning_rate": 2.5373141979508102e-05, | |
| "loss": 2.1382, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.6973478939157567, | |
| "grad_norm": 0.6969221830368042, | |
| "learning_rate": 2.5309901146422404e-05, | |
| "loss": 2.1861, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.6994279771190848, | |
| "grad_norm": 0.7223391532897949, | |
| "learning_rate": 2.5246658329895252e-05, | |
| "loss": 2.1625, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 1.7015080603224129, | |
| "grad_norm": 0.7388386726379395, | |
| "learning_rate": 2.5183413934695794e-05, | |
| "loss": 2.1558, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.703588143525741, | |
| "grad_norm": 0.8030023574829102, | |
| "learning_rate": 2.5120168365603292e-05, | |
| "loss": 2.1696, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 1.705668226729069, | |
| "grad_norm": 0.7399486899375916, | |
| "learning_rate": 2.50569220274045e-05, | |
| "loss": 2.192, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.7077483099323973, | |
| "grad_norm": 0.7578115463256836, | |
| "learning_rate": 2.4993675324891135e-05, | |
| "loss": 2.1734, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 1.7098283931357254, | |
| "grad_norm": 0.8409146070480347, | |
| "learning_rate": 2.493042866285719e-05, | |
| "loss": 2.2099, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.7119084763390535, | |
| "grad_norm": 0.756033718585968, | |
| "learning_rate": 2.486718244609645e-05, | |
| "loss": 2.1975, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 1.7139885595423818, | |
| "grad_norm": 0.7186440229415894, | |
| "learning_rate": 2.480393707939981e-05, | |
| "loss": 2.1507, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.71606864274571, | |
| "grad_norm": 0.7971068620681763, | |
| "learning_rate": 2.4740692967552773e-05, | |
| "loss": 2.1812, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.718148725949038, | |
| "grad_norm": 0.7994965314865112, | |
| "learning_rate": 2.467745051533274e-05, | |
| "loss": 2.1485, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.720228809152366, | |
| "grad_norm": 0.868439257144928, | |
| "learning_rate": 2.4614210127506556e-05, | |
| "loss": 2.1645, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 1.7223088923556942, | |
| "grad_norm": 0.7531114220619202, | |
| "learning_rate": 2.4550972208827817e-05, | |
| "loss": 2.1822, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.7243889755590223, | |
| "grad_norm": 0.7823972702026367, | |
| "learning_rate": 2.4487737164034338e-05, | |
| "loss": 2.1444, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 1.7264690587623504, | |
| "grad_norm": 0.6993070840835571, | |
| "learning_rate": 2.4424505397845517e-05, | |
| "loss": 2.1499, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.7285491419656787, | |
| "grad_norm": 0.9332249164581299, | |
| "learning_rate": 2.4361277314959796e-05, | |
| "loss": 2.169, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 1.7306292251690067, | |
| "grad_norm": 0.8146462440490723, | |
| "learning_rate": 2.4298053320052004e-05, | |
| "loss": 2.1949, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.732709308372335, | |
| "grad_norm": 0.7273747324943542, | |
| "learning_rate": 2.4234833817770846e-05, | |
| "loss": 2.1759, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 1.7347893915756631, | |
| "grad_norm": 0.7499719858169556, | |
| "learning_rate": 2.417161921273625e-05, | |
| "loss": 2.2312, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.7368694747789912, | |
| "grad_norm": 0.6487704515457153, | |
| "learning_rate": 2.4108409909536805e-05, | |
| "loss": 2.167, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.7389495579823193, | |
| "grad_norm": 0.6822001338005066, | |
| "learning_rate": 2.4045206312727184e-05, | |
| "loss": 2.1986, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.7410296411856474, | |
| "grad_norm": 0.8364559412002563, | |
| "learning_rate": 2.3982008826825503e-05, | |
| "loss": 2.2144, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 1.7431097243889755, | |
| "grad_norm": 0.6755366921424866, | |
| "learning_rate": 2.3918817856310786e-05, | |
| "loss": 2.1672, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.7451898075923036, | |
| "grad_norm": 0.6534518003463745, | |
| "learning_rate": 2.3855633805620374e-05, | |
| "loss": 2.1611, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 1.7472698907956317, | |
| "grad_norm": 0.8979050517082214, | |
| "learning_rate": 2.3792457079147286e-05, | |
| "loss": 2.2152, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.74934997399896, | |
| "grad_norm": 0.8764121532440186, | |
| "learning_rate": 2.3729288081237687e-05, | |
| "loss": 2.1696, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 1.751430057202288, | |
| "grad_norm": 0.7482860088348389, | |
| "learning_rate": 2.3666127216188284e-05, | |
| "loss": 2.1707, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.7535101404056164, | |
| "grad_norm": 0.6165511608123779, | |
| "learning_rate": 2.36029748882437e-05, | |
| "loss": 2.1893, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 1.7555902236089445, | |
| "grad_norm": 0.7991209626197815, | |
| "learning_rate": 2.3539831501593944e-05, | |
| "loss": 2.1379, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.7576703068122725, | |
| "grad_norm": 0.7029572129249573, | |
| "learning_rate": 2.3476697460371785e-05, | |
| "loss": 2.1528, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.7597503900156006, | |
| "grad_norm": 0.7116166353225708, | |
| "learning_rate": 2.3413573168650198e-05, | |
| "loss": 2.1759, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.7618304732189287, | |
| "grad_norm": 0.8789666295051575, | |
| "learning_rate": 2.335045903043974e-05, | |
| "loss": 2.1882, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 1.7639105564222568, | |
| "grad_norm": 1.1269465684890747, | |
| "learning_rate": 2.3287355449686004e-05, | |
| "loss": 2.1826, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.765990639625585, | |
| "grad_norm": 0.744796097278595, | |
| "learning_rate": 2.322426283026697e-05, | |
| "loss": 2.1916, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 1.7680707228289132, | |
| "grad_norm": 0.7507933378219604, | |
| "learning_rate": 2.3161181575990518e-05, | |
| "loss": 2.1924, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.7701508060322413, | |
| "grad_norm": 0.9487372636795044, | |
| "learning_rate": 2.3098112090591744e-05, | |
| "loss": 2.163, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 1.7722308892355694, | |
| "grad_norm": 0.6878231763839722, | |
| "learning_rate": 2.303505477773045e-05, | |
| "loss": 2.1803, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.7743109724388977, | |
| "grad_norm": 0.686570405960083, | |
| "learning_rate": 2.2972010040988518e-05, | |
| "loss": 2.1865, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 1.7763910556422258, | |
| "grad_norm": 0.8939157724380493, | |
| "learning_rate": 2.290897828386734e-05, | |
| "loss": 2.1612, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.7784711388455539, | |
| "grad_norm": 0.7810360193252563, | |
| "learning_rate": 2.2845959909785226e-05, | |
| "loss": 2.1517, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.780551222048882, | |
| "grad_norm": 0.8274974822998047, | |
| "learning_rate": 2.2782955322074855e-05, | |
| "loss": 2.1616, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.78263130525221, | |
| "grad_norm": 0.7559753060340881, | |
| "learning_rate": 2.2719964923980653e-05, | |
| "loss": 2.1183, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 1.7847113884555381, | |
| "grad_norm": 0.8210632801055908, | |
| "learning_rate": 2.2656989118656224e-05, | |
| "loss": 2.1911, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.7867914716588662, | |
| "grad_norm": 0.7994166016578674, | |
| "learning_rate": 2.2594028309161802e-05, | |
| "loss": 2.1927, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 1.7888715548621945, | |
| "grad_norm": 0.805383026599884, | |
| "learning_rate": 2.253108289846161e-05, | |
| "loss": 2.186, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.7909516380655226, | |
| "grad_norm": 0.872366189956665, | |
| "learning_rate": 2.246815328942133e-05, | |
| "loss": 2.1307, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 1.793031721268851, | |
| "grad_norm": 0.7008072137832642, | |
| "learning_rate": 2.240523988480551e-05, | |
| "loss": 2.2187, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.795111804472179, | |
| "grad_norm": 0.799379289150238, | |
| "learning_rate": 2.2342343087275e-05, | |
| "loss": 2.1588, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 1.797191887675507, | |
| "grad_norm": 1.337504267692566, | |
| "learning_rate": 2.227946329938433e-05, | |
| "loss": 2.1518, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.7992719708788352, | |
| "grad_norm": 0.7516017556190491, | |
| "learning_rate": 2.2216600923579196e-05, | |
| "loss": 2.149, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.8013520540821633, | |
| "grad_norm": 0.6863098740577698, | |
| "learning_rate": 2.2153756362193827e-05, | |
| "loss": 2.1886, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.8034321372854913, | |
| "grad_norm": 0.6174860596656799, | |
| "learning_rate": 2.209093001744845e-05, | |
| "loss": 2.181, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 1.8055122204888194, | |
| "grad_norm": 0.7695885300636292, | |
| "learning_rate": 2.2028122291446687e-05, | |
| "loss": 2.1137, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.8075923036921477, | |
| "grad_norm": 0.8168510794639587, | |
| "learning_rate": 2.1965333586173022e-05, | |
| "loss": 2.1785, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 1.8096723868954758, | |
| "grad_norm": 0.7163233757019043, | |
| "learning_rate": 2.1902564303490168e-05, | |
| "loss": 2.1422, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.811752470098804, | |
| "grad_norm": 0.8038668036460876, | |
| "learning_rate": 2.183981484513657e-05, | |
| "loss": 2.2291, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 1.8138325533021322, | |
| "grad_norm": 0.8842986226081848, | |
| "learning_rate": 2.177708561272374e-05, | |
| "loss": 2.1491, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.8159126365054603, | |
| "grad_norm": 0.7526798844337463, | |
| "learning_rate": 2.1714377007733787e-05, | |
| "loss": 2.1554, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 1.8179927197087884, | |
| "grad_norm": 0.8559210896492004, | |
| "learning_rate": 2.165168943151677e-05, | |
| "loss": 2.1444, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.8200728029121165, | |
| "grad_norm": 0.6576410531997681, | |
| "learning_rate": 2.1589023285288177e-05, | |
| "loss": 2.1322, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.8221528861154446, | |
| "grad_norm": 0.9053552746772766, | |
| "learning_rate": 2.152637897012633e-05, | |
| "loss": 2.2095, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.8242329693187727, | |
| "grad_norm": 0.7630800008773804, | |
| "learning_rate": 2.1463756886969828e-05, | |
| "loss": 2.1478, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 1.8263130525221007, | |
| "grad_norm": 0.6651878952980042, | |
| "learning_rate": 2.140115743661497e-05, | |
| "loss": 2.1831, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.828393135725429, | |
| "grad_norm": 0.7167788147926331, | |
| "learning_rate": 2.1338581019713225e-05, | |
| "loss": 2.1871, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 1.8304732189287571, | |
| "grad_norm": 0.7219182252883911, | |
| "learning_rate": 2.1276028036768617e-05, | |
| "loss": 2.1765, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.8325533021320854, | |
| "grad_norm": 0.911848783493042, | |
| "learning_rate": 2.121349888813519e-05, | |
| "loss": 2.1678, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 1.8346333853354135, | |
| "grad_norm": 0.7422452569007874, | |
| "learning_rate": 2.1150993974014477e-05, | |
| "loss": 2.1797, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.8367134685387416, | |
| "grad_norm": 0.7973139882087708, | |
| "learning_rate": 2.1088513694452852e-05, | |
| "loss": 2.1423, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 1.8387935517420697, | |
| "grad_norm": 0.6837089657783508, | |
| "learning_rate": 2.1026058449339053e-05, | |
| "loss": 2.1601, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.8408736349453978, | |
| "grad_norm": 0.7220534086227417, | |
| "learning_rate": 2.0963628638401584e-05, | |
| "loss": 2.1673, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.8429537181487259, | |
| "grad_norm": 0.7278773188591003, | |
| "learning_rate": 2.090122466120617e-05, | |
| "loss": 2.1831, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.845033801352054, | |
| "grad_norm": 0.7825272679328918, | |
| "learning_rate": 2.0838846917153184e-05, | |
| "loss": 2.1568, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 1.847113884555382, | |
| "grad_norm": 0.9204423427581787, | |
| "learning_rate": 2.0776495805475125e-05, | |
| "loss": 2.187, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.8491939677587104, | |
| "grad_norm": 0.7809643149375916, | |
| "learning_rate": 2.0714171725233993e-05, | |
| "loss": 2.1516, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 1.8512740509620385, | |
| "grad_norm": 0.7957209944725037, | |
| "learning_rate": 2.0651875075318823e-05, | |
| "loss": 2.1706, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.8533541341653668, | |
| "grad_norm": 0.751357913017273, | |
| "learning_rate": 2.0589606254443066e-05, | |
| "loss": 2.1797, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 1.8554342173686948, | |
| "grad_norm": 0.7734564542770386, | |
| "learning_rate": 2.0527365661142074e-05, | |
| "loss": 2.1572, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.857514300572023, | |
| "grad_norm": 0.8376662135124207, | |
| "learning_rate": 2.046515369377052e-05, | |
| "loss": 2.1688, | |
| "step": 4465 | |
| }, | |
| { | |
| "epoch": 1.859594383775351, | |
| "grad_norm": 0.7141360640525818, | |
| "learning_rate": 2.04029707504999e-05, | |
| "loss": 2.1694, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.861674466978679, | |
| "grad_norm": 0.8017005920410156, | |
| "learning_rate": 2.0340817229315888e-05, | |
| "loss": 2.1641, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 1.8637545501820072, | |
| "grad_norm": 0.8669304847717285, | |
| "learning_rate": 2.02786935280159e-05, | |
| "loss": 2.1787, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.8658346333853353, | |
| "grad_norm": 0.8036349415779114, | |
| "learning_rate": 2.021660004420648e-05, | |
| "loss": 2.198, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 1.8679147165886636, | |
| "grad_norm": 0.7105252146720886, | |
| "learning_rate": 2.015453717530078e-05, | |
| "loss": 2.1836, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.8699947997919917, | |
| "grad_norm": 1.4521474838256836, | |
| "learning_rate": 2.0092505318515998e-05, | |
| "loss": 2.1859, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 1.8720748829953198, | |
| "grad_norm": 0.8009501099586487, | |
| "learning_rate": 2.003050487087086e-05, | |
| "loss": 2.1799, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.874154966198648, | |
| "grad_norm": 0.7517442107200623, | |
| "learning_rate": 1.9968536229183045e-05, | |
| "loss": 2.1736, | |
| "step": 4505 | |
| }, | |
| { | |
| "epoch": 1.8762350494019762, | |
| "grad_norm": 0.7923987507820129, | |
| "learning_rate": 1.9906599790066696e-05, | |
| "loss": 2.1947, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.8783151326053042, | |
| "grad_norm": 0.8453426957130432, | |
| "learning_rate": 1.9844695949929825e-05, | |
| "loss": 2.1611, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 1.8803952158086323, | |
| "grad_norm": 1.034262776374817, | |
| "learning_rate": 1.9782825104971815e-05, | |
| "loss": 2.1851, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.8824752990119604, | |
| "grad_norm": 0.7277210354804993, | |
| "learning_rate": 1.9720987651180886e-05, | |
| "loss": 2.142, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 1.8845553822152885, | |
| "grad_norm": 0.7940502166748047, | |
| "learning_rate": 1.9659183984331513e-05, | |
| "loss": 2.1891, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.8866354654186166, | |
| "grad_norm": 0.7815389633178711, | |
| "learning_rate": 1.959741449998195e-05, | |
| "loss": 2.1806, | |
| "step": 4535 | |
| }, | |
| { | |
| "epoch": 1.888715548621945, | |
| "grad_norm": 0.759210467338562, | |
| "learning_rate": 1.9535679593471665e-05, | |
| "loss": 2.1939, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.890795631825273, | |
| "grad_norm": 0.6869644522666931, | |
| "learning_rate": 1.9473979659918835e-05, | |
| "loss": 2.1908, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 1.8928757150286013, | |
| "grad_norm": 0.782366156578064, | |
| "learning_rate": 1.941231509421778e-05, | |
| "loss": 2.1411, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.8949557982319294, | |
| "grad_norm": 0.7898021340370178, | |
| "learning_rate": 1.935068629103649e-05, | |
| "loss": 2.1908, | |
| "step": 4555 | |
| }, | |
| { | |
| "epoch": 1.8970358814352575, | |
| "grad_norm": 0.6880964636802673, | |
| "learning_rate": 1.9289093644814015e-05, | |
| "loss": 2.1845, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.8991159646385856, | |
| "grad_norm": 0.6600199937820435, | |
| "learning_rate": 1.9227537549758037e-05, | |
| "loss": 2.1671, | |
| "step": 4565 | |
| }, | |
| { | |
| "epoch": 1.9011960478419136, | |
| "grad_norm": 0.7955625057220459, | |
| "learning_rate": 1.9166018399842277e-05, | |
| "loss": 2.1635, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.9032761310452417, | |
| "grad_norm": 0.9278563261032104, | |
| "learning_rate": 1.910453658880402e-05, | |
| "loss": 2.2131, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 1.9053562142485698, | |
| "grad_norm": 0.6783947348594666, | |
| "learning_rate": 1.904309251014156e-05, | |
| "loss": 2.1891, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.907436297451898, | |
| "grad_norm": 0.7580657005310059, | |
| "learning_rate": 1.8981686557111696e-05, | |
| "loss": 2.1862, | |
| "step": 4585 | |
| }, | |
| { | |
| "epoch": 1.9095163806552262, | |
| "grad_norm": 0.7643154263496399, | |
| "learning_rate": 1.892031912272719e-05, | |
| "loss": 2.1516, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.9115964638585543, | |
| "grad_norm": 0.7217808365821838, | |
| "learning_rate": 1.8858990599754326e-05, | |
| "loss": 2.1513, | |
| "step": 4595 | |
| }, | |
| { | |
| "epoch": 1.9136765470618826, | |
| "grad_norm": 0.8804484605789185, | |
| "learning_rate": 1.87977013807103e-05, | |
| "loss": 2.1691, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.9157566302652107, | |
| "grad_norm": 0.9107383489608765, | |
| "learning_rate": 1.8736451857860788e-05, | |
| "loss": 2.1448, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 1.9178367134685388, | |
| "grad_norm": 0.7312045097351074, | |
| "learning_rate": 1.8675242423217375e-05, | |
| "loss": 2.1895, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.9199167966718669, | |
| "grad_norm": 0.71600341796875, | |
| "learning_rate": 1.8614073468535094e-05, | |
| "loss": 2.1567, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 1.921996879875195, | |
| "grad_norm": 0.8909045457839966, | |
| "learning_rate": 1.855294538530986e-05, | |
| "loss": 2.178, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.924076963078523, | |
| "grad_norm": 0.7625308632850647, | |
| "learning_rate": 1.8491858564776043e-05, | |
| "loss": 2.1696, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 1.9261570462818511, | |
| "grad_norm": 0.7876774668693542, | |
| "learning_rate": 1.84308133979039e-05, | |
| "loss": 2.1827, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.9282371294851794, | |
| "grad_norm": 0.768500804901123, | |
| "learning_rate": 1.836981027539709e-05, | |
| "loss": 2.1463, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 1.9303172126885075, | |
| "grad_norm": 0.7600011229515076, | |
| "learning_rate": 1.8308849587690213e-05, | |
| "loss": 2.18, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.9323972958918356, | |
| "grad_norm": 0.8618823885917664, | |
| "learning_rate": 1.8247931724946223e-05, | |
| "loss": 2.1846, | |
| "step": 4645 | |
| }, | |
| { | |
| "epoch": 1.934477379095164, | |
| "grad_norm": 0.8330581784248352, | |
| "learning_rate": 1.818705707705402e-05, | |
| "loss": 2.1882, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.936557462298492, | |
| "grad_norm": 0.7751420140266418, | |
| "learning_rate": 1.81262260336259e-05, | |
| "loss": 2.1909, | |
| "step": 4655 | |
| }, | |
| { | |
| "epoch": 1.93863754550182, | |
| "grad_norm": 1.0599454641342163, | |
| "learning_rate": 1.8065438983995107e-05, | |
| "loss": 2.1608, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.9407176287051482, | |
| "grad_norm": 0.7828746438026428, | |
| "learning_rate": 1.8004696317213283e-05, | |
| "loss": 2.1639, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 1.9427977119084763, | |
| "grad_norm": 0.7516394257545471, | |
| "learning_rate": 1.7943998422048038e-05, | |
| "loss": 2.1625, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.9448777951118044, | |
| "grad_norm": 0.6707669496536255, | |
| "learning_rate": 1.7883345686980392e-05, | |
| "loss": 2.1772, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 1.9469578783151325, | |
| "grad_norm": 0.7721826434135437, | |
| "learning_rate": 1.782273850020238e-05, | |
| "loss": 2.1463, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.9490379615184608, | |
| "grad_norm": 0.6671859622001648, | |
| "learning_rate": 1.776217724961447e-05, | |
| "loss": 2.1573, | |
| "step": 4685 | |
| }, | |
| { | |
| "epoch": 1.9511180447217888, | |
| "grad_norm": 0.8338425159454346, | |
| "learning_rate": 1.7701662322823172e-05, | |
| "loss": 2.1525, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.9531981279251172, | |
| "grad_norm": 0.7207419276237488, | |
| "learning_rate": 1.7641194107138477e-05, | |
| "loss": 2.1281, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 1.9552782111284452, | |
| "grad_norm": 0.8658615350723267, | |
| "learning_rate": 1.7580772989571434e-05, | |
| "loss": 2.1775, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.9573582943317733, | |
| "grad_norm": 1.124961495399475, | |
| "learning_rate": 1.7520399356831636e-05, | |
| "loss": 2.1508, | |
| "step": 4705 | |
| }, | |
| { | |
| "epoch": 1.9594383775351014, | |
| "grad_norm": 0.7705496549606323, | |
| "learning_rate": 1.7460073595324776e-05, | |
| "loss": 2.1657, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.9615184607384295, | |
| "grad_norm": 0.80336594581604, | |
| "learning_rate": 1.7399796091150155e-05, | |
| "loss": 2.1883, | |
| "step": 4715 | |
| }, | |
| { | |
| "epoch": 1.9635985439417576, | |
| "grad_norm": 0.904358446598053, | |
| "learning_rate": 1.733956723009822e-05, | |
| "loss": 2.2069, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.9656786271450857, | |
| "grad_norm": 0.8221344947814941, | |
| "learning_rate": 1.7279387397648084e-05, | |
| "loss": 2.1593, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 1.967758710348414, | |
| "grad_norm": 0.6962605714797974, | |
| "learning_rate": 1.721925697896507e-05, | |
| "loss": 2.1805, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.969838793551742, | |
| "grad_norm": 0.7350068092346191, | |
| "learning_rate": 1.715917635889823e-05, | |
| "loss": 2.1726, | |
| "step": 4735 | |
| }, | |
| { | |
| "epoch": 1.9719188767550702, | |
| "grad_norm": 0.8693460822105408, | |
| "learning_rate": 1.7099145921977904e-05, | |
| "loss": 2.121, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.9739989599583985, | |
| "grad_norm": 0.9144797325134277, | |
| "learning_rate": 1.703916605241325e-05, | |
| "loss": 2.168, | |
| "step": 4745 | |
| }, | |
| { | |
| "epoch": 1.9760790431617266, | |
| "grad_norm": 0.7593632340431213, | |
| "learning_rate": 1.697923713408977e-05, | |
| "loss": 2.1659, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.9781591263650546, | |
| "grad_norm": 0.7203252911567688, | |
| "learning_rate": 1.6919359550566886e-05, | |
| "loss": 2.1566, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 1.9802392095683827, | |
| "grad_norm": 0.6899543404579163, | |
| "learning_rate": 1.6859533685075447e-05, | |
| "loss": 2.1703, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.9823192927717108, | |
| "grad_norm": 0.7112452387809753, | |
| "learning_rate": 1.6799759920515294e-05, | |
| "loss": 2.1541, | |
| "step": 4765 | |
| }, | |
| { | |
| "epoch": 1.984399375975039, | |
| "grad_norm": 0.7511719465255737, | |
| "learning_rate": 1.6740038639452822e-05, | |
| "loss": 2.1698, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.986479459178367, | |
| "grad_norm": 0.9203821420669556, | |
| "learning_rate": 1.668037022411851e-05, | |
| "loss": 2.1095, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 1.9885595423816953, | |
| "grad_norm": 0.7733403444290161, | |
| "learning_rate": 1.6620755056404485e-05, | |
| "loss": 2.191, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.9906396255850234, | |
| "grad_norm": 1.0877577066421509, | |
| "learning_rate": 1.6561193517862097e-05, | |
| "loss": 2.167, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 1.9927197087883517, | |
| "grad_norm": 0.7318416237831116, | |
| "learning_rate": 1.6501685989699405e-05, | |
| "loss": 2.1868, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.9947997919916798, | |
| "grad_norm": 0.7766571640968323, | |
| "learning_rate": 1.6442232852778843e-05, | |
| "loss": 2.1417, | |
| "step": 4795 | |
| }, | |
| { | |
| "epoch": 1.9968798751950079, | |
| "grad_norm": 0.8204246163368225, | |
| "learning_rate": 1.6382834487614694e-05, | |
| "loss": 2.0969, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.998959958398336, | |
| "grad_norm": 0.7147572040557861, | |
| "learning_rate": 1.632349127437072e-05, | |
| "loss": 2.1594, | |
| "step": 4805 | |
| }, | |
| { | |
| "epoch": 2.001040041601664, | |
| "grad_norm": 0.8010472655296326, | |
| "learning_rate": 1.6264203592857656e-05, | |
| "loss": 2.1423, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.003120124804992, | |
| "grad_norm": 0.7142998576164246, | |
| "learning_rate": 1.6204971822530858e-05, | |
| "loss": 2.1746, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 2.00520020800832, | |
| "grad_norm": 0.8411352634429932, | |
| "learning_rate": 1.614579634248781e-05, | |
| "loss": 2.1706, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.0072802912116483, | |
| "grad_norm": 0.8868602514266968, | |
| "learning_rate": 1.6086677531465747e-05, | |
| "loss": 2.1658, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 2.0093603744149764, | |
| "grad_norm": 0.6709080934524536, | |
| "learning_rate": 1.6027615767839195e-05, | |
| "loss": 2.1805, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.011440457618305, | |
| "grad_norm": 0.8166576027870178, | |
| "learning_rate": 1.596861142961756e-05, | |
| "loss": 2.1314, | |
| "step": 4835 | |
| }, | |
| { | |
| "epoch": 2.013520540821633, | |
| "grad_norm": 0.7754188179969788, | |
| "learning_rate": 1.590966489444273e-05, | |
| "loss": 2.154, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.015600624024961, | |
| "grad_norm": 0.7570802569389343, | |
| "learning_rate": 1.5850776539586627e-05, | |
| "loss": 2.1362, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 2.017680707228289, | |
| "grad_norm": 0.7763701677322388, | |
| "learning_rate": 1.579194674194879e-05, | |
| "loss": 2.141, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.0197607904316173, | |
| "grad_norm": 0.8779370784759521, | |
| "learning_rate": 1.573317587805401e-05, | |
| "loss": 2.1578, | |
| "step": 4855 | |
| }, | |
| { | |
| "epoch": 2.0218408736349454, | |
| "grad_norm": 0.6604492664337158, | |
| "learning_rate": 1.5674464324049864e-05, | |
| "loss": 2.1501, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.0239209568382734, | |
| "grad_norm": 0.8826847672462463, | |
| "learning_rate": 1.561581245570434e-05, | |
| "loss": 2.2047, | |
| "step": 4865 | |
| }, | |
| { | |
| "epoch": 2.0260010400416015, | |
| "grad_norm": 0.6858493685722351, | |
| "learning_rate": 1.5557220648403432e-05, | |
| "loss": 2.1161, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.0280811232449296, | |
| "grad_norm": 0.8504419326782227, | |
| "learning_rate": 1.5498689277148704e-05, | |
| "loss": 2.1241, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 2.030161206448258, | |
| "grad_norm": 0.857164740562439, | |
| "learning_rate": 1.544021871655494e-05, | |
| "loss": 2.141, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.0322412896515862, | |
| "grad_norm": 0.8137455582618713, | |
| "learning_rate": 1.53818093408477e-05, | |
| "loss": 2.1258, | |
| "step": 4885 | |
| }, | |
| { | |
| "epoch": 2.0343213728549143, | |
| "grad_norm": 0.7521177530288696, | |
| "learning_rate": 1.5323461523860977e-05, | |
| "loss": 2.1515, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.0364014560582424, | |
| "grad_norm": 0.8833959698677063, | |
| "learning_rate": 1.5265175639034736e-05, | |
| "loss": 2.1538, | |
| "step": 4895 | |
| }, | |
| { | |
| "epoch": 2.0384815392615705, | |
| "grad_norm": 0.8551697134971619, | |
| "learning_rate": 1.5206952059412604e-05, | |
| "loss": 2.1609, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.0405616224648986, | |
| "grad_norm": 0.9351072311401367, | |
| "learning_rate": 1.5148791157639386e-05, | |
| "loss": 2.1616, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 2.0426417056682267, | |
| "grad_norm": 0.7209817171096802, | |
| "learning_rate": 1.5090693305958779e-05, | |
| "loss": 2.1216, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.0447217888715548, | |
| "grad_norm": 0.7822704911231995, | |
| "learning_rate": 1.503265887621092e-05, | |
| "loss": 2.1041, | |
| "step": 4915 | |
| }, | |
| { | |
| "epoch": 2.046801872074883, | |
| "grad_norm": 0.7391334772109985, | |
| "learning_rate": 1.497468823983005e-05, | |
| "loss": 2.1683, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.048881955278211, | |
| "grad_norm": 0.8245681524276733, | |
| "learning_rate": 1.4916781767842103e-05, | |
| "loss": 2.1609, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 2.0509620384815395, | |
| "grad_norm": 0.7062039375305176, | |
| "learning_rate": 1.4858939830862347e-05, | |
| "loss": 2.1479, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.0530421216848675, | |
| "grad_norm": 0.9019640684127808, | |
| "learning_rate": 1.4801162799093004e-05, | |
| "loss": 2.141, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 2.0551222048881956, | |
| "grad_norm": 0.7071494460105896, | |
| "learning_rate": 1.4743451042320905e-05, | |
| "loss": 2.1558, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.0572022880915237, | |
| "grad_norm": 0.7502083778381348, | |
| "learning_rate": 1.4685804929915098e-05, | |
| "loss": 2.1755, | |
| "step": 4945 | |
| }, | |
| { | |
| "epoch": 2.059282371294852, | |
| "grad_norm": 0.8539254665374756, | |
| "learning_rate": 1.4628224830824478e-05, | |
| "loss": 2.1479, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.06136245449818, | |
| "grad_norm": 0.8018076419830322, | |
| "learning_rate": 1.4570711113575457e-05, | |
| "loss": 2.1345, | |
| "step": 4955 | |
| }, | |
| { | |
| "epoch": 2.063442537701508, | |
| "grad_norm": 0.7207697629928589, | |
| "learning_rate": 1.451326414626959e-05, | |
| "loss": 2.1527, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.065522620904836, | |
| "grad_norm": 0.8118232488632202, | |
| "learning_rate": 1.4455884296581185e-05, | |
| "loss": 2.1186, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 2.067602704108164, | |
| "grad_norm": 0.8361445665359497, | |
| "learning_rate": 1.4398571931755023e-05, | |
| "loss": 2.148, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.0696827873114922, | |
| "grad_norm": 0.9550055861473083, | |
| "learning_rate": 1.4341327418603931e-05, | |
| "loss": 2.1479, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 2.0717628705148208, | |
| "grad_norm": 0.8788464069366455, | |
| "learning_rate": 1.428415112350649e-05, | |
| "loss": 2.1226, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.073842953718149, | |
| "grad_norm": 0.7504823803901672, | |
| "learning_rate": 1.4227043412404669e-05, | |
| "loss": 2.1977, | |
| "step": 4985 | |
| }, | |
| { | |
| "epoch": 2.075923036921477, | |
| "grad_norm": 0.9366262555122375, | |
| "learning_rate": 1.4170004650801472e-05, | |
| "loss": 2.147, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.078003120124805, | |
| "grad_norm": 0.9149728417396545, | |
| "learning_rate": 1.4113035203758606e-05, | |
| "loss": 2.1187, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 2.080083203328133, | |
| "grad_norm": 0.7827330827713013, | |
| "learning_rate": 1.4056135435894163e-05, | |
| "loss": 2.1989, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.082163286531461, | |
| "grad_norm": 0.7674335837364197, | |
| "learning_rate": 1.3999305711380267e-05, | |
| "loss": 2.1156, | |
| "step": 5005 | |
| }, | |
| { | |
| "epoch": 2.0842433697347893, | |
| "grad_norm": 0.7534267902374268, | |
| "learning_rate": 1.3942546393940758e-05, | |
| "loss": 2.1608, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.0863234529381174, | |
| "grad_norm": 1.0169003009796143, | |
| "learning_rate": 1.3885857846848829e-05, | |
| "loss": 2.1333, | |
| "step": 5015 | |
| }, | |
| { | |
| "epoch": 2.0884035361414455, | |
| "grad_norm": 0.7536617517471313, | |
| "learning_rate": 1.3829240432924734e-05, | |
| "loss": 2.2094, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.090483619344774, | |
| "grad_norm": 0.8216843605041504, | |
| "learning_rate": 1.3772694514533464e-05, | |
| "loss": 2.1178, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 2.092563702548102, | |
| "grad_norm": 0.8296759724617004, | |
| "learning_rate": 1.371622045358244e-05, | |
| "loss": 2.1405, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.09464378575143, | |
| "grad_norm": 0.7530525326728821, | |
| "learning_rate": 1.3659818611519131e-05, | |
| "loss": 2.1602, | |
| "step": 5035 | |
| }, | |
| { | |
| "epoch": 2.0967238689547583, | |
| "grad_norm": 0.7887389659881592, | |
| "learning_rate": 1.360348934932883e-05, | |
| "loss": 2.1249, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.0988039521580864, | |
| "grad_norm": 0.8775638341903687, | |
| "learning_rate": 1.3547233027532291e-05, | |
| "loss": 2.1087, | |
| "step": 5045 | |
| }, | |
| { | |
| "epoch": 2.1008840353614144, | |
| "grad_norm": 0.7218084335327148, | |
| "learning_rate": 1.3491050006183425e-05, | |
| "loss": 2.1855, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.1029641185647425, | |
| "grad_norm": 0.7829450368881226, | |
| "learning_rate": 1.3434940644866994e-05, | |
| "loss": 2.1544, | |
| "step": 5055 | |
| }, | |
| { | |
| "epoch": 2.1050442017680706, | |
| "grad_norm": 0.9921479225158691, | |
| "learning_rate": 1.3378905302696338e-05, | |
| "loss": 2.1401, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.1071242849713987, | |
| "grad_norm": 0.7223407030105591, | |
| "learning_rate": 1.3322944338311056e-05, | |
| "loss": 2.1623, | |
| "step": 5065 | |
| }, | |
| { | |
| "epoch": 2.109204368174727, | |
| "grad_norm": 0.786841869354248, | |
| "learning_rate": 1.3267058109874683e-05, | |
| "loss": 2.127, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.1112844513780553, | |
| "grad_norm": 0.8238504528999329, | |
| "learning_rate": 1.3211246975072473e-05, | |
| "loss": 2.1636, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 2.1133645345813834, | |
| "grad_norm": 0.8730514645576477, | |
| "learning_rate": 1.3155511291109013e-05, | |
| "loss": 2.1809, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.1154446177847115, | |
| "grad_norm": 0.7120848894119263, | |
| "learning_rate": 1.3099851414706027e-05, | |
| "loss": 2.1496, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 2.1175247009880396, | |
| "grad_norm": 0.8902744650840759, | |
| "learning_rate": 1.304426770210002e-05, | |
| "loss": 2.1899, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.1196047841913677, | |
| "grad_norm": 0.7903064489364624, | |
| "learning_rate": 1.2988760509040058e-05, | |
| "loss": 2.1446, | |
| "step": 5095 | |
| }, | |
| { | |
| "epoch": 2.1216848673946958, | |
| "grad_norm": 0.8039788603782654, | |
| "learning_rate": 1.2933330190785444e-05, | |
| "loss": 2.1314, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.123764950598024, | |
| "grad_norm": 0.8553371429443359, | |
| "learning_rate": 1.28779771021035e-05, | |
| "loss": 2.1604, | |
| "step": 5105 | |
| }, | |
| { | |
| "epoch": 2.125845033801352, | |
| "grad_norm": 0.7643943428993225, | |
| "learning_rate": 1.2822701597267185e-05, | |
| "loss": 2.1628, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.12792511700468, | |
| "grad_norm": 0.8471932411193848, | |
| "learning_rate": 1.2767504030052973e-05, | |
| "loss": 2.1831, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 2.130005200208008, | |
| "grad_norm": 0.9007987380027771, | |
| "learning_rate": 1.2712384753738499e-05, | |
| "loss": 2.1761, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.1320852834113366, | |
| "grad_norm": 0.7271192073822021, | |
| "learning_rate": 1.2657344121100314e-05, | |
| "loss": 2.173, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 2.1341653666146647, | |
| "grad_norm": 0.761782705783844, | |
| "learning_rate": 1.260238248441163e-05, | |
| "loss": 2.1227, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.136245449817993, | |
| "grad_norm": 0.8150344491004944, | |
| "learning_rate": 1.2547500195440049e-05, | |
| "loss": 2.1529, | |
| "step": 5135 | |
| }, | |
| { | |
| "epoch": 2.138325533021321, | |
| "grad_norm": 0.7628713250160217, | |
| "learning_rate": 1.2492697605445361e-05, | |
| "loss": 2.1491, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.140405616224649, | |
| "grad_norm": 0.7385218739509583, | |
| "learning_rate": 1.2437975065177258e-05, | |
| "loss": 2.1381, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 2.142485699427977, | |
| "grad_norm": 0.8682646155357361, | |
| "learning_rate": 1.2383332924873062e-05, | |
| "loss": 2.151, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.144565782631305, | |
| "grad_norm": 0.9769818782806396, | |
| "learning_rate": 1.232877153425555e-05, | |
| "loss": 2.2263, | |
| "step": 5155 | |
| }, | |
| { | |
| "epoch": 2.1466458658346332, | |
| "grad_norm": 0.6793572902679443, | |
| "learning_rate": 1.2274291242530685e-05, | |
| "loss": 2.1157, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.1487259490379613, | |
| "grad_norm": 0.7732495069503784, | |
| "learning_rate": 1.2219892398385351e-05, | |
| "loss": 2.1697, | |
| "step": 5165 | |
| }, | |
| { | |
| "epoch": 2.15080603224129, | |
| "grad_norm": 0.8827196359634399, | |
| "learning_rate": 1.2165575349985151e-05, | |
| "loss": 2.1536, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.152886115444618, | |
| "grad_norm": 0.7778597474098206, | |
| "learning_rate": 1.2111340444972194e-05, | |
| "loss": 2.1682, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 2.154966198647946, | |
| "grad_norm": 0.7099940776824951, | |
| "learning_rate": 1.2057188030462851e-05, | |
| "loss": 2.1749, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.157046281851274, | |
| "grad_norm": 0.864341139793396, | |
| "learning_rate": 1.2003118453045512e-05, | |
| "loss": 2.1742, | |
| "step": 5185 | |
| }, | |
| { | |
| "epoch": 2.159126365054602, | |
| "grad_norm": 0.7807241082191467, | |
| "learning_rate": 1.194913205877842e-05, | |
| "loss": 2.1381, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.1612064482579303, | |
| "grad_norm": 0.8154935240745544, | |
| "learning_rate": 1.1895229193187387e-05, | |
| "loss": 2.1717, | |
| "step": 5195 | |
| }, | |
| { | |
| "epoch": 2.1632865314612584, | |
| "grad_norm": 0.745764970779419, | |
| "learning_rate": 1.184141020126367e-05, | |
| "loss": 2.1242, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.1653666146645865, | |
| "grad_norm": 0.8805513978004456, | |
| "learning_rate": 1.1787675427461664e-05, | |
| "loss": 2.1491, | |
| "step": 5205 | |
| }, | |
| { | |
| "epoch": 2.1674466978679146, | |
| "grad_norm": 0.7791693806648254, | |
| "learning_rate": 1.1734025215696784e-05, | |
| "loss": 2.1188, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.1695267810712426, | |
| "grad_norm": 0.902431309223175, | |
| "learning_rate": 1.1680459909343219e-05, | |
| "loss": 2.1735, | |
| "step": 5215 | |
| }, | |
| { | |
| "epoch": 2.171606864274571, | |
| "grad_norm": 0.728276789188385, | |
| "learning_rate": 1.1626979851231756e-05, | |
| "loss": 2.1625, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.1736869474778993, | |
| "grad_norm": 0.9273778796195984, | |
| "learning_rate": 1.157358538364752e-05, | |
| "loss": 2.126, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 2.1757670306812273, | |
| "grad_norm": 0.7193277478218079, | |
| "learning_rate": 1.1520276848327893e-05, | |
| "loss": 2.1437, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.1778471138845554, | |
| "grad_norm": 0.8120283484458923, | |
| "learning_rate": 1.1467054586460249e-05, | |
| "loss": 2.1432, | |
| "step": 5235 | |
| }, | |
| { | |
| "epoch": 2.1799271970878835, | |
| "grad_norm": 0.8220019340515137, | |
| "learning_rate": 1.1413918938679805e-05, | |
| "loss": 2.1332, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.1820072802912116, | |
| "grad_norm": 0.8082548379898071, | |
| "learning_rate": 1.13608702450674e-05, | |
| "loss": 2.1843, | |
| "step": 5245 | |
| }, | |
| { | |
| "epoch": 2.1840873634945397, | |
| "grad_norm": 0.8674708604812622, | |
| "learning_rate": 1.1307908845147358e-05, | |
| "loss": 2.1804, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.1861674466978678, | |
| "grad_norm": 0.7203066349029541, | |
| "learning_rate": 1.1255035077885307e-05, | |
| "loss": 2.1674, | |
| "step": 5255 | |
| }, | |
| { | |
| "epoch": 2.188247529901196, | |
| "grad_norm": 0.9036044478416443, | |
| "learning_rate": 1.1202249281686018e-05, | |
| "loss": 2.1792, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.1903276131045244, | |
| "grad_norm": 0.9539422988891602, | |
| "learning_rate": 1.1149551794391186e-05, | |
| "loss": 2.1372, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 2.1924076963078525, | |
| "grad_norm": 0.8710148930549622, | |
| "learning_rate": 1.1096942953277347e-05, | |
| "loss": 2.1819, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.1944877795111806, | |
| "grad_norm": 0.6914278864860535, | |
| "learning_rate": 1.1044423095053677e-05, | |
| "loss": 2.1202, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 2.1965678627145087, | |
| "grad_norm": 0.6613463163375854, | |
| "learning_rate": 1.0991992555859814e-05, | |
| "loss": 2.1623, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.1986479459178367, | |
| "grad_norm": 0.6368273496627808, | |
| "learning_rate": 1.0939651671263745e-05, | |
| "loss": 2.1809, | |
| "step": 5285 | |
| }, | |
| { | |
| "epoch": 2.200728029121165, | |
| "grad_norm": 0.7360798716545105, | |
| "learning_rate": 1.0887400776259655e-05, | |
| "loss": 2.1576, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.202808112324493, | |
| "grad_norm": 0.8459822535514832, | |
| "learning_rate": 1.0835240205265775e-05, | |
| "loss": 2.1608, | |
| "step": 5295 | |
| }, | |
| { | |
| "epoch": 2.204888195527821, | |
| "grad_norm": 0.686772882938385, | |
| "learning_rate": 1.0783170292122222e-05, | |
| "loss": 2.1849, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.206968278731149, | |
| "grad_norm": 0.797569751739502, | |
| "learning_rate": 1.0731191370088905e-05, | |
| "loss": 2.1912, | |
| "step": 5305 | |
| }, | |
| { | |
| "epoch": 2.209048361934477, | |
| "grad_norm": 0.7473933100700378, | |
| "learning_rate": 1.0679303771843343e-05, | |
| "loss": 2.1224, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.2111284451378057, | |
| "grad_norm": 0.7206450700759888, | |
| "learning_rate": 1.0627507829478595e-05, | |
| "loss": 2.1904, | |
| "step": 5315 | |
| }, | |
| { | |
| "epoch": 2.213208528341134, | |
| "grad_norm": 0.774996817111969, | |
| "learning_rate": 1.0575803874501053e-05, | |
| "loss": 2.157, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.215288611544462, | |
| "grad_norm": 0.880527138710022, | |
| "learning_rate": 1.0524192237828406e-05, | |
| "loss": 2.1832, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 2.21736869474779, | |
| "grad_norm": 0.8387317061424255, | |
| "learning_rate": 1.0472673249787477e-05, | |
| "loss": 2.1494, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.219448777951118, | |
| "grad_norm": 0.7557438611984253, | |
| "learning_rate": 1.0421247240112126e-05, | |
| "loss": 2.1711, | |
| "step": 5335 | |
| }, | |
| { | |
| "epoch": 2.221528861154446, | |
| "grad_norm": 0.8698292970657349, | |
| "learning_rate": 1.0369914537941076e-05, | |
| "loss": 2.1497, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.2236089443577742, | |
| "grad_norm": 0.7102160453796387, | |
| "learning_rate": 1.031867547181592e-05, | |
| "loss": 2.1101, | |
| "step": 5345 | |
| }, | |
| { | |
| "epoch": 2.2256890275611023, | |
| "grad_norm": 0.852909505367279, | |
| "learning_rate": 1.0267530369678929e-05, | |
| "loss": 2.1418, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.2277691107644304, | |
| "grad_norm": 0.871283233165741, | |
| "learning_rate": 1.0216479558871004e-05, | |
| "loss": 2.196, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 2.229849193967759, | |
| "grad_norm": 0.7697460055351257, | |
| "learning_rate": 1.0165523366129531e-05, | |
| "loss": 2.1592, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.231929277171087, | |
| "grad_norm": 0.8391153216362, | |
| "learning_rate": 1.0114662117586321e-05, | |
| "loss": 2.1135, | |
| "step": 5365 | |
| }, | |
| { | |
| "epoch": 2.234009360374415, | |
| "grad_norm": 0.739166259765625, | |
| "learning_rate": 1.0063896138765541e-05, | |
| "loss": 2.161, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.236089443577743, | |
| "grad_norm": 0.9063655138015747, | |
| "learning_rate": 1.0013225754581601e-05, | |
| "loss": 2.155, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 2.2381695267810713, | |
| "grad_norm": 0.9477112293243408, | |
| "learning_rate": 9.962651289337063e-06, | |
| "loss": 2.1512, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.2402496099843994, | |
| "grad_norm": 0.7602328062057495, | |
| "learning_rate": 9.91217306672061e-06, | |
| "loss": 2.1378, | |
| "step": 5385 | |
| }, | |
| { | |
| "epoch": 2.2423296931877275, | |
| "grad_norm": 0.9798824191093445, | |
| "learning_rate": 9.861791409804946e-06, | |
| "loss": 2.1466, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.2444097763910555, | |
| "grad_norm": 0.9144386053085327, | |
| "learning_rate": 9.811506641044715e-06, | |
| "loss": 2.1672, | |
| "step": 5395 | |
| }, | |
| { | |
| "epoch": 2.2464898595943836, | |
| "grad_norm": 0.7295798659324646, | |
| "learning_rate": 9.761319082274456e-06, | |
| "loss": 2.1446, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.2485699427977117, | |
| "grad_norm": 1.134780764579773, | |
| "learning_rate": 9.711229054706558e-06, | |
| "loss": 2.1593, | |
| "step": 5405 | |
| }, | |
| { | |
| "epoch": 2.25065002600104, | |
| "grad_norm": 0.7548600435256958, | |
| "learning_rate": 9.661236878929184e-06, | |
| "loss": 2.1742, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.2527301092043683, | |
| "grad_norm": 0.8188340663909912, | |
| "learning_rate": 9.611342874904194e-06, | |
| "loss": 2.1521, | |
| "step": 5415 | |
| }, | |
| { | |
| "epoch": 2.2548101924076964, | |
| "grad_norm": 0.7602256536483765, | |
| "learning_rate": 9.561547361965173e-06, | |
| "loss": 2.1661, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.2568902756110245, | |
| "grad_norm": 0.7601512670516968, | |
| "learning_rate": 9.511850658815285e-06, | |
| "loss": 2.1678, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 2.2589703588143526, | |
| "grad_norm": 0.7207624912261963, | |
| "learning_rate": 9.46225308352534e-06, | |
| "loss": 2.1191, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.2610504420176807, | |
| "grad_norm": 0.6282790899276733, | |
| "learning_rate": 9.412754953531663e-06, | |
| "loss": 2.1543, | |
| "step": 5435 | |
| }, | |
| { | |
| "epoch": 2.2631305252210088, | |
| "grad_norm": 0.7994824647903442, | |
| "learning_rate": 9.363356585634133e-06, | |
| "loss": 2.1642, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.265210608424337, | |
| "grad_norm": 0.7656511068344116, | |
| "learning_rate": 9.314058295994116e-06, | |
| "loss": 2.1403, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 2.267290691627665, | |
| "grad_norm": 0.8570970296859741, | |
| "learning_rate": 9.264860400132475e-06, | |
| "loss": 2.1625, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.2693707748309935, | |
| "grad_norm": 0.788067102432251, | |
| "learning_rate": 9.215763212927476e-06, | |
| "loss": 2.1396, | |
| "step": 5455 | |
| }, | |
| { | |
| "epoch": 2.2714508580343216, | |
| "grad_norm": 0.9106987714767456, | |
| "learning_rate": 9.166767048612872e-06, | |
| "loss": 2.1419, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.2735309412376496, | |
| "grad_norm": 0.7237809300422668, | |
| "learning_rate": 9.117872220775839e-06, | |
| "loss": 2.1413, | |
| "step": 5465 | |
| }, | |
| { | |
| "epoch": 2.2756110244409777, | |
| "grad_norm": 0.8435239791870117, | |
| "learning_rate": 9.069079042354975e-06, | |
| "loss": 2.1533, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.277691107644306, | |
| "grad_norm": 1.157760500907898, | |
| "learning_rate": 9.02038782563828e-06, | |
| "loss": 2.1467, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 2.279771190847634, | |
| "grad_norm": 0.8838199973106384, | |
| "learning_rate": 8.971798882261182e-06, | |
| "loss": 2.1843, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.281851274050962, | |
| "grad_norm": 0.7448551058769226, | |
| "learning_rate": 8.923312523204541e-06, | |
| "loss": 2.1238, | |
| "step": 5485 | |
| }, | |
| { | |
| "epoch": 2.28393135725429, | |
| "grad_norm": 0.8447023630142212, | |
| "learning_rate": 8.874929058792667e-06, | |
| "loss": 2.1628, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.286011440457618, | |
| "grad_norm": 0.7749513983726501, | |
| "learning_rate": 8.826648798691284e-06, | |
| "loss": 2.1367, | |
| "step": 5495 | |
| }, | |
| { | |
| "epoch": 2.2880915236609463, | |
| "grad_norm": 0.7845672965049744, | |
| "learning_rate": 8.778472051905609e-06, | |
| "loss": 2.1882, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.2901716068642743, | |
| "grad_norm": 0.7977719306945801, | |
| "learning_rate": 8.730399126778355e-06, | |
| "loss": 2.1734, | |
| "step": 5505 | |
| }, | |
| { | |
| "epoch": 2.292251690067603, | |
| "grad_norm": 0.8288791179656982, | |
| "learning_rate": 8.682430330987732e-06, | |
| "loss": 2.1488, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.294331773270931, | |
| "grad_norm": 0.7984702587127686, | |
| "learning_rate": 8.63456597154549e-06, | |
| "loss": 2.1762, | |
| "step": 5515 | |
| }, | |
| { | |
| "epoch": 2.296411856474259, | |
| "grad_norm": 0.88454270362854, | |
| "learning_rate": 8.586806354794997e-06, | |
| "loss": 2.1475, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.298491939677587, | |
| "grad_norm": 0.8620187044143677, | |
| "learning_rate": 8.539151786409223e-06, | |
| "loss": 2.1913, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 2.3005720228809152, | |
| "grad_norm": 1.0124173164367676, | |
| "learning_rate": 8.491602571388784e-06, | |
| "loss": 2.1124, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.3026521060842433, | |
| "grad_norm": 0.8168820738792419, | |
| "learning_rate": 8.444159014060051e-06, | |
| "loss": 2.1298, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 2.3047321892875714, | |
| "grad_norm": 0.8259533643722534, | |
| "learning_rate": 8.396821418073118e-06, | |
| "loss": 2.1422, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.3068122724908995, | |
| "grad_norm": 0.8243906497955322, | |
| "learning_rate": 8.349590086399934e-06, | |
| "loss": 2.1251, | |
| "step": 5545 | |
| }, | |
| { | |
| "epoch": 2.308892355694228, | |
| "grad_norm": 0.8051894903182983, | |
| "learning_rate": 8.302465321332306e-06, | |
| "loss": 2.1547, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.310972438897556, | |
| "grad_norm": 0.8569920063018799, | |
| "learning_rate": 8.255447424480007e-06, | |
| "loss": 2.107, | |
| "step": 5555 | |
| }, | |
| { | |
| "epoch": 2.313052522100884, | |
| "grad_norm": 0.9085841178894043, | |
| "learning_rate": 8.208536696768823e-06, | |
| "loss": 2.1564, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.3151326053042123, | |
| "grad_norm": 0.7567054033279419, | |
| "learning_rate": 8.161733438438643e-06, | |
| "loss": 2.1805, | |
| "step": 5565 | |
| }, | |
| { | |
| "epoch": 2.3172126885075404, | |
| "grad_norm": 1.1547774076461792, | |
| "learning_rate": 8.115037949041488e-06, | |
| "loss": 2.175, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.3192927717108685, | |
| "grad_norm": 0.7055802941322327, | |
| "learning_rate": 8.068450527439667e-06, | |
| "loss": 2.1464, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 2.3213728549141965, | |
| "grad_norm": 0.8332855105400085, | |
| "learning_rate": 8.02197147180382e-06, | |
| "loss": 2.138, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.3234529381175246, | |
| "grad_norm": 0.8682128190994263, | |
| "learning_rate": 7.975601079611036e-06, | |
| "loss": 2.1292, | |
| "step": 5585 | |
| }, | |
| { | |
| "epoch": 2.3255330213208527, | |
| "grad_norm": 0.8856552839279175, | |
| "learning_rate": 7.929339647642898e-06, | |
| "loss": 2.2076, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.327613104524181, | |
| "grad_norm": 0.9237544536590576, | |
| "learning_rate": 7.88318747198363e-06, | |
| "loss": 2.1674, | |
| "step": 5595 | |
| }, | |
| { | |
| "epoch": 2.329693187727509, | |
| "grad_norm": 0.8059023022651672, | |
| "learning_rate": 7.837144848018203e-06, | |
| "loss": 2.1626, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.3317732709308374, | |
| "grad_norm": 0.792470395565033, | |
| "learning_rate": 7.791212070430426e-06, | |
| "loss": 2.12, | |
| "step": 5605 | |
| }, | |
| { | |
| "epoch": 2.3338533541341655, | |
| "grad_norm": 0.8884658813476562, | |
| "learning_rate": 7.745389433201047e-06, | |
| "loss": 2.1297, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.3359334373374936, | |
| "grad_norm": 0.8813104033470154, | |
| "learning_rate": 7.699677229605914e-06, | |
| "loss": 2.1471, | |
| "step": 5615 | |
| }, | |
| { | |
| "epoch": 2.3380135205408217, | |
| "grad_norm": 0.746537983417511, | |
| "learning_rate": 7.654075752214065e-06, | |
| "loss": 2.1614, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.3400936037441498, | |
| "grad_norm": 1.192823886871338, | |
| "learning_rate": 7.608585292885862e-06, | |
| "loss": 2.1381, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 2.342173686947478, | |
| "grad_norm": 0.6864091157913208, | |
| "learning_rate": 7.563206142771106e-06, | |
| "loss": 2.1509, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.344253770150806, | |
| "grad_norm": 0.7745160460472107, | |
| "learning_rate": 7.517938592307225e-06, | |
| "loss": 2.1405, | |
| "step": 5635 | |
| }, | |
| { | |
| "epoch": 2.346333853354134, | |
| "grad_norm": 0.6769249439239502, | |
| "learning_rate": 7.472782931217373e-06, | |
| "loss": 2.1317, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.348413936557462, | |
| "grad_norm": 0.8352982997894287, | |
| "learning_rate": 7.427739448508566e-06, | |
| "loss": 2.1467, | |
| "step": 5645 | |
| }, | |
| { | |
| "epoch": 2.3504940197607906, | |
| "grad_norm": 0.9535309672355652, | |
| "learning_rate": 7.382808432469885e-06, | |
| "loss": 2.162, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.3525741029641187, | |
| "grad_norm": 0.8944627046585083, | |
| "learning_rate": 7.337990170670556e-06, | |
| "loss": 2.1402, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 2.354654186167447, | |
| "grad_norm": 0.7682763338088989, | |
| "learning_rate": 7.293284949958193e-06, | |
| "loss": 2.1346, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.356734269370775, | |
| "grad_norm": 0.6998905539512634, | |
| "learning_rate": 7.248693056456882e-06, | |
| "loss": 2.1127, | |
| "step": 5665 | |
| }, | |
| { | |
| "epoch": 2.358814352574103, | |
| "grad_norm": 0.8731864094734192, | |
| "learning_rate": 7.2042147755654185e-06, | |
| "loss": 2.1329, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.360894435777431, | |
| "grad_norm": 0.6891990900039673, | |
| "learning_rate": 7.159850391955441e-06, | |
| "loss": 2.1495, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 2.362974518980759, | |
| "grad_norm": 0.7515982389450073, | |
| "learning_rate": 7.11560018956961e-06, | |
| "loss": 2.1439, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.3650546021840873, | |
| "grad_norm": 0.7395572662353516, | |
| "learning_rate": 7.071464451619794e-06, | |
| "loss": 2.1448, | |
| "step": 5685 | |
| }, | |
| { | |
| "epoch": 2.3671346853874153, | |
| "grad_norm": 0.7845259308815002, | |
| "learning_rate": 7.027443460585278e-06, | |
| "loss": 2.1667, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.3692147685907434, | |
| "grad_norm": 0.8673036098480225, | |
| "learning_rate": 6.983537498210938e-06, | |
| "loss": 2.1466, | |
| "step": 5695 | |
| }, | |
| { | |
| "epoch": 2.371294851794072, | |
| "grad_norm": 0.8610036969184875, | |
| "learning_rate": 6.939746845505435e-06, | |
| "loss": 2.1419, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.3733749349974, | |
| "grad_norm": 0.7803443074226379, | |
| "learning_rate": 6.896071782739416e-06, | |
| "loss": 2.1728, | |
| "step": 5705 | |
| }, | |
| { | |
| "epoch": 2.375455018200728, | |
| "grad_norm": 0.8815957903862, | |
| "learning_rate": 6.852512589443719e-06, | |
| "loss": 2.1333, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.377535101404056, | |
| "grad_norm": 0.7982692718505859, | |
| "learning_rate": 6.8090695444076035e-06, | |
| "loss": 2.1605, | |
| "step": 5715 | |
| }, | |
| { | |
| "epoch": 2.3796151846073843, | |
| "grad_norm": 0.889788806438446, | |
| "learning_rate": 6.76574292567696e-06, | |
| "loss": 2.161, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.3816952678107124, | |
| "grad_norm": 0.7610637545585632, | |
| "learning_rate": 6.722533010552492e-06, | |
| "loss": 2.153, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 2.3837753510140405, | |
| "grad_norm": 0.876675009727478, | |
| "learning_rate": 6.679440075588001e-06, | |
| "loss": 2.1666, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.3858554342173686, | |
| "grad_norm": 0.8992761969566345, | |
| "learning_rate": 6.636464396588582e-06, | |
| "loss": 2.1821, | |
| "step": 5735 | |
| }, | |
| { | |
| "epoch": 2.3879355174206967, | |
| "grad_norm": 0.779009997844696, | |
| "learning_rate": 6.5936062486088495e-06, | |
| "loss": 2.1368, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.390015600624025, | |
| "grad_norm": 0.7470064163208008, | |
| "learning_rate": 6.550865905951198e-06, | |
| "loss": 2.1409, | |
| "step": 5745 | |
| }, | |
| { | |
| "epoch": 2.3920956838273533, | |
| "grad_norm": 0.7569335699081421, | |
| "learning_rate": 6.508243642164044e-06, | |
| "loss": 2.1626, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.3941757670306814, | |
| "grad_norm": 0.8971595764160156, | |
| "learning_rate": 6.465739730040082e-06, | |
| "loss": 2.2096, | |
| "step": 5755 | |
| }, | |
| { | |
| "epoch": 2.3962558502340094, | |
| "grad_norm": 0.7085235118865967, | |
| "learning_rate": 6.423354441614496e-06, | |
| "loss": 2.1093, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.3983359334373375, | |
| "grad_norm": 0.8055812120437622, | |
| "learning_rate": 6.381088048163286e-06, | |
| "loss": 2.1378, | |
| "step": 5765 | |
| }, | |
| { | |
| "epoch": 2.4004160166406656, | |
| "grad_norm": 0.743766725063324, | |
| "learning_rate": 6.338940820201464e-06, | |
| "loss": 2.1661, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.4024960998439937, | |
| "grad_norm": 0.7603965997695923, | |
| "learning_rate": 6.2969130274813796e-06, | |
| "loss": 2.1367, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 2.404576183047322, | |
| "grad_norm": 0.7308275103569031, | |
| "learning_rate": 6.255004938990949e-06, | |
| "loss": 2.1867, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.40665626625065, | |
| "grad_norm": 0.690366268157959, | |
| "learning_rate": 6.2132168229519646e-06, | |
| "loss": 2.1444, | |
| "step": 5785 | |
| }, | |
| { | |
| "epoch": 2.408736349453978, | |
| "grad_norm": 0.8872700929641724, | |
| "learning_rate": 6.17154894681837e-06, | |
| "loss": 2.1508, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.410816432657306, | |
| "grad_norm": 0.7977089285850525, | |
| "learning_rate": 6.1300015772745326e-06, | |
| "loss": 2.1729, | |
| "step": 5795 | |
| }, | |
| { | |
| "epoch": 2.4128965158606346, | |
| "grad_norm": 0.8401610851287842, | |
| "learning_rate": 6.088574980233546e-06, | |
| "loss": 2.1363, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.4149765990639627, | |
| "grad_norm": 0.8002891540527344, | |
| "learning_rate": 6.0472694208355465e-06, | |
| "loss": 2.1188, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 2.4170566822672908, | |
| "grad_norm": 0.7504671216011047, | |
| "learning_rate": 6.006085163445993e-06, | |
| "loss": 2.1852, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.419136765470619, | |
| "grad_norm": 0.7341333627700806, | |
| "learning_rate": 5.965022471653989e-06, | |
| "loss": 2.1733, | |
| "step": 5815 | |
| }, | |
| { | |
| "epoch": 2.421216848673947, | |
| "grad_norm": 0.7916009426116943, | |
| "learning_rate": 5.924081608270574e-06, | |
| "loss": 2.1516, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.423296931877275, | |
| "grad_norm": 0.7159552574157715, | |
| "learning_rate": 5.883262835327058e-06, | |
| "loss": 2.1762, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 2.425377015080603, | |
| "grad_norm": 0.8512945175170898, | |
| "learning_rate": 5.842566414073361e-06, | |
| "loss": 2.1495, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.427457098283931, | |
| "grad_norm": 0.8425369262695312, | |
| "learning_rate": 5.801992604976317e-06, | |
| "loss": 2.1454, | |
| "step": 5835 | |
| }, | |
| { | |
| "epoch": 2.4295371814872597, | |
| "grad_norm": 0.7385132312774658, | |
| "learning_rate": 5.76154166771799e-06, | |
| "loss": 2.1219, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.431617264690588, | |
| "grad_norm": 0.7568185925483704, | |
| "learning_rate": 5.721213861194066e-06, | |
| "loss": 2.1488, | |
| "step": 5845 | |
| }, | |
| { | |
| "epoch": 2.433697347893916, | |
| "grad_norm": 0.7648590803146362, | |
| "learning_rate": 5.681009443512156e-06, | |
| "loss": 2.1374, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.435777431097244, | |
| "grad_norm": 0.7592807412147522, | |
| "learning_rate": 5.640928671990139e-06, | |
| "loss": 2.1475, | |
| "step": 5855 | |
| }, | |
| { | |
| "epoch": 2.437857514300572, | |
| "grad_norm": 0.7430097460746765, | |
| "learning_rate": 5.600971803154534e-06, | |
| "loss": 2.1509, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.4399375975039, | |
| "grad_norm": 0.7668802738189697, | |
| "learning_rate": 5.561139092738865e-06, | |
| "loss": 2.1284, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 2.4420176807072282, | |
| "grad_norm": 1.1163736581802368, | |
| "learning_rate": 5.521430795682012e-06, | |
| "loss": 2.1686, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.4440977639105563, | |
| "grad_norm": 0.7597137093544006, | |
| "learning_rate": 5.481847166126555e-06, | |
| "loss": 2.1846, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 2.4461778471138844, | |
| "grad_norm": 0.7747199535369873, | |
| "learning_rate": 5.442388457417211e-06, | |
| "loss": 2.1709, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.4482579303172125, | |
| "grad_norm": 0.8134779334068298, | |
| "learning_rate": 5.403054922099132e-06, | |
| "loss": 2.1754, | |
| "step": 5885 | |
| }, | |
| { | |
| "epoch": 2.4503380135205406, | |
| "grad_norm": 0.8847607970237732, | |
| "learning_rate": 5.3638468119163675e-06, | |
| "loss": 2.1396, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.452418096723869, | |
| "grad_norm": 0.8032739758491516, | |
| "learning_rate": 5.324764377810187e-06, | |
| "loss": 2.1771, | |
| "step": 5895 | |
| }, | |
| { | |
| "epoch": 2.454498179927197, | |
| "grad_norm": 0.768173336982727, | |
| "learning_rate": 5.285807869917522e-06, | |
| "loss": 2.1521, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.4565782631305253, | |
| "grad_norm": 0.9260585904121399, | |
| "learning_rate": 5.246977537569345e-06, | |
| "loss": 2.125, | |
| "step": 5905 | |
| }, | |
| { | |
| "epoch": 2.4586583463338534, | |
| "grad_norm": 0.7803197503089905, | |
| "learning_rate": 5.208273629289065e-06, | |
| "loss": 2.1739, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.4607384295371815, | |
| "grad_norm": 0.7892303466796875, | |
| "learning_rate": 5.169696392790946e-06, | |
| "loss": 2.1638, | |
| "step": 5915 | |
| }, | |
| { | |
| "epoch": 2.4628185127405096, | |
| "grad_norm": 0.719467043876648, | |
| "learning_rate": 5.13124607497853e-06, | |
| "loss": 2.1767, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.4648985959438376, | |
| "grad_norm": 0.8412065505981445, | |
| "learning_rate": 5.0929229219430556e-06, | |
| "loss": 2.1636, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 2.4669786791471657, | |
| "grad_norm": 0.8725780844688416, | |
| "learning_rate": 5.054727178961854e-06, | |
| "loss": 2.1908, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.4690587623504943, | |
| "grad_norm": 0.9785857796669006, | |
| "learning_rate": 5.016659090496833e-06, | |
| "loss": 2.1268, | |
| "step": 5935 | |
| }, | |
| { | |
| "epoch": 2.4711388455538223, | |
| "grad_norm": 0.7336744070053101, | |
| "learning_rate": 4.978718900192841e-06, | |
| "loss": 2.1662, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.4732189287571504, | |
| "grad_norm": 0.819547176361084, | |
| "learning_rate": 4.940906850876184e-06, | |
| "loss": 2.136, | |
| "step": 5945 | |
| }, | |
| { | |
| "epoch": 2.4752990119604785, | |
| "grad_norm": 0.8629989624023438, | |
| "learning_rate": 4.903223184553027e-06, | |
| "loss": 2.1465, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.4773790951638066, | |
| "grad_norm": 0.9366332292556763, | |
| "learning_rate": 4.865668142407828e-06, | |
| "loss": 2.1505, | |
| "step": 5955 | |
| }, | |
| { | |
| "epoch": 2.4794591783671347, | |
| "grad_norm": 0.8467020988464355, | |
| "learning_rate": 4.828241964801847e-06, | |
| "loss": 2.1357, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 2.481539261570463, | |
| "grad_norm": 0.9429585337638855, | |
| "learning_rate": 4.790944891271581e-06, | |
| "loss": 2.1352, | |
| "step": 5965 | |
| }, | |
| { | |
| "epoch": 2.483619344773791, | |
| "grad_norm": 0.7886870503425598, | |
| "learning_rate": 4.753777160527215e-06, | |
| "loss": 2.1286, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 2.485699427977119, | |
| "grad_norm": 0.9287928342819214, | |
| "learning_rate": 4.716739010451102e-06, | |
| "loss": 2.1432, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 2.487779511180447, | |
| "grad_norm": 0.7071095705032349, | |
| "learning_rate": 4.679830678096272e-06, | |
| "loss": 2.1476, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 2.489859594383775, | |
| "grad_norm": 0.7225685715675354, | |
| "learning_rate": 4.643052399684886e-06, | |
| "loss": 2.1121, | |
| "step": 5985 | |
| }, | |
| { | |
| "epoch": 2.4919396775871037, | |
| "grad_norm": 0.7947995066642761, | |
| "learning_rate": 4.6064044106067045e-06, | |
| "loss": 2.1618, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 2.4940197607904318, | |
| "grad_norm": 0.7835599780082703, | |
| "learning_rate": 4.569886945417639e-06, | |
| "loss": 2.1221, | |
| "step": 5995 | |
| }, | |
| { | |
| "epoch": 2.49609984399376, | |
| "grad_norm": 0.7121095061302185, | |
| "learning_rate": 4.533500237838187e-06, | |
| "loss": 2.1202, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.498179927197088, | |
| "grad_norm": 0.9667412042617798, | |
| "learning_rate": 4.4972445207519895e-06, | |
| "loss": 2.1949, | |
| "step": 6005 | |
| }, | |
| { | |
| "epoch": 2.500260010400416, | |
| "grad_norm": 0.7887002825737, | |
| "learning_rate": 4.461120026204299e-06, | |
| "loss": 2.1333, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 2.502340093603744, | |
| "grad_norm": 0.7729286551475525, | |
| "learning_rate": 4.425126985400521e-06, | |
| "loss": 2.1112, | |
| "step": 6015 | |
| }, | |
| { | |
| "epoch": 2.504420176807072, | |
| "grad_norm": 0.8352200388908386, | |
| "learning_rate": 4.389265628704734e-06, | |
| "loss": 2.1195, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 2.5065002600104003, | |
| "grad_norm": 0.7960278987884521, | |
| "learning_rate": 4.353536185638188e-06, | |
| "loss": 2.1827, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 2.508580343213729, | |
| "grad_norm": 0.8067042827606201, | |
| "learning_rate": 4.317938884877862e-06, | |
| "loss": 2.1796, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 2.510660426417057, | |
| "grad_norm": 0.7853888273239136, | |
| "learning_rate": 4.282473954255e-06, | |
| "loss": 2.1338, | |
| "step": 6035 | |
| }, | |
| { | |
| "epoch": 2.512740509620385, | |
| "grad_norm": 0.7720993161201477, | |
| "learning_rate": 4.247141620753642e-06, | |
| "loss": 2.1328, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 2.514820592823713, | |
| "grad_norm": 0.7566680908203125, | |
| "learning_rate": 4.211942110509165e-06, | |
| "loss": 2.1656, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 2.516900676027041, | |
| "grad_norm": 0.7984460592269897, | |
| "learning_rate": 4.17687564880686e-06, | |
| "loss": 2.1554, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.5189807592303692, | |
| "grad_norm": 0.7508406639099121, | |
| "learning_rate": 4.141942460080461e-06, | |
| "loss": 2.1899, | |
| "step": 6055 | |
| }, | |
| { | |
| "epoch": 2.5210608424336973, | |
| "grad_norm": 0.8357878923416138, | |
| "learning_rate": 4.107142767910741e-06, | |
| "loss": 2.1807, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 2.5231409256370254, | |
| "grad_norm": 0.7380987405776978, | |
| "learning_rate": 4.0724767950240415e-06, | |
| "loss": 2.1567, | |
| "step": 6065 | |
| }, | |
| { | |
| "epoch": 2.5252210088403535, | |
| "grad_norm": 0.8467985391616821, | |
| "learning_rate": 4.037944763290879e-06, | |
| "loss": 2.1115, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 2.5273010920436816, | |
| "grad_norm": 0.7928602695465088, | |
| "learning_rate": 4.0035468937245245e-06, | |
| "loss": 2.1322, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 2.5293811752470097, | |
| "grad_norm": 0.9464954733848572, | |
| "learning_rate": 3.9692834064795735e-06, | |
| "loss": 2.1607, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 2.5314612584503378, | |
| "grad_norm": 0.7425093054771423, | |
| "learning_rate": 3.935154520850529e-06, | |
| "loss": 2.1538, | |
| "step": 6085 | |
| }, | |
| { | |
| "epoch": 2.5335413416536663, | |
| "grad_norm": 0.8105461001396179, | |
| "learning_rate": 3.901160455270416e-06, | |
| "loss": 2.1784, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 2.5356214248569944, | |
| "grad_norm": 0.8401845097541809, | |
| "learning_rate": 3.8673014273093945e-06, | |
| "loss": 2.1461, | |
| "step": 6095 | |
| }, | |
| { | |
| "epoch": 2.5377015080603225, | |
| "grad_norm": 0.9266782402992249, | |
| "learning_rate": 3.833577653673346e-06, | |
| "loss": 2.1359, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.5397815912636506, | |
| "grad_norm": 0.754927933216095, | |
| "learning_rate": 3.7999893502024707e-06, | |
| "loss": 2.0896, | |
| "step": 6105 | |
| }, | |
| { | |
| "epoch": 2.5418616744669786, | |
| "grad_norm": 0.7852912545204163, | |
| "learning_rate": 3.7665367318699602e-06, | |
| "loss": 2.1487, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 2.5439417576703067, | |
| "grad_norm": 0.8186183571815491, | |
| "learning_rate": 3.7332200127805585e-06, | |
| "loss": 2.1314, | |
| "step": 6115 | |
| }, | |
| { | |
| "epoch": 2.546021840873635, | |
| "grad_norm": 0.9804696440696716, | |
| "learning_rate": 3.700039406169248e-06, | |
| "loss": 2.1519, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 2.5481019240769633, | |
| "grad_norm": 0.7977464199066162, | |
| "learning_rate": 3.666995124399836e-06, | |
| "loss": 2.1604, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 2.5501820072802914, | |
| "grad_norm": 0.802650511264801, | |
| "learning_rate": 3.63408737896363e-06, | |
| "loss": 2.1328, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 2.5522620904836195, | |
| "grad_norm": 0.8455849289894104, | |
| "learning_rate": 3.6013163804780843e-06, | |
| "loss": 2.1521, | |
| "step": 6135 | |
| }, | |
| { | |
| "epoch": 2.5543421736869476, | |
| "grad_norm": 0.8368895649909973, | |
| "learning_rate": 3.568682338685414e-06, | |
| "loss": 2.1753, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 2.5564222568902757, | |
| "grad_norm": 0.7585736513137817, | |
| "learning_rate": 3.5361854624512912e-06, | |
| "loss": 2.1695, | |
| "step": 6145 | |
| }, | |
| { | |
| "epoch": 2.5585023400936038, | |
| "grad_norm": 0.8306118845939636, | |
| "learning_rate": 3.503825959763496e-06, | |
| "loss": 2.1526, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.560582423296932, | |
| "grad_norm": 0.9081049561500549, | |
| "learning_rate": 3.4716040377305944e-06, | |
| "loss": 2.1503, | |
| "step": 6155 | |
| }, | |
| { | |
| "epoch": 2.56266250650026, | |
| "grad_norm": 0.8425130844116211, | |
| "learning_rate": 3.439519902580582e-06, | |
| "loss": 2.1075, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 2.564742589703588, | |
| "grad_norm": 0.8468633890151978, | |
| "learning_rate": 3.4075737596596074e-06, | |
| "loss": 2.1245, | |
| "step": 6165 | |
| }, | |
| { | |
| "epoch": 2.566822672906916, | |
| "grad_norm": 1.2079002857208252, | |
| "learning_rate": 3.375765813430612e-06, | |
| "loss": 2.1524, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 2.568902756110244, | |
| "grad_norm": 0.7111079096794128, | |
| "learning_rate": 3.3440962674720743e-06, | |
| "loss": 2.1511, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 2.5709828393135723, | |
| "grad_norm": 1.0822231769561768, | |
| "learning_rate": 3.312565324476649e-06, | |
| "loss": 2.1155, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 2.573062922516901, | |
| "grad_norm": 0.8851024508476257, | |
| "learning_rate": 3.2811731862499166e-06, | |
| "loss": 2.1426, | |
| "step": 6185 | |
| }, | |
| { | |
| "epoch": 2.575143005720229, | |
| "grad_norm": 0.7527185678482056, | |
| "learning_rate": 3.249920053709074e-06, | |
| "loss": 2.1514, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 2.577223088923557, | |
| "grad_norm": 0.7749047875404358, | |
| "learning_rate": 3.218806126881643e-06, | |
| "loss": 2.1163, | |
| "step": 6195 | |
| }, | |
| { | |
| "epoch": 2.579303172126885, | |
| "grad_norm": 0.6856330037117004, | |
| "learning_rate": 3.1878316049041984e-06, | |
| "loss": 2.1971, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.581383255330213, | |
| "grad_norm": 0.8586908578872681, | |
| "learning_rate": 3.156996686021077e-06, | |
| "loss": 2.1033, | |
| "step": 6205 | |
| }, | |
| { | |
| "epoch": 2.5834633385335413, | |
| "grad_norm": 0.7209339141845703, | |
| "learning_rate": 3.1263015675831427e-06, | |
| "loss": 2.1671, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 2.5855434217368694, | |
| "grad_norm": 0.8630194664001465, | |
| "learning_rate": 3.095746446046499e-06, | |
| "loss": 2.1427, | |
| "step": 6215 | |
| }, | |
| { | |
| "epoch": 2.587623504940198, | |
| "grad_norm": 0.727620005607605, | |
| "learning_rate": 3.0653315169712203e-06, | |
| "loss": 2.1219, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 2.589703588143526, | |
| "grad_norm": 0.7987605333328247, | |
| "learning_rate": 3.0350569750201368e-06, | |
| "loss": 2.1473, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 2.591783671346854, | |
| "grad_norm": 0.8267928957939148, | |
| "learning_rate": 3.00492301395755e-06, | |
| "loss": 2.1306, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 2.593863754550182, | |
| "grad_norm": 0.7957183718681335, | |
| "learning_rate": 2.9749298266480264e-06, | |
| "loss": 2.105, | |
| "step": 6235 | |
| }, | |
| { | |
| "epoch": 2.5959438377535102, | |
| "grad_norm": 0.7759562730789185, | |
| "learning_rate": 2.945077605055127e-06, | |
| "loss": 2.1411, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 2.5980239209568383, | |
| "grad_norm": 0.8441088795661926, | |
| "learning_rate": 2.9153665402402137e-06, | |
| "loss": 2.126, | |
| "step": 6245 | |
| }, | |
| { | |
| "epoch": 2.6001040041601664, | |
| "grad_norm": 0.798534631729126, | |
| "learning_rate": 2.8857968223612143e-06, | |
| "loss": 2.1506, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.6021840873634945, | |
| "grad_norm": 0.8515554070472717, | |
| "learning_rate": 2.8563686406713863e-06, | |
| "loss": 2.1549, | |
| "step": 6255 | |
| }, | |
| { | |
| "epoch": 2.6042641705668226, | |
| "grad_norm": 1.0475796461105347, | |
| "learning_rate": 2.8270821835181316e-06, | |
| "loss": 2.1054, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 2.6063442537701507, | |
| "grad_norm": 0.8114381432533264, | |
| "learning_rate": 2.7979376383417798e-06, | |
| "loss": 2.1381, | |
| "step": 6265 | |
| }, | |
| { | |
| "epoch": 2.6084243369734788, | |
| "grad_norm": 0.7577459216117859, | |
| "learning_rate": 2.768935191674396e-06, | |
| "loss": 2.1364, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 2.610504420176807, | |
| "grad_norm": 0.7933220267295837, | |
| "learning_rate": 2.7400750291385697e-06, | |
| "loss": 2.1323, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 2.6125845033801354, | |
| "grad_norm": 0.7705368399620056, | |
| "learning_rate": 2.711357335446246e-06, | |
| "loss": 2.1612, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 2.6146645865834635, | |
| "grad_norm": 0.8709501028060913, | |
| "learning_rate": 2.682782294397529e-06, | |
| "loss": 2.1322, | |
| "step": 6285 | |
| }, | |
| { | |
| "epoch": 2.6167446697867915, | |
| "grad_norm": 0.9047673344612122, | |
| "learning_rate": 2.654350088879523e-06, | |
| "loss": 2.1423, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 2.6188247529901196, | |
| "grad_norm": 0.8506908416748047, | |
| "learning_rate": 2.626060900865132e-06, | |
| "loss": 2.1844, | |
| "step": 6295 | |
| }, | |
| { | |
| "epoch": 2.6209048361934477, | |
| "grad_norm": 0.7736124992370605, | |
| "learning_rate": 2.5979149114119334e-06, | |
| "loss": 2.1991, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.622984919396776, | |
| "grad_norm": 0.8622713088989258, | |
| "learning_rate": 2.569912300660987e-06, | |
| "loss": 2.1664, | |
| "step": 6305 | |
| }, | |
| { | |
| "epoch": 2.625065002600104, | |
| "grad_norm": 0.821495771408081, | |
| "learning_rate": 2.54205324783571e-06, | |
| "loss": 2.1621, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 2.627145085803432, | |
| "grad_norm": 0.7039251923561096, | |
| "learning_rate": 2.5143379312406847e-06, | |
| "loss": 2.1118, | |
| "step": 6315 | |
| }, | |
| { | |
| "epoch": 2.6292251690067605, | |
| "grad_norm": 1.277297019958496, | |
| "learning_rate": 2.4867665282605755e-06, | |
| "loss": 2.1417, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 2.6313052522100886, | |
| "grad_norm": 0.711395263671875, | |
| "learning_rate": 2.459339215358955e-06, | |
| "loss": 2.0946, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 2.6333853354134167, | |
| "grad_norm": 0.7947670817375183, | |
| "learning_rate": 2.4320561680771874e-06, | |
| "loss": 2.1806, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 2.6354654186167448, | |
| "grad_norm": 0.7751787900924683, | |
| "learning_rate": 2.4049175610332957e-06, | |
| "loss": 2.1688, | |
| "step": 6335 | |
| }, | |
| { | |
| "epoch": 2.637545501820073, | |
| "grad_norm": 0.978768527507782, | |
| "learning_rate": 2.377923567920862e-06, | |
| "loss": 2.1478, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 2.639625585023401, | |
| "grad_norm": 0.7635921239852905, | |
| "learning_rate": 2.351074361507888e-06, | |
| "loss": 2.1733, | |
| "step": 6345 | |
| }, | |
| { | |
| "epoch": 2.641705668226729, | |
| "grad_norm": 0.7769209146499634, | |
| "learning_rate": 2.3243701136357266e-06, | |
| "loss": 2.1009, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.643785751430057, | |
| "grad_norm": 0.9034749865531921, | |
| "learning_rate": 2.2978109952179416e-06, | |
| "loss": 2.1271, | |
| "step": 6355 | |
| }, | |
| { | |
| "epoch": 2.645865834633385, | |
| "grad_norm": 0.9482102990150452, | |
| "learning_rate": 2.2713971762392456e-06, | |
| "loss": 2.1541, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 2.6479459178367133, | |
| "grad_norm": 0.8525805473327637, | |
| "learning_rate": 2.245128825754406e-06, | |
| "loss": 2.1331, | |
| "step": 6365 | |
| }, | |
| { | |
| "epoch": 2.6500260010400414, | |
| "grad_norm": 0.7677263021469116, | |
| "learning_rate": 2.2190061118871396e-06, | |
| "loss": 2.177, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 2.6521060842433695, | |
| "grad_norm": 0.7640902400016785, | |
| "learning_rate": 2.193029201829061e-06, | |
| "loss": 2.1332, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 2.654186167446698, | |
| "grad_norm": 0.9157296419143677, | |
| "learning_rate": 2.1671982618386098e-06, | |
| "loss": 2.126, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 2.656266250650026, | |
| "grad_norm": 0.8202362656593323, | |
| "learning_rate": 2.1415134572399824e-06, | |
| "loss": 2.1443, | |
| "step": 6385 | |
| }, | |
| { | |
| "epoch": 2.658346333853354, | |
| "grad_norm": 0.881324052810669, | |
| "learning_rate": 2.115974952422067e-06, | |
| "loss": 2.1435, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 2.6604264170566823, | |
| "grad_norm": 0.7640511393547058, | |
| "learning_rate": 2.0905829108374077e-06, | |
| "loss": 2.1499, | |
| "step": 6395 | |
| }, | |
| { | |
| "epoch": 2.6625065002600103, | |
| "grad_norm": 0.6942290663719177, | |
| "learning_rate": 2.065337495001135e-06, | |
| "loss": 2.1699, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.6645865834633384, | |
| "grad_norm": 0.7413431406021118, | |
| "learning_rate": 2.0402388664899574e-06, | |
| "loss": 2.1568, | |
| "step": 6405 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.7595916986465454, | |
| "learning_rate": 2.015287185941089e-06, | |
| "loss": 2.1116, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 2.668746749869995, | |
| "grad_norm": 0.7836700081825256, | |
| "learning_rate": 1.9904826130512618e-06, | |
| "loss": 2.1544, | |
| "step": 6415 | |
| }, | |
| { | |
| "epoch": 2.670826833073323, | |
| "grad_norm": 0.8121596574783325, | |
| "learning_rate": 1.9658253065756694e-06, | |
| "loss": 2.1396, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 2.6729069162766512, | |
| "grad_norm": 0.7615113854408264, | |
| "learning_rate": 1.941315424326984e-06, | |
| "loss": 2.1623, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 2.6749869994799793, | |
| "grad_norm": 0.6900429725646973, | |
| "learning_rate": 1.9169531231742892e-06, | |
| "loss": 2.1518, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 2.6770670826833074, | |
| "grad_norm": 0.7657844424247742, | |
| "learning_rate": 1.8927385590421565e-06, | |
| "loss": 2.1401, | |
| "step": 6435 | |
| }, | |
| { | |
| "epoch": 2.6791471658866355, | |
| "grad_norm": 0.7979406714439392, | |
| "learning_rate": 1.8686718869095815e-06, | |
| "loss": 2.1908, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 2.6812272490899636, | |
| "grad_norm": 0.6649172306060791, | |
| "learning_rate": 1.8447532608090261e-06, | |
| "loss": 2.1519, | |
| "step": 6445 | |
| }, | |
| { | |
| "epoch": 2.6833073322932917, | |
| "grad_norm": 0.7565729022026062, | |
| "learning_rate": 1.8209828338254132e-06, | |
| "loss": 2.1739, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.6853874154966197, | |
| "grad_norm": 0.8752434849739075, | |
| "learning_rate": 1.797360758095165e-06, | |
| "loss": 2.11, | |
| "step": 6455 | |
| }, | |
| { | |
| "epoch": 2.687467498699948, | |
| "grad_norm": 0.6268664598464966, | |
| "learning_rate": 1.7738871848052092e-06, | |
| "loss": 2.1259, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 2.689547581903276, | |
| "grad_norm": 0.7275916337966919, | |
| "learning_rate": 1.750562264192035e-06, | |
| "loss": 2.1678, | |
| "step": 6465 | |
| }, | |
| { | |
| "epoch": 2.691627665106604, | |
| "grad_norm": 0.9467329382896423, | |
| "learning_rate": 1.7273861455407075e-06, | |
| "loss": 2.1566, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 2.6937077483099325, | |
| "grad_norm": 0.7651445865631104, | |
| "learning_rate": 1.7043589771839314e-06, | |
| "loss": 2.1423, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 2.6957878315132606, | |
| "grad_norm": 0.7837608456611633, | |
| "learning_rate": 1.6814809065010927e-06, | |
| "loss": 2.1355, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 2.6978679147165887, | |
| "grad_norm": 0.7050347328186035, | |
| "learning_rate": 1.6587520799173168e-06, | |
| "loss": 2.1632, | |
| "step": 6485 | |
| }, | |
| { | |
| "epoch": 2.699947997919917, | |
| "grad_norm": 0.8150919079780579, | |
| "learning_rate": 1.6361726429025227e-06, | |
| "loss": 2.1455, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 2.702028081123245, | |
| "grad_norm": 0.7656033039093018, | |
| "learning_rate": 1.6137427399705113e-06, | |
| "loss": 2.1248, | |
| "step": 6495 | |
| }, | |
| { | |
| "epoch": 2.704108164326573, | |
| "grad_norm": 0.8316398859024048, | |
| "learning_rate": 1.5914625146780299e-06, | |
| "loss": 2.1594, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.706188247529901, | |
| "grad_norm": 0.7980731725692749, | |
| "learning_rate": 1.569332109623839e-06, | |
| "loss": 2.1578, | |
| "step": 6505 | |
| }, | |
| { | |
| "epoch": 2.7082683307332296, | |
| "grad_norm": 0.8120521903038025, | |
| "learning_rate": 1.5473516664478354e-06, | |
| "loss": 2.1358, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 2.7103484139365577, | |
| "grad_norm": 0.6915965676307678, | |
| "learning_rate": 1.5255213258301037e-06, | |
| "loss": 2.1685, | |
| "step": 6515 | |
| }, | |
| { | |
| "epoch": 2.7124284971398858, | |
| "grad_norm": 0.8055347800254822, | |
| "learning_rate": 1.5038412274900493e-06, | |
| "loss": 2.1332, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 2.714508580343214, | |
| "grad_norm": 1.0255202054977417, | |
| "learning_rate": 1.4823115101854829e-06, | |
| "loss": 2.1319, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 2.716588663546542, | |
| "grad_norm": 0.7742552757263184, | |
| "learning_rate": 1.4609323117117434e-06, | |
| "loss": 2.1698, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 2.71866874674987, | |
| "grad_norm": 0.9451241493225098, | |
| "learning_rate": 1.4397037689008186e-06, | |
| "loss": 2.133, | |
| "step": 6535 | |
| }, | |
| { | |
| "epoch": 2.720748829953198, | |
| "grad_norm": 0.8512309193611145, | |
| "learning_rate": 1.4186260176204668e-06, | |
| "loss": 2.1614, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 2.722828913156526, | |
| "grad_norm": 0.7536253929138184, | |
| "learning_rate": 1.397699192773319e-06, | |
| "loss": 2.0921, | |
| "step": 6545 | |
| }, | |
| { | |
| "epoch": 2.7249089963598543, | |
| "grad_norm": 0.8096660375595093, | |
| "learning_rate": 1.3769234282960702e-06, | |
| "loss": 2.1722, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.7269890795631824, | |
| "grad_norm": 0.7829006910324097, | |
| "learning_rate": 1.3562988571585777e-06, | |
| "loss": 2.1508, | |
| "step": 6555 | |
| }, | |
| { | |
| "epoch": 2.7290691627665105, | |
| "grad_norm": 0.7823526263237, | |
| "learning_rate": 1.3358256113630369e-06, | |
| "loss": 2.1571, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 2.7311492459698385, | |
| "grad_norm": 0.6584709882736206, | |
| "learning_rate": 1.3155038219431065e-06, | |
| "loss": 2.1743, | |
| "step": 6565 | |
| }, | |
| { | |
| "epoch": 2.733229329173167, | |
| "grad_norm": 0.7424419522285461, | |
| "learning_rate": 1.2953336189631098e-06, | |
| "loss": 2.1726, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 2.735309412376495, | |
| "grad_norm": 1.0195063352584839, | |
| "learning_rate": 1.2753151315171602e-06, | |
| "loss": 2.1497, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 2.7373894955798233, | |
| "grad_norm": 0.7543803453445435, | |
| "learning_rate": 1.2554484877283724e-06, | |
| "loss": 2.1554, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 2.7394695787831513, | |
| "grad_norm": 0.7631281614303589, | |
| "learning_rate": 1.2357338147480107e-06, | |
| "loss": 2.1328, | |
| "step": 6585 | |
| }, | |
| { | |
| "epoch": 2.7415496619864794, | |
| "grad_norm": 0.7183612585067749, | |
| "learning_rate": 1.2161712387547014e-06, | |
| "loss": 2.1774, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 2.7436297451898075, | |
| "grad_norm": 0.7182380557060242, | |
| "learning_rate": 1.1967608849536127e-06, | |
| "loss": 2.2034, | |
| "step": 6595 | |
| }, | |
| { | |
| "epoch": 2.7457098283931356, | |
| "grad_norm": 0.9577491283416748, | |
| "learning_rate": 1.177502877575648e-06, | |
| "loss": 2.2012, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.747789911596464, | |
| "grad_norm": 0.7523165941238403, | |
| "learning_rate": 1.1583973398766573e-06, | |
| "loss": 2.1192, | |
| "step": 6605 | |
| }, | |
| { | |
| "epoch": 2.749869994799792, | |
| "grad_norm": 0.9832723140716553, | |
| "learning_rate": 1.1394443941366518e-06, | |
| "loss": 2.1164, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 2.7519500780031203, | |
| "grad_norm": 0.6826837658882141, | |
| "learning_rate": 1.1206441616590235e-06, | |
| "loss": 2.1478, | |
| "step": 6615 | |
| }, | |
| { | |
| "epoch": 2.7540301612064484, | |
| "grad_norm": 0.8534629344940186, | |
| "learning_rate": 1.1019967627697498e-06, | |
| "loss": 2.1572, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 2.7561102444097765, | |
| "grad_norm": 0.7205258011817932, | |
| "learning_rate": 1.0835023168166452e-06, | |
| "loss": 2.1686, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 2.7581903276131046, | |
| "grad_norm": 0.8041960597038269, | |
| "learning_rate": 1.065160942168586e-06, | |
| "loss": 2.1548, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 2.7602704108164327, | |
| "grad_norm": 0.8386279940605164, | |
| "learning_rate": 1.046972756214762e-06, | |
| "loss": 2.1106, | |
| "step": 6635 | |
| }, | |
| { | |
| "epoch": 2.7623504940197607, | |
| "grad_norm": 0.8722918629646301, | |
| "learning_rate": 1.0289378753639055e-06, | |
| "loss": 2.1529, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 2.764430577223089, | |
| "grad_norm": 0.7601779103279114, | |
| "learning_rate": 1.0110564150435709e-06, | |
| "loss": 2.1476, | |
| "step": 6645 | |
| }, | |
| { | |
| "epoch": 2.766510660426417, | |
| "grad_norm": 0.7956823706626892, | |
| "learning_rate": 9.93328489699377e-07, | |
| "loss": 2.1767, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.768590743629745, | |
| "grad_norm": 0.7255099415779114, | |
| "learning_rate": 9.757542127942998e-07, | |
| "loss": 2.1429, | |
| "step": 6655 | |
| }, | |
| { | |
| "epoch": 2.770670826833073, | |
| "grad_norm": 0.9191991686820984, | |
| "learning_rate": 9.583336968078948e-07, | |
| "loss": 2.1431, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 2.7727509100364016, | |
| "grad_norm": 0.7821683287620544, | |
| "learning_rate": 9.410670532356419e-07, | |
| "loss": 2.0939, | |
| "step": 6665 | |
| }, | |
| { | |
| "epoch": 2.7748309932397297, | |
| "grad_norm": 0.7056489586830139, | |
| "learning_rate": 9.23954392588186e-07, | |
| "loss": 2.1588, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 2.776911076443058, | |
| "grad_norm": 0.6995697021484375, | |
| "learning_rate": 9.069958243906524e-07, | |
| "loss": 2.1238, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 2.778991159646386, | |
| "grad_norm": 0.7750363945960999, | |
| "learning_rate": 8.901914571819298e-07, | |
| "loss": 2.156, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 2.781071242849714, | |
| "grad_norm": 0.8692283034324646, | |
| "learning_rate": 8.735413985139884e-07, | |
| "loss": 2.1586, | |
| "step": 6685 | |
| }, | |
| { | |
| "epoch": 2.783151326053042, | |
| "grad_norm": 0.8263838291168213, | |
| "learning_rate": 8.570457549511802e-07, | |
| "loss": 2.1698, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 2.78523140925637, | |
| "grad_norm": 0.7581605911254883, | |
| "learning_rate": 8.407046320695805e-07, | |
| "loss": 2.1863, | |
| "step": 6695 | |
| }, | |
| { | |
| "epoch": 2.7873114924596982, | |
| "grad_norm": 0.7336562871932983, | |
| "learning_rate": 8.24518134456273e-07, | |
| "loss": 2.1528, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.7893915756630268, | |
| "grad_norm": 0.7941420078277588, | |
| "learning_rate": 8.084863657087189e-07, | |
| "loss": 2.1556, | |
| "step": 6705 | |
| }, | |
| { | |
| "epoch": 2.791471658866355, | |
| "grad_norm": 0.7126220464706421, | |
| "learning_rate": 7.926094284340713e-07, | |
| "loss": 2.1559, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 2.793551742069683, | |
| "grad_norm": 0.6795457601547241, | |
| "learning_rate": 7.768874242485291e-07, | |
| "loss": 2.1256, | |
| "step": 6715 | |
| }, | |
| { | |
| "epoch": 2.795631825273011, | |
| "grad_norm": 0.7890847325325012, | |
| "learning_rate": 7.613204537766704e-07, | |
| "loss": 2.1345, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.797711908476339, | |
| "grad_norm": 0.8842664957046509, | |
| "learning_rate": 7.459086166508367e-07, | |
| "loss": 2.1474, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 2.799791991679667, | |
| "grad_norm": 0.9438645243644714, | |
| "learning_rate": 7.306520115104743e-07, | |
| "loss": 2.1808, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 2.8018720748829953, | |
| "grad_norm": 0.8111101388931274, | |
| "learning_rate": 7.155507360014941e-07, | |
| "loss": 2.167, | |
| "step": 6735 | |
| }, | |
| { | |
| "epoch": 2.8039521580863234, | |
| "grad_norm": 0.7519661784172058, | |
| "learning_rate": 7.006048867756798e-07, | |
| "loss": 2.1375, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 2.8060322412896515, | |
| "grad_norm": 0.776961088180542, | |
| "learning_rate": 6.858145594900389e-07, | |
| "loss": 2.146, | |
| "step": 6745 | |
| }, | |
| { | |
| "epoch": 2.8081123244929795, | |
| "grad_norm": 0.8284921050071716, | |
| "learning_rate": 6.711798488062027e-07, | |
| "loss": 2.1459, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.8101924076963076, | |
| "grad_norm": 0.7575844526290894, | |
| "learning_rate": 6.567008483898185e-07, | |
| "loss": 2.1788, | |
| "step": 6755 | |
| }, | |
| { | |
| "epoch": 2.8122724908996357, | |
| "grad_norm": 0.7377430200576782, | |
| "learning_rate": 6.423776509099505e-07, | |
| "loss": 2.1239, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 2.8143525741029642, | |
| "grad_norm": 0.8331411480903625, | |
| "learning_rate": 6.28210348038491e-07, | |
| "loss": 2.1618, | |
| "step": 6765 | |
| }, | |
| { | |
| "epoch": 2.8164326573062923, | |
| "grad_norm": 0.7900028824806213, | |
| "learning_rate": 6.14199030449572e-07, | |
| "loss": 2.142, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 2.8185127405096204, | |
| "grad_norm": 0.7936934232711792, | |
| "learning_rate": 6.003437878189661e-07, | |
| "loss": 2.1309, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 2.8205928237129485, | |
| "grad_norm": 0.7084915041923523, | |
| "learning_rate": 5.866447088235444e-07, | |
| "loss": 2.1436, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 2.8226729069162766, | |
| "grad_norm": 0.894076406955719, | |
| "learning_rate": 5.731018811406891e-07, | |
| "loss": 2.1469, | |
| "step": 6785 | |
| }, | |
| { | |
| "epoch": 2.8247529901196047, | |
| "grad_norm": 0.8683717250823975, | |
| "learning_rate": 5.597153914477376e-07, | |
| "loss": 2.174, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 2.8268330733229328, | |
| "grad_norm": 0.6982574462890625, | |
| "learning_rate": 5.464853254214225e-07, | |
| "loss": 2.1291, | |
| "step": 6795 | |
| }, | |
| { | |
| "epoch": 2.8289131565262613, | |
| "grad_norm": 0.7772095799446106, | |
| "learning_rate": 5.334117677373352e-07, | |
| "loss": 2.1343, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.8309932397295894, | |
| "grad_norm": 0.893601655960083, | |
| "learning_rate": 5.204948020693657e-07, | |
| "loss": 2.1457, | |
| "step": 6805 | |
| }, | |
| { | |
| "epoch": 2.8330733229329175, | |
| "grad_norm": 0.7480981349945068, | |
| "learning_rate": 5.07734511089189e-07, | |
| "loss": 2.1251, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 2.8351534061362456, | |
| "grad_norm": 0.7743918299674988, | |
| "learning_rate": 4.951309764657131e-07, | |
| "loss": 2.1248, | |
| "step": 6815 | |
| }, | |
| { | |
| "epoch": 2.8372334893395736, | |
| "grad_norm": 0.7804460525512695, | |
| "learning_rate": 4.826842788645758e-07, | |
| "loss": 2.1898, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 2.8393135725429017, | |
| "grad_norm": 0.8208061456680298, | |
| "learning_rate": 4.703944979476238e-07, | |
| "loss": 2.1471, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 2.84139365574623, | |
| "grad_norm": 0.9219505786895752, | |
| "learning_rate": 4.5826171237239035e-07, | |
| "loss": 2.1078, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 2.843473738949558, | |
| "grad_norm": 0.838716447353363, | |
| "learning_rate": 4.4628599979160136e-07, | |
| "loss": 2.175, | |
| "step": 6835 | |
| }, | |
| { | |
| "epoch": 2.845553822152886, | |
| "grad_norm": 0.7236716151237488, | |
| "learning_rate": 4.344674368526841e-07, | |
| "loss": 2.1288, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 2.847633905356214, | |
| "grad_norm": 0.7392603158950806, | |
| "learning_rate": 4.2280609919727323e-07, | |
| "loss": 2.1579, | |
| "step": 6845 | |
| }, | |
| { | |
| "epoch": 2.849713988559542, | |
| "grad_norm": 0.6753377914428711, | |
| "learning_rate": 4.1130206146071106e-07, | |
| "loss": 2.1519, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.8517940717628703, | |
| "grad_norm": 0.9331068396568298, | |
| "learning_rate": 3.999553972715925e-07, | |
| "loss": 2.1456, | |
| "step": 6855 | |
| }, | |
| { | |
| "epoch": 2.853874154966199, | |
| "grad_norm": 0.7977923154830933, | |
| "learning_rate": 3.887661792512848e-07, | |
| "loss": 2.1992, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 2.855954238169527, | |
| "grad_norm": 0.8902707099914551, | |
| "learning_rate": 3.777344790134585e-07, | |
| "loss": 2.1761, | |
| "step": 6865 | |
| }, | |
| { | |
| "epoch": 2.858034321372855, | |
| "grad_norm": 0.8167480230331421, | |
| "learning_rate": 3.668603671636295e-07, | |
| "loss": 2.1186, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 2.860114404576183, | |
| "grad_norm": 0.7843384742736816, | |
| "learning_rate": 3.5614391329871487e-07, | |
| "loss": 2.1964, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 2.862194487779511, | |
| "grad_norm": 0.9206196665763855, | |
| "learning_rate": 3.4558518600658893e-07, | |
| "loss": 2.1679, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 2.864274570982839, | |
| "grad_norm": 0.7699995636940002, | |
| "learning_rate": 3.3518425286562795e-07, | |
| "loss": 2.158, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 2.8663546541861673, | |
| "grad_norm": 0.860309898853302, | |
| "learning_rate": 3.249411804442881e-07, | |
| "loss": 2.1557, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 2.868434737389496, | |
| "grad_norm": 0.8029336333274841, | |
| "learning_rate": 3.1485603430068676e-07, | |
| "loss": 2.1117, | |
| "step": 6895 | |
| }, | |
| { | |
| "epoch": 2.870514820592824, | |
| "grad_norm": 0.7837362885475159, | |
| "learning_rate": 3.049288789821664e-07, | |
| "loss": 2.1301, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.872594903796152, | |
| "grad_norm": 0.7675833106040955, | |
| "learning_rate": 2.9515977802490324e-07, | |
| "loss": 2.1545, | |
| "step": 6905 | |
| }, | |
| { | |
| "epoch": 2.87467498699948, | |
| "grad_norm": 0.820162296295166, | |
| "learning_rate": 2.8554879395347177e-07, | |
| "loss": 2.1776, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 2.876755070202808, | |
| "grad_norm": 1.1358041763305664, | |
| "learning_rate": 2.760959882804753e-07, | |
| "loss": 2.1434, | |
| "step": 6915 | |
| }, | |
| { | |
| "epoch": 2.8788351534061363, | |
| "grad_norm": 0.8005056381225586, | |
| "learning_rate": 2.668014215061243e-07, | |
| "loss": 2.1697, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 2.8809152366094644, | |
| "grad_norm": 0.81009840965271, | |
| "learning_rate": 2.576651531178725e-07, | |
| "loss": 2.1597, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 2.8829953198127924, | |
| "grad_norm": 0.9089166522026062, | |
| "learning_rate": 2.4868724159002323e-07, | |
| "loss": 2.1189, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 2.8850754030161205, | |
| "grad_norm": 0.9949043989181519, | |
| "learning_rate": 2.398677443833569e-07, | |
| "loss": 2.1673, | |
| "step": 6935 | |
| }, | |
| { | |
| "epoch": 2.8871554862194486, | |
| "grad_norm": 0.7992036938667297, | |
| "learning_rate": 2.3120671794476522e-07, | |
| "loss": 2.1761, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 2.8892355694227767, | |
| "grad_norm": 0.8436097502708435, | |
| "learning_rate": 2.2270421770688722e-07, | |
| "loss": 2.1527, | |
| "step": 6945 | |
| }, | |
| { | |
| "epoch": 2.891315652626105, | |
| "grad_norm": 0.8040410280227661, | |
| "learning_rate": 2.143602980877596e-07, | |
| "loss": 2.1215, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.8933957358294333, | |
| "grad_norm": 0.9088870882987976, | |
| "learning_rate": 2.0617501249046156e-07, | |
| "loss": 2.1204, | |
| "step": 6955 | |
| }, | |
| { | |
| "epoch": 2.8954758190327614, | |
| "grad_norm": 0.6987228989601135, | |
| "learning_rate": 1.9814841330277889e-07, | |
| "loss": 2.1617, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 2.8975559022360895, | |
| "grad_norm": 1.0931040048599243, | |
| "learning_rate": 1.902805518968681e-07, | |
| "loss": 2.145, | |
| "step": 6965 | |
| }, | |
| { | |
| "epoch": 2.8996359854394176, | |
| "grad_norm": 0.8285526633262634, | |
| "learning_rate": 1.8257147862892065e-07, | |
| "loss": 2.1255, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 2.9017160686427457, | |
| "grad_norm": 0.7968695163726807, | |
| "learning_rate": 1.7502124283885478e-07, | |
| "loss": 2.0997, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 2.9037961518460738, | |
| "grad_norm": 0.8100786805152893, | |
| "learning_rate": 1.6762989284997975e-07, | |
| "loss": 2.1215, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 2.905876235049402, | |
| "grad_norm": 0.7216346859931946, | |
| "learning_rate": 1.6039747596870437e-07, | |
| "loss": 2.1376, | |
| "step": 6985 | |
| }, | |
| { | |
| "epoch": 2.9079563182527304, | |
| "grad_norm": 0.7840524911880493, | |
| "learning_rate": 1.5332403848422606e-07, | |
| "loss": 2.1328, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 2.9100364014560585, | |
| "grad_norm": 0.9314194917678833, | |
| "learning_rate": 1.464096256682368e-07, | |
| "loss": 2.1194, | |
| "step": 6995 | |
| }, | |
| { | |
| "epoch": 2.9121164846593866, | |
| "grad_norm": 0.6695131063461304, | |
| "learning_rate": 1.3965428177463712e-07, | |
| "loss": 2.1324, | |
| "step": 7000 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 7209, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 1.7817058483577553e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |