|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 62447, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016013579515429084, |
|
"grad_norm": 19.96757698059082, |
|
"learning_rate": 4.003442960946414e-08, |
|
"loss": 6.6824, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0032027159030858167, |
|
"grad_norm": 20.086483001708984, |
|
"learning_rate": 8.006885921892828e-08, |
|
"loss": 6.6684, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.004804073854628725, |
|
"grad_norm": 15.201075553894043, |
|
"learning_rate": 1.2010328882839244e-07, |
|
"loss": 6.5818, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.006405431806171633, |
|
"grad_norm": 12.548062324523926, |
|
"learning_rate": 1.6013771843785657e-07, |
|
"loss": 6.409, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.008006789757714542, |
|
"grad_norm": 10.694886207580566, |
|
"learning_rate": 2.0017214804732072e-07, |
|
"loss": 6.1352, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.00960814770925745, |
|
"grad_norm": 8.358333587646484, |
|
"learning_rate": 2.402065776567849e-07, |
|
"loss": 5.7786, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.011209505660800359, |
|
"grad_norm": 6.130667686462402, |
|
"learning_rate": 2.80241007266249e-07, |
|
"loss": 5.3755, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.012810863612343267, |
|
"grad_norm": 3.611398935317993, |
|
"learning_rate": 3.2027543687571313e-07, |
|
"loss": 5.1389, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.014412221563886175, |
|
"grad_norm": 2.8347551822662354, |
|
"learning_rate": 3.603098664851773e-07, |
|
"loss": 4.9266, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.016013579515429085, |
|
"grad_norm": 21.159616470336914, |
|
"learning_rate": 4.0034429609464144e-07, |
|
"loss": 4.8398, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.017614937466971993, |
|
"grad_norm": 6.242217540740967, |
|
"learning_rate": 4.4037872570410557e-07, |
|
"loss": 4.7241, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0192162954185149, |
|
"grad_norm": 3.52362322807312, |
|
"learning_rate": 4.804131553135698e-07, |
|
"loss": 4.6735, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.02081765337005781, |
|
"grad_norm": 2.831575393676758, |
|
"learning_rate": 5.204475849230339e-07, |
|
"loss": 4.5398, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.022419011321600717, |
|
"grad_norm": 11.5364351272583, |
|
"learning_rate": 5.60482014532498e-07, |
|
"loss": 4.5072, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.024020369273143626, |
|
"grad_norm": 3.1574575901031494, |
|
"learning_rate": 6.005164441419621e-07, |
|
"loss": 4.4694, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.025621727224686534, |
|
"grad_norm": 2.988778829574585, |
|
"learning_rate": 6.405508737514263e-07, |
|
"loss": 4.451, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.027223085176229442, |
|
"grad_norm": 2.388536214828491, |
|
"learning_rate": 6.805853033608904e-07, |
|
"loss": 4.3784, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.02882444312777235, |
|
"grad_norm": 8.523998260498047, |
|
"learning_rate": 7.206197329703546e-07, |
|
"loss": 4.3341, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.030425801079315258, |
|
"grad_norm": 5.110918045043945, |
|
"learning_rate": 7.606541625798188e-07, |
|
"loss": 4.2974, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.03202715903085817, |
|
"grad_norm": 2.1784770488739014, |
|
"learning_rate": 8.006885921892829e-07, |
|
"loss": 4.2403, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.033628516982401074, |
|
"grad_norm": 2.2727229595184326, |
|
"learning_rate": 8.407230217987469e-07, |
|
"loss": 4.1771, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.035229874933943986, |
|
"grad_norm": 2.410856008529663, |
|
"learning_rate": 8.807574514082111e-07, |
|
"loss": 4.1723, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.03683123288548689, |
|
"grad_norm": 7.233943462371826, |
|
"learning_rate": 9.207918810176753e-07, |
|
"loss": 4.1031, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.0384325908370298, |
|
"grad_norm": 9.451576232910156, |
|
"learning_rate": 9.608263106271395e-07, |
|
"loss": 4.0296, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.04003394878857271, |
|
"grad_norm": 5.198200225830078, |
|
"learning_rate": 1.0008607402366035e-06, |
|
"loss": 3.9371, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04163530674011562, |
|
"grad_norm": 11.912164688110352, |
|
"learning_rate": 1.0408951698460678e-06, |
|
"loss": 3.8349, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.04323666469165852, |
|
"grad_norm": 6.008382320404053, |
|
"learning_rate": 1.0809295994555318e-06, |
|
"loss": 3.7505, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.044838022643201435, |
|
"grad_norm": 3.3153979778289795, |
|
"learning_rate": 1.120964029064996e-06, |
|
"loss": 3.6149, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.046439380594744346, |
|
"grad_norm": 8.011855125427246, |
|
"learning_rate": 1.16099845867446e-06, |
|
"loss": 3.5414, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.04804073854628725, |
|
"grad_norm": 3.550476312637329, |
|
"learning_rate": 1.2010328882839243e-06, |
|
"loss": 3.4248, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.04964209649783016, |
|
"grad_norm": 3.9144866466522217, |
|
"learning_rate": 1.2410673178933883e-06, |
|
"loss": 3.3224, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.05124345444937307, |
|
"grad_norm": 3.6054248809814453, |
|
"learning_rate": 1.2811017475028525e-06, |
|
"loss": 3.2983, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.05284481240091598, |
|
"grad_norm": 4.165266990661621, |
|
"learning_rate": 1.3211361771123166e-06, |
|
"loss": 3.1677, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.054446170352458884, |
|
"grad_norm": 4.654821872711182, |
|
"learning_rate": 1.3611706067217808e-06, |
|
"loss": 3.14, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.056047528304001795, |
|
"grad_norm": 3.641819715499878, |
|
"learning_rate": 1.4012050363312448e-06, |
|
"loss": 3.0439, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.0576488862555447, |
|
"grad_norm": 3.61091947555542, |
|
"learning_rate": 1.4412394659407093e-06, |
|
"loss": 2.9522, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.05925024420708761, |
|
"grad_norm": 22.04112434387207, |
|
"learning_rate": 1.4812738955501733e-06, |
|
"loss": 2.9255, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.060851602158630516, |
|
"grad_norm": 5.0808892250061035, |
|
"learning_rate": 1.5213083251596375e-06, |
|
"loss": 2.8402, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.06245296011017343, |
|
"grad_norm": 9.055444717407227, |
|
"learning_rate": 1.5613427547691015e-06, |
|
"loss": 2.8354, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.06405431806171634, |
|
"grad_norm": 3.44482684135437, |
|
"learning_rate": 1.6013771843785658e-06, |
|
"loss": 2.7592, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06565567601325924, |
|
"grad_norm": 2.7728819847106934, |
|
"learning_rate": 1.6414116139880298e-06, |
|
"loss": 2.7746, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.06725703396480215, |
|
"grad_norm": 1.9306970834732056, |
|
"learning_rate": 1.6814460435974938e-06, |
|
"loss": 2.7233, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.06885839191634506, |
|
"grad_norm": 1.8614246845245361, |
|
"learning_rate": 1.7214804732069583e-06, |
|
"loss": 2.7021, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.07045974986788797, |
|
"grad_norm": 3.224013566970825, |
|
"learning_rate": 1.7615149028164223e-06, |
|
"loss": 2.6586, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.07206110781943088, |
|
"grad_norm": 4.159784317016602, |
|
"learning_rate": 1.8015493324258865e-06, |
|
"loss": 2.6666, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.07366246577097378, |
|
"grad_norm": 2.2219038009643555, |
|
"learning_rate": 1.8415837620353505e-06, |
|
"loss": 2.6465, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.07526382372251669, |
|
"grad_norm": 14.757235527038574, |
|
"learning_rate": 1.8816181916448148e-06, |
|
"loss": 2.6125, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.0768651816740596, |
|
"grad_norm": 1.881609559059143, |
|
"learning_rate": 1.921652621254279e-06, |
|
"loss": 2.5652, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.07846653962560252, |
|
"grad_norm": 1.9000244140625, |
|
"learning_rate": 1.9616870508637432e-06, |
|
"loss": 2.5676, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.08006789757714541, |
|
"grad_norm": 3.4342846870422363, |
|
"learning_rate": 2.001721480473207e-06, |
|
"loss": 2.5934, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08166925552868833, |
|
"grad_norm": 3.2394461631774902, |
|
"learning_rate": 2.0417559100826713e-06, |
|
"loss": 2.5371, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.08327061348023124, |
|
"grad_norm": 2.726757287979126, |
|
"learning_rate": 2.0817903396921355e-06, |
|
"loss": 2.5211, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.08487197143177415, |
|
"grad_norm": 1.8385337591171265, |
|
"learning_rate": 2.1218247693015993e-06, |
|
"loss": 2.5449, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.08647332938331705, |
|
"grad_norm": 1.7317003011703491, |
|
"learning_rate": 2.1618591989110636e-06, |
|
"loss": 2.5368, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.08807468733485996, |
|
"grad_norm": 1.8202093839645386, |
|
"learning_rate": 2.201893628520528e-06, |
|
"loss": 2.4703, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.08967604528640287, |
|
"grad_norm": 1.627389669418335, |
|
"learning_rate": 2.241928058129992e-06, |
|
"loss": 2.4741, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.09127740323794578, |
|
"grad_norm": 3.039496660232544, |
|
"learning_rate": 2.2819624877394563e-06, |
|
"loss": 2.4966, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.09287876118948869, |
|
"grad_norm": 5.223389148712158, |
|
"learning_rate": 2.32199691734892e-06, |
|
"loss": 2.4383, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.09448011914103159, |
|
"grad_norm": 1.7681688070297241, |
|
"learning_rate": 2.3620313469583843e-06, |
|
"loss": 2.4656, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.0960814770925745, |
|
"grad_norm": 4.00803804397583, |
|
"learning_rate": 2.4020657765678486e-06, |
|
"loss": 2.481, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.09768283504411741, |
|
"grad_norm": 14.015419960021973, |
|
"learning_rate": 2.4421002061773128e-06, |
|
"loss": 2.4758, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.09928419299566033, |
|
"grad_norm": 3.860048294067383, |
|
"learning_rate": 2.4821346357867766e-06, |
|
"loss": 2.4548, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.10088555094720322, |
|
"grad_norm": 1.4068512916564941, |
|
"learning_rate": 2.5221690653962413e-06, |
|
"loss": 2.4428, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.10248690889874613, |
|
"grad_norm": 3.721557855606079, |
|
"learning_rate": 2.562203495005705e-06, |
|
"loss": 2.3956, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.10408826685028905, |
|
"grad_norm": 2.806149482727051, |
|
"learning_rate": 2.6022379246151693e-06, |
|
"loss": 2.3903, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.10568962480183196, |
|
"grad_norm": 2.8240647315979004, |
|
"learning_rate": 2.642272354224633e-06, |
|
"loss": 2.395, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.10729098275337486, |
|
"grad_norm": 1.7092350721359253, |
|
"learning_rate": 2.6823067838340978e-06, |
|
"loss": 2.4076, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.10889234070491777, |
|
"grad_norm": 1.814175009727478, |
|
"learning_rate": 2.7223412134435616e-06, |
|
"loss": 2.4055, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.11049369865646068, |
|
"grad_norm": 2.08941650390625, |
|
"learning_rate": 2.762375643053026e-06, |
|
"loss": 2.4097, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.11209505660800359, |
|
"grad_norm": 2.0335028171539307, |
|
"learning_rate": 2.8024100726624896e-06, |
|
"loss": 2.3769, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.1136964145595465, |
|
"grad_norm": 14.262283325195312, |
|
"learning_rate": 2.8424445022719543e-06, |
|
"loss": 2.3706, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.1152977725110894, |
|
"grad_norm": 2.324890375137329, |
|
"learning_rate": 2.8824789318814185e-06, |
|
"loss": 2.3688, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.11689913046263231, |
|
"grad_norm": 2.6902220249176025, |
|
"learning_rate": 2.9225133614908823e-06, |
|
"loss": 2.3829, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.11850048841417522, |
|
"grad_norm": 3.410318613052368, |
|
"learning_rate": 2.9625477911003466e-06, |
|
"loss": 2.3687, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.12010184636571813, |
|
"grad_norm": 1.4391207695007324, |
|
"learning_rate": 3.0025822207098104e-06, |
|
"loss": 2.3909, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.12170320431726103, |
|
"grad_norm": 11.690342903137207, |
|
"learning_rate": 3.042616650319275e-06, |
|
"loss": 2.3387, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.12330456226880394, |
|
"grad_norm": 1.5653709173202515, |
|
"learning_rate": 3.082651079928739e-06, |
|
"loss": 2.3451, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.12490592022034686, |
|
"grad_norm": 3.124866247177124, |
|
"learning_rate": 3.122685509538203e-06, |
|
"loss": 2.322, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.12650727817188975, |
|
"grad_norm": 12.413910865783691, |
|
"learning_rate": 3.162719939147667e-06, |
|
"loss": 2.3182, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.12810863612343268, |
|
"grad_norm": 1.7550314664840698, |
|
"learning_rate": 3.2027543687571315e-06, |
|
"loss": 2.3099, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.12970999407497558, |
|
"grad_norm": 1.9001699686050415, |
|
"learning_rate": 3.2427887983665958e-06, |
|
"loss": 2.3299, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.13131135202651847, |
|
"grad_norm": 1.4118369817733765, |
|
"learning_rate": 3.2828232279760596e-06, |
|
"loss": 2.3003, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.1329127099780614, |
|
"grad_norm": 3.046459913253784, |
|
"learning_rate": 3.322857657585524e-06, |
|
"loss": 2.3053, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.1345140679296043, |
|
"grad_norm": 6.424179553985596, |
|
"learning_rate": 3.3628920871949876e-06, |
|
"loss": 2.2845, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.13611542588114722, |
|
"grad_norm": 3.9462482929229736, |
|
"learning_rate": 3.4029265168044523e-06, |
|
"loss": 2.2821, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.13771678383269012, |
|
"grad_norm": 2.464116096496582, |
|
"learning_rate": 3.4429609464139165e-06, |
|
"loss": 2.3144, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.13931814178423302, |
|
"grad_norm": 17.63976287841797, |
|
"learning_rate": 3.4829953760233803e-06, |
|
"loss": 2.2811, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.14091949973577594, |
|
"grad_norm": 3.135732650756836, |
|
"learning_rate": 3.5230298056328446e-06, |
|
"loss": 2.2953, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.14252085768731884, |
|
"grad_norm": 4.162137031555176, |
|
"learning_rate": 3.563064235242309e-06, |
|
"loss": 2.2692, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.14412221563886177, |
|
"grad_norm": 6.429003715515137, |
|
"learning_rate": 3.603098664851773e-06, |
|
"loss": 2.2819, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.14572357359040466, |
|
"grad_norm": 6.803035736083984, |
|
"learning_rate": 3.643133094461237e-06, |
|
"loss": 2.2672, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.14732493154194756, |
|
"grad_norm": 15.847606658935547, |
|
"learning_rate": 3.683167524070701e-06, |
|
"loss": 2.26, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.1489262894934905, |
|
"grad_norm": 3.1911871433258057, |
|
"learning_rate": 3.723201953680165e-06, |
|
"loss": 2.2355, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.15052764744503339, |
|
"grad_norm": 1.6060032844543457, |
|
"learning_rate": 3.7632363832896296e-06, |
|
"loss": 2.2608, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.15212900539657628, |
|
"grad_norm": 1.5236974954605103, |
|
"learning_rate": 3.8032708128990938e-06, |
|
"loss": 2.2507, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.1537303633481192, |
|
"grad_norm": 8.704015731811523, |
|
"learning_rate": 3.843305242508558e-06, |
|
"loss": 2.2457, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.1553317212996621, |
|
"grad_norm": 4.1284918785095215, |
|
"learning_rate": 3.883339672118022e-06, |
|
"loss": 2.2321, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.15693307925120503, |
|
"grad_norm": 8.519213676452637, |
|
"learning_rate": 3.9233741017274865e-06, |
|
"loss": 2.2356, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.15853443720274793, |
|
"grad_norm": 6.228696823120117, |
|
"learning_rate": 3.96340853133695e-06, |
|
"loss": 2.2243, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.16013579515429083, |
|
"grad_norm": 2.693775177001953, |
|
"learning_rate": 4.003442960946414e-06, |
|
"loss": 2.2288, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.16173715310583375, |
|
"grad_norm": 8.416048049926758, |
|
"learning_rate": 4.043477390555878e-06, |
|
"loss": 2.2311, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.16333851105737665, |
|
"grad_norm": 1.5264601707458496, |
|
"learning_rate": 4.083511820165343e-06, |
|
"loss": 2.2186, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.16493986900891958, |
|
"grad_norm": 1.7846661806106567, |
|
"learning_rate": 4.123546249774807e-06, |
|
"loss": 2.2132, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.16654122696046247, |
|
"grad_norm": 3.9117202758789062, |
|
"learning_rate": 4.163580679384271e-06, |
|
"loss": 2.228, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.16814258491200537, |
|
"grad_norm": 4.531779766082764, |
|
"learning_rate": 4.203615108993735e-06, |
|
"loss": 2.2066, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.1697439428635483, |
|
"grad_norm": 2.1657228469848633, |
|
"learning_rate": 4.243649538603199e-06, |
|
"loss": 2.1929, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.1713453008150912, |
|
"grad_norm": 2.9067344665527344, |
|
"learning_rate": 4.283683968212663e-06, |
|
"loss": 2.2093, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.1729466587666341, |
|
"grad_norm": 3.7661423683166504, |
|
"learning_rate": 4.323718397822127e-06, |
|
"loss": 2.1919, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.17454801671817702, |
|
"grad_norm": 2.9169373512268066, |
|
"learning_rate": 4.363752827431592e-06, |
|
"loss": 2.2099, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.17614937466971992, |
|
"grad_norm": 2.1810638904571533, |
|
"learning_rate": 4.403787257041056e-06, |
|
"loss": 2.1923, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.17775073262126284, |
|
"grad_norm": 8.174213409423828, |
|
"learning_rate": 4.443821686650519e-06, |
|
"loss": 2.1886, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.17935209057280574, |
|
"grad_norm": 2.431321382522583, |
|
"learning_rate": 4.483856116259984e-06, |
|
"loss": 2.1991, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.18095344852434864, |
|
"grad_norm": 3.7426862716674805, |
|
"learning_rate": 4.523890545869448e-06, |
|
"loss": 2.1763, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.18255480647589156, |
|
"grad_norm": 2.5155022144317627, |
|
"learning_rate": 4.5639249754789125e-06, |
|
"loss": 2.1906, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.18415616442743446, |
|
"grad_norm": 1.7059454917907715, |
|
"learning_rate": 4.603959405088376e-06, |
|
"loss": 2.1872, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.18575752237897739, |
|
"grad_norm": 5.253864765167236, |
|
"learning_rate": 4.64399383469784e-06, |
|
"loss": 2.1889, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.18735888033052028, |
|
"grad_norm": 1.5918197631835938, |
|
"learning_rate": 4.684028264307305e-06, |
|
"loss": 2.1746, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.18896023828206318, |
|
"grad_norm": 10.147111892700195, |
|
"learning_rate": 4.724062693916769e-06, |
|
"loss": 2.1712, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.1905615962336061, |
|
"grad_norm": 4.3356781005859375, |
|
"learning_rate": 4.764097123526233e-06, |
|
"loss": 2.1815, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.192162954185149, |
|
"grad_norm": 10.20026683807373, |
|
"learning_rate": 4.804131553135697e-06, |
|
"loss": 2.176, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.1937643121366919, |
|
"grad_norm": 1.9123090505599976, |
|
"learning_rate": 4.844165982745162e-06, |
|
"loss": 2.1807, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.19536567008823483, |
|
"grad_norm": 1.6245704889297485, |
|
"learning_rate": 4.8842004123546256e-06, |
|
"loss": 2.1637, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.19696702803977773, |
|
"grad_norm": 5.880768299102783, |
|
"learning_rate": 4.924234841964089e-06, |
|
"loss": 2.1735, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.19856838599132065, |
|
"grad_norm": 5.809731960296631, |
|
"learning_rate": 4.964269271573553e-06, |
|
"loss": 2.1523, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.20016974394286355, |
|
"grad_norm": 1.827416181564331, |
|
"learning_rate": 5.004303701183018e-06, |
|
"loss": 2.1485, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.20177110189440645, |
|
"grad_norm": 2.386488437652588, |
|
"learning_rate": 5.0443381307924825e-06, |
|
"loss": 2.1641, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.20337245984594937, |
|
"grad_norm": 5.080982208251953, |
|
"learning_rate": 5.0843725604019455e-06, |
|
"loss": 2.1706, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.20497381779749227, |
|
"grad_norm": 6.828605651855469, |
|
"learning_rate": 5.12440699001141e-06, |
|
"loss": 2.1736, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.2065751757490352, |
|
"grad_norm": 2.243302822113037, |
|
"learning_rate": 5.164441419620875e-06, |
|
"loss": 2.1238, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.2081765337005781, |
|
"grad_norm": 3.8954567909240723, |
|
"learning_rate": 5.204475849230339e-06, |
|
"loss": 2.1461, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.209777891652121, |
|
"grad_norm": 3.563438653945923, |
|
"learning_rate": 5.244510278839802e-06, |
|
"loss": 2.1492, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.21137924960366392, |
|
"grad_norm": 2.1851043701171875, |
|
"learning_rate": 5.284544708449266e-06, |
|
"loss": 2.1407, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.2129806075552068, |
|
"grad_norm": 4.8792524337768555, |
|
"learning_rate": 5.324579138058731e-06, |
|
"loss": 2.1403, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.2145819655067497, |
|
"grad_norm": 4.021134376525879, |
|
"learning_rate": 5.3646135676681955e-06, |
|
"loss": 2.1628, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.21618332345829264, |
|
"grad_norm": 3.8988146781921387, |
|
"learning_rate": 5.4046479972776585e-06, |
|
"loss": 2.1425, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.21778468140983553, |
|
"grad_norm": 6.337070941925049, |
|
"learning_rate": 5.444682426887123e-06, |
|
"loss": 2.1493, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.21938603936137846, |
|
"grad_norm": 2.077366828918457, |
|
"learning_rate": 5.484716856496588e-06, |
|
"loss": 2.1264, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.22098739731292136, |
|
"grad_norm": 1.3507400751113892, |
|
"learning_rate": 5.524751286106052e-06, |
|
"loss": 2.1306, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.22258875526446426, |
|
"grad_norm": 1.5656003952026367, |
|
"learning_rate": 5.564785715715516e-06, |
|
"loss": 2.135, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.22419011321600718, |
|
"grad_norm": 3.315119981765747, |
|
"learning_rate": 5.604820145324979e-06, |
|
"loss": 2.1449, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.22579147116755008, |
|
"grad_norm": 1.677067518234253, |
|
"learning_rate": 5.644854574934444e-06, |
|
"loss": 2.1126, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.227392829119093, |
|
"grad_norm": 1.3107109069824219, |
|
"learning_rate": 5.6848890045439086e-06, |
|
"loss": 2.1415, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.2289941870706359, |
|
"grad_norm": 1.887251853942871, |
|
"learning_rate": 5.724923434153372e-06, |
|
"loss": 2.1312, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.2305955450221788, |
|
"grad_norm": 4.706649303436279, |
|
"learning_rate": 5.764957863762837e-06, |
|
"loss": 2.1417, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.23219690297372172, |
|
"grad_norm": 4.202969074249268, |
|
"learning_rate": 5.8049922933723e-06, |
|
"loss": 2.1403, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.23379826092526462, |
|
"grad_norm": 2.2349281311035156, |
|
"learning_rate": 5.845026722981765e-06, |
|
"loss": 2.1164, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.23539961887680752, |
|
"grad_norm": 1.7390815019607544, |
|
"learning_rate": 5.885061152591229e-06, |
|
"loss": 2.1313, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.23700097682835045, |
|
"grad_norm": 1.9534856081008911, |
|
"learning_rate": 5.925095582200693e-06, |
|
"loss": 2.1252, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.23860233477989334, |
|
"grad_norm": 1.7701072692871094, |
|
"learning_rate": 5.965130011810158e-06, |
|
"loss": 2.1207, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.24020369273143627, |
|
"grad_norm": 6.166327953338623, |
|
"learning_rate": 6.005164441419621e-06, |
|
"loss": 2.1079, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.24180505068297917, |
|
"grad_norm": 2.4361186027526855, |
|
"learning_rate": 6.045198871029085e-06, |
|
"loss": 2.114, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.24340640863452206, |
|
"grad_norm": 2.536973714828491, |
|
"learning_rate": 6.08523330063855e-06, |
|
"loss": 2.109, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.245007766586065, |
|
"grad_norm": 3.394212484359741, |
|
"learning_rate": 6.125267730248014e-06, |
|
"loss": 2.1193, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.2466091245376079, |
|
"grad_norm": 1.725258708000183, |
|
"learning_rate": 6.165302159857478e-06, |
|
"loss": 2.1238, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.2482104824891508, |
|
"grad_norm": 2.9132273197174072, |
|
"learning_rate": 6.205336589466942e-06, |
|
"loss": 2.115, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.2498118404406937, |
|
"grad_norm": 1.6105629205703735, |
|
"learning_rate": 6.245371019076406e-06, |
|
"loss": 2.1103, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.25141319839223664, |
|
"grad_norm": 1.4759615659713745, |
|
"learning_rate": 6.285405448685871e-06, |
|
"loss": 2.1018, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.2530145563437795, |
|
"grad_norm": 6.175992488861084, |
|
"learning_rate": 6.325439878295334e-06, |
|
"loss": 2.1052, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.25461591429532243, |
|
"grad_norm": 1.496497631072998, |
|
"learning_rate": 6.3654743079047984e-06, |
|
"loss": 2.1098, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.25621727224686536, |
|
"grad_norm": 2.9353444576263428, |
|
"learning_rate": 6.405508737514263e-06, |
|
"loss": 2.1142, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2578186301984082, |
|
"grad_norm": 3.003761053085327, |
|
"learning_rate": 6.445543167123727e-06, |
|
"loss": 2.1096, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.25941998814995115, |
|
"grad_norm": 1.8897191286087036, |
|
"learning_rate": 6.4855775967331916e-06, |
|
"loss": 2.0977, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.2610213461014941, |
|
"grad_norm": 1.1225190162658691, |
|
"learning_rate": 6.5256120263426545e-06, |
|
"loss": 2.1022, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.26262270405303695, |
|
"grad_norm": 5.252044200897217, |
|
"learning_rate": 6.565646455952119e-06, |
|
"loss": 2.1068, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.2642240620045799, |
|
"grad_norm": 1.9852492809295654, |
|
"learning_rate": 6.605680885561584e-06, |
|
"loss": 2.0882, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.2658254199561228, |
|
"grad_norm": 1.1616008281707764, |
|
"learning_rate": 6.645715315171048e-06, |
|
"loss": 2.0944, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.2674267779076657, |
|
"grad_norm": 2.1226704120635986, |
|
"learning_rate": 6.685749744780512e-06, |
|
"loss": 2.0927, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.2690281358592086, |
|
"grad_norm": 1.4191474914550781, |
|
"learning_rate": 6.725784174389975e-06, |
|
"loss": 2.0998, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.2706294938107515, |
|
"grad_norm": 2.283435106277466, |
|
"learning_rate": 6.76581860399944e-06, |
|
"loss": 2.1157, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.27223085176229445, |
|
"grad_norm": 1.6899996995925903, |
|
"learning_rate": 6.805853033608905e-06, |
|
"loss": 2.0937, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.2738322097138373, |
|
"grad_norm": 1.3105698823928833, |
|
"learning_rate": 6.845887463218368e-06, |
|
"loss": 2.0545, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.27543356766538024, |
|
"grad_norm": 1.1776176691055298, |
|
"learning_rate": 6.885921892827833e-06, |
|
"loss": 2.0984, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.27703492561692317, |
|
"grad_norm": 1.651307225227356, |
|
"learning_rate": 6.925956322437296e-06, |
|
"loss": 2.0959, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.27863628356846604, |
|
"grad_norm": 1.7482041120529175, |
|
"learning_rate": 6.965990752046761e-06, |
|
"loss": 2.0636, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.28023764152000896, |
|
"grad_norm": 3.625835418701172, |
|
"learning_rate": 7.006025181656225e-06, |
|
"loss": 2.085, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.2818389994715519, |
|
"grad_norm": 1.6532440185546875, |
|
"learning_rate": 7.046059611265689e-06, |
|
"loss": 2.0883, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.28344035742309476, |
|
"grad_norm": 1.331597924232483, |
|
"learning_rate": 7.086094040875153e-06, |
|
"loss": 2.1034, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.2850417153746377, |
|
"grad_norm": 3.6023612022399902, |
|
"learning_rate": 7.126128470484618e-06, |
|
"loss": 2.0991, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.2866430733261806, |
|
"grad_norm": 1.4167087078094482, |
|
"learning_rate": 7.166162900094081e-06, |
|
"loss": 2.1057, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.28824443127772353, |
|
"grad_norm": 6.183845520019531, |
|
"learning_rate": 7.206197329703546e-06, |
|
"loss": 2.0951, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.2898457892292664, |
|
"grad_norm": 1.5191693305969238, |
|
"learning_rate": 7.246231759313009e-06, |
|
"loss": 2.062, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.29144714718080933, |
|
"grad_norm": 1.5019919872283936, |
|
"learning_rate": 7.286266188922474e-06, |
|
"loss": 2.073, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.29304850513235225, |
|
"grad_norm": 2.338139533996582, |
|
"learning_rate": 7.326300618531938e-06, |
|
"loss": 2.0935, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.2946498630838951, |
|
"grad_norm": 3.6389622688293457, |
|
"learning_rate": 7.366335048141402e-06, |
|
"loss": 2.0907, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.29625122103543805, |
|
"grad_norm": 1.3060230016708374, |
|
"learning_rate": 7.406369477750867e-06, |
|
"loss": 2.0691, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.297852578986981, |
|
"grad_norm": 2.181640863418579, |
|
"learning_rate": 7.44640390736033e-06, |
|
"loss": 2.0668, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.29945393693852385, |
|
"grad_norm": 2.1645591259002686, |
|
"learning_rate": 7.4864383369697944e-06, |
|
"loss": 2.0589, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.30105529489006677, |
|
"grad_norm": 2.522383451461792, |
|
"learning_rate": 7.526472766579259e-06, |
|
"loss": 2.0624, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.3026566528416097, |
|
"grad_norm": 2.0438318252563477, |
|
"learning_rate": 7.566507196188723e-06, |
|
"loss": 2.0756, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.30425801079315257, |
|
"grad_norm": 1.5602883100509644, |
|
"learning_rate": 7.6065416257981876e-06, |
|
"loss": 2.0539, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.3058593687446955, |
|
"grad_norm": 1.2384752035140991, |
|
"learning_rate": 7.64657605540765e-06, |
|
"loss": 2.0698, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.3074607266962384, |
|
"grad_norm": 3.290865659713745, |
|
"learning_rate": 7.686610485017116e-06, |
|
"loss": 2.0538, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.30906208464778134, |
|
"grad_norm": 1.9636443853378296, |
|
"learning_rate": 7.72664491462658e-06, |
|
"loss": 2.0679, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.3106634425993242, |
|
"grad_norm": 2.1679654121398926, |
|
"learning_rate": 7.766679344236044e-06, |
|
"loss": 2.0734, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.31226480055086714, |
|
"grad_norm": 2.441173553466797, |
|
"learning_rate": 7.806713773845507e-06, |
|
"loss": 2.0475, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.31386615850241006, |
|
"grad_norm": 1.2764122486114502, |
|
"learning_rate": 7.846748203454973e-06, |
|
"loss": 2.0773, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.31546751645395293, |
|
"grad_norm": 1.106123685836792, |
|
"learning_rate": 7.886782633064435e-06, |
|
"loss": 2.0645, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.31706887440549586, |
|
"grad_norm": 1.025707721710205, |
|
"learning_rate": 7.9268170626739e-06, |
|
"loss": 2.0643, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.3186702323570388, |
|
"grad_norm": 1.2565511465072632, |
|
"learning_rate": 7.966851492283364e-06, |
|
"loss": 2.0666, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.32027159030858166, |
|
"grad_norm": 1.2378392219543457, |
|
"learning_rate": 8.006885921892828e-06, |
|
"loss": 2.0601, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.3218729482601246, |
|
"grad_norm": 1.9206656217575073, |
|
"learning_rate": 8.046920351502294e-06, |
|
"loss": 2.0576, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.3234743062116675, |
|
"grad_norm": 1.6953002214431763, |
|
"learning_rate": 8.086954781111756e-06, |
|
"loss": 2.0502, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.3250756641632104, |
|
"grad_norm": 1.6600971221923828, |
|
"learning_rate": 8.126989210721221e-06, |
|
"loss": 2.0589, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.3266770221147533, |
|
"grad_norm": 2.360778331756592, |
|
"learning_rate": 8.167023640330685e-06, |
|
"loss": 2.0591, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.3282783800662962, |
|
"grad_norm": 1.5475653409957886, |
|
"learning_rate": 8.207058069940149e-06, |
|
"loss": 2.0703, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.32987973801783915, |
|
"grad_norm": 1.2999683618545532, |
|
"learning_rate": 8.247092499549614e-06, |
|
"loss": 2.0651, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.331481095969382, |
|
"grad_norm": 3.301884889602661, |
|
"learning_rate": 8.287126929159077e-06, |
|
"loss": 2.0485, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.33308245392092495, |
|
"grad_norm": 3.200942277908325, |
|
"learning_rate": 8.327161358768542e-06, |
|
"loss": 2.0444, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.3346838118724679, |
|
"grad_norm": 1.2649630308151245, |
|
"learning_rate": 8.367195788378006e-06, |
|
"loss": 2.0526, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.33628516982401074, |
|
"grad_norm": 1.187700867652893, |
|
"learning_rate": 8.40723021798747e-06, |
|
"loss": 2.0651, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.33788652777555367, |
|
"grad_norm": 1.5766338109970093, |
|
"learning_rate": 8.447264647596935e-06, |
|
"loss": 2.0575, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.3394878857270966, |
|
"grad_norm": 1.1678153276443481, |
|
"learning_rate": 8.487299077206397e-06, |
|
"loss": 2.0394, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.34108924367863946, |
|
"grad_norm": 1.978745698928833, |
|
"learning_rate": 8.527333506815863e-06, |
|
"loss": 2.0434, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.3426906016301824, |
|
"grad_norm": 1.311265230178833, |
|
"learning_rate": 8.567367936425327e-06, |
|
"loss": 2.0423, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.3442919595817253, |
|
"grad_norm": 1.4099359512329102, |
|
"learning_rate": 8.60740236603479e-06, |
|
"loss": 2.0375, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.3458933175332682, |
|
"grad_norm": 1.2521507740020752, |
|
"learning_rate": 8.647436795644254e-06, |
|
"loss": 2.0355, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.3474946754848111, |
|
"grad_norm": 2.544433832168579, |
|
"learning_rate": 8.687471225253718e-06, |
|
"loss": 2.0351, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.34909603343635404, |
|
"grad_norm": 1.6786710023880005, |
|
"learning_rate": 8.727505654863184e-06, |
|
"loss": 2.0544, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.35069739138789696, |
|
"grad_norm": 1.224026083946228, |
|
"learning_rate": 8.767540084472647e-06, |
|
"loss": 2.0406, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.35229874933943983, |
|
"grad_norm": 7.5012431144714355, |
|
"learning_rate": 8.807574514082111e-06, |
|
"loss": 2.0355, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.35390010729098276, |
|
"grad_norm": 1.4292916059494019, |
|
"learning_rate": 8.847608943691575e-06, |
|
"loss": 2.0445, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.3555014652425257, |
|
"grad_norm": 1.1762036085128784, |
|
"learning_rate": 8.887643373301039e-06, |
|
"loss": 2.0358, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.35710282319406855, |
|
"grad_norm": 1.1497453451156616, |
|
"learning_rate": 8.927677802910504e-06, |
|
"loss": 2.0411, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.3587041811456115, |
|
"grad_norm": 1.7819931507110596, |
|
"learning_rate": 8.967712232519968e-06, |
|
"loss": 2.0414, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.3603055390971544, |
|
"grad_norm": 4.624775409698486, |
|
"learning_rate": 9.007746662129432e-06, |
|
"loss": 2.0309, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.3619068970486973, |
|
"grad_norm": 1.5174845457077026, |
|
"learning_rate": 9.047781091738896e-06, |
|
"loss": 2.0494, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.3635082550002402, |
|
"grad_norm": 2.5349197387695312, |
|
"learning_rate": 9.08781552134836e-06, |
|
"loss": 2.0199, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.3651096129517831, |
|
"grad_norm": 1.4281384944915771, |
|
"learning_rate": 9.127849950957825e-06, |
|
"loss": 2.0461, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.366710970903326, |
|
"grad_norm": 1.4501956701278687, |
|
"learning_rate": 9.167884380567289e-06, |
|
"loss": 2.0275, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.3683123288548689, |
|
"grad_norm": 1.7848312854766846, |
|
"learning_rate": 9.207918810176753e-06, |
|
"loss": 2.0459, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.36991368680641185, |
|
"grad_norm": 1.2266578674316406, |
|
"learning_rate": 9.247953239786217e-06, |
|
"loss": 2.0382, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.37151504475795477, |
|
"grad_norm": 2.917593002319336, |
|
"learning_rate": 9.28798766939568e-06, |
|
"loss": 2.0338, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.37311640270949764, |
|
"grad_norm": 1.7669585943222046, |
|
"learning_rate": 9.328022099005146e-06, |
|
"loss": 2.0098, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.37471776066104057, |
|
"grad_norm": 1.3076069355010986, |
|
"learning_rate": 9.36805652861461e-06, |
|
"loss": 2.0259, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.3763191186125835, |
|
"grad_norm": 1.26585054397583, |
|
"learning_rate": 9.408090958224073e-06, |
|
"loss": 2.0096, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.37792047656412636, |
|
"grad_norm": 1.330881953239441, |
|
"learning_rate": 9.448125387833537e-06, |
|
"loss": 2.0141, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.3795218345156693, |
|
"grad_norm": 1.3129397630691528, |
|
"learning_rate": 9.488159817443003e-06, |
|
"loss": 2.0351, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.3811231924672122, |
|
"grad_norm": 2.2104837894439697, |
|
"learning_rate": 9.528194247052467e-06, |
|
"loss": 2.0458, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.3827245504187551, |
|
"grad_norm": 4.37896728515625, |
|
"learning_rate": 9.56822867666193e-06, |
|
"loss": 2.0432, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.384325908370298, |
|
"grad_norm": 1.4323294162750244, |
|
"learning_rate": 9.608263106271394e-06, |
|
"loss": 2.028, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.38592726632184093, |
|
"grad_norm": 2.277630567550659, |
|
"learning_rate": 9.648297535880858e-06, |
|
"loss": 2.012, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.3875286242733838, |
|
"grad_norm": 1.0068135261535645, |
|
"learning_rate": 9.688331965490324e-06, |
|
"loss": 2.0153, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.38912998222492673, |
|
"grad_norm": 1.464872121810913, |
|
"learning_rate": 9.728366395099786e-06, |
|
"loss": 2.0255, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.39073134017646965, |
|
"grad_norm": 1.6919342279434204, |
|
"learning_rate": 9.768400824709251e-06, |
|
"loss": 2.0146, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.3923326981280126, |
|
"grad_norm": 1.4236170053482056, |
|
"learning_rate": 9.808435254318715e-06, |
|
"loss": 2.0235, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.39393405607955545, |
|
"grad_norm": 1.2634207010269165, |
|
"learning_rate": 9.848469683928179e-06, |
|
"loss": 2.0067, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.3955354140310984, |
|
"grad_norm": 1.185770034790039, |
|
"learning_rate": 9.888504113537644e-06, |
|
"loss": 2.0249, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.3971367719826413, |
|
"grad_norm": 1.6554452180862427, |
|
"learning_rate": 9.928538543147106e-06, |
|
"loss": 2.0224, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.39873812993418417, |
|
"grad_norm": 1.7017241716384888, |
|
"learning_rate": 9.968572972756572e-06, |
|
"loss": 2.0138, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.4003394878857271, |
|
"grad_norm": 1.0250684022903442, |
|
"learning_rate": 1.0008607402366036e-05, |
|
"loss": 2.0082, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.40194084583727, |
|
"grad_norm": 1.3391590118408203, |
|
"learning_rate": 1.0048641831975501e-05, |
|
"loss": 2.008, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.4035422037888129, |
|
"grad_norm": 1.0555273294448853, |
|
"learning_rate": 1.0088676261584965e-05, |
|
"loss": 2.0199, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.4051435617403558, |
|
"grad_norm": 2.1245908737182617, |
|
"learning_rate": 1.0128710691194427e-05, |
|
"loss": 2.0141, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.40674491969189874, |
|
"grad_norm": 1.1639268398284912, |
|
"learning_rate": 1.0168745120803891e-05, |
|
"loss": 2.0274, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.4083462776434416, |
|
"grad_norm": 1.75816011428833, |
|
"learning_rate": 1.0208779550413356e-05, |
|
"loss": 2.0065, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.40994763559498454, |
|
"grad_norm": 3.2224700450897217, |
|
"learning_rate": 1.024881398002282e-05, |
|
"loss": 2.0036, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.41154899354652746, |
|
"grad_norm": 1.0586453676223755, |
|
"learning_rate": 1.0288848409632284e-05, |
|
"loss": 2.0216, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.4131503514980704, |
|
"grad_norm": 1.5636674165725708, |
|
"learning_rate": 1.032888283924175e-05, |
|
"loss": 2.0035, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.41475170944961326, |
|
"grad_norm": 1.287876009941101, |
|
"learning_rate": 1.0368917268851213e-05, |
|
"loss": 2.0033, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.4163530674011562, |
|
"grad_norm": 1.1676390171051025, |
|
"learning_rate": 1.0408951698460677e-05, |
|
"loss": 1.9948, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.4179544253526991, |
|
"grad_norm": 2.230921506881714, |
|
"learning_rate": 1.0448986128070143e-05, |
|
"loss": 1.9747, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.419555783304242, |
|
"grad_norm": 1.1102570295333862, |
|
"learning_rate": 1.0489020557679605e-05, |
|
"loss": 2.002, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.4211571412557849, |
|
"grad_norm": 12.577959060668945, |
|
"learning_rate": 1.0529054987289069e-05, |
|
"loss": 1.9873, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.42275849920732783, |
|
"grad_norm": 1.0285041332244873, |
|
"learning_rate": 1.0569089416898532e-05, |
|
"loss": 2.0182, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.4243598571588707, |
|
"grad_norm": 2.1250357627868652, |
|
"learning_rate": 1.0609123846507998e-05, |
|
"loss": 1.9949, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.4259612151104136, |
|
"grad_norm": 0.90369713306427, |
|
"learning_rate": 1.0649158276117462e-05, |
|
"loss": 2.0081, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.42756257306195655, |
|
"grad_norm": 1.0429993867874146, |
|
"learning_rate": 1.0689192705726926e-05, |
|
"loss": 2.0188, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.4291639310134994, |
|
"grad_norm": 1.2060284614562988, |
|
"learning_rate": 1.0729227135336391e-05, |
|
"loss": 1.9747, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.43076528896504235, |
|
"grad_norm": 1.7947618961334229, |
|
"learning_rate": 1.0769261564945855e-05, |
|
"loss": 1.9963, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.4323666469165853, |
|
"grad_norm": 0.970507025718689, |
|
"learning_rate": 1.0809295994555317e-05, |
|
"loss": 2.0089, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.4339680048681282, |
|
"grad_norm": 1.038913607597351, |
|
"learning_rate": 1.0849330424164784e-05, |
|
"loss": 1.9827, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.43556936281967107, |
|
"grad_norm": 2.165769100189209, |
|
"learning_rate": 1.0889364853774246e-05, |
|
"loss": 1.9961, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.437170720771214, |
|
"grad_norm": 1.196454644203186, |
|
"learning_rate": 1.092939928338371e-05, |
|
"loss": 2.0035, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.4387720787227569, |
|
"grad_norm": 0.956650972366333, |
|
"learning_rate": 1.0969433712993176e-05, |
|
"loss": 1.9841, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.4403734366742998, |
|
"grad_norm": 1.084486961364746, |
|
"learning_rate": 1.100946814260264e-05, |
|
"loss": 1.9789, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.4419747946258427, |
|
"grad_norm": 0.9682411551475525, |
|
"learning_rate": 1.1049502572212103e-05, |
|
"loss": 1.9999, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.44357615257738564, |
|
"grad_norm": 2.1347734928131104, |
|
"learning_rate": 1.1089537001821567e-05, |
|
"loss": 2.0091, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.4451775105289285, |
|
"grad_norm": 4.513906478881836, |
|
"learning_rate": 1.1129571431431033e-05, |
|
"loss": 1.9896, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.44677886848047144, |
|
"grad_norm": 1.6367132663726807, |
|
"learning_rate": 1.1169605861040496e-05, |
|
"loss": 1.9781, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.44838022643201436, |
|
"grad_norm": 1.0168904066085815, |
|
"learning_rate": 1.1209640290649958e-05, |
|
"loss": 1.9924, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.44998158438355723, |
|
"grad_norm": 1.7051305770874023, |
|
"learning_rate": 1.1249674720259424e-05, |
|
"loss": 1.9872, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.45158294233510016, |
|
"grad_norm": 0.9768884778022766, |
|
"learning_rate": 1.1289709149868888e-05, |
|
"loss": 1.9809, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.4531843002866431, |
|
"grad_norm": 1.0439552068710327, |
|
"learning_rate": 1.1329743579478352e-05, |
|
"loss": 1.999, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.454785658238186, |
|
"grad_norm": 0.9658423066139221, |
|
"learning_rate": 1.1369778009087817e-05, |
|
"loss": 2.0104, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.4563870161897289, |
|
"grad_norm": 0.9558666944503784, |
|
"learning_rate": 1.1409812438697281e-05, |
|
"loss": 2.0104, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.4579883741412718, |
|
"grad_norm": 1.591242790222168, |
|
"learning_rate": 1.1449846868306745e-05, |
|
"loss": 1.9888, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.45958973209281473, |
|
"grad_norm": 1.8828788995742798, |
|
"learning_rate": 1.148988129791621e-05, |
|
"loss": 1.9951, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.4611910900443576, |
|
"grad_norm": 1.1350332498550415, |
|
"learning_rate": 1.1529915727525674e-05, |
|
"loss": 1.9842, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.4627924479959005, |
|
"grad_norm": 1.6506210565567017, |
|
"learning_rate": 1.1569950157135136e-05, |
|
"loss": 1.9927, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.46439380594744345, |
|
"grad_norm": 1.0234204530715942, |
|
"learning_rate": 1.16099845867446e-05, |
|
"loss": 1.9981, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.4659951638989863, |
|
"grad_norm": 0.9220559597015381, |
|
"learning_rate": 1.1650019016354065e-05, |
|
"loss": 1.9772, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.46759652185052925, |
|
"grad_norm": 1.008548617362976, |
|
"learning_rate": 1.169005344596353e-05, |
|
"loss": 1.9885, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.46919787980207217, |
|
"grad_norm": 1.0374430418014526, |
|
"learning_rate": 1.1730087875572993e-05, |
|
"loss": 1.9901, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.47079923775361504, |
|
"grad_norm": 1.4683129787445068, |
|
"learning_rate": 1.1770122305182459e-05, |
|
"loss": 1.9905, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.47240059570515797, |
|
"grad_norm": 2.1045260429382324, |
|
"learning_rate": 1.1810156734791922e-05, |
|
"loss": 1.9764, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.4740019536567009, |
|
"grad_norm": 0.9143902063369751, |
|
"learning_rate": 1.1850191164401386e-05, |
|
"loss": 1.9914, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.4756033116082438, |
|
"grad_norm": 1.0126798152923584, |
|
"learning_rate": 1.1890225594010852e-05, |
|
"loss": 1.9559, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.4772046695597867, |
|
"grad_norm": 1.282818078994751, |
|
"learning_rate": 1.1930260023620316e-05, |
|
"loss": 1.9927, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.4788060275113296, |
|
"grad_norm": 1.2307484149932861, |
|
"learning_rate": 1.1970294453229778e-05, |
|
"loss": 1.9825, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.48040738546287254, |
|
"grad_norm": 1.429739236831665, |
|
"learning_rate": 1.2010328882839241e-05, |
|
"loss": 1.9616, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.4820087434144154, |
|
"grad_norm": 1.5777498483657837, |
|
"learning_rate": 1.2050363312448707e-05, |
|
"loss": 1.9821, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.48361010136595833, |
|
"grad_norm": 1.1172056198120117, |
|
"learning_rate": 1.209039774205817e-05, |
|
"loss": 1.9669, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.48521145931750126, |
|
"grad_norm": 1.8118427991867065, |
|
"learning_rate": 1.2130432171667635e-05, |
|
"loss": 1.9555, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.48681281726904413, |
|
"grad_norm": 5.031758785247803, |
|
"learning_rate": 1.21704666012771e-05, |
|
"loss": 1.958, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.48841417522058705, |
|
"grad_norm": 1.171064853668213, |
|
"learning_rate": 1.2210501030886564e-05, |
|
"loss": 1.9697, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.49001553317213, |
|
"grad_norm": 1.6317328214645386, |
|
"learning_rate": 1.2250535460496028e-05, |
|
"loss": 1.9722, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.49161689112367285, |
|
"grad_norm": 0.9671623110771179, |
|
"learning_rate": 1.2290569890105493e-05, |
|
"loss": 1.9659, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.4932182490752158, |
|
"grad_norm": 1.0588128566741943, |
|
"learning_rate": 1.2330604319714955e-05, |
|
"loss": 1.9534, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.4948196070267587, |
|
"grad_norm": 1.1236603260040283, |
|
"learning_rate": 1.237063874932442e-05, |
|
"loss": 1.9505, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.4964209649783016, |
|
"grad_norm": 1.175752878189087, |
|
"learning_rate": 1.2410673178933885e-05, |
|
"loss": 1.9712, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.4980223229298445, |
|
"grad_norm": 1.0395989418029785, |
|
"learning_rate": 1.2450707608543348e-05, |
|
"loss": 1.9493, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.4996236808813874, |
|
"grad_norm": 0.9693764448165894, |
|
"learning_rate": 1.2490742038152812e-05, |
|
"loss": 1.9581, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.5012250388329303, |
|
"grad_norm": 1.100197434425354, |
|
"learning_rate": 1.2530776467762276e-05, |
|
"loss": 1.955, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.5028263967844733, |
|
"grad_norm": 1.3823459148406982, |
|
"learning_rate": 1.2570810897371742e-05, |
|
"loss": 1.9734, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.5044277547360161, |
|
"grad_norm": 0.9062979221343994, |
|
"learning_rate": 1.2610845326981205e-05, |
|
"loss": 1.9612, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.506029112687559, |
|
"grad_norm": 1.0007665157318115, |
|
"learning_rate": 1.2650879756590668e-05, |
|
"loss": 1.9664, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.5076304706391019, |
|
"grad_norm": 0.9745628833770752, |
|
"learning_rate": 1.2690914186200135e-05, |
|
"loss": 1.9648, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.5092318285906449, |
|
"grad_norm": 1.407834768295288, |
|
"learning_rate": 1.2730948615809597e-05, |
|
"loss": 1.9562, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.5108331865421878, |
|
"grad_norm": 1.207322597503662, |
|
"learning_rate": 1.277098304541906e-05, |
|
"loss": 1.9696, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.5124345444937307, |
|
"grad_norm": 1.4670792818069458, |
|
"learning_rate": 1.2811017475028526e-05, |
|
"loss": 1.9524, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.5140359024452736, |
|
"grad_norm": 1.023777961730957, |
|
"learning_rate": 1.285105190463799e-05, |
|
"loss": 1.97, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.5156372603968165, |
|
"grad_norm": 0.9778289198875427, |
|
"learning_rate": 1.2891086334247454e-05, |
|
"loss": 1.9494, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.5172386183483594, |
|
"grad_norm": 0.8971097469329834, |
|
"learning_rate": 1.2931120763856918e-05, |
|
"loss": 1.9628, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.5188399762999023, |
|
"grad_norm": 1.8562573194503784, |
|
"learning_rate": 1.2971155193466383e-05, |
|
"loss": 1.9543, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.5204413342514452, |
|
"grad_norm": 1.7294055223464966, |
|
"learning_rate": 1.3011189623075847e-05, |
|
"loss": 1.9519, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.5220426922029882, |
|
"grad_norm": 1.2172763347625732, |
|
"learning_rate": 1.3051224052685309e-05, |
|
"loss": 1.9758, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.5236440501545311, |
|
"grad_norm": 1.144281268119812, |
|
"learning_rate": 1.3091258482294775e-05, |
|
"loss": 1.9589, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.5252454081060739, |
|
"grad_norm": 1.057813048362732, |
|
"learning_rate": 1.3131292911904238e-05, |
|
"loss": 1.9443, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.5268467660576168, |
|
"grad_norm": 1.297404170036316, |
|
"learning_rate": 1.3171327341513702e-05, |
|
"loss": 1.9614, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.5284481240091597, |
|
"grad_norm": 1.0840290784835815, |
|
"learning_rate": 1.3211361771123168e-05, |
|
"loss": 1.9633, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.5300494819607027, |
|
"grad_norm": 1.0041546821594238, |
|
"learning_rate": 1.3251396200732631e-05, |
|
"loss": 1.9484, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.5316508399122456, |
|
"grad_norm": 1.780435562133789, |
|
"learning_rate": 1.3291430630342095e-05, |
|
"loss": 1.9438, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.5332521978637885, |
|
"grad_norm": 0.9901188015937805, |
|
"learning_rate": 1.333146505995156e-05, |
|
"loss": 1.9384, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.5348535558153314, |
|
"grad_norm": 0.9118313789367676, |
|
"learning_rate": 1.3371499489561025e-05, |
|
"loss": 1.9507, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.5364549137668743, |
|
"grad_norm": 1.0270628929138184, |
|
"learning_rate": 1.3411533919170487e-05, |
|
"loss": 1.9685, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.5380562717184172, |
|
"grad_norm": 2.4503536224365234, |
|
"learning_rate": 1.345156834877995e-05, |
|
"loss": 1.9481, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.5396576296699601, |
|
"grad_norm": 1.1191452741622925, |
|
"learning_rate": 1.3491602778389416e-05, |
|
"loss": 1.9529, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.541258987621503, |
|
"grad_norm": 0.8804434537887573, |
|
"learning_rate": 1.353163720799888e-05, |
|
"loss": 1.9591, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.542860345573046, |
|
"grad_norm": 1.1734013557434082, |
|
"learning_rate": 1.3571671637608344e-05, |
|
"loss": 1.9643, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.5444617035245889, |
|
"grad_norm": 0.9487005472183228, |
|
"learning_rate": 1.361170606721781e-05, |
|
"loss": 1.9408, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.5460630614761317, |
|
"grad_norm": 1.025894045829773, |
|
"learning_rate": 1.3651740496827273e-05, |
|
"loss": 1.9682, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.5476644194276746, |
|
"grad_norm": 1.3745815753936768, |
|
"learning_rate": 1.3691774926436737e-05, |
|
"loss": 1.9441, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.5492657773792176, |
|
"grad_norm": 0.9772420525550842, |
|
"learning_rate": 1.3731809356046202e-05, |
|
"loss": 1.9593, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.5508671353307605, |
|
"grad_norm": 0.8825002908706665, |
|
"learning_rate": 1.3771843785655666e-05, |
|
"loss": 1.9413, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.5524684932823034, |
|
"grad_norm": 2.0654349327087402, |
|
"learning_rate": 1.3811878215265128e-05, |
|
"loss": 1.9478, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.5540698512338463, |
|
"grad_norm": 0.9932202696800232, |
|
"learning_rate": 1.3851912644874592e-05, |
|
"loss": 1.9529, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.5556712091853893, |
|
"grad_norm": 0.923985481262207, |
|
"learning_rate": 1.3891947074484058e-05, |
|
"loss": 1.9542, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.5572725671369321, |
|
"grad_norm": 1.2756383419036865, |
|
"learning_rate": 1.3931981504093521e-05, |
|
"loss": 1.9437, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.558873925088475, |
|
"grad_norm": 1.025530457496643, |
|
"learning_rate": 1.3972015933702985e-05, |
|
"loss": 1.9479, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.5604752830400179, |
|
"grad_norm": 0.9658239483833313, |
|
"learning_rate": 1.401205036331245e-05, |
|
"loss": 1.9392, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.5620766409915608, |
|
"grad_norm": 1.0094221830368042, |
|
"learning_rate": 1.4052084792921914e-05, |
|
"loss": 1.9311, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.5636779989431038, |
|
"grad_norm": 0.933716893196106, |
|
"learning_rate": 1.4092119222531378e-05, |
|
"loss": 1.9605, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.5652793568946467, |
|
"grad_norm": 1.0568841695785522, |
|
"learning_rate": 1.4132153652140844e-05, |
|
"loss": 1.9453, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.5668807148461895, |
|
"grad_norm": 0.9029392004013062, |
|
"learning_rate": 1.4172188081750306e-05, |
|
"loss": 1.9327, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.5684820727977324, |
|
"grad_norm": 0.9875580668449402, |
|
"learning_rate": 1.421222251135977e-05, |
|
"loss": 1.9405, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.5700834307492754, |
|
"grad_norm": 0.9351832270622253, |
|
"learning_rate": 1.4252256940969235e-05, |
|
"loss": 1.9527, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.5716847887008183, |
|
"grad_norm": 1.1400425434112549, |
|
"learning_rate": 1.4292291370578699e-05, |
|
"loss": 1.9451, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.5732861466523612, |
|
"grad_norm": 0.971022367477417, |
|
"learning_rate": 1.4332325800188163e-05, |
|
"loss": 1.9336, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.5748875046039041, |
|
"grad_norm": 0.8905283808708191, |
|
"learning_rate": 1.4372360229797627e-05, |
|
"loss": 1.9518, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.5764888625554471, |
|
"grad_norm": 1.2511688470840454, |
|
"learning_rate": 1.4412394659407092e-05, |
|
"loss": 1.9276, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.5780902205069899, |
|
"grad_norm": 1.2555015087127686, |
|
"learning_rate": 1.4452429089016556e-05, |
|
"loss": 1.9306, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.5796915784585328, |
|
"grad_norm": 2.5456793308258057, |
|
"learning_rate": 1.4492463518626018e-05, |
|
"loss": 1.9212, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.5812929364100757, |
|
"grad_norm": 5.189430236816406, |
|
"learning_rate": 1.4532497948235485e-05, |
|
"loss": 1.9298, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.5828942943616187, |
|
"grad_norm": 0.8082601428031921, |
|
"learning_rate": 1.4572532377844947e-05, |
|
"loss": 1.9289, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.5844956523131616, |
|
"grad_norm": 1.2962714433670044, |
|
"learning_rate": 1.4612566807454411e-05, |
|
"loss": 1.9303, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.5860970102647045, |
|
"grad_norm": 1.9360517263412476, |
|
"learning_rate": 1.4652601237063877e-05, |
|
"loss": 1.9155, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.5876983682162473, |
|
"grad_norm": 1.16732919216156, |
|
"learning_rate": 1.469263566667334e-05, |
|
"loss": 1.9132, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.5892997261677902, |
|
"grad_norm": 0.8907911777496338, |
|
"learning_rate": 1.4732670096282804e-05, |
|
"loss": 1.9312, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.5909010841193332, |
|
"grad_norm": 0.9275608062744141, |
|
"learning_rate": 1.477270452589227e-05, |
|
"loss": 1.9638, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.5925024420708761, |
|
"grad_norm": 1.2977879047393799, |
|
"learning_rate": 1.4812738955501734e-05, |
|
"loss": 1.9372, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.594103800022419, |
|
"grad_norm": 1.1967015266418457, |
|
"learning_rate": 1.4852773385111196e-05, |
|
"loss": 1.9319, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.595705157973962, |
|
"grad_norm": 1.0788534879684448, |
|
"learning_rate": 1.489280781472066e-05, |
|
"loss": 1.9326, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.5973065159255049, |
|
"grad_norm": 0.8467668890953064, |
|
"learning_rate": 1.4932842244330125e-05, |
|
"loss": 1.9238, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.5989078738770477, |
|
"grad_norm": 0.8952154517173767, |
|
"learning_rate": 1.4972876673939589e-05, |
|
"loss": 1.926, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.6005092318285906, |
|
"grad_norm": 0.8892629742622375, |
|
"learning_rate": 1.5012911103549053e-05, |
|
"loss": 1.943, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.6021105897801335, |
|
"grad_norm": 0.8832671642303467, |
|
"learning_rate": 1.5052945533158518e-05, |
|
"loss": 1.9035, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.6037119477316765, |
|
"grad_norm": 1.0101639032363892, |
|
"learning_rate": 1.5092979962767982e-05, |
|
"loss": 1.9282, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.6053133056832194, |
|
"grad_norm": 0.9980772733688354, |
|
"learning_rate": 1.5133014392377446e-05, |
|
"loss": 1.9334, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.6069146636347623, |
|
"grad_norm": 0.9352878332138062, |
|
"learning_rate": 1.5173048821986911e-05, |
|
"loss": 1.9286, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.6085160215863051, |
|
"grad_norm": 0.9329906105995178, |
|
"learning_rate": 1.5213083251596375e-05, |
|
"loss": 1.9133, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.6101173795378481, |
|
"grad_norm": 1.0744600296020508, |
|
"learning_rate": 1.5253117681205837e-05, |
|
"loss": 1.9431, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.611718737489391, |
|
"grad_norm": 1.1284574270248413, |
|
"learning_rate": 1.52931521108153e-05, |
|
"loss": 1.9236, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.6133200954409339, |
|
"grad_norm": 0.7931867241859436, |
|
"learning_rate": 1.5333186540424767e-05, |
|
"loss": 1.9239, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.6149214533924768, |
|
"grad_norm": 0.9535111784934998, |
|
"learning_rate": 1.5373220970034232e-05, |
|
"loss": 1.933, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.6165228113440198, |
|
"grad_norm": 1.1604766845703125, |
|
"learning_rate": 1.5413255399643694e-05, |
|
"loss": 1.9118, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.6181241692955627, |
|
"grad_norm": 0.9939236640930176, |
|
"learning_rate": 1.545328982925316e-05, |
|
"loss": 1.9004, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.6197255272471055, |
|
"grad_norm": 0.901757538318634, |
|
"learning_rate": 1.5493324258862622e-05, |
|
"loss": 1.9198, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.6213268851986484, |
|
"grad_norm": 1.034832239151001, |
|
"learning_rate": 1.5533358688472087e-05, |
|
"loss": 1.9175, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.6229282431501914, |
|
"grad_norm": 0.8186530470848083, |
|
"learning_rate": 1.5573393118081553e-05, |
|
"loss": 1.9098, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.6245296011017343, |
|
"grad_norm": 1.0724900960922241, |
|
"learning_rate": 1.5613427547691015e-05, |
|
"loss": 1.9143, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.6261309590532772, |
|
"grad_norm": 0.9440537691116333, |
|
"learning_rate": 1.565346197730048e-05, |
|
"loss": 1.9327, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.6277323170048201, |
|
"grad_norm": 0.9175347089767456, |
|
"learning_rate": 1.5693496406909946e-05, |
|
"loss": 1.9097, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.6293336749563629, |
|
"grad_norm": 1.075506567955017, |
|
"learning_rate": 1.5733530836519408e-05, |
|
"loss": 1.9148, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.6309350329079059, |
|
"grad_norm": 1.156162142753601, |
|
"learning_rate": 1.577356526612887e-05, |
|
"loss": 1.928, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.6325363908594488, |
|
"grad_norm": 1.2199561595916748, |
|
"learning_rate": 1.5813599695738336e-05, |
|
"loss": 1.9212, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.6341377488109917, |
|
"grad_norm": 1.088230848312378, |
|
"learning_rate": 1.58536341253478e-05, |
|
"loss": 1.9147, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.6357391067625346, |
|
"grad_norm": 0.911685049533844, |
|
"learning_rate": 1.5893668554957263e-05, |
|
"loss": 1.914, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.6373404647140776, |
|
"grad_norm": 0.8977714776992798, |
|
"learning_rate": 1.593370298456673e-05, |
|
"loss": 1.9212, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.6389418226656205, |
|
"grad_norm": 0.9816354513168335, |
|
"learning_rate": 1.5973737414176194e-05, |
|
"loss": 1.9046, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.6405431806171633, |
|
"grad_norm": 0.88201904296875, |
|
"learning_rate": 1.6013771843785656e-05, |
|
"loss": 1.9359, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.6421445385687062, |
|
"grad_norm": 0.9104109406471252, |
|
"learning_rate": 1.6053806273395122e-05, |
|
"loss": 1.9302, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.6437458965202492, |
|
"grad_norm": 1.5256859064102173, |
|
"learning_rate": 1.6093840703004587e-05, |
|
"loss": 1.9226, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.6453472544717921, |
|
"grad_norm": 0.8858827948570251, |
|
"learning_rate": 1.613387513261405e-05, |
|
"loss": 1.9143, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.646948612423335, |
|
"grad_norm": 1.480420470237732, |
|
"learning_rate": 1.617390956222351e-05, |
|
"loss": 1.9171, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.6485499703748779, |
|
"grad_norm": 0.9443252682685852, |
|
"learning_rate": 1.6213943991832977e-05, |
|
"loss": 1.9104, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.6501513283264208, |
|
"grad_norm": 1.4180731773376465, |
|
"learning_rate": 1.6253978421442443e-05, |
|
"loss": 1.9015, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.6517526862779637, |
|
"grad_norm": 1.0369699001312256, |
|
"learning_rate": 1.6294012851051905e-05, |
|
"loss": 1.9085, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.6533540442295066, |
|
"grad_norm": 1.0155749320983887, |
|
"learning_rate": 1.633404728066137e-05, |
|
"loss": 1.8968, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.6549554021810495, |
|
"grad_norm": 1.0214248895645142, |
|
"learning_rate": 1.6374081710270836e-05, |
|
"loss": 1.9109, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.6565567601325925, |
|
"grad_norm": 1.2233892679214478, |
|
"learning_rate": 1.6414116139880298e-05, |
|
"loss": 1.8968, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.6581581180841354, |
|
"grad_norm": 0.8677876591682434, |
|
"learning_rate": 1.6454150569489763e-05, |
|
"loss": 1.9121, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.6597594760356783, |
|
"grad_norm": 0.8257797956466675, |
|
"learning_rate": 1.649418499909923e-05, |
|
"loss": 1.9329, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.6613608339872211, |
|
"grad_norm": 0.904925525188446, |
|
"learning_rate": 1.653421942870869e-05, |
|
"loss": 1.8934, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.662962191938764, |
|
"grad_norm": 0.8754270672798157, |
|
"learning_rate": 1.6574253858318153e-05, |
|
"loss": 1.8885, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.664563549890307, |
|
"grad_norm": 0.9102962613105774, |
|
"learning_rate": 1.661428828792762e-05, |
|
"loss": 1.9046, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.6661649078418499, |
|
"grad_norm": 0.9199568033218384, |
|
"learning_rate": 1.6654322717537084e-05, |
|
"loss": 1.9122, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.6677662657933928, |
|
"grad_norm": 0.9582586288452148, |
|
"learning_rate": 1.6694357147146546e-05, |
|
"loss": 1.8959, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.6693676237449357, |
|
"grad_norm": 0.8151847124099731, |
|
"learning_rate": 1.6734391576756012e-05, |
|
"loss": 1.887, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.6709689816964786, |
|
"grad_norm": 0.9953237771987915, |
|
"learning_rate": 1.6774426006365477e-05, |
|
"loss": 1.9236, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.6725703396480215, |
|
"grad_norm": 1.465527057647705, |
|
"learning_rate": 1.681446043597494e-05, |
|
"loss": 1.9136, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.6741716975995644, |
|
"grad_norm": 0.9603108763694763, |
|
"learning_rate": 1.68544948655844e-05, |
|
"loss": 1.8941, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.6757730555511073, |
|
"grad_norm": 0.8624867796897888, |
|
"learning_rate": 1.689452929519387e-05, |
|
"loss": 1.905, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.6773744135026503, |
|
"grad_norm": 0.9774655699729919, |
|
"learning_rate": 1.6934563724803333e-05, |
|
"loss": 1.9156, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.6789757714541932, |
|
"grad_norm": 2.9199743270874023, |
|
"learning_rate": 1.6974598154412795e-05, |
|
"loss": 1.9126, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.6805771294057361, |
|
"grad_norm": 1.2201206684112549, |
|
"learning_rate": 1.701463258402226e-05, |
|
"loss": 1.8976, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.6821784873572789, |
|
"grad_norm": 1.0182702541351318, |
|
"learning_rate": 1.7054667013631726e-05, |
|
"loss": 1.8968, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.6837798453088219, |
|
"grad_norm": 1.134906530380249, |
|
"learning_rate": 1.7094701443241188e-05, |
|
"loss": 1.9361, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.6853812032603648, |
|
"grad_norm": 1.635399341583252, |
|
"learning_rate": 1.7134735872850653e-05, |
|
"loss": 1.919, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.6869825612119077, |
|
"grad_norm": 0.8835542798042297, |
|
"learning_rate": 1.717477030246012e-05, |
|
"loss": 1.8776, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.6885839191634506, |
|
"grad_norm": 0.9510149955749512, |
|
"learning_rate": 1.721480473206958e-05, |
|
"loss": 1.9036, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.6901852771149936, |
|
"grad_norm": 0.8410897850990295, |
|
"learning_rate": 1.7254839161679043e-05, |
|
"loss": 1.9091, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.6917866350665364, |
|
"grad_norm": 1.4297950267791748, |
|
"learning_rate": 1.729487359128851e-05, |
|
"loss": 1.8934, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.6933879930180793, |
|
"grad_norm": 0.9010776877403259, |
|
"learning_rate": 1.7334908020897974e-05, |
|
"loss": 1.8997, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.6949893509696222, |
|
"grad_norm": 0.8833039999008179, |
|
"learning_rate": 1.7374942450507436e-05, |
|
"loss": 1.892, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.6965907089211651, |
|
"grad_norm": 0.9560312032699585, |
|
"learning_rate": 1.74149768801169e-05, |
|
"loss": 1.8977, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.6981920668727081, |
|
"grad_norm": 0.8603575825691223, |
|
"learning_rate": 1.7455011309726367e-05, |
|
"loss": 1.8881, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.699793424824251, |
|
"grad_norm": 0.8545820116996765, |
|
"learning_rate": 1.749504573933583e-05, |
|
"loss": 1.8992, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.7013947827757939, |
|
"grad_norm": 1.6138850450515747, |
|
"learning_rate": 1.7535080168945295e-05, |
|
"loss": 1.8823, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.7029961407273367, |
|
"grad_norm": 1.339882731437683, |
|
"learning_rate": 1.757511459855476e-05, |
|
"loss": 1.8942, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.7045974986788797, |
|
"grad_norm": 0.8209664225578308, |
|
"learning_rate": 1.7615149028164222e-05, |
|
"loss": 1.885, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.7061988566304226, |
|
"grad_norm": 0.8096824884414673, |
|
"learning_rate": 1.7655183457773685e-05, |
|
"loss": 1.8738, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.7078002145819655, |
|
"grad_norm": 1.0560259819030762, |
|
"learning_rate": 1.769521788738315e-05, |
|
"loss": 1.8941, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.7094015725335084, |
|
"grad_norm": 1.1268258094787598, |
|
"learning_rate": 1.7735252316992616e-05, |
|
"loss": 1.8974, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.7110029304850514, |
|
"grad_norm": 0.9307839274406433, |
|
"learning_rate": 1.7775286746602078e-05, |
|
"loss": 1.9122, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.7126042884365942, |
|
"grad_norm": 1.0069445371627808, |
|
"learning_rate": 1.7815321176211543e-05, |
|
"loss": 1.8597, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.7142056463881371, |
|
"grad_norm": 1.2771753072738647, |
|
"learning_rate": 1.785535560582101e-05, |
|
"loss": 1.8819, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.71580700433968, |
|
"grad_norm": 0.7819973230361938, |
|
"learning_rate": 1.789539003543047e-05, |
|
"loss": 1.8722, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.717408362291223, |
|
"grad_norm": 0.8193828463554382, |
|
"learning_rate": 1.7935424465039936e-05, |
|
"loss": 1.8745, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.7190097202427659, |
|
"grad_norm": 0.7969743609428406, |
|
"learning_rate": 1.7975458894649402e-05, |
|
"loss": 1.911, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.7206110781943088, |
|
"grad_norm": 1.4411369562149048, |
|
"learning_rate": 1.8015493324258864e-05, |
|
"loss": 1.8763, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.7222124361458517, |
|
"grad_norm": 1.0016000270843506, |
|
"learning_rate": 1.805552775386833e-05, |
|
"loss": 1.8875, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.7238137940973945, |
|
"grad_norm": 0.8997382521629333, |
|
"learning_rate": 1.809556218347779e-05, |
|
"loss": 1.8766, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.7254151520489375, |
|
"grad_norm": 0.8878375291824341, |
|
"learning_rate": 1.8135596613087257e-05, |
|
"loss": 1.8803, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.7270165100004804, |
|
"grad_norm": 0.8563076853752136, |
|
"learning_rate": 1.817563104269672e-05, |
|
"loss": 1.8855, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.7286178679520233, |
|
"grad_norm": 1.020241618156433, |
|
"learning_rate": 1.8215665472306185e-05, |
|
"loss": 1.8583, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.7302192259035662, |
|
"grad_norm": 0.8296322822570801, |
|
"learning_rate": 1.825569990191565e-05, |
|
"loss": 1.8692, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.7318205838551092, |
|
"grad_norm": 1.9434692859649658, |
|
"learning_rate": 1.8295734331525112e-05, |
|
"loss": 1.8831, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.733421941806652, |
|
"grad_norm": 0.9088252782821655, |
|
"learning_rate": 1.8335768761134578e-05, |
|
"loss": 1.8868, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.7350232997581949, |
|
"grad_norm": 1.7532590627670288, |
|
"learning_rate": 1.837580319074404e-05, |
|
"loss": 1.8734, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.7366246577097378, |
|
"grad_norm": 0.9662244319915771, |
|
"learning_rate": 1.8415837620353505e-05, |
|
"loss": 1.888, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.7382260156612808, |
|
"grad_norm": 3.4851512908935547, |
|
"learning_rate": 1.845587204996297e-05, |
|
"loss": 1.8829, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.7398273736128237, |
|
"grad_norm": 0.9157941341400146, |
|
"learning_rate": 1.8495906479572433e-05, |
|
"loss": 1.9091, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.7414287315643666, |
|
"grad_norm": 0.8992369771003723, |
|
"learning_rate": 1.85359409091819e-05, |
|
"loss": 1.8932, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.7430300895159095, |
|
"grad_norm": 0.8611487150192261, |
|
"learning_rate": 1.857597533879136e-05, |
|
"loss": 1.8672, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.7446314474674524, |
|
"grad_norm": 1.0629839897155762, |
|
"learning_rate": 1.8616009768400826e-05, |
|
"loss": 1.8819, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.7462328054189953, |
|
"grad_norm": 0.8317407369613647, |
|
"learning_rate": 1.865604419801029e-05, |
|
"loss": 1.8684, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.7478341633705382, |
|
"grad_norm": 0.8102233409881592, |
|
"learning_rate": 1.8696078627619754e-05, |
|
"loss": 1.8796, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.7494355213220811, |
|
"grad_norm": 0.8077260255813599, |
|
"learning_rate": 1.873611305722922e-05, |
|
"loss": 1.8872, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.7510368792736241, |
|
"grad_norm": 0.9285743236541748, |
|
"learning_rate": 1.877614748683868e-05, |
|
"loss": 1.8828, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.752638237225167, |
|
"grad_norm": 0.835612416267395, |
|
"learning_rate": 1.8816181916448147e-05, |
|
"loss": 1.8685, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.7542395951767098, |
|
"grad_norm": 1.5960347652435303, |
|
"learning_rate": 1.8856216346057612e-05, |
|
"loss": 1.8705, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.7558409531282527, |
|
"grad_norm": 0.7755472660064697, |
|
"learning_rate": 1.8896250775667075e-05, |
|
"loss": 1.8851, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.7574423110797956, |
|
"grad_norm": 1.0042415857315063, |
|
"learning_rate": 1.893628520527654e-05, |
|
"loss": 1.855, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.7590436690313386, |
|
"grad_norm": 0.8991414904594421, |
|
"learning_rate": 1.8976319634886006e-05, |
|
"loss": 1.8569, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.7606450269828815, |
|
"grad_norm": 0.813565194606781, |
|
"learning_rate": 1.9016354064495468e-05, |
|
"loss": 1.8578, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.7622463849344244, |
|
"grad_norm": 0.7883344292640686, |
|
"learning_rate": 1.9056388494104933e-05, |
|
"loss": 1.8968, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.7638477428859674, |
|
"grad_norm": 1.0632473230361938, |
|
"learning_rate": 1.9096422923714395e-05, |
|
"loss": 1.879, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.7654491008375102, |
|
"grad_norm": 0.8479236364364624, |
|
"learning_rate": 1.913645735332386e-05, |
|
"loss": 1.8683, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.7670504587890531, |
|
"grad_norm": 0.871159553527832, |
|
"learning_rate": 1.9176491782933323e-05, |
|
"loss": 1.8653, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.768651816740596, |
|
"grad_norm": 0.8534667491912842, |
|
"learning_rate": 1.921652621254279e-05, |
|
"loss": 1.8507, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.7702531746921389, |
|
"grad_norm": 0.8931534290313721, |
|
"learning_rate": 1.9256560642152254e-05, |
|
"loss": 1.8625, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.7718545326436819, |
|
"grad_norm": 1.1518031358718872, |
|
"learning_rate": 1.9296595071761716e-05, |
|
"loss": 1.8786, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.7734558905952248, |
|
"grad_norm": 0.9310818910598755, |
|
"learning_rate": 1.933662950137118e-05, |
|
"loss": 1.8517, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.7750572485467676, |
|
"grad_norm": 1.314759612083435, |
|
"learning_rate": 1.9376663930980647e-05, |
|
"loss": 1.853, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.7766586064983105, |
|
"grad_norm": 0.8431141972541809, |
|
"learning_rate": 1.941669836059011e-05, |
|
"loss": 1.863, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.7782599644498535, |
|
"grad_norm": 0.90580815076828, |
|
"learning_rate": 1.945673279019957e-05, |
|
"loss": 1.8496, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.7798613224013964, |
|
"grad_norm": 1.0436537265777588, |
|
"learning_rate": 1.9496767219809037e-05, |
|
"loss": 1.8629, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.7814626803529393, |
|
"grad_norm": 0.8080843091011047, |
|
"learning_rate": 1.9536801649418502e-05, |
|
"loss": 1.857, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.7830640383044822, |
|
"grad_norm": 0.8750945925712585, |
|
"learning_rate": 1.9576836079027964e-05, |
|
"loss": 1.8725, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.7846653962560252, |
|
"grad_norm": 1.2619659900665283, |
|
"learning_rate": 1.961687050863743e-05, |
|
"loss": 1.8422, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.786266754207568, |
|
"grad_norm": 0.84897780418396, |
|
"learning_rate": 1.9656904938246895e-05, |
|
"loss": 1.8725, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.7878681121591109, |
|
"grad_norm": 0.7454677820205688, |
|
"learning_rate": 1.9696939367856358e-05, |
|
"loss": 1.8735, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.7894694701106538, |
|
"grad_norm": 0.8530156016349792, |
|
"learning_rate": 1.9736973797465823e-05, |
|
"loss": 1.8597, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.7910708280621968, |
|
"grad_norm": 0.9725930690765381, |
|
"learning_rate": 1.977700822707529e-05, |
|
"loss": 1.8515, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.7926721860137397, |
|
"grad_norm": 0.8235682249069214, |
|
"learning_rate": 1.981704265668475e-05, |
|
"loss": 1.8791, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.7942735439652826, |
|
"grad_norm": 0.9344043135643005, |
|
"learning_rate": 1.9857077086294213e-05, |
|
"loss": 1.8663, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.7958749019168254, |
|
"grad_norm": 0.9629167318344116, |
|
"learning_rate": 1.9897111515903678e-05, |
|
"loss": 1.8647, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.7974762598683683, |
|
"grad_norm": 0.7384589910507202, |
|
"learning_rate": 1.9937145945513144e-05, |
|
"loss": 1.8586, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.7990776178199113, |
|
"grad_norm": 1.069229245185852, |
|
"learning_rate": 1.9977180375122606e-05, |
|
"loss": 1.8622, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.8006789757714542, |
|
"grad_norm": 0.8724033236503601, |
|
"learning_rate": 1.9999415105482566e-05, |
|
"loss": 1.8602, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.8022803337229971, |
|
"grad_norm": 0.8449952602386475, |
|
"learning_rate": 1.9993531998299776e-05, |
|
"loss": 1.8321, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.80388169167454, |
|
"grad_norm": 0.8022226095199585, |
|
"learning_rate": 1.9981326651105962e-05, |
|
"loss": 1.8735, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.805483049626083, |
|
"grad_norm": 0.8840874433517456, |
|
"learning_rate": 1.9962806785408838e-05, |
|
"loss": 1.829, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.8070844075776258, |
|
"grad_norm": 1.0448670387268066, |
|
"learning_rate": 1.993798411749008e-05, |
|
"loss": 1.8595, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.8086857655291687, |
|
"grad_norm": 0.904961109161377, |
|
"learning_rate": 1.9906874350993245e-05, |
|
"loss": 1.8586, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.8102871234807116, |
|
"grad_norm": 2.3403968811035156, |
|
"learning_rate": 1.98694971669891e-05, |
|
"loss": 1.8492, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.8118884814322546, |
|
"grad_norm": 1.0114359855651855, |
|
"learning_rate": 1.9825876211524724e-05, |
|
"loss": 1.8609, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.8134898393837975, |
|
"grad_norm": 0.8057318329811096, |
|
"learning_rate": 1.977603908066426e-05, |
|
"loss": 1.829, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.8150911973353404, |
|
"grad_norm": 0.8821057677268982, |
|
"learning_rate": 1.9720017303030703e-05, |
|
"loss": 1.862, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.8166925552868832, |
|
"grad_norm": 2.767875909805298, |
|
"learning_rate": 1.9657846319859854e-05, |
|
"loss": 1.8678, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.8182939132384262, |
|
"grad_norm": 0.7666317820549011, |
|
"learning_rate": 1.9589565462579015e-05, |
|
"loss": 1.8621, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.8198952711899691, |
|
"grad_norm": 1.021729588508606, |
|
"learning_rate": 1.9515217927924633e-05, |
|
"loss": 1.8352, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.821496629141512, |
|
"grad_norm": 0.8282054662704468, |
|
"learning_rate": 1.943485075061461e-05, |
|
"loss": 1.8583, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.8230979870930549, |
|
"grad_norm": 3.0896732807159424, |
|
"learning_rate": 1.934851477359256e-05, |
|
"loss": 1.8394, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.8246993450445979, |
|
"grad_norm": 0.9474732279777527, |
|
"learning_rate": 1.9256264615862893e-05, |
|
"loss": 1.8682, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.8263007029961408, |
|
"grad_norm": 0.7810207009315491, |
|
"learning_rate": 1.9158158637937027e-05, |
|
"loss": 1.8337, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.8279020609476836, |
|
"grad_norm": 0.8208989500999451, |
|
"learning_rate": 1.9054258904912575e-05, |
|
"loss": 1.8367, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.8295034188992265, |
|
"grad_norm": 0.8764814138412476, |
|
"learning_rate": 1.89446311472089e-05, |
|
"loss": 1.8403, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.8311047768507694, |
|
"grad_norm": 1.1485708951950073, |
|
"learning_rate": 1.8829344718983903e-05, |
|
"loss": 1.8576, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.8327061348023124, |
|
"grad_norm": 1.06003737449646, |
|
"learning_rate": 1.8708472554258237e-05, |
|
"loss": 1.872, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.8343074927538553, |
|
"grad_norm": 0.8322979807853699, |
|
"learning_rate": 1.8582091120774855e-05, |
|
"loss": 1.859, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.8359088507053982, |
|
"grad_norm": 0.7536402940750122, |
|
"learning_rate": 1.845028037162298e-05, |
|
"loss": 1.8401, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.837510208656941, |
|
"grad_norm": 1.4201630353927612, |
|
"learning_rate": 1.83131236946571e-05, |
|
"loss": 1.8723, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.839111566608484, |
|
"grad_norm": 0.7676379680633545, |
|
"learning_rate": 1.8170707859743067e-05, |
|
"loss": 1.8572, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.8407129245600269, |
|
"grad_norm": 0.8063752055168152, |
|
"learning_rate": 1.8023122963864602e-05, |
|
"loss": 1.8469, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.8423142825115698, |
|
"grad_norm": 0.8385179042816162, |
|
"learning_rate": 1.787046237412493e-05, |
|
"loss": 1.8564, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.8439156404631127, |
|
"grad_norm": 0.8969714641571045, |
|
"learning_rate": 1.7712822668679682e-05, |
|
"loss": 1.8556, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.8455169984146557, |
|
"grad_norm": 1.184692621231079, |
|
"learning_rate": 1.7550303575638318e-05, |
|
"loss": 1.8423, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.8471183563661986, |
|
"grad_norm": 0.8388579487800598, |
|
"learning_rate": 1.7383007909972844e-05, |
|
"loss": 1.8157, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.8487197143177414, |
|
"grad_norm": 0.7864462733268738, |
|
"learning_rate": 1.721104150847362e-05, |
|
"loss": 1.8526, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.8503210722692843, |
|
"grad_norm": 0.86407071352005, |
|
"learning_rate": 1.703451316279353e-05, |
|
"loss": 1.8428, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.8519224302208273, |
|
"grad_norm": 0.8313634395599365, |
|
"learning_rate": 1.6853534550622722e-05, |
|
"loss": 1.8479, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.8535237881723702, |
|
"grad_norm": 1.4253445863723755, |
|
"learning_rate": 1.666822016503765e-05, |
|
"loss": 1.8275, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.8551251461239131, |
|
"grad_norm": 5.398781776428223, |
|
"learning_rate": 1.6478687242068904e-05, |
|
"loss": 1.854, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.856726504075456, |
|
"grad_norm": 1.7977509498596191, |
|
"learning_rate": 1.628505568653385e-05, |
|
"loss": 1.8339, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.8583278620269988, |
|
"grad_norm": 0.8206777572631836, |
|
"learning_rate": 1.6087447996180826e-05, |
|
"loss": 1.8511, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.8599292199785418, |
|
"grad_norm": 0.8535060286521912, |
|
"learning_rate": 1.5885989184193027e-05, |
|
"loss": 1.8586, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.8615305779300847, |
|
"grad_norm": 1.6550579071044922, |
|
"learning_rate": 1.5680806700101e-05, |
|
"loss": 1.8482, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.8631319358816276, |
|
"grad_norm": 0.8122648000717163, |
|
"learning_rate": 1.5472030349153854e-05, |
|
"loss": 1.8335, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.8647332938331705, |
|
"grad_norm": 0.7805556058883667, |
|
"learning_rate": 1.525979221020014e-05, |
|
"loss": 1.8252, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.8663346517847135, |
|
"grad_norm": 0.8546029329299927, |
|
"learning_rate": 1.5044226552130399e-05, |
|
"loss": 1.8353, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.8679360097362564, |
|
"grad_norm": 0.7961782217025757, |
|
"learning_rate": 1.4825469748934192e-05, |
|
"loss": 1.8348, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.8695373676877992, |
|
"grad_norm": 0.9392079710960388, |
|
"learning_rate": 1.4603660193425402e-05, |
|
"loss": 1.8205, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.8711387256393421, |
|
"grad_norm": 0.7852017283439636, |
|
"learning_rate": 1.4378938209690334e-05, |
|
"loss": 1.8327, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.8727400835908851, |
|
"grad_norm": 0.8385934829711914, |
|
"learning_rate": 1.4151445964314057e-05, |
|
"loss": 1.8383, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.874341441542428, |
|
"grad_norm": 0.7498407363891602, |
|
"learning_rate": 1.3921327376441087e-05, |
|
"loss": 1.8121, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.8759427994939709, |
|
"grad_norm": 0.8227770924568176, |
|
"learning_rate": 1.3688728026727369e-05, |
|
"loss": 1.8395, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.8775441574455138, |
|
"grad_norm": 0.911970317363739, |
|
"learning_rate": 1.3453795065241128e-05, |
|
"loss": 1.8262, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.8791455153970567, |
|
"grad_norm": 0.8143411874771118, |
|
"learning_rate": 1.3216677118370834e-05, |
|
"loss": 1.8571, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.8807468733485996, |
|
"grad_norm": 0.8301388621330261, |
|
"learning_rate": 1.2977524194799229e-05, |
|
"loss": 1.8435, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.8823482313001425, |
|
"grad_norm": 1.3477791547775269, |
|
"learning_rate": 1.2736487590602864e-05, |
|
"loss": 1.8372, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.8839495892516854, |
|
"grad_norm": 0.8804235458374023, |
|
"learning_rate": 1.2493719793537157e-05, |
|
"loss": 1.841, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.8855509472032284, |
|
"grad_norm": 0.7941620349884033, |
|
"learning_rate": 1.2249374386567598e-05, |
|
"loss": 1.8271, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.8871523051547713, |
|
"grad_norm": 0.8681734800338745, |
|
"learning_rate": 1.2003605950708059e-05, |
|
"loss": 1.8459, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.8887536631063142, |
|
"grad_norm": 0.7299553155899048, |
|
"learning_rate": 1.1756569967227716e-05, |
|
"loss": 1.8684, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.890355021057857, |
|
"grad_norm": 0.7805650234222412, |
|
"learning_rate": 1.1508422719288434e-05, |
|
"loss": 1.8113, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.8919563790094, |
|
"grad_norm": 0.7692527770996094, |
|
"learning_rate": 1.125932119307486e-05, |
|
"loss": 1.8252, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.8935577369609429, |
|
"grad_norm": 0.8291378021240234, |
|
"learning_rate": 1.1009422978479742e-05, |
|
"loss": 1.7992, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.8951590949124858, |
|
"grad_norm": 0.8779826164245605, |
|
"learning_rate": 1.0758886169407351e-05, |
|
"loss": 1.8336, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.8967604528640287, |
|
"grad_norm": 0.7980159521102905, |
|
"learning_rate": 1.050786926375801e-05, |
|
"loss": 1.8212, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.8983618108155716, |
|
"grad_norm": 3.2298014163970947, |
|
"learning_rate": 1.025653106315707e-05, |
|
"loss": 1.8188, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.8999631687671145, |
|
"grad_norm": 0.8914725184440613, |
|
"learning_rate": 1.0005030572491733e-05, |
|
"loss": 1.8387, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.9015645267186574, |
|
"grad_norm": 0.8599027395248413, |
|
"learning_rate": 9.753526899319275e-06, |
|
"loss": 1.8327, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.9031658846702003, |
|
"grad_norm": 0.9533581733703613, |
|
"learning_rate": 9.50217915321035e-06, |
|
"loss": 1.822, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.9047672426217432, |
|
"grad_norm": 0.8099405169487, |
|
"learning_rate": 9.251146345090958e-06, |
|
"loss": 1.8462, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.9063686005732862, |
|
"grad_norm": 0.8883758783340454, |
|
"learning_rate": 9.000587286646886e-06, |
|
"loss": 1.8184, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.9079699585248291, |
|
"grad_norm": 1.6830765008926392, |
|
"learning_rate": 8.750660489854142e-06, |
|
"loss": 1.82, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.909571316476372, |
|
"grad_norm": 1.2402883768081665, |
|
"learning_rate": 8.501524066699047e-06, |
|
"loss": 1.816, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.9111726744279148, |
|
"grad_norm": 0.8525800108909607, |
|
"learning_rate": 8.253335629151306e-06, |
|
"loss": 1.8248, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.9127740323794578, |
|
"grad_norm": 0.8562950491905212, |
|
"learning_rate": 8.006252189453485e-06, |
|
"loss": 1.8284, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.9143753903310007, |
|
"grad_norm": 0.7687914371490479, |
|
"learning_rate": 7.760430060789828e-06, |
|
"loss": 1.8198, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.9159767482825436, |
|
"grad_norm": 0.9463182091712952, |
|
"learning_rate": 7.51602475839736e-06, |
|
"loss": 1.8266, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.9175781062340865, |
|
"grad_norm": 1.0767518281936646, |
|
"learning_rate": 7.273190901181783e-06, |
|
"loss": 1.8054, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.9191794641856295, |
|
"grad_norm": 0.8242263197898865, |
|
"learning_rate": 7.032082113900434e-06, |
|
"loss": 1.8337, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.9207808221371723, |
|
"grad_norm": 0.7926039695739746, |
|
"learning_rate": 6.792850929974142e-06, |
|
"loss": 1.8144, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.9223821800887152, |
|
"grad_norm": 0.7732511162757874, |
|
"learning_rate": 6.55564869498956e-06, |
|
"loss": 1.804, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.9239835380402581, |
|
"grad_norm": 0.7959622144699097, |
|
"learning_rate": 6.32062547095288e-06, |
|
"loss": 1.8222, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.925584895991801, |
|
"grad_norm": 0.8663679957389832, |
|
"learning_rate": 6.087929941355671e-06, |
|
"loss": 1.8496, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.927186253943344, |
|
"grad_norm": 0.7793252468109131, |
|
"learning_rate": 5.857709317112736e-06, |
|
"loss": 1.8177, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.9287876118948869, |
|
"grad_norm": 0.9085448980331421, |
|
"learning_rate": 5.630109243431608e-06, |
|
"loss": 1.8193, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.9303889698464298, |
|
"grad_norm": 0.7569569945335388, |
|
"learning_rate": 5.4052737076725824e-06, |
|
"loss": 1.8196, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.9319903277979726, |
|
"grad_norm": 0.8424269556999207, |
|
"learning_rate": 5.1833449482574895e-06, |
|
"loss": 1.835, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.9335916857495156, |
|
"grad_norm": 0.8512621521949768, |
|
"learning_rate": 4.964463364685001e-06, |
|
"loss": 1.8145, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.9351930437010585, |
|
"grad_norm": 1.0519986152648926, |
|
"learning_rate": 4.748767428709187e-06, |
|
"loss": 1.8213, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.9367944016526014, |
|
"grad_norm": 0.7896735072135925, |
|
"learning_rate": 4.536393596737752e-06, |
|
"loss": 1.8243, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.9383957596041443, |
|
"grad_norm": 1.0739407539367676, |
|
"learning_rate": 4.327476223505136e-06, |
|
"loss": 1.832, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.9399971175556873, |
|
"grad_norm": 0.8374795913696289, |
|
"learning_rate": 4.12214747707527e-06, |
|
"loss": 1.8338, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.9415984755072301, |
|
"grad_norm": 1.0221420526504517, |
|
"learning_rate": 3.920537255227669e-06, |
|
"loss": 1.8101, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.943199833458773, |
|
"grad_norm": 0.8421764969825745, |
|
"learning_rate": 3.7227731032797853e-06, |
|
"loss": 1.8329, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.9448011914103159, |
|
"grad_norm": 0.7701355814933777, |
|
"learning_rate": 3.5289801333976102e-06, |
|
"loss": 1.8216, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.9464025493618589, |
|
"grad_norm": 0.7741368412971497, |
|
"learning_rate": 3.339280945445559e-06, |
|
"loss": 1.8272, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.9480039073134018, |
|
"grad_norm": 1.7360873222351074, |
|
"learning_rate": 3.1537955494257345e-06, |
|
"loss": 1.8372, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.9496052652649447, |
|
"grad_norm": 0.7760699987411499, |
|
"learning_rate": 2.972641289555616e-06, |
|
"loss": 1.8182, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.9512066232164876, |
|
"grad_norm": 0.7646809220314026, |
|
"learning_rate": 2.7959327700322036e-06, |
|
"loss": 1.8084, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.9528079811680304, |
|
"grad_norm": 0.9442381858825684, |
|
"learning_rate": 2.623781782529625e-06, |
|
"loss": 1.8239, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.9544093391195734, |
|
"grad_norm": 0.8009527325630188, |
|
"learning_rate": 2.4562972354759698e-06, |
|
"loss": 1.8272, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.9560106970711163, |
|
"grad_norm": 0.7591850757598877, |
|
"learning_rate": 2.293585085154252e-06, |
|
"loss": 1.8314, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.9576120550226592, |
|
"grad_norm": 0.7954255938529968, |
|
"learning_rate": 2.135748268670902e-06, |
|
"loss": 1.8341, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.9592134129742022, |
|
"grad_norm": 1.0002678632736206, |
|
"learning_rate": 1.9828866388343814e-06, |
|
"loss": 1.8075, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.9608147709257451, |
|
"grad_norm": 0.7856830954551697, |
|
"learning_rate": 1.8350969009849483e-06, |
|
"loss": 1.8005, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.9624161288772879, |
|
"grad_norm": 0.9126999378204346, |
|
"learning_rate": 1.6924725518156637e-06, |
|
"loss": 1.8277, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.9640174868288308, |
|
"grad_norm": 0.8106286525726318, |
|
"learning_rate": 1.5551038202232805e-06, |
|
"loss": 1.8108, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.9656188447803737, |
|
"grad_norm": 1.359531044960022, |
|
"learning_rate": 1.4230776102264454e-06, |
|
"loss": 1.8475, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.9672202027319167, |
|
"grad_norm": 0.7704586386680603, |
|
"learning_rate": 1.2964774459873364e-06, |
|
"loss": 1.8482, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.9688215606834596, |
|
"grad_norm": 0.7488996982574463, |
|
"learning_rate": 1.1753834189715019e-06, |
|
"loss": 1.8115, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.9704229186350025, |
|
"grad_norm": 1.662976861000061, |
|
"learning_rate": 1.059872137279342e-06, |
|
"loss": 1.8391, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.9720242765865454, |
|
"grad_norm": 1.0111815929412842, |
|
"learning_rate": 9.500166771812902e-07, |
|
"loss": 1.8161, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.9736256345380883, |
|
"grad_norm": 0.7973281145095825, |
|
"learning_rate": 8.458865368873204e-07, |
|
"loss": 1.8219, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.9752269924896312, |
|
"grad_norm": 0.8591629266738892, |
|
"learning_rate": 7.475475925800968e-07, |
|
"loss": 1.8399, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.9768283504411741, |
|
"grad_norm": 0.9209094047546387, |
|
"learning_rate": 6.550620567394883e-07, |
|
"loss": 1.8319, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.978429708392717, |
|
"grad_norm": 0.916976273059845, |
|
"learning_rate": 5.684884387849176e-07, |
|
"loss": 1.8189, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.98003106634426, |
|
"grad_norm": 0.950470507144928, |
|
"learning_rate": 4.878815080603372e-07, |
|
"loss": 1.8052, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.9816324242958029, |
|
"grad_norm": 0.7501734495162964, |
|
"learning_rate": 4.1329225918533277e-07, |
|
"loss": 1.8419, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.9832337822473457, |
|
"grad_norm": 0.8855769038200378, |
|
"learning_rate": 3.447678797942389e-07, |
|
"loss": 1.8168, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.9848351401988886, |
|
"grad_norm": 0.9513728618621826, |
|
"learning_rate": 2.823517206836701e-07, |
|
"loss": 1.8219, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.9864364981504316, |
|
"grad_norm": 0.9888412952423096, |
|
"learning_rate": 2.2608326838736817e-07, |
|
"loss": 1.8183, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.9880378561019745, |
|
"grad_norm": 0.8009938597679138, |
|
"learning_rate": 1.7599812019571395e-07, |
|
"loss": 1.8027, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.9896392140535174, |
|
"grad_norm": 0.9275427460670471, |
|
"learning_rate": 1.321279616356963e-07, |
|
"loss": 1.8145, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.9912405720050603, |
|
"grad_norm": 0.7663293480873108, |
|
"learning_rate": 9.450054642560102e-08, |
|
"loss": 1.8332, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.9928419299566033, |
|
"grad_norm": 0.7306997776031494, |
|
"learning_rate": 6.313967891707906e-08, |
|
"loss": 1.8059, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.9944432879081461, |
|
"grad_norm": 0.8004014492034912, |
|
"learning_rate": 3.806519903573502e-08, |
|
"loss": 1.8347, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.996044645859689, |
|
"grad_norm": 0.7328791618347168, |
|
"learning_rate": 1.9292969729719502e-08, |
|
"loss": 1.8156, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.9976460038112319, |
|
"grad_norm": 0.8255366086959839, |
|
"learning_rate": 6.834866934314344e-09, |
|
"loss": 1.8029, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.9992473617627748, |
|
"grad_norm": 0.8802406787872314, |
|
"learning_rate": 6.987720588080837e-10, |
|
"loss": 1.8173, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 62447, |
|
"total_flos": 7.631778497299481e+18, |
|
"train_loss": 2.1485319636500972, |
|
"train_runtime": 14119.5859, |
|
"train_samples_per_second": 35.382, |
|
"train_steps_per_second": 4.423 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 62447, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.631778497299481e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|