{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9999404631963325, "eval_steps": 500, "global_step": 37791, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007938240488995614, "grad_norm": 0.44355273246765137, "learning_rate": 4.998676933661454e-05, "loss": 1.5273, "step": 10 }, { "epoch": 0.0015876480977991228, "grad_norm": 0.27582135796546936, "learning_rate": 4.9973538673229073e-05, "loss": 1.3011, "step": 20 }, { "epoch": 0.0023814721466986844, "grad_norm": 0.3760926425457001, "learning_rate": 4.996030800984361e-05, "loss": 1.2377, "step": 30 }, { "epoch": 0.0031752961955982457, "grad_norm": 0.2921426594257355, "learning_rate": 4.994707734645815e-05, "loss": 1.2327, "step": 40 }, { "epoch": 0.003969120244497807, "grad_norm": 0.5147169828414917, "learning_rate": 4.99338466830727e-05, "loss": 1.1691, "step": 50 }, { "epoch": 0.004762944293397369, "grad_norm": 0.38984984159469604, "learning_rate": 4.992061601968723e-05, "loss": 1.1292, "step": 60 }, { "epoch": 0.00555676834229693, "grad_norm": 0.4045616090297699, "learning_rate": 4.990738535630177e-05, "loss": 1.0903, "step": 70 }, { "epoch": 0.006350592391196491, "grad_norm": 0.4870094656944275, "learning_rate": 4.989415469291631e-05, "loss": 1.1421, "step": 80 }, { "epoch": 0.007144416440096053, "grad_norm": 0.6016878485679626, "learning_rate": 4.9880924029530846e-05, "loss": 1.1459, "step": 90 }, { "epoch": 0.007938240488995614, "grad_norm": 0.5868497490882874, "learning_rate": 4.986769336614538e-05, "loss": 1.0909, "step": 100 }, { "epoch": 0.008732064537895176, "grad_norm": 0.6827491521835327, "learning_rate": 4.985446270275992e-05, "loss": 1.065, "step": 110 }, { "epoch": 0.009525888586794738, "grad_norm": 0.7056289315223694, "learning_rate": 4.9841232039374456e-05, "loss": 1.0764, "step": 120 }, { "epoch": 0.010319712635694298, "grad_norm": 0.5899627208709717, "learning_rate": 4.9828001375988995e-05, "loss": 1.0369, "step": 130 }, { "epoch": 0.01111353668459386, "grad_norm": 0.6351290345191956, "learning_rate": 4.9814770712603534e-05, "loss": 1.0332, "step": 140 }, { "epoch": 0.011907360733493421, "grad_norm": 0.7403615117073059, "learning_rate": 4.980154004921807e-05, "loss": 1.0837, "step": 150 }, { "epoch": 0.012701184782392983, "grad_norm": 0.7318956255912781, "learning_rate": 4.978830938583261e-05, "loss": 1.1196, "step": 160 }, { "epoch": 0.013495008831292545, "grad_norm": 0.8113905787467957, "learning_rate": 4.9775078722447144e-05, "loss": 1.0629, "step": 170 }, { "epoch": 0.014288832880192106, "grad_norm": 0.7525221109390259, "learning_rate": 4.976184805906168e-05, "loss": 1.0937, "step": 180 }, { "epoch": 0.015082656929091666, "grad_norm": 0.7905648350715637, "learning_rate": 4.974861739567622e-05, "loss": 1.0263, "step": 190 }, { "epoch": 0.015876480977991228, "grad_norm": 0.47737765312194824, "learning_rate": 4.973538673229076e-05, "loss": 1.1193, "step": 200 }, { "epoch": 0.016670305026890788, "grad_norm": 0.6782676577568054, "learning_rate": 4.972215606890529e-05, "loss": 1.0208, "step": 210 }, { "epoch": 0.01746412907579035, "grad_norm": 0.7456789016723633, "learning_rate": 4.970892540551983e-05, "loss": 1.039, "step": 220 }, { "epoch": 0.01825795312468991, "grad_norm": 0.8023837804794312, "learning_rate": 4.969569474213438e-05, "loss": 1.0291, "step": 230 }, { "epoch": 0.019051777173589475, "grad_norm": 0.6566641926765442, "learning_rate": 4.968246407874891e-05, "loss": 1.071, "step": 240 }, { "epoch": 0.019845601222489035, "grad_norm": 0.8695980906486511, "learning_rate": 4.966923341536345e-05, "loss": 0.9391, "step": 250 }, { "epoch": 0.020639425271388595, "grad_norm": 0.7018496990203857, "learning_rate": 4.965600275197799e-05, "loss": 1.0663, "step": 260 }, { "epoch": 0.02143324932028816, "grad_norm": 0.7785579562187195, "learning_rate": 4.9642772088592526e-05, "loss": 1.0047, "step": 270 }, { "epoch": 0.02222707336918772, "grad_norm": 0.6475448608398438, "learning_rate": 4.962954142520706e-05, "loss": 1.0443, "step": 280 }, { "epoch": 0.023020897418087282, "grad_norm": 0.8628908395767212, "learning_rate": 4.96163107618216e-05, "loss": 0.9987, "step": 290 }, { "epoch": 0.023814721466986842, "grad_norm": 0.8528658747673035, "learning_rate": 4.9603080098436136e-05, "loss": 1.0199, "step": 300 }, { "epoch": 0.024608545515886402, "grad_norm": 0.8548170328140259, "learning_rate": 4.9589849435050675e-05, "loss": 1.0916, "step": 310 }, { "epoch": 0.025402369564785966, "grad_norm": 0.8923755884170532, "learning_rate": 4.9576618771665214e-05, "loss": 0.996, "step": 320 }, { "epoch": 0.026196193613685526, "grad_norm": 0.8691526651382446, "learning_rate": 4.956338810827975e-05, "loss": 1.0162, "step": 330 }, { "epoch": 0.02699001766258509, "grad_norm": 0.8156205415725708, "learning_rate": 4.955015744489429e-05, "loss": 1.0359, "step": 340 }, { "epoch": 0.02778384171148465, "grad_norm": 0.8026768565177917, "learning_rate": 4.9536926781508824e-05, "loss": 1.0605, "step": 350 }, { "epoch": 0.028577665760384213, "grad_norm": 0.9509242177009583, "learning_rate": 4.952369611812336e-05, "loss": 1.004, "step": 360 }, { "epoch": 0.029371489809283773, "grad_norm": 0.8537798523902893, "learning_rate": 4.95104654547379e-05, "loss": 0.9256, "step": 370 }, { "epoch": 0.030165313858183333, "grad_norm": 0.7341266870498657, "learning_rate": 4.949723479135244e-05, "loss": 0.95, "step": 380 }, { "epoch": 0.030959137907082896, "grad_norm": 0.9486088752746582, "learning_rate": 4.948400412796697e-05, "loss": 0.9779, "step": 390 }, { "epoch": 0.031752961955982456, "grad_norm": 0.5967279672622681, "learning_rate": 4.947077346458152e-05, "loss": 1.0431, "step": 400 }, { "epoch": 0.03254678600488202, "grad_norm": 0.6559339165687561, "learning_rate": 4.945754280119606e-05, "loss": 1.0192, "step": 410 }, { "epoch": 0.033340610053781576, "grad_norm": 0.8226445317268372, "learning_rate": 4.94443121378106e-05, "loss": 1.0024, "step": 420 }, { "epoch": 0.03413443410268114, "grad_norm": 1.0217857360839844, "learning_rate": 4.943108147442513e-05, "loss": 1.0036, "step": 430 }, { "epoch": 0.0349282581515807, "grad_norm": 0.9626277089118958, "learning_rate": 4.941785081103967e-05, "loss": 0.9955, "step": 440 }, { "epoch": 0.03572208220048027, "grad_norm": 0.6774649620056152, "learning_rate": 4.940462014765421e-05, "loss": 0.9775, "step": 450 }, { "epoch": 0.03651590624937982, "grad_norm": 0.8078123927116394, "learning_rate": 4.9391389484268746e-05, "loss": 0.9618, "step": 460 }, { "epoch": 0.03730973029827939, "grad_norm": 1.0022040605545044, "learning_rate": 4.937815882088328e-05, "loss": 0.9787, "step": 470 }, { "epoch": 0.03810355434717895, "grad_norm": 0.7025789618492126, "learning_rate": 4.936492815749782e-05, "loss": 0.9842, "step": 480 }, { "epoch": 0.03889737839607851, "grad_norm": 0.6538853049278259, "learning_rate": 4.9351697494112356e-05, "loss": 0.988, "step": 490 }, { "epoch": 0.03969120244497807, "grad_norm": 0.6774317026138306, "learning_rate": 4.9338466830726895e-05, "loss": 0.939, "step": 500 }, { "epoch": 0.040485026493877634, "grad_norm": 0.9341694116592407, "learning_rate": 4.9325236167341433e-05, "loss": 1.0455, "step": 510 }, { "epoch": 0.04127885054277719, "grad_norm": 0.8977088332176208, "learning_rate": 4.931200550395597e-05, "loss": 1.0065, "step": 520 }, { "epoch": 0.042072674591676754, "grad_norm": 0.6621760725975037, "learning_rate": 4.929877484057051e-05, "loss": 1.0243, "step": 530 }, { "epoch": 0.04286649864057632, "grad_norm": 0.8938838243484497, "learning_rate": 4.9285544177185043e-05, "loss": 1.0233, "step": 540 }, { "epoch": 0.04366032268947588, "grad_norm": 0.7396982908248901, "learning_rate": 4.927231351379958e-05, "loss": 0.9628, "step": 550 }, { "epoch": 0.04445414673837544, "grad_norm": 0.8066884279251099, "learning_rate": 4.925908285041412e-05, "loss": 1.0092, "step": 560 }, { "epoch": 0.045247970787275, "grad_norm": 0.8251396417617798, "learning_rate": 4.924585218702866e-05, "loss": 1.0094, "step": 570 }, { "epoch": 0.046041794836174564, "grad_norm": 0.8999793529510498, "learning_rate": 4.92326215236432e-05, "loss": 0.9971, "step": 580 }, { "epoch": 0.04683561888507412, "grad_norm": 0.7802185416221619, "learning_rate": 4.921939086025774e-05, "loss": 0.9441, "step": 590 }, { "epoch": 0.047629442933973684, "grad_norm": 0.9204962849617004, "learning_rate": 4.920616019687228e-05, "loss": 1.0266, "step": 600 }, { "epoch": 0.04842326698287325, "grad_norm": 0.8698873519897461, "learning_rate": 4.919292953348681e-05, "loss": 0.9666, "step": 610 }, { "epoch": 0.049217091031772804, "grad_norm": 0.7144138813018799, "learning_rate": 4.917969887010135e-05, "loss": 1.0201, "step": 620 }, { "epoch": 0.05001091508067237, "grad_norm": 0.9176733493804932, "learning_rate": 4.916646820671589e-05, "loss": 0.9937, "step": 630 }, { "epoch": 0.05080473912957193, "grad_norm": 0.8604631423950195, "learning_rate": 4.9153237543330426e-05, "loss": 0.9882, "step": 640 }, { "epoch": 0.051598563178471495, "grad_norm": 1.0573097467422485, "learning_rate": 4.914000687994496e-05, "loss": 0.9997, "step": 650 }, { "epoch": 0.05239238722737105, "grad_norm": 0.9698209166526794, "learning_rate": 4.91267762165595e-05, "loss": 0.9492, "step": 660 }, { "epoch": 0.053186211276270615, "grad_norm": 0.8090052604675293, "learning_rate": 4.911354555317404e-05, "loss": 0.9408, "step": 670 }, { "epoch": 0.05398003532517018, "grad_norm": 0.9131582975387573, "learning_rate": 4.910031488978858e-05, "loss": 0.92, "step": 680 }, { "epoch": 0.054773859374069735, "grad_norm": 0.8041892051696777, "learning_rate": 4.9087084226403114e-05, "loss": 1.021, "step": 690 }, { "epoch": 0.0555676834229693, "grad_norm": 1.0868831872940063, "learning_rate": 4.907385356301765e-05, "loss": 1.0078, "step": 700 }, { "epoch": 0.05636150747186886, "grad_norm": 0.7588531374931335, "learning_rate": 4.906062289963219e-05, "loss": 0.9926, "step": 710 }, { "epoch": 0.057155331520768425, "grad_norm": 0.7499911189079285, "learning_rate": 4.9047392236246724e-05, "loss": 1.0117, "step": 720 }, { "epoch": 0.05794915556966798, "grad_norm": 0.8259711265563965, "learning_rate": 4.903416157286126e-05, "loss": 0.9394, "step": 730 }, { "epoch": 0.058742979618567545, "grad_norm": 0.7790316939353943, "learning_rate": 4.90209309094758e-05, "loss": 0.9598, "step": 740 }, { "epoch": 0.05953680366746711, "grad_norm": 0.8743465542793274, "learning_rate": 4.900770024609034e-05, "loss": 0.9337, "step": 750 }, { "epoch": 0.060330627716366665, "grad_norm": 0.6910851001739502, "learning_rate": 4.899446958270488e-05, "loss": 1.0239, "step": 760 }, { "epoch": 0.06112445176526623, "grad_norm": 0.8479220271110535, "learning_rate": 4.898123891931942e-05, "loss": 0.94, "step": 770 }, { "epoch": 0.06191827581416579, "grad_norm": 0.9144072532653809, "learning_rate": 4.896800825593396e-05, "loss": 0.9813, "step": 780 }, { "epoch": 0.06271209986306535, "grad_norm": 0.7847743034362793, "learning_rate": 4.8954777592548496e-05, "loss": 1.0148, "step": 790 }, { "epoch": 0.06350592391196491, "grad_norm": 0.6444619297981262, "learning_rate": 4.894154692916303e-05, "loss": 1.0174, "step": 800 }, { "epoch": 0.06429974796086448, "grad_norm": 0.8129200339317322, "learning_rate": 4.892831626577757e-05, "loss": 0.9764, "step": 810 }, { "epoch": 0.06509357200976404, "grad_norm": 0.8406952619552612, "learning_rate": 4.8915085602392106e-05, "loss": 0.9332, "step": 820 }, { "epoch": 0.0658873960586636, "grad_norm": 0.778441846370697, "learning_rate": 4.890185493900664e-05, "loss": 0.9439, "step": 830 }, { "epoch": 0.06668122010756315, "grad_norm": 0.9417208433151245, "learning_rate": 4.8888624275621184e-05, "loss": 0.9337, "step": 840 }, { "epoch": 0.06747504415646272, "grad_norm": 0.8156752586364746, "learning_rate": 4.887539361223572e-05, "loss": 0.9909, "step": 850 }, { "epoch": 0.06826886820536228, "grad_norm": 0.8329022526741028, "learning_rate": 4.886216294885026e-05, "loss": 0.9374, "step": 860 }, { "epoch": 0.06906269225426184, "grad_norm": 0.816589891910553, "learning_rate": 4.8848932285464794e-05, "loss": 1.0349, "step": 870 }, { "epoch": 0.0698565163031614, "grad_norm": 0.7630716562271118, "learning_rate": 4.883570162207933e-05, "loss": 0.9716, "step": 880 }, { "epoch": 0.07065034035206097, "grad_norm": 0.978765606880188, "learning_rate": 4.882247095869387e-05, "loss": 1.0316, "step": 890 }, { "epoch": 0.07144416440096053, "grad_norm": 0.7739888429641724, "learning_rate": 4.880924029530841e-05, "loss": 0.9596, "step": 900 }, { "epoch": 0.07223798844986008, "grad_norm": 0.9134480357170105, "learning_rate": 4.879600963192294e-05, "loss": 0.9758, "step": 910 }, { "epoch": 0.07303181249875965, "grad_norm": 0.8623645305633545, "learning_rate": 4.878277896853748e-05, "loss": 1.001, "step": 920 }, { "epoch": 0.07382563654765921, "grad_norm": 0.8770903944969177, "learning_rate": 4.876954830515202e-05, "loss": 0.9698, "step": 930 }, { "epoch": 0.07461946059655877, "grad_norm": 0.6688238382339478, "learning_rate": 4.875631764176656e-05, "loss": 1.0244, "step": 940 }, { "epoch": 0.07541328464545834, "grad_norm": 0.7384278178215027, "learning_rate": 4.87430869783811e-05, "loss": 0.9876, "step": 950 }, { "epoch": 0.0762071086943579, "grad_norm": 0.8874367475509644, "learning_rate": 4.872985631499564e-05, "loss": 0.9847, "step": 960 }, { "epoch": 0.07700093274325746, "grad_norm": 0.6085987687110901, "learning_rate": 4.871662565161018e-05, "loss": 0.9255, "step": 970 }, { "epoch": 0.07779475679215701, "grad_norm": 0.7393110394477844, "learning_rate": 4.870339498822471e-05, "loss": 0.9081, "step": 980 }, { "epoch": 0.07858858084105658, "grad_norm": 0.9497765302658081, "learning_rate": 4.869016432483925e-05, "loss": 0.9948, "step": 990 }, { "epoch": 0.07938240488995614, "grad_norm": 0.7365654110908508, "learning_rate": 4.867693366145379e-05, "loss": 1.0194, "step": 1000 }, { "epoch": 0.0801762289388557, "grad_norm": 0.9192026257514954, "learning_rate": 4.8663702998068326e-05, "loss": 0.9777, "step": 1010 }, { "epoch": 0.08097005298775527, "grad_norm": 0.9804342985153198, "learning_rate": 4.8650472334682865e-05, "loss": 0.9756, "step": 1020 }, { "epoch": 0.08176387703665483, "grad_norm": 0.976823091506958, "learning_rate": 4.8637241671297403e-05, "loss": 0.9507, "step": 1030 }, { "epoch": 0.08255770108555438, "grad_norm": 0.6893647909164429, "learning_rate": 4.862401100791194e-05, "loss": 1.003, "step": 1040 }, { "epoch": 0.08335152513445394, "grad_norm": 0.8077487945556641, "learning_rate": 4.8610780344526475e-05, "loss": 0.944, "step": 1050 }, { "epoch": 0.08414534918335351, "grad_norm": 0.6170672178268433, "learning_rate": 4.8597549681141013e-05, "loss": 0.9685, "step": 1060 }, { "epoch": 0.08493917323225307, "grad_norm": 0.7847235798835754, "learning_rate": 4.858431901775555e-05, "loss": 0.9448, "step": 1070 }, { "epoch": 0.08573299728115263, "grad_norm": 0.7903896570205688, "learning_rate": 4.857108835437009e-05, "loss": 1.0231, "step": 1080 }, { "epoch": 0.0865268213300522, "grad_norm": 0.8650990724563599, "learning_rate": 4.8557857690984623e-05, "loss": 0.9676, "step": 1090 }, { "epoch": 0.08732064537895176, "grad_norm": 0.9188217520713806, "learning_rate": 4.854462702759916e-05, "loss": 0.9851, "step": 1100 }, { "epoch": 0.08811446942785131, "grad_norm": 0.9663834571838379, "learning_rate": 4.853139636421371e-05, "loss": 0.9442, "step": 1110 }, { "epoch": 0.08890829347675087, "grad_norm": 0.8045158386230469, "learning_rate": 4.851816570082825e-05, "loss": 0.9411, "step": 1120 }, { "epoch": 0.08970211752565044, "grad_norm": 0.9113909006118774, "learning_rate": 4.850493503744278e-05, "loss": 0.8866, "step": 1130 }, { "epoch": 0.09049594157455, "grad_norm": 0.8947747349739075, "learning_rate": 4.849170437405732e-05, "loss": 0.9206, "step": 1140 }, { "epoch": 0.09128976562344956, "grad_norm": 0.7741749882698059, "learning_rate": 4.847847371067186e-05, "loss": 1.0066, "step": 1150 }, { "epoch": 0.09208358967234913, "grad_norm": 0.9636614918708801, "learning_rate": 4.846524304728639e-05, "loss": 0.9518, "step": 1160 }, { "epoch": 0.09287741372124869, "grad_norm": 0.7008812427520752, "learning_rate": 4.845201238390093e-05, "loss": 0.9543, "step": 1170 }, { "epoch": 0.09367123777014824, "grad_norm": 0.9602216482162476, "learning_rate": 4.843878172051547e-05, "loss": 0.915, "step": 1180 }, { "epoch": 0.0944650618190478, "grad_norm": 0.9824376702308655, "learning_rate": 4.8425551057130006e-05, "loss": 0.9073, "step": 1190 }, { "epoch": 0.09525888586794737, "grad_norm": 0.7355358600616455, "learning_rate": 4.8412320393744545e-05, "loss": 0.9487, "step": 1200 }, { "epoch": 0.09605270991684693, "grad_norm": 1.0072027444839478, "learning_rate": 4.8399089730359084e-05, "loss": 0.9659, "step": 1210 }, { "epoch": 0.0968465339657465, "grad_norm": 0.78026282787323, "learning_rate": 4.838585906697362e-05, "loss": 0.9716, "step": 1220 }, { "epoch": 0.09764035801464606, "grad_norm": 0.8772688508033752, "learning_rate": 4.837262840358816e-05, "loss": 0.9621, "step": 1230 }, { "epoch": 0.09843418206354561, "grad_norm": 0.8083699345588684, "learning_rate": 4.8359397740202694e-05, "loss": 0.8598, "step": 1240 }, { "epoch": 0.09922800611244517, "grad_norm": 0.8948017954826355, "learning_rate": 4.834616707681723e-05, "loss": 0.9425, "step": 1250 }, { "epoch": 0.10002183016134474, "grad_norm": 0.7335965633392334, "learning_rate": 4.833293641343177e-05, "loss": 0.9441, "step": 1260 }, { "epoch": 0.1008156542102443, "grad_norm": 0.7533779740333557, "learning_rate": 4.831970575004631e-05, "loss": 0.9918, "step": 1270 }, { "epoch": 0.10160947825914386, "grad_norm": 0.8201155066490173, "learning_rate": 4.830647508666085e-05, "loss": 1.014, "step": 1280 }, { "epoch": 0.10240330230804343, "grad_norm": 0.6296638250350952, "learning_rate": 4.829324442327539e-05, "loss": 0.962, "step": 1290 }, { "epoch": 0.10319712635694299, "grad_norm": 0.8968091011047363, "learning_rate": 4.828001375988993e-05, "loss": 0.9825, "step": 1300 }, { "epoch": 0.10399095040584254, "grad_norm": 0.9169098138809204, "learning_rate": 4.826678309650446e-05, "loss": 0.8705, "step": 1310 }, { "epoch": 0.1047847744547421, "grad_norm": 0.7152407169342041, "learning_rate": 4.8253552433119e-05, "loss": 0.9902, "step": 1320 }, { "epoch": 0.10557859850364167, "grad_norm": 0.9684945940971375, "learning_rate": 4.824032176973354e-05, "loss": 1.0179, "step": 1330 }, { "epoch": 0.10637242255254123, "grad_norm": 1.0346221923828125, "learning_rate": 4.8227091106348076e-05, "loss": 0.9245, "step": 1340 }, { "epoch": 0.10716624660144079, "grad_norm": 0.9577662348747253, "learning_rate": 4.821386044296261e-05, "loss": 1.0263, "step": 1350 }, { "epoch": 0.10796007065034036, "grad_norm": 0.756993293762207, "learning_rate": 4.820062977957715e-05, "loss": 0.9375, "step": 1360 }, { "epoch": 0.10875389469923992, "grad_norm": 0.6969035267829895, "learning_rate": 4.8187399116191686e-05, "loss": 0.9871, "step": 1370 }, { "epoch": 0.10954771874813947, "grad_norm": 1.1062333583831787, "learning_rate": 4.817416845280623e-05, "loss": 0.9659, "step": 1380 }, { "epoch": 0.11034154279703903, "grad_norm": 0.592980146408081, "learning_rate": 4.8160937789420764e-05, "loss": 1.0116, "step": 1390 }, { "epoch": 0.1111353668459386, "grad_norm": 0.8485913276672363, "learning_rate": 4.81477071260353e-05, "loss": 0.963, "step": 1400 }, { "epoch": 0.11192919089483816, "grad_norm": 0.8063706159591675, "learning_rate": 4.813447646264984e-05, "loss": 0.9862, "step": 1410 }, { "epoch": 0.11272301494373772, "grad_norm": 0.705464243888855, "learning_rate": 4.8121245799264374e-05, "loss": 0.9472, "step": 1420 }, { "epoch": 0.11351683899263729, "grad_norm": 0.8876354098320007, "learning_rate": 4.810801513587891e-05, "loss": 0.924, "step": 1430 }, { "epoch": 0.11431066304153685, "grad_norm": 1.0832111835479736, "learning_rate": 4.809478447249345e-05, "loss": 0.9387, "step": 1440 }, { "epoch": 0.1151044870904364, "grad_norm": 0.8490377068519592, "learning_rate": 4.808155380910799e-05, "loss": 1.0323, "step": 1450 }, { "epoch": 0.11589831113933596, "grad_norm": 0.8763223886489868, "learning_rate": 4.806832314572253e-05, "loss": 0.9555, "step": 1460 }, { "epoch": 0.11669213518823553, "grad_norm": 0.9210900664329529, "learning_rate": 4.805509248233707e-05, "loss": 1.0096, "step": 1470 }, { "epoch": 0.11748595923713509, "grad_norm": 0.7176244258880615, "learning_rate": 4.804186181895161e-05, "loss": 0.9415, "step": 1480 }, { "epoch": 0.11827978328603465, "grad_norm": 0.9028609991073608, "learning_rate": 4.802863115556615e-05, "loss": 0.9449, "step": 1490 }, { "epoch": 0.11907360733493422, "grad_norm": 0.779123067855835, "learning_rate": 4.801540049218068e-05, "loss": 0.899, "step": 1500 }, { "epoch": 0.11986743138383377, "grad_norm": 0.7735058665275574, "learning_rate": 4.800216982879522e-05, "loss": 0.9138, "step": 1510 }, { "epoch": 0.12066125543273333, "grad_norm": 0.851739764213562, "learning_rate": 4.798893916540976e-05, "loss": 0.9476, "step": 1520 }, { "epoch": 0.1214550794816329, "grad_norm": 0.8166431188583374, "learning_rate": 4.797570850202429e-05, "loss": 0.9746, "step": 1530 }, { "epoch": 0.12224890353053246, "grad_norm": 0.8283756971359253, "learning_rate": 4.796247783863883e-05, "loss": 0.9072, "step": 1540 }, { "epoch": 0.12304272757943202, "grad_norm": 0.8357489705085754, "learning_rate": 4.794924717525337e-05, "loss": 0.9563, "step": 1550 }, { "epoch": 0.12383655162833158, "grad_norm": 0.9437370896339417, "learning_rate": 4.793601651186791e-05, "loss": 0.9709, "step": 1560 }, { "epoch": 0.12463037567723115, "grad_norm": 0.8020033836364746, "learning_rate": 4.7922785848482445e-05, "loss": 0.9925, "step": 1570 }, { "epoch": 0.1254241997261307, "grad_norm": 0.7915470600128174, "learning_rate": 4.7909555185096983e-05, "loss": 0.9991, "step": 1580 }, { "epoch": 0.12621802377503027, "grad_norm": 0.6978237628936768, "learning_rate": 4.789632452171152e-05, "loss": 0.9994, "step": 1590 }, { "epoch": 0.12701184782392982, "grad_norm": 0.9151979088783264, "learning_rate": 4.788309385832606e-05, "loss": 0.9341, "step": 1600 }, { "epoch": 0.12780567187282937, "grad_norm": 0.8755201697349548, "learning_rate": 4.7869863194940593e-05, "loss": 0.9303, "step": 1610 }, { "epoch": 0.12859949592172895, "grad_norm": 0.9662395715713501, "learning_rate": 4.785663253155513e-05, "loss": 0.9088, "step": 1620 }, { "epoch": 0.1293933199706285, "grad_norm": 0.7612505555152893, "learning_rate": 4.784340186816967e-05, "loss": 1.0068, "step": 1630 }, { "epoch": 0.13018714401952808, "grad_norm": 0.8853713274002075, "learning_rate": 4.783017120478421e-05, "loss": 0.9877, "step": 1640 }, { "epoch": 0.13098096806842763, "grad_norm": 0.6972758173942566, "learning_rate": 4.781694054139875e-05, "loss": 0.9068, "step": 1650 }, { "epoch": 0.1317747921173272, "grad_norm": 0.8050327301025391, "learning_rate": 4.780370987801329e-05, "loss": 0.8972, "step": 1660 }, { "epoch": 0.13256861616622675, "grad_norm": 0.8144531846046448, "learning_rate": 4.779047921462783e-05, "loss": 0.889, "step": 1670 }, { "epoch": 0.1333624402151263, "grad_norm": 0.8536010980606079, "learning_rate": 4.777724855124236e-05, "loss": 0.9718, "step": 1680 }, { "epoch": 0.13415626426402588, "grad_norm": 0.7940757274627686, "learning_rate": 4.77640178878569e-05, "loss": 0.9628, "step": 1690 }, { "epoch": 0.13495008831292543, "grad_norm": 0.6885228157043457, "learning_rate": 4.775078722447144e-05, "loss": 0.9482, "step": 1700 }, { "epoch": 0.135743912361825, "grad_norm": 0.7801288366317749, "learning_rate": 4.7737556561085976e-05, "loss": 0.9084, "step": 1710 }, { "epoch": 0.13653773641072456, "grad_norm": 0.7502837777137756, "learning_rate": 4.772432589770051e-05, "loss": 0.869, "step": 1720 }, { "epoch": 0.13733156045962414, "grad_norm": 0.9921239614486694, "learning_rate": 4.7711095234315054e-05, "loss": 0.9065, "step": 1730 }, { "epoch": 0.13812538450852369, "grad_norm": 0.8346391916275024, "learning_rate": 4.769786457092959e-05, "loss": 0.9831, "step": 1740 }, { "epoch": 0.13891920855742323, "grad_norm": 0.8118425607681274, "learning_rate": 4.7684633907544125e-05, "loss": 0.9673, "step": 1750 }, { "epoch": 0.1397130326063228, "grad_norm": 0.8836124539375305, "learning_rate": 4.7671403244158664e-05, "loss": 0.9506, "step": 1760 }, { "epoch": 0.14050685665522236, "grad_norm": 0.8998447060585022, "learning_rate": 4.76581725807732e-05, "loss": 0.9443, "step": 1770 }, { "epoch": 0.14130068070412194, "grad_norm": 0.9058572053909302, "learning_rate": 4.764494191738774e-05, "loss": 0.8735, "step": 1780 }, { "epoch": 0.1420945047530215, "grad_norm": 0.9656883478164673, "learning_rate": 4.7631711254002274e-05, "loss": 0.9379, "step": 1790 }, { "epoch": 0.14288832880192107, "grad_norm": 0.8430660367012024, "learning_rate": 4.761848059061681e-05, "loss": 0.9405, "step": 1800 }, { "epoch": 0.14368215285082062, "grad_norm": 0.963220477104187, "learning_rate": 4.760524992723135e-05, "loss": 0.9306, "step": 1810 }, { "epoch": 0.14447597689972017, "grad_norm": 0.806779146194458, "learning_rate": 4.759201926384589e-05, "loss": 0.964, "step": 1820 }, { "epoch": 0.14526980094861974, "grad_norm": 0.6742879152297974, "learning_rate": 4.757878860046043e-05, "loss": 0.9398, "step": 1830 }, { "epoch": 0.1460636249975193, "grad_norm": 0.8305578827857971, "learning_rate": 4.756555793707497e-05, "loss": 0.9661, "step": 1840 }, { "epoch": 0.14685744904641887, "grad_norm": 0.9429900646209717, "learning_rate": 4.755232727368951e-05, "loss": 0.9151, "step": 1850 }, { "epoch": 0.14765127309531842, "grad_norm": 0.7211086750030518, "learning_rate": 4.753909661030404e-05, "loss": 0.9367, "step": 1860 }, { "epoch": 0.148445097144218, "grad_norm": 0.932462751865387, "learning_rate": 4.752586594691858e-05, "loss": 0.9465, "step": 1870 }, { "epoch": 0.14923892119311755, "grad_norm": 0.8956672549247742, "learning_rate": 4.751263528353312e-05, "loss": 0.9948, "step": 1880 }, { "epoch": 0.1500327452420171, "grad_norm": 0.78652423620224, "learning_rate": 4.7499404620147656e-05, "loss": 0.9323, "step": 1890 }, { "epoch": 0.15082656929091667, "grad_norm": 0.8287889361381531, "learning_rate": 4.7486173956762195e-05, "loss": 0.944, "step": 1900 }, { "epoch": 0.15162039333981622, "grad_norm": 1.0075963735580444, "learning_rate": 4.7472943293376734e-05, "loss": 0.9595, "step": 1910 }, { "epoch": 0.1524142173887158, "grad_norm": 1.075189471244812, "learning_rate": 4.745971262999127e-05, "loss": 0.9636, "step": 1920 }, { "epoch": 0.15320804143761535, "grad_norm": 0.9045613408088684, "learning_rate": 4.744648196660581e-05, "loss": 0.9544, "step": 1930 }, { "epoch": 0.15400186548651493, "grad_norm": 0.8848656415939331, "learning_rate": 4.7433251303220344e-05, "loss": 0.9707, "step": 1940 }, { "epoch": 0.15479568953541448, "grad_norm": 1.1052658557891846, "learning_rate": 4.742002063983488e-05, "loss": 0.9881, "step": 1950 }, { "epoch": 0.15558951358431403, "grad_norm": 0.8946327567100525, "learning_rate": 4.740678997644942e-05, "loss": 1.0025, "step": 1960 }, { "epoch": 0.1563833376332136, "grad_norm": 0.8792600035667419, "learning_rate": 4.7393559313063954e-05, "loss": 0.9993, "step": 1970 }, { "epoch": 0.15717716168211315, "grad_norm": 0.9298778176307678, "learning_rate": 4.738032864967849e-05, "loss": 0.9169, "step": 1980 }, { "epoch": 0.15797098573101273, "grad_norm": 0.7488551735877991, "learning_rate": 4.736709798629303e-05, "loss": 0.9047, "step": 1990 }, { "epoch": 0.15876480977991228, "grad_norm": 0.7787390351295471, "learning_rate": 4.735386732290758e-05, "loss": 0.936, "step": 2000 }, { "epoch": 0.15955863382881183, "grad_norm": 0.707550048828125, "learning_rate": 4.734063665952211e-05, "loss": 0.9948, "step": 2010 }, { "epoch": 0.1603524578777114, "grad_norm": 0.8335928916931152, "learning_rate": 4.732740599613665e-05, "loss": 0.9602, "step": 2020 }, { "epoch": 0.16114628192661096, "grad_norm": 1.0111210346221924, "learning_rate": 4.731417533275119e-05, "loss": 0.9417, "step": 2030 }, { "epoch": 0.16194010597551053, "grad_norm": 0.6672728657722473, "learning_rate": 4.730094466936573e-05, "loss": 0.9389, "step": 2040 }, { "epoch": 0.16273393002441008, "grad_norm": 0.8862355947494507, "learning_rate": 4.728771400598026e-05, "loss": 0.9071, "step": 2050 }, { "epoch": 0.16352775407330966, "grad_norm": 0.8846628069877625, "learning_rate": 4.72744833425948e-05, "loss": 0.9351, "step": 2060 }, { "epoch": 0.1643215781222092, "grad_norm": 0.833533763885498, "learning_rate": 4.726125267920934e-05, "loss": 0.9713, "step": 2070 }, { "epoch": 0.16511540217110876, "grad_norm": 0.724327564239502, "learning_rate": 4.7248022015823876e-05, "loss": 0.9349, "step": 2080 }, { "epoch": 0.16590922622000834, "grad_norm": 0.9286043643951416, "learning_rate": 4.7234791352438415e-05, "loss": 0.9144, "step": 2090 }, { "epoch": 0.1667030502689079, "grad_norm": 0.8603926301002502, "learning_rate": 4.7221560689052953e-05, "loss": 0.9156, "step": 2100 }, { "epoch": 0.16749687431780746, "grad_norm": 0.9300916194915771, "learning_rate": 4.720833002566749e-05, "loss": 0.8894, "step": 2110 }, { "epoch": 0.16829069836670701, "grad_norm": 0.7799938917160034, "learning_rate": 4.7195099362282025e-05, "loss": 0.8955, "step": 2120 }, { "epoch": 0.1690845224156066, "grad_norm": 0.9887612462043762, "learning_rate": 4.7181868698896563e-05, "loss": 0.9712, "step": 2130 }, { "epoch": 0.16987834646450614, "grad_norm": 0.9083105325698853, "learning_rate": 4.71686380355111e-05, "loss": 0.9434, "step": 2140 }, { "epoch": 0.1706721705134057, "grad_norm": 0.868770182132721, "learning_rate": 4.715540737212564e-05, "loss": 0.9522, "step": 2150 }, { "epoch": 0.17146599456230527, "grad_norm": 0.819227933883667, "learning_rate": 4.7142176708740173e-05, "loss": 0.9812, "step": 2160 }, { "epoch": 0.17225981861120482, "grad_norm": 0.9821423292160034, "learning_rate": 4.712894604535472e-05, "loss": 0.9075, "step": 2170 }, { "epoch": 0.1730536426601044, "grad_norm": 0.6770303845405579, "learning_rate": 4.711571538196926e-05, "loss": 0.9006, "step": 2180 }, { "epoch": 0.17384746670900394, "grad_norm": 1.0133216381072998, "learning_rate": 4.71024847185838e-05, "loss": 0.9851, "step": 2190 }, { "epoch": 0.17464129075790352, "grad_norm": 0.6719434261322021, "learning_rate": 4.708925405519833e-05, "loss": 0.9876, "step": 2200 }, { "epoch": 0.17543511480680307, "grad_norm": 0.9074175357818604, "learning_rate": 4.707602339181287e-05, "loss": 0.8679, "step": 2210 }, { "epoch": 0.17622893885570262, "grad_norm": 0.7234490513801575, "learning_rate": 4.706279272842741e-05, "loss": 0.9469, "step": 2220 }, { "epoch": 0.1770227629046022, "grad_norm": 0.8408550024032593, "learning_rate": 4.704956206504194e-05, "loss": 0.8871, "step": 2230 }, { "epoch": 0.17781658695350175, "grad_norm": 0.9565715789794922, "learning_rate": 4.703633140165648e-05, "loss": 0.9597, "step": 2240 }, { "epoch": 0.17861041100240133, "grad_norm": 0.7657901644706726, "learning_rate": 4.702310073827102e-05, "loss": 0.912, "step": 2250 }, { "epoch": 0.17940423505130088, "grad_norm": 0.7222371101379395, "learning_rate": 4.7009870074885556e-05, "loss": 0.9369, "step": 2260 }, { "epoch": 0.18019805910020045, "grad_norm": 0.8453822731971741, "learning_rate": 4.6996639411500095e-05, "loss": 0.94, "step": 2270 }, { "epoch": 0.1809918831491, "grad_norm": 0.6532242298126221, "learning_rate": 4.6983408748114634e-05, "loss": 0.8897, "step": 2280 }, { "epoch": 0.18178570719799955, "grad_norm": 0.8275949358940125, "learning_rate": 4.697017808472917e-05, "loss": 0.9235, "step": 2290 }, { "epoch": 0.18257953124689913, "grad_norm": 0.7926145792007446, "learning_rate": 4.695694742134371e-05, "loss": 0.9327, "step": 2300 }, { "epoch": 0.18337335529579868, "grad_norm": 0.8629859089851379, "learning_rate": 4.6943716757958244e-05, "loss": 0.9714, "step": 2310 }, { "epoch": 0.18416717934469826, "grad_norm": 0.8121680021286011, "learning_rate": 4.693048609457278e-05, "loss": 0.9545, "step": 2320 }, { "epoch": 0.1849610033935978, "grad_norm": 0.9470918774604797, "learning_rate": 4.691725543118732e-05, "loss": 0.8992, "step": 2330 }, { "epoch": 0.18575482744249738, "grad_norm": 0.8054450750350952, "learning_rate": 4.690402476780186e-05, "loss": 0.9984, "step": 2340 }, { "epoch": 0.18654865149139693, "grad_norm": 0.6857488751411438, "learning_rate": 4.68907941044164e-05, "loss": 0.9411, "step": 2350 }, { "epoch": 0.18734247554029648, "grad_norm": 0.8462006449699402, "learning_rate": 4.687756344103094e-05, "loss": 0.9527, "step": 2360 }, { "epoch": 0.18813629958919606, "grad_norm": 0.8726380467414856, "learning_rate": 4.686433277764548e-05, "loss": 0.8983, "step": 2370 }, { "epoch": 0.1889301236380956, "grad_norm": 0.883798360824585, "learning_rate": 4.685110211426001e-05, "loss": 0.9382, "step": 2380 }, { "epoch": 0.1897239476869952, "grad_norm": 0.8085561394691467, "learning_rate": 4.683787145087455e-05, "loss": 0.9371, "step": 2390 }, { "epoch": 0.19051777173589474, "grad_norm": 0.816972017288208, "learning_rate": 4.682464078748909e-05, "loss": 0.9322, "step": 2400 }, { "epoch": 0.1913115957847943, "grad_norm": 0.8701701164245605, "learning_rate": 4.6811410124103626e-05, "loss": 0.9277, "step": 2410 }, { "epoch": 0.19210541983369386, "grad_norm": 0.9203164577484131, "learning_rate": 4.679817946071816e-05, "loss": 0.9365, "step": 2420 }, { "epoch": 0.1928992438825934, "grad_norm": 0.9870917797088623, "learning_rate": 4.67849487973327e-05, "loss": 0.973, "step": 2430 }, { "epoch": 0.193693067931493, "grad_norm": 0.8381191492080688, "learning_rate": 4.677171813394724e-05, "loss": 0.9344, "step": 2440 }, { "epoch": 0.19448689198039254, "grad_norm": 0.9467155933380127, "learning_rate": 4.6758487470561775e-05, "loss": 0.9162, "step": 2450 }, { "epoch": 0.19528071602929212, "grad_norm": 0.8836456537246704, "learning_rate": 4.6745256807176314e-05, "loss": 0.9548, "step": 2460 }, { "epoch": 0.19607454007819167, "grad_norm": 0.806165337562561, "learning_rate": 4.673202614379085e-05, "loss": 0.9615, "step": 2470 }, { "epoch": 0.19686836412709122, "grad_norm": 0.8333777785301208, "learning_rate": 4.671879548040539e-05, "loss": 0.9331, "step": 2480 }, { "epoch": 0.1976621881759908, "grad_norm": 0.9667910933494568, "learning_rate": 4.6705564817019924e-05, "loss": 0.9031, "step": 2490 }, { "epoch": 0.19845601222489034, "grad_norm": 0.8661583065986633, "learning_rate": 4.669233415363446e-05, "loss": 0.9208, "step": 2500 }, { "epoch": 0.19924983627378992, "grad_norm": 1.0444538593292236, "learning_rate": 4.6679103490249e-05, "loss": 0.992, "step": 2510 }, { "epoch": 0.20004366032268947, "grad_norm": 0.6848312616348267, "learning_rate": 4.666587282686354e-05, "loss": 0.9215, "step": 2520 }, { "epoch": 0.20083748437158905, "grad_norm": 0.789969801902771, "learning_rate": 4.665264216347808e-05, "loss": 0.9371, "step": 2530 }, { "epoch": 0.2016313084204886, "grad_norm": 0.7210158109664917, "learning_rate": 4.663941150009262e-05, "loss": 0.9561, "step": 2540 }, { "epoch": 0.20242513246938815, "grad_norm": 0.9324051737785339, "learning_rate": 4.662618083670716e-05, "loss": 0.9632, "step": 2550 }, { "epoch": 0.20321895651828772, "grad_norm": 0.6545863151550293, "learning_rate": 4.661295017332169e-05, "loss": 0.9027, "step": 2560 }, { "epoch": 0.20401278056718727, "grad_norm": 0.9494184255599976, "learning_rate": 4.659971950993623e-05, "loss": 0.8897, "step": 2570 }, { "epoch": 0.20480660461608685, "grad_norm": 0.670927107334137, "learning_rate": 4.658648884655077e-05, "loss": 0.9166, "step": 2580 }, { "epoch": 0.2056004286649864, "grad_norm": 0.8145579695701599, "learning_rate": 4.657325818316531e-05, "loss": 0.9364, "step": 2590 }, { "epoch": 0.20639425271388598, "grad_norm": 0.9413288831710815, "learning_rate": 4.656002751977984e-05, "loss": 0.8997, "step": 2600 }, { "epoch": 0.20718807676278553, "grad_norm": 0.8547295331954956, "learning_rate": 4.6546796856394385e-05, "loss": 0.9594, "step": 2610 }, { "epoch": 0.20798190081168508, "grad_norm": 0.711956799030304, "learning_rate": 4.6533566193008923e-05, "loss": 0.9728, "step": 2620 }, { "epoch": 0.20877572486058465, "grad_norm": 0.7029785513877869, "learning_rate": 4.652033552962346e-05, "loss": 0.9561, "step": 2630 }, { "epoch": 0.2095695489094842, "grad_norm": 0.7861992120742798, "learning_rate": 4.6507104866237995e-05, "loss": 0.9288, "step": 2640 }, { "epoch": 0.21036337295838378, "grad_norm": 0.9786797761917114, "learning_rate": 4.6493874202852533e-05, "loss": 0.9492, "step": 2650 }, { "epoch": 0.21115719700728333, "grad_norm": 0.8280521631240845, "learning_rate": 4.648064353946707e-05, "loss": 0.96, "step": 2660 }, { "epoch": 0.2119510210561829, "grad_norm": 0.5959718227386475, "learning_rate": 4.6467412876081605e-05, "loss": 0.989, "step": 2670 }, { "epoch": 0.21274484510508246, "grad_norm": 0.7416049838066101, "learning_rate": 4.6454182212696143e-05, "loss": 1.0148, "step": 2680 }, { "epoch": 0.213538669153982, "grad_norm": 0.915602445602417, "learning_rate": 4.644095154931068e-05, "loss": 0.9789, "step": 2690 }, { "epoch": 0.21433249320288159, "grad_norm": 0.616190493106842, "learning_rate": 4.642772088592522e-05, "loss": 0.9163, "step": 2700 }, { "epoch": 0.21512631725178114, "grad_norm": 0.7318758368492126, "learning_rate": 4.641449022253976e-05, "loss": 0.9084, "step": 2710 }, { "epoch": 0.2159201413006807, "grad_norm": 0.7663441896438599, "learning_rate": 4.64012595591543e-05, "loss": 0.921, "step": 2720 }, { "epoch": 0.21671396534958026, "grad_norm": 0.7115159034729004, "learning_rate": 4.638802889576884e-05, "loss": 0.9214, "step": 2730 }, { "epoch": 0.21750778939847984, "grad_norm": 0.569379985332489, "learning_rate": 4.637479823238338e-05, "loss": 0.9658, "step": 2740 }, { "epoch": 0.2183016134473794, "grad_norm": 0.9785438776016235, "learning_rate": 4.636156756899791e-05, "loss": 0.8818, "step": 2750 }, { "epoch": 0.21909543749627894, "grad_norm": 0.7893972396850586, "learning_rate": 4.634833690561245e-05, "loss": 0.9254, "step": 2760 }, { "epoch": 0.21988926154517852, "grad_norm": 0.948264479637146, "learning_rate": 4.633510624222699e-05, "loss": 0.8619, "step": 2770 }, { "epoch": 0.22068308559407807, "grad_norm": 0.8142668008804321, "learning_rate": 4.6321875578841526e-05, "loss": 0.9289, "step": 2780 }, { "epoch": 0.22147690964297764, "grad_norm": 0.7517206072807312, "learning_rate": 4.6308644915456065e-05, "loss": 0.8759, "step": 2790 }, { "epoch": 0.2222707336918772, "grad_norm": 0.6064891219139099, "learning_rate": 4.6295414252070604e-05, "loss": 0.8558, "step": 2800 }, { "epoch": 0.22306455774077677, "grad_norm": 0.7425729036331177, "learning_rate": 4.628218358868514e-05, "loss": 0.9281, "step": 2810 }, { "epoch": 0.22385838178967632, "grad_norm": 0.7883281707763672, "learning_rate": 4.6268952925299675e-05, "loss": 0.8969, "step": 2820 }, { "epoch": 0.22465220583857587, "grad_norm": 0.8569539189338684, "learning_rate": 4.6255722261914214e-05, "loss": 0.9111, "step": 2830 }, { "epoch": 0.22544602988747545, "grad_norm": 0.8346198797225952, "learning_rate": 4.624249159852875e-05, "loss": 0.8691, "step": 2840 }, { "epoch": 0.226239853936375, "grad_norm": 0.8474957346916199, "learning_rate": 4.622926093514329e-05, "loss": 0.8502, "step": 2850 }, { "epoch": 0.22703367798527457, "grad_norm": 0.9480683207511902, "learning_rate": 4.6216030271757824e-05, "loss": 0.8832, "step": 2860 }, { "epoch": 0.22782750203417412, "grad_norm": 0.9572345018386841, "learning_rate": 4.620279960837236e-05, "loss": 0.9159, "step": 2870 }, { "epoch": 0.2286213260830737, "grad_norm": 0.9414117932319641, "learning_rate": 4.618956894498691e-05, "loss": 0.9427, "step": 2880 }, { "epoch": 0.22941515013197325, "grad_norm": 0.7920238375663757, "learning_rate": 4.617633828160145e-05, "loss": 0.8879, "step": 2890 }, { "epoch": 0.2302089741808728, "grad_norm": 0.736736536026001, "learning_rate": 4.616310761821598e-05, "loss": 0.9429, "step": 2900 }, { "epoch": 0.23100279822977238, "grad_norm": 0.7859109044075012, "learning_rate": 4.614987695483052e-05, "loss": 0.8835, "step": 2910 }, { "epoch": 0.23179662227867193, "grad_norm": 0.9163410663604736, "learning_rate": 4.613664629144506e-05, "loss": 0.9017, "step": 2920 }, { "epoch": 0.2325904463275715, "grad_norm": 0.9528617262840271, "learning_rate": 4.612341562805959e-05, "loss": 0.9572, "step": 2930 }, { "epoch": 0.23338427037647105, "grad_norm": 0.9378358125686646, "learning_rate": 4.611018496467413e-05, "loss": 0.9002, "step": 2940 }, { "epoch": 0.2341780944253706, "grad_norm": 0.754435122013092, "learning_rate": 4.609695430128867e-05, "loss": 0.9026, "step": 2950 }, { "epoch": 0.23497191847427018, "grad_norm": 0.906570315361023, "learning_rate": 4.6083723637903206e-05, "loss": 0.9109, "step": 2960 }, { "epoch": 0.23576574252316973, "grad_norm": 0.9783695936203003, "learning_rate": 4.6070492974517745e-05, "loss": 0.9366, "step": 2970 }, { "epoch": 0.2365595665720693, "grad_norm": 0.8152298331260681, "learning_rate": 4.6057262311132284e-05, "loss": 0.9555, "step": 2980 }, { "epoch": 0.23735339062096886, "grad_norm": 0.8119134902954102, "learning_rate": 4.604403164774682e-05, "loss": 0.9418, "step": 2990 }, { "epoch": 0.23814721466986843, "grad_norm": 0.5931934714317322, "learning_rate": 4.603080098436136e-05, "loss": 0.8831, "step": 3000 }, { "epoch": 0.23894103871876798, "grad_norm": 1.0472118854522705, "learning_rate": 4.6017570320975894e-05, "loss": 0.9529, "step": 3010 }, { "epoch": 0.23973486276766753, "grad_norm": 0.8576637506484985, "learning_rate": 4.600433965759043e-05, "loss": 0.9101, "step": 3020 }, { "epoch": 0.2405286868165671, "grad_norm": 0.7458338141441345, "learning_rate": 4.599110899420497e-05, "loss": 0.8409, "step": 3030 }, { "epoch": 0.24132251086546666, "grad_norm": 0.8986132740974426, "learning_rate": 4.5977878330819504e-05, "loss": 0.9417, "step": 3040 }, { "epoch": 0.24211633491436624, "grad_norm": 0.9694287180900574, "learning_rate": 4.596464766743404e-05, "loss": 0.9255, "step": 3050 }, { "epoch": 0.2429101589632658, "grad_norm": 0.9465477466583252, "learning_rate": 4.595141700404859e-05, "loss": 0.9307, "step": 3060 }, { "epoch": 0.24370398301216536, "grad_norm": 0.6529215574264526, "learning_rate": 4.593818634066313e-05, "loss": 0.8807, "step": 3070 }, { "epoch": 0.24449780706106491, "grad_norm": 0.9342437982559204, "learning_rate": 4.592495567727766e-05, "loss": 0.9448, "step": 3080 }, { "epoch": 0.24529163110996446, "grad_norm": 0.7587674260139465, "learning_rate": 4.59117250138922e-05, "loss": 0.9655, "step": 3090 }, { "epoch": 0.24608545515886404, "grad_norm": 0.7746047377586365, "learning_rate": 4.589849435050674e-05, "loss": 0.9213, "step": 3100 }, { "epoch": 0.2468792792077636, "grad_norm": 0.9817701578140259, "learning_rate": 4.588526368712128e-05, "loss": 0.9621, "step": 3110 }, { "epoch": 0.24767310325666317, "grad_norm": 0.9328246116638184, "learning_rate": 4.587203302373581e-05, "loss": 0.8562, "step": 3120 }, { "epoch": 0.24846692730556272, "grad_norm": 0.8459776043891907, "learning_rate": 4.585880236035035e-05, "loss": 0.9237, "step": 3130 }, { "epoch": 0.2492607513544623, "grad_norm": 0.8480767607688904, "learning_rate": 4.584557169696489e-05, "loss": 0.8931, "step": 3140 }, { "epoch": 0.25005457540336185, "grad_norm": 0.8643662333488464, "learning_rate": 4.5832341033579426e-05, "loss": 0.9657, "step": 3150 }, { "epoch": 0.2508483994522614, "grad_norm": 0.7610037326812744, "learning_rate": 4.5819110370193965e-05, "loss": 0.9678, "step": 3160 }, { "epoch": 0.25164222350116094, "grad_norm": 0.8325300812721252, "learning_rate": 4.5805879706808503e-05, "loss": 0.9242, "step": 3170 }, { "epoch": 0.25243604755006055, "grad_norm": 0.8497787117958069, "learning_rate": 4.579264904342304e-05, "loss": 0.9659, "step": 3180 }, { "epoch": 0.2532298715989601, "grad_norm": 0.9831972122192383, "learning_rate": 4.5779418380037575e-05, "loss": 0.927, "step": 3190 }, { "epoch": 0.25402369564785965, "grad_norm": 0.829680323600769, "learning_rate": 4.5766187716652113e-05, "loss": 0.8841, "step": 3200 }, { "epoch": 0.2548175196967592, "grad_norm": 0.8839125633239746, "learning_rate": 4.575295705326665e-05, "loss": 0.8945, "step": 3210 }, { "epoch": 0.25561134374565875, "grad_norm": 1.0367532968521118, "learning_rate": 4.573972638988119e-05, "loss": 0.8785, "step": 3220 }, { "epoch": 0.25640516779455835, "grad_norm": 0.9415130615234375, "learning_rate": 4.572649572649573e-05, "loss": 0.9348, "step": 3230 }, { "epoch": 0.2571989918434579, "grad_norm": 0.7434957027435303, "learning_rate": 4.571326506311027e-05, "loss": 0.8587, "step": 3240 }, { "epoch": 0.25799281589235745, "grad_norm": 0.6429414749145508, "learning_rate": 4.570003439972481e-05, "loss": 0.9606, "step": 3250 }, { "epoch": 0.258786639941257, "grad_norm": 1.4009513854980469, "learning_rate": 4.568680373633934e-05, "loss": 0.8492, "step": 3260 }, { "epoch": 0.2595804639901566, "grad_norm": 0.6804202198982239, "learning_rate": 4.567357307295388e-05, "loss": 0.9661, "step": 3270 }, { "epoch": 0.26037428803905616, "grad_norm": 0.7768397331237793, "learning_rate": 4.566034240956842e-05, "loss": 0.8915, "step": 3280 }, { "epoch": 0.2611681120879557, "grad_norm": 0.8182624578475952, "learning_rate": 4.564711174618296e-05, "loss": 0.914, "step": 3290 }, { "epoch": 0.26196193613685526, "grad_norm": 0.8247461915016174, "learning_rate": 4.563388108279749e-05, "loss": 0.9191, "step": 3300 }, { "epoch": 0.2627557601857548, "grad_norm": 0.8486924171447754, "learning_rate": 4.562065041941203e-05, "loss": 0.9561, "step": 3310 }, { "epoch": 0.2635495842346544, "grad_norm": 0.7965856790542603, "learning_rate": 4.560741975602657e-05, "loss": 0.9409, "step": 3320 }, { "epoch": 0.26434340828355396, "grad_norm": 1.0266188383102417, "learning_rate": 4.559418909264111e-05, "loss": 0.8715, "step": 3330 }, { "epoch": 0.2651372323324535, "grad_norm": 0.795364499092102, "learning_rate": 4.5580958429255645e-05, "loss": 0.8615, "step": 3340 }, { "epoch": 0.26593105638135306, "grad_norm": 0.7666260600090027, "learning_rate": 4.5567727765870184e-05, "loss": 0.8847, "step": 3350 }, { "epoch": 0.2667248804302526, "grad_norm": 0.741375207901001, "learning_rate": 4.555449710248472e-05, "loss": 0.977, "step": 3360 }, { "epoch": 0.2675187044791522, "grad_norm": 0.8026778697967529, "learning_rate": 4.5541266439099255e-05, "loss": 1.012, "step": 3370 }, { "epoch": 0.26831252852805176, "grad_norm": 0.828702986240387, "learning_rate": 4.5528035775713794e-05, "loss": 0.9397, "step": 3380 }, { "epoch": 0.2691063525769513, "grad_norm": 0.7557156085968018, "learning_rate": 4.551480511232833e-05, "loss": 0.9116, "step": 3390 }, { "epoch": 0.26990017662585086, "grad_norm": 0.8590690493583679, "learning_rate": 4.550157444894287e-05, "loss": 0.899, "step": 3400 }, { "epoch": 0.27069400067475047, "grad_norm": 1.0395570993423462, "learning_rate": 4.548834378555741e-05, "loss": 0.8633, "step": 3410 }, { "epoch": 0.27148782472365, "grad_norm": 0.756775438785553, "learning_rate": 4.547511312217195e-05, "loss": 0.9149, "step": 3420 }, { "epoch": 0.27228164877254957, "grad_norm": 1.0174977779388428, "learning_rate": 4.546188245878649e-05, "loss": 0.8964, "step": 3430 }, { "epoch": 0.2730754728214491, "grad_norm": 0.9375600218772888, "learning_rate": 4.544865179540103e-05, "loss": 1.0064, "step": 3440 }, { "epoch": 0.27386929687034867, "grad_norm": 0.7545995116233826, "learning_rate": 4.543542113201556e-05, "loss": 0.8792, "step": 3450 }, { "epoch": 0.27466312091924827, "grad_norm": 0.7980530261993408, "learning_rate": 4.54221904686301e-05, "loss": 0.9207, "step": 3460 }, { "epoch": 0.2754569449681478, "grad_norm": 0.8304005265235901, "learning_rate": 4.540895980524464e-05, "loss": 0.867, "step": 3470 }, { "epoch": 0.27625076901704737, "grad_norm": 0.9552587270736694, "learning_rate": 4.539572914185917e-05, "loss": 0.9498, "step": 3480 }, { "epoch": 0.2770445930659469, "grad_norm": 0.8936342000961304, "learning_rate": 4.538249847847371e-05, "loss": 0.9253, "step": 3490 }, { "epoch": 0.27783841711484647, "grad_norm": 0.8894282579421997, "learning_rate": 4.5369267815088254e-05, "loss": 0.8884, "step": 3500 }, { "epoch": 0.2786322411637461, "grad_norm": 0.7489833831787109, "learning_rate": 4.535603715170279e-05, "loss": 0.9472, "step": 3510 }, { "epoch": 0.2794260652126456, "grad_norm": 0.7078631520271301, "learning_rate": 4.5342806488317325e-05, "loss": 0.9027, "step": 3520 }, { "epoch": 0.2802198892615452, "grad_norm": 0.7065866589546204, "learning_rate": 4.5329575824931864e-05, "loss": 0.9334, "step": 3530 }, { "epoch": 0.2810137133104447, "grad_norm": 0.5849603414535522, "learning_rate": 4.53163451615464e-05, "loss": 0.9532, "step": 3540 }, { "epoch": 0.28180753735934433, "grad_norm": 0.7648496627807617, "learning_rate": 4.530311449816094e-05, "loss": 0.9062, "step": 3550 }, { "epoch": 0.2826013614082439, "grad_norm": 0.7615086436271667, "learning_rate": 4.5289883834775474e-05, "loss": 0.999, "step": 3560 }, { "epoch": 0.28339518545714343, "grad_norm": 0.7692292928695679, "learning_rate": 4.527665317139001e-05, "loss": 0.9794, "step": 3570 }, { "epoch": 0.284189009506043, "grad_norm": 0.6985743045806885, "learning_rate": 4.526342250800455e-05, "loss": 0.922, "step": 3580 }, { "epoch": 0.2849828335549425, "grad_norm": 0.8894872069358826, "learning_rate": 4.525019184461909e-05, "loss": 0.9348, "step": 3590 }, { "epoch": 0.28577665760384213, "grad_norm": 1.1146018505096436, "learning_rate": 4.523696118123363e-05, "loss": 0.9343, "step": 3600 }, { "epoch": 0.2865704816527417, "grad_norm": 0.7811164855957031, "learning_rate": 4.522373051784817e-05, "loss": 0.9495, "step": 3610 }, { "epoch": 0.28736430570164123, "grad_norm": 0.9659305810928345, "learning_rate": 4.521049985446271e-05, "loss": 0.9037, "step": 3620 }, { "epoch": 0.2881581297505408, "grad_norm": 0.8798300623893738, "learning_rate": 4.519726919107724e-05, "loss": 0.8077, "step": 3630 }, { "epoch": 0.28895195379944033, "grad_norm": 0.9190603494644165, "learning_rate": 4.518403852769178e-05, "loss": 0.9834, "step": 3640 }, { "epoch": 0.28974577784833994, "grad_norm": 1.0563730001449585, "learning_rate": 4.517080786430632e-05, "loss": 0.903, "step": 3650 }, { "epoch": 0.2905396018972395, "grad_norm": 0.9505723714828491, "learning_rate": 4.515757720092086e-05, "loss": 0.9106, "step": 3660 }, { "epoch": 0.29133342594613904, "grad_norm": 0.7892711758613586, "learning_rate": 4.5144346537535396e-05, "loss": 0.9156, "step": 3670 }, { "epoch": 0.2921272499950386, "grad_norm": 0.8885009288787842, "learning_rate": 4.5131115874149935e-05, "loss": 0.9297, "step": 3680 }, { "epoch": 0.29292107404393813, "grad_norm": 0.9264837503433228, "learning_rate": 4.5117885210764473e-05, "loss": 0.8791, "step": 3690 }, { "epoch": 0.29371489809283774, "grad_norm": 0.8464304208755493, "learning_rate": 4.5104654547379006e-05, "loss": 0.9119, "step": 3700 }, { "epoch": 0.2945087221417373, "grad_norm": 0.8379583358764648, "learning_rate": 4.5091423883993545e-05, "loss": 0.9339, "step": 3710 }, { "epoch": 0.29530254619063684, "grad_norm": 0.7604143619537354, "learning_rate": 4.5078193220608083e-05, "loss": 0.8827, "step": 3720 }, { "epoch": 0.2960963702395364, "grad_norm": 1.2368942499160767, "learning_rate": 4.506496255722262e-05, "loss": 0.8412, "step": 3730 }, { "epoch": 0.296890194288436, "grad_norm": 0.8182504773139954, "learning_rate": 4.5051731893837155e-05, "loss": 0.9742, "step": 3740 }, { "epoch": 0.29768401833733554, "grad_norm": 0.6445258259773254, "learning_rate": 4.5038501230451693e-05, "loss": 0.9485, "step": 3750 }, { "epoch": 0.2984778423862351, "grad_norm": 0.535454511642456, "learning_rate": 4.502527056706623e-05, "loss": 0.8326, "step": 3760 }, { "epoch": 0.29927166643513464, "grad_norm": 0.9186378121376038, "learning_rate": 4.501203990368078e-05, "loss": 0.8988, "step": 3770 }, { "epoch": 0.3000654904840342, "grad_norm": 0.8731489181518555, "learning_rate": 4.499880924029531e-05, "loss": 0.8857, "step": 3780 }, { "epoch": 0.3008593145329338, "grad_norm": 0.9101518392562866, "learning_rate": 4.498557857690985e-05, "loss": 0.8697, "step": 3790 }, { "epoch": 0.30165313858183335, "grad_norm": 1.0505417585372925, "learning_rate": 4.497234791352439e-05, "loss": 0.9239, "step": 3800 }, { "epoch": 0.3024469626307329, "grad_norm": 0.9743961095809937, "learning_rate": 4.495911725013893e-05, "loss": 0.9206, "step": 3810 }, { "epoch": 0.30324078667963245, "grad_norm": 0.7029954195022583, "learning_rate": 4.494588658675346e-05, "loss": 0.9278, "step": 3820 }, { "epoch": 0.304034610728532, "grad_norm": 1.1363028287887573, "learning_rate": 4.4932655923368e-05, "loss": 0.8596, "step": 3830 }, { "epoch": 0.3048284347774316, "grad_norm": 0.6260054707527161, "learning_rate": 4.491942525998254e-05, "loss": 0.8998, "step": 3840 }, { "epoch": 0.30562225882633115, "grad_norm": 0.9502087831497192, "learning_rate": 4.4906194596597076e-05, "loss": 0.9423, "step": 3850 }, { "epoch": 0.3064160828752307, "grad_norm": 0.6927551627159119, "learning_rate": 4.4892963933211615e-05, "loss": 0.9297, "step": 3860 }, { "epoch": 0.30720990692413025, "grad_norm": 0.9120237231254578, "learning_rate": 4.4879733269826154e-05, "loss": 0.916, "step": 3870 }, { "epoch": 0.30800373097302985, "grad_norm": 0.7541207075119019, "learning_rate": 4.486650260644069e-05, "loss": 0.8848, "step": 3880 }, { "epoch": 0.3087975550219294, "grad_norm": 0.8690943717956543, "learning_rate": 4.4853271943055225e-05, "loss": 0.8941, "step": 3890 }, { "epoch": 0.30959137907082895, "grad_norm": 0.9503733515739441, "learning_rate": 4.4840041279669764e-05, "loss": 0.9158, "step": 3900 }, { "epoch": 0.3103852031197285, "grad_norm": 0.8072488903999329, "learning_rate": 4.48268106162843e-05, "loss": 0.9325, "step": 3910 }, { "epoch": 0.31117902716862805, "grad_norm": 0.9710533618927002, "learning_rate": 4.481357995289884e-05, "loss": 0.9011, "step": 3920 }, { "epoch": 0.31197285121752766, "grad_norm": 0.8638473749160767, "learning_rate": 4.4800349289513374e-05, "loss": 0.9435, "step": 3930 }, { "epoch": 0.3127666752664272, "grad_norm": 1.0046641826629639, "learning_rate": 4.478711862612792e-05, "loss": 0.9496, "step": 3940 }, { "epoch": 0.31356049931532676, "grad_norm": 0.7358148097991943, "learning_rate": 4.477388796274246e-05, "loss": 0.9531, "step": 3950 }, { "epoch": 0.3143543233642263, "grad_norm": 0.9239563941955566, "learning_rate": 4.476065729935699e-05, "loss": 0.9164, "step": 3960 }, { "epoch": 0.31514814741312586, "grad_norm": 0.919695258140564, "learning_rate": 4.474742663597153e-05, "loss": 0.891, "step": 3970 }, { "epoch": 0.31594197146202546, "grad_norm": 0.6320668458938599, "learning_rate": 4.473419597258607e-05, "loss": 0.9372, "step": 3980 }, { "epoch": 0.316735795510925, "grad_norm": 0.8896647691726685, "learning_rate": 4.472096530920061e-05, "loss": 0.9236, "step": 3990 }, { "epoch": 0.31752961955982456, "grad_norm": 0.8284244537353516, "learning_rate": 4.470773464581514e-05, "loss": 0.8625, "step": 4000 }, { "epoch": 0.3183234436087241, "grad_norm": 0.760328471660614, "learning_rate": 4.469450398242968e-05, "loss": 0.9433, "step": 4010 }, { "epoch": 0.31911726765762366, "grad_norm": 0.7320681810379028, "learning_rate": 4.468127331904422e-05, "loss": 0.8655, "step": 4020 }, { "epoch": 0.31991109170652327, "grad_norm": 0.9130124449729919, "learning_rate": 4.4668042655658756e-05, "loss": 0.8828, "step": 4030 }, { "epoch": 0.3207049157554228, "grad_norm": 0.8663350343704224, "learning_rate": 4.4654811992273295e-05, "loss": 0.9523, "step": 4040 }, { "epoch": 0.32149873980432236, "grad_norm": 0.8587914705276489, "learning_rate": 4.4641581328887834e-05, "loss": 0.8242, "step": 4050 }, { "epoch": 0.3222925638532219, "grad_norm": 0.7651414275169373, "learning_rate": 4.462835066550237e-05, "loss": 0.8989, "step": 4060 }, { "epoch": 0.3230863879021215, "grad_norm": 0.834144651889801, "learning_rate": 4.4615120002116905e-05, "loss": 0.9105, "step": 4070 }, { "epoch": 0.32388021195102107, "grad_norm": 0.691362202167511, "learning_rate": 4.4601889338731444e-05, "loss": 0.8757, "step": 4080 }, { "epoch": 0.3246740359999206, "grad_norm": 0.9557787179946899, "learning_rate": 4.458865867534598e-05, "loss": 0.9276, "step": 4090 }, { "epoch": 0.32546786004882017, "grad_norm": 0.8461413383483887, "learning_rate": 4.457542801196052e-05, "loss": 0.9158, "step": 4100 }, { "epoch": 0.3262616840977197, "grad_norm": 0.8489325046539307, "learning_rate": 4.456219734857506e-05, "loss": 0.9529, "step": 4110 }, { "epoch": 0.3270555081466193, "grad_norm": 0.8827877044677734, "learning_rate": 4.45489666851896e-05, "loss": 0.9152, "step": 4120 }, { "epoch": 0.32784933219551887, "grad_norm": 0.69962078332901, "learning_rate": 4.453573602180414e-05, "loss": 0.8927, "step": 4130 }, { "epoch": 0.3286431562444184, "grad_norm": 0.6991488337516785, "learning_rate": 4.452250535841868e-05, "loss": 0.9344, "step": 4140 }, { "epoch": 0.32943698029331797, "grad_norm": 0.9323925375938416, "learning_rate": 4.450927469503321e-05, "loss": 0.945, "step": 4150 }, { "epoch": 0.3302308043422175, "grad_norm": 0.8266938328742981, "learning_rate": 4.449604403164775e-05, "loss": 0.9045, "step": 4160 }, { "epoch": 0.3310246283911171, "grad_norm": 0.8383837342262268, "learning_rate": 4.448281336826229e-05, "loss": 0.8516, "step": 4170 }, { "epoch": 0.3318184524400167, "grad_norm": 0.72791987657547, "learning_rate": 4.446958270487682e-05, "loss": 0.8718, "step": 4180 }, { "epoch": 0.3326122764889162, "grad_norm": 0.7107085585594177, "learning_rate": 4.445635204149136e-05, "loss": 0.9216, "step": 4190 }, { "epoch": 0.3334061005378158, "grad_norm": 0.7751030325889587, "learning_rate": 4.44431213781059e-05, "loss": 0.8512, "step": 4200 }, { "epoch": 0.3341999245867154, "grad_norm": 0.764103889465332, "learning_rate": 4.4429890714720443e-05, "loss": 0.9594, "step": 4210 }, { "epoch": 0.33499374863561493, "grad_norm": 0.7244476079940796, "learning_rate": 4.4416660051334976e-05, "loss": 0.9194, "step": 4220 }, { "epoch": 0.3357875726845145, "grad_norm": 0.8353381156921387, "learning_rate": 4.4403429387949515e-05, "loss": 0.8394, "step": 4230 }, { "epoch": 0.33658139673341403, "grad_norm": 0.6536258459091187, "learning_rate": 4.4390198724564053e-05, "loss": 0.9125, "step": 4240 }, { "epoch": 0.3373752207823136, "grad_norm": 0.8908824920654297, "learning_rate": 4.437696806117859e-05, "loss": 0.9419, "step": 4250 }, { "epoch": 0.3381690448312132, "grad_norm": 0.6466046571731567, "learning_rate": 4.4363737397793125e-05, "loss": 0.946, "step": 4260 }, { "epoch": 0.33896286888011273, "grad_norm": 0.8315856456756592, "learning_rate": 4.4350506734407663e-05, "loss": 0.9282, "step": 4270 }, { "epoch": 0.3397566929290123, "grad_norm": 0.7369117736816406, "learning_rate": 4.43372760710222e-05, "loss": 0.9398, "step": 4280 }, { "epoch": 0.34055051697791183, "grad_norm": 0.841690719127655, "learning_rate": 4.432404540763674e-05, "loss": 0.8411, "step": 4290 }, { "epoch": 0.3413443410268114, "grad_norm": 0.8048957586288452, "learning_rate": 4.431081474425128e-05, "loss": 0.9204, "step": 4300 }, { "epoch": 0.342138165075711, "grad_norm": 0.6272882223129272, "learning_rate": 4.429758408086582e-05, "loss": 0.8819, "step": 4310 }, { "epoch": 0.34293198912461054, "grad_norm": 0.8420210480690002, "learning_rate": 4.428435341748036e-05, "loss": 0.9187, "step": 4320 }, { "epoch": 0.3437258131735101, "grad_norm": 0.873335063457489, "learning_rate": 4.427112275409489e-05, "loss": 0.8807, "step": 4330 }, { "epoch": 0.34451963722240964, "grad_norm": 0.6721493005752563, "learning_rate": 4.425789209070943e-05, "loss": 0.9068, "step": 4340 }, { "epoch": 0.34531346127130924, "grad_norm": 0.8227477669715881, "learning_rate": 4.424466142732397e-05, "loss": 0.9372, "step": 4350 }, { "epoch": 0.3461072853202088, "grad_norm": 0.8997884392738342, "learning_rate": 4.423143076393851e-05, "loss": 0.8916, "step": 4360 }, { "epoch": 0.34690110936910834, "grad_norm": 0.9526328444480896, "learning_rate": 4.421820010055304e-05, "loss": 0.857, "step": 4370 }, { "epoch": 0.3476949334180079, "grad_norm": 0.8911690711975098, "learning_rate": 4.4204969437167585e-05, "loss": 0.9323, "step": 4380 }, { "epoch": 0.34848875746690744, "grad_norm": 0.9047393798828125, "learning_rate": 4.4191738773782124e-05, "loss": 0.8946, "step": 4390 }, { "epoch": 0.34928258151580704, "grad_norm": 0.8549637794494629, "learning_rate": 4.4178508110396656e-05, "loss": 0.8755, "step": 4400 }, { "epoch": 0.3500764055647066, "grad_norm": 0.9609139561653137, "learning_rate": 4.4165277447011195e-05, "loss": 0.8262, "step": 4410 }, { "epoch": 0.35087022961360614, "grad_norm": 0.5445454120635986, "learning_rate": 4.4152046783625734e-05, "loss": 0.8849, "step": 4420 }, { "epoch": 0.3516640536625057, "grad_norm": 0.9729016423225403, "learning_rate": 4.413881612024027e-05, "loss": 0.8396, "step": 4430 }, { "epoch": 0.35245787771140524, "grad_norm": 1.071895718574524, "learning_rate": 4.4125585456854805e-05, "loss": 0.9218, "step": 4440 }, { "epoch": 0.35325170176030485, "grad_norm": 0.6580398082733154, "learning_rate": 4.4112354793469344e-05, "loss": 1.0065, "step": 4450 }, { "epoch": 0.3540455258092044, "grad_norm": 0.6029400825500488, "learning_rate": 4.409912413008388e-05, "loss": 0.8869, "step": 4460 }, { "epoch": 0.35483934985810395, "grad_norm": 1.124426007270813, "learning_rate": 4.408589346669842e-05, "loss": 0.8604, "step": 4470 }, { "epoch": 0.3556331739070035, "grad_norm": 0.905108630657196, "learning_rate": 4.407266280331296e-05, "loss": 0.8818, "step": 4480 }, { "epoch": 0.35642699795590305, "grad_norm": 0.7963013052940369, "learning_rate": 4.40594321399275e-05, "loss": 0.9271, "step": 4490 }, { "epoch": 0.35722082200480265, "grad_norm": 0.8501315712928772, "learning_rate": 4.404620147654204e-05, "loss": 0.9084, "step": 4500 }, { "epoch": 0.3580146460537022, "grad_norm": 0.7018426656723022, "learning_rate": 4.403297081315657e-05, "loss": 0.9059, "step": 4510 }, { "epoch": 0.35880847010260175, "grad_norm": 0.9202073216438293, "learning_rate": 4.401974014977111e-05, "loss": 0.846, "step": 4520 }, { "epoch": 0.3596022941515013, "grad_norm": 1.0749320983886719, "learning_rate": 4.400650948638565e-05, "loss": 0.9123, "step": 4530 }, { "epoch": 0.3603961182004009, "grad_norm": 0.7524193525314331, "learning_rate": 4.399327882300019e-05, "loss": 0.9018, "step": 4540 }, { "epoch": 0.36118994224930046, "grad_norm": 0.7798032164573669, "learning_rate": 4.398004815961472e-05, "loss": 0.9221, "step": 4550 }, { "epoch": 0.3619837662982, "grad_norm": 0.8113389015197754, "learning_rate": 4.3966817496229265e-05, "loss": 0.9005, "step": 4560 }, { "epoch": 0.36277759034709955, "grad_norm": 0.7713738083839417, "learning_rate": 4.3953586832843804e-05, "loss": 0.9608, "step": 4570 }, { "epoch": 0.3635714143959991, "grad_norm": 0.7880844473838806, "learning_rate": 4.394035616945834e-05, "loss": 0.9077, "step": 4580 }, { "epoch": 0.3643652384448987, "grad_norm": 0.8188297748565674, "learning_rate": 4.3927125506072875e-05, "loss": 0.9276, "step": 4590 }, { "epoch": 0.36515906249379826, "grad_norm": 0.7484593987464905, "learning_rate": 4.3913894842687414e-05, "loss": 0.926, "step": 4600 }, { "epoch": 0.3659528865426978, "grad_norm": 0.7410342693328857, "learning_rate": 4.390066417930195e-05, "loss": 0.9059, "step": 4610 }, { "epoch": 0.36674671059159736, "grad_norm": 0.897441565990448, "learning_rate": 4.388743351591649e-05, "loss": 0.8394, "step": 4620 }, { "epoch": 0.3675405346404969, "grad_norm": 0.7051084637641907, "learning_rate": 4.3874202852531024e-05, "loss": 0.8888, "step": 4630 }, { "epoch": 0.3683343586893965, "grad_norm": 0.8222568035125732, "learning_rate": 4.386097218914556e-05, "loss": 0.8424, "step": 4640 }, { "epoch": 0.36912818273829606, "grad_norm": 0.9209256768226624, "learning_rate": 4.38477415257601e-05, "loss": 0.8892, "step": 4650 }, { "epoch": 0.3699220067871956, "grad_norm": 0.9021577835083008, "learning_rate": 4.383451086237464e-05, "loss": 0.831, "step": 4660 }, { "epoch": 0.37071583083609516, "grad_norm": 0.7472784519195557, "learning_rate": 4.382128019898918e-05, "loss": 0.8574, "step": 4670 }, { "epoch": 0.37150965488499477, "grad_norm": 0.7072018384933472, "learning_rate": 4.380804953560372e-05, "loss": 0.9618, "step": 4680 }, { "epoch": 0.3723034789338943, "grad_norm": 0.9218171238899231, "learning_rate": 4.379481887221826e-05, "loss": 0.8916, "step": 4690 }, { "epoch": 0.37309730298279387, "grad_norm": 0.8794512152671814, "learning_rate": 4.378158820883279e-05, "loss": 0.9359, "step": 4700 }, { "epoch": 0.3738911270316934, "grad_norm": 0.8208445906639099, "learning_rate": 4.376835754544733e-05, "loss": 0.8878, "step": 4710 }, { "epoch": 0.37468495108059297, "grad_norm": 0.5891085863113403, "learning_rate": 4.375512688206187e-05, "loss": 0.9369, "step": 4720 }, { "epoch": 0.37547877512949257, "grad_norm": 0.8617219924926758, "learning_rate": 4.374189621867641e-05, "loss": 0.8791, "step": 4730 }, { "epoch": 0.3762725991783921, "grad_norm": 1.0510433912277222, "learning_rate": 4.3728665555290946e-05, "loss": 0.987, "step": 4740 }, { "epoch": 0.37706642322729167, "grad_norm": 0.9196320176124573, "learning_rate": 4.3715434891905485e-05, "loss": 0.8332, "step": 4750 }, { "epoch": 0.3778602472761912, "grad_norm": 0.9651428461074829, "learning_rate": 4.3702204228520023e-05, "loss": 0.9301, "step": 4760 }, { "epoch": 0.37865407132509077, "grad_norm": 0.7794479131698608, "learning_rate": 4.3688973565134556e-05, "loss": 0.8898, "step": 4770 }, { "epoch": 0.3794478953739904, "grad_norm": 0.708845853805542, "learning_rate": 4.3675742901749095e-05, "loss": 0.8781, "step": 4780 }, { "epoch": 0.3802417194228899, "grad_norm": 1.07254159450531, "learning_rate": 4.3662512238363633e-05, "loss": 0.8329, "step": 4790 }, { "epoch": 0.3810355434717895, "grad_norm": 0.6615691184997559, "learning_rate": 4.364928157497817e-05, "loss": 0.9107, "step": 4800 }, { "epoch": 0.381829367520689, "grad_norm": 0.7933416962623596, "learning_rate": 4.3636050911592705e-05, "loss": 0.8986, "step": 4810 }, { "epoch": 0.3826231915695886, "grad_norm": 0.7650178074836731, "learning_rate": 4.3622820248207243e-05, "loss": 0.9212, "step": 4820 }, { "epoch": 0.3834170156184882, "grad_norm": 0.8654028177261353, "learning_rate": 4.360958958482179e-05, "loss": 0.8686, "step": 4830 }, { "epoch": 0.3842108396673877, "grad_norm": 0.8981553912162781, "learning_rate": 4.359635892143633e-05, "loss": 0.843, "step": 4840 }, { "epoch": 0.3850046637162873, "grad_norm": 0.8608548641204834, "learning_rate": 4.358312825805086e-05, "loss": 0.8958, "step": 4850 }, { "epoch": 0.3857984877651868, "grad_norm": 0.8663391470909119, "learning_rate": 4.35698975946654e-05, "loss": 0.8992, "step": 4860 }, { "epoch": 0.38659231181408643, "grad_norm": 0.7639971971511841, "learning_rate": 4.355666693127994e-05, "loss": 0.8592, "step": 4870 }, { "epoch": 0.387386135862986, "grad_norm": 0.5844544768333435, "learning_rate": 4.354343626789447e-05, "loss": 0.8638, "step": 4880 }, { "epoch": 0.38817995991188553, "grad_norm": 0.8091756701469421, "learning_rate": 4.353020560450901e-05, "loss": 0.934, "step": 4890 }, { "epoch": 0.3889737839607851, "grad_norm": 0.8555530905723572, "learning_rate": 4.351697494112355e-05, "loss": 0.8541, "step": 4900 }, { "epoch": 0.38976760800968463, "grad_norm": 0.7854059934616089, "learning_rate": 4.350374427773809e-05, "loss": 0.9043, "step": 4910 }, { "epoch": 0.39056143205858423, "grad_norm": 1.0440559387207031, "learning_rate": 4.3490513614352626e-05, "loss": 0.8941, "step": 4920 }, { "epoch": 0.3913552561074838, "grad_norm": 0.7478209733963013, "learning_rate": 4.3477282950967165e-05, "loss": 0.8565, "step": 4930 }, { "epoch": 0.39214908015638333, "grad_norm": 0.7952112555503845, "learning_rate": 4.3464052287581704e-05, "loss": 0.8953, "step": 4940 }, { "epoch": 0.3929429042052829, "grad_norm": 0.8431933522224426, "learning_rate": 4.345082162419624e-05, "loss": 0.8433, "step": 4950 }, { "epoch": 0.39373672825418243, "grad_norm": 0.7418366074562073, "learning_rate": 4.3437590960810775e-05, "loss": 0.8713, "step": 4960 }, { "epoch": 0.39453055230308204, "grad_norm": 0.8180972933769226, "learning_rate": 4.3424360297425314e-05, "loss": 0.9263, "step": 4970 }, { "epoch": 0.3953243763519816, "grad_norm": 0.681885302066803, "learning_rate": 4.341112963403985e-05, "loss": 0.8812, "step": 4980 }, { "epoch": 0.39611820040088114, "grad_norm": 0.6465054750442505, "learning_rate": 4.3397898970654385e-05, "loss": 0.8943, "step": 4990 }, { "epoch": 0.3969120244497807, "grad_norm": 0.8901807069778442, "learning_rate": 4.338466830726893e-05, "loss": 0.9004, "step": 5000 }, { "epoch": 0.3977058484986803, "grad_norm": 0.937714695930481, "learning_rate": 4.337143764388347e-05, "loss": 0.8341, "step": 5010 }, { "epoch": 0.39849967254757984, "grad_norm": 0.6198152303695679, "learning_rate": 4.335820698049801e-05, "loss": 0.9793, "step": 5020 }, { "epoch": 0.3992934965964794, "grad_norm": 1.0210473537445068, "learning_rate": 4.334497631711254e-05, "loss": 0.8939, "step": 5030 }, { "epoch": 0.40008732064537894, "grad_norm": 0.7850756645202637, "learning_rate": 4.333174565372708e-05, "loss": 0.8545, "step": 5040 }, { "epoch": 0.4008811446942785, "grad_norm": 0.9468925595283508, "learning_rate": 4.331851499034162e-05, "loss": 0.833, "step": 5050 }, { "epoch": 0.4016749687431781, "grad_norm": 0.6961553692817688, "learning_rate": 4.330528432695616e-05, "loss": 0.887, "step": 5060 }, { "epoch": 0.40246879279207765, "grad_norm": 0.7382469177246094, "learning_rate": 4.329205366357069e-05, "loss": 0.8148, "step": 5070 }, { "epoch": 0.4032626168409772, "grad_norm": 0.8620008230209351, "learning_rate": 4.327882300018523e-05, "loss": 0.8493, "step": 5080 }, { "epoch": 0.40405644088987674, "grad_norm": 0.6228137612342834, "learning_rate": 4.326559233679977e-05, "loss": 0.961, "step": 5090 }, { "epoch": 0.4048502649387763, "grad_norm": 0.8809449672698975, "learning_rate": 4.3252361673414306e-05, "loss": 0.8982, "step": 5100 }, { "epoch": 0.4056440889876759, "grad_norm": 0.7898284196853638, "learning_rate": 4.3239131010028845e-05, "loss": 0.8696, "step": 5110 }, { "epoch": 0.40643791303657545, "grad_norm": 0.5993707776069641, "learning_rate": 4.3225900346643384e-05, "loss": 0.9455, "step": 5120 }, { "epoch": 0.407231737085475, "grad_norm": 0.5548946261405945, "learning_rate": 4.321266968325792e-05, "loss": 0.9056, "step": 5130 }, { "epoch": 0.40802556113437455, "grad_norm": 0.5974055528640747, "learning_rate": 4.3199439019872455e-05, "loss": 0.933, "step": 5140 }, { "epoch": 0.40881938518327415, "grad_norm": 0.77692711353302, "learning_rate": 4.3186208356486994e-05, "loss": 0.9088, "step": 5150 }, { "epoch": 0.4096132092321737, "grad_norm": 0.6619086265563965, "learning_rate": 4.317297769310153e-05, "loss": 0.9095, "step": 5160 }, { "epoch": 0.41040703328107325, "grad_norm": 0.8819933533668518, "learning_rate": 4.315974702971607e-05, "loss": 0.8546, "step": 5170 }, { "epoch": 0.4112008573299728, "grad_norm": 0.8302019834518433, "learning_rate": 4.314651636633061e-05, "loss": 0.883, "step": 5180 }, { "epoch": 0.41199468137887235, "grad_norm": 0.9397719502449036, "learning_rate": 4.313328570294515e-05, "loss": 0.8768, "step": 5190 }, { "epoch": 0.41278850542777196, "grad_norm": 1.071415901184082, "learning_rate": 4.312005503955969e-05, "loss": 0.9198, "step": 5200 }, { "epoch": 0.4135823294766715, "grad_norm": 0.7499683499336243, "learning_rate": 4.310682437617422e-05, "loss": 0.875, "step": 5210 }, { "epoch": 0.41437615352557106, "grad_norm": 0.7693167328834534, "learning_rate": 4.309359371278876e-05, "loss": 0.898, "step": 5220 }, { "epoch": 0.4151699775744706, "grad_norm": 1.0537440776824951, "learning_rate": 4.30803630494033e-05, "loss": 0.9159, "step": 5230 }, { "epoch": 0.41596380162337016, "grad_norm": 0.7521883249282837, "learning_rate": 4.306713238601784e-05, "loss": 0.9153, "step": 5240 }, { "epoch": 0.41675762567226976, "grad_norm": 1.0054055452346802, "learning_rate": 4.305390172263237e-05, "loss": 0.8956, "step": 5250 }, { "epoch": 0.4175514497211693, "grad_norm": 0.7442034482955933, "learning_rate": 4.304067105924691e-05, "loss": 0.891, "step": 5260 }, { "epoch": 0.41834527377006886, "grad_norm": 1.084252953529358, "learning_rate": 4.3027440395861455e-05, "loss": 0.9027, "step": 5270 }, { "epoch": 0.4191390978189684, "grad_norm": 0.8177867531776428, "learning_rate": 4.3014209732475993e-05, "loss": 0.9018, "step": 5280 }, { "epoch": 0.419932921867868, "grad_norm": 0.48281505703926086, "learning_rate": 4.3000979069090526e-05, "loss": 0.9254, "step": 5290 }, { "epoch": 0.42072674591676756, "grad_norm": 0.7245819568634033, "learning_rate": 4.2987748405705065e-05, "loss": 0.9109, "step": 5300 }, { "epoch": 0.4215205699656671, "grad_norm": 0.7288056015968323, "learning_rate": 4.2974517742319603e-05, "loss": 0.8432, "step": 5310 }, { "epoch": 0.42231439401456666, "grad_norm": 0.7283946871757507, "learning_rate": 4.2961287078934136e-05, "loss": 0.8788, "step": 5320 }, { "epoch": 0.4231082180634662, "grad_norm": 0.7446802258491516, "learning_rate": 4.2948056415548675e-05, "loss": 0.9142, "step": 5330 }, { "epoch": 0.4239020421123658, "grad_norm": 1.0653091669082642, "learning_rate": 4.2934825752163213e-05, "loss": 0.8688, "step": 5340 }, { "epoch": 0.42469586616126537, "grad_norm": 0.702083170413971, "learning_rate": 4.292159508877775e-05, "loss": 0.9083, "step": 5350 }, { "epoch": 0.4254896902101649, "grad_norm": 0.8971485495567322, "learning_rate": 4.290836442539229e-05, "loss": 0.8935, "step": 5360 }, { "epoch": 0.42628351425906447, "grad_norm": 0.7836154103279114, "learning_rate": 4.289513376200683e-05, "loss": 0.814, "step": 5370 }, { "epoch": 0.427077338307964, "grad_norm": 0.964614987373352, "learning_rate": 4.288190309862137e-05, "loss": 0.8964, "step": 5380 }, { "epoch": 0.4278711623568636, "grad_norm": 0.8366977572441101, "learning_rate": 4.286867243523591e-05, "loss": 0.9089, "step": 5390 }, { "epoch": 0.42866498640576317, "grad_norm": 0.720564603805542, "learning_rate": 4.285544177185044e-05, "loss": 0.905, "step": 5400 }, { "epoch": 0.4294588104546627, "grad_norm": 0.7407063841819763, "learning_rate": 4.284221110846498e-05, "loss": 0.9191, "step": 5410 }, { "epoch": 0.43025263450356227, "grad_norm": 0.7772669792175293, "learning_rate": 4.282898044507952e-05, "loss": 0.8633, "step": 5420 }, { "epoch": 0.4310464585524618, "grad_norm": 0.7156360149383545, "learning_rate": 4.281574978169406e-05, "loss": 0.9515, "step": 5430 }, { "epoch": 0.4318402826013614, "grad_norm": 0.7044875621795654, "learning_rate": 4.2802519118308596e-05, "loss": 0.9199, "step": 5440 }, { "epoch": 0.432634106650261, "grad_norm": 0.7540838122367859, "learning_rate": 4.2789288454923135e-05, "loss": 0.9229, "step": 5450 }, { "epoch": 0.4334279306991605, "grad_norm": 0.6620701551437378, "learning_rate": 4.2776057791537674e-05, "loss": 0.8556, "step": 5460 }, { "epoch": 0.4342217547480601, "grad_norm": 0.8868663907051086, "learning_rate": 4.2762827128152206e-05, "loss": 0.8666, "step": 5470 }, { "epoch": 0.4350155787969597, "grad_norm": 0.7081343531608582, "learning_rate": 4.2749596464766745e-05, "loss": 0.8888, "step": 5480 }, { "epoch": 0.43580940284585923, "grad_norm": 0.7983565926551819, "learning_rate": 4.2736365801381284e-05, "loss": 0.9041, "step": 5490 }, { "epoch": 0.4366032268947588, "grad_norm": 0.7562633156776428, "learning_rate": 4.272313513799582e-05, "loss": 0.908, "step": 5500 }, { "epoch": 0.4373970509436583, "grad_norm": 0.8235949873924255, "learning_rate": 4.2709904474610355e-05, "loss": 0.8907, "step": 5510 }, { "epoch": 0.4381908749925579, "grad_norm": 0.6528277397155762, "learning_rate": 4.2696673811224894e-05, "loss": 0.8403, "step": 5520 }, { "epoch": 0.4389846990414575, "grad_norm": 0.7162795662879944, "learning_rate": 4.268344314783943e-05, "loss": 0.9001, "step": 5530 }, { "epoch": 0.43977852309035703, "grad_norm": 0.8113960027694702, "learning_rate": 4.267021248445398e-05, "loss": 0.8785, "step": 5540 }, { "epoch": 0.4405723471392566, "grad_norm": 0.6791692972183228, "learning_rate": 4.265698182106851e-05, "loss": 0.8744, "step": 5550 }, { "epoch": 0.44136617118815613, "grad_norm": 0.7448859214782715, "learning_rate": 4.264375115768305e-05, "loss": 0.9191, "step": 5560 }, { "epoch": 0.4421599952370557, "grad_norm": 0.8218660950660706, "learning_rate": 4.263052049429759e-05, "loss": 0.8693, "step": 5570 }, { "epoch": 0.4429538192859553, "grad_norm": 0.8838844299316406, "learning_rate": 4.261728983091212e-05, "loss": 0.8701, "step": 5580 }, { "epoch": 0.44374764333485484, "grad_norm": 0.9519463181495667, "learning_rate": 4.260405916752666e-05, "loss": 0.8621, "step": 5590 }, { "epoch": 0.4445414673837544, "grad_norm": 0.9154829382896423, "learning_rate": 4.25908285041412e-05, "loss": 0.8917, "step": 5600 }, { "epoch": 0.44533529143265393, "grad_norm": 1.03704833984375, "learning_rate": 4.257759784075574e-05, "loss": 0.9156, "step": 5610 }, { "epoch": 0.44612911548155354, "grad_norm": 0.7216574549674988, "learning_rate": 4.2564367177370276e-05, "loss": 0.8858, "step": 5620 }, { "epoch": 0.4469229395304531, "grad_norm": 0.6714047789573669, "learning_rate": 4.2551136513984815e-05, "loss": 0.8379, "step": 5630 }, { "epoch": 0.44771676357935264, "grad_norm": 0.9155183434486389, "learning_rate": 4.2537905850599354e-05, "loss": 0.871, "step": 5640 }, { "epoch": 0.4485105876282522, "grad_norm": 0.7604344487190247, "learning_rate": 4.252467518721389e-05, "loss": 0.9569, "step": 5650 }, { "epoch": 0.44930441167715174, "grad_norm": 0.6832132935523987, "learning_rate": 4.2511444523828425e-05, "loss": 0.9031, "step": 5660 }, { "epoch": 0.45009823572605134, "grad_norm": 0.5372611880302429, "learning_rate": 4.2498213860442964e-05, "loss": 0.9364, "step": 5670 }, { "epoch": 0.4508920597749509, "grad_norm": 0.8363433480262756, "learning_rate": 4.24849831970575e-05, "loss": 0.8708, "step": 5680 }, { "epoch": 0.45168588382385044, "grad_norm": 0.7453399896621704, "learning_rate": 4.2471752533672035e-05, "loss": 0.8366, "step": 5690 }, { "epoch": 0.45247970787275, "grad_norm": 0.9587568044662476, "learning_rate": 4.2458521870286574e-05, "loss": 0.884, "step": 5700 }, { "epoch": 0.45327353192164954, "grad_norm": 0.9268891215324402, "learning_rate": 4.244529120690112e-05, "loss": 0.8884, "step": 5710 }, { "epoch": 0.45406735597054915, "grad_norm": 0.7946332693099976, "learning_rate": 4.243206054351566e-05, "loss": 0.8686, "step": 5720 }, { "epoch": 0.4548611800194487, "grad_norm": 0.7091253995895386, "learning_rate": 4.241882988013019e-05, "loss": 0.8849, "step": 5730 }, { "epoch": 0.45565500406834825, "grad_norm": 0.7621028423309326, "learning_rate": 4.240559921674473e-05, "loss": 0.9082, "step": 5740 }, { "epoch": 0.4564488281172478, "grad_norm": 0.8027592301368713, "learning_rate": 4.239236855335927e-05, "loss": 0.8613, "step": 5750 }, { "epoch": 0.4572426521661474, "grad_norm": 0.7330886721611023, "learning_rate": 4.237913788997381e-05, "loss": 0.9076, "step": 5760 }, { "epoch": 0.45803647621504695, "grad_norm": 0.9339755177497864, "learning_rate": 4.236590722658834e-05, "loss": 0.8422, "step": 5770 }, { "epoch": 0.4588303002639465, "grad_norm": 0.810807466506958, "learning_rate": 4.235267656320288e-05, "loss": 1.0052, "step": 5780 }, { "epoch": 0.45962412431284605, "grad_norm": 0.6460042595863342, "learning_rate": 4.233944589981742e-05, "loss": 0.9634, "step": 5790 }, { "epoch": 0.4604179483617456, "grad_norm": 0.9872604012489319, "learning_rate": 4.232621523643196e-05, "loss": 0.8762, "step": 5800 }, { "epoch": 0.4612117724106452, "grad_norm": 0.6482599973678589, "learning_rate": 4.2312984573046496e-05, "loss": 0.8439, "step": 5810 }, { "epoch": 0.46200559645954475, "grad_norm": 0.7754231691360474, "learning_rate": 4.2299753909661035e-05, "loss": 0.9037, "step": 5820 }, { "epoch": 0.4627994205084443, "grad_norm": 0.7624156475067139, "learning_rate": 4.2286523246275573e-05, "loss": 0.8971, "step": 5830 }, { "epoch": 0.46359324455734385, "grad_norm": 0.8388280272483826, "learning_rate": 4.2273292582890106e-05, "loss": 0.8161, "step": 5840 }, { "epoch": 0.4643870686062434, "grad_norm": 0.9448870420455933, "learning_rate": 4.2260061919504645e-05, "loss": 0.8949, "step": 5850 }, { "epoch": 0.465180892655143, "grad_norm": 0.9757259488105774, "learning_rate": 4.2246831256119183e-05, "loss": 0.8765, "step": 5860 }, { "epoch": 0.46597471670404256, "grad_norm": 0.7201075553894043, "learning_rate": 4.223360059273372e-05, "loss": 0.9155, "step": 5870 }, { "epoch": 0.4667685407529421, "grad_norm": 0.9070501923561096, "learning_rate": 4.222036992934826e-05, "loss": 0.9414, "step": 5880 }, { "epoch": 0.46756236480184166, "grad_norm": 0.6421061754226685, "learning_rate": 4.22071392659628e-05, "loss": 0.8891, "step": 5890 }, { "epoch": 0.4683561888507412, "grad_norm": 0.957310140132904, "learning_rate": 4.219390860257734e-05, "loss": 0.9296, "step": 5900 }, { "epoch": 0.4691500128996408, "grad_norm": 0.9540489315986633, "learning_rate": 4.218067793919187e-05, "loss": 0.8695, "step": 5910 }, { "epoch": 0.46994383694854036, "grad_norm": 0.7519145011901855, "learning_rate": 4.216744727580641e-05, "loss": 0.874, "step": 5920 }, { "epoch": 0.4707376609974399, "grad_norm": 0.6650322675704956, "learning_rate": 4.215421661242095e-05, "loss": 0.9093, "step": 5930 }, { "epoch": 0.47153148504633946, "grad_norm": 0.6322752833366394, "learning_rate": 4.214098594903549e-05, "loss": 0.8887, "step": 5940 }, { "epoch": 0.47232530909523907, "grad_norm": 0.8493393063545227, "learning_rate": 4.212775528565002e-05, "loss": 0.858, "step": 5950 }, { "epoch": 0.4731191331441386, "grad_norm": 0.8725616931915283, "learning_rate": 4.211452462226456e-05, "loss": 0.8437, "step": 5960 }, { "epoch": 0.47391295719303816, "grad_norm": 1.01144540309906, "learning_rate": 4.21012939588791e-05, "loss": 0.8824, "step": 5970 }, { "epoch": 0.4747067812419377, "grad_norm": 0.8524407744407654, "learning_rate": 4.2088063295493644e-05, "loss": 0.8566, "step": 5980 }, { "epoch": 0.47550060529083726, "grad_norm": 1.0122267007827759, "learning_rate": 4.2074832632108176e-05, "loss": 0.8802, "step": 5990 }, { "epoch": 0.47629442933973687, "grad_norm": 0.930794894695282, "learning_rate": 4.2061601968722715e-05, "loss": 0.8751, "step": 6000 }, { "epoch": 0.4770882533886364, "grad_norm": 1.0592617988586426, "learning_rate": 4.2048371305337254e-05, "loss": 0.8464, "step": 6010 }, { "epoch": 0.47788207743753597, "grad_norm": 0.7701418995857239, "learning_rate": 4.2035140641951786e-05, "loss": 0.8491, "step": 6020 }, { "epoch": 0.4786759014864355, "grad_norm": 0.7711431384086609, "learning_rate": 4.2021909978566325e-05, "loss": 0.8827, "step": 6030 }, { "epoch": 0.47946972553533507, "grad_norm": 0.5670927166938782, "learning_rate": 4.2008679315180864e-05, "loss": 0.911, "step": 6040 }, { "epoch": 0.4802635495842347, "grad_norm": 0.8850795030593872, "learning_rate": 4.19954486517954e-05, "loss": 0.8703, "step": 6050 }, { "epoch": 0.4810573736331342, "grad_norm": 0.7688451409339905, "learning_rate": 4.198221798840994e-05, "loss": 0.8591, "step": 6060 }, { "epoch": 0.48185119768203377, "grad_norm": 0.7370510101318359, "learning_rate": 4.196898732502448e-05, "loss": 0.8814, "step": 6070 }, { "epoch": 0.4826450217309333, "grad_norm": 0.8961367607116699, "learning_rate": 4.195575666163902e-05, "loss": 0.9584, "step": 6080 }, { "epoch": 0.4834388457798329, "grad_norm": 0.547537624835968, "learning_rate": 4.194252599825356e-05, "loss": 0.8956, "step": 6090 }, { "epoch": 0.4842326698287325, "grad_norm": 0.7516084909439087, "learning_rate": 4.192929533486809e-05, "loss": 0.8988, "step": 6100 }, { "epoch": 0.485026493877632, "grad_norm": 0.856939971446991, "learning_rate": 4.191606467148263e-05, "loss": 0.9231, "step": 6110 }, { "epoch": 0.4858203179265316, "grad_norm": 0.8620836734771729, "learning_rate": 4.190283400809717e-05, "loss": 0.8834, "step": 6120 }, { "epoch": 0.4866141419754311, "grad_norm": 0.7581316828727722, "learning_rate": 4.18896033447117e-05, "loss": 0.8127, "step": 6130 }, { "epoch": 0.48740796602433073, "grad_norm": 0.6257145404815674, "learning_rate": 4.187637268132624e-05, "loss": 0.8563, "step": 6140 }, { "epoch": 0.4882017900732303, "grad_norm": 0.8719417452812195, "learning_rate": 4.186314201794078e-05, "loss": 0.9042, "step": 6150 }, { "epoch": 0.48899561412212983, "grad_norm": 0.8335233926773071, "learning_rate": 4.1849911354555324e-05, "loss": 0.9324, "step": 6160 }, { "epoch": 0.4897894381710294, "grad_norm": 0.8433477878570557, "learning_rate": 4.1836680691169856e-05, "loss": 0.8667, "step": 6170 }, { "epoch": 0.49058326221992893, "grad_norm": 0.5534406900405884, "learning_rate": 4.1823450027784395e-05, "loss": 0.8776, "step": 6180 }, { "epoch": 0.49137708626882853, "grad_norm": 0.6916033029556274, "learning_rate": 4.1810219364398934e-05, "loss": 0.8879, "step": 6190 }, { "epoch": 0.4921709103177281, "grad_norm": 1.0233070850372314, "learning_rate": 4.179698870101347e-05, "loss": 0.915, "step": 6200 }, { "epoch": 0.49296473436662763, "grad_norm": 0.8879387974739075, "learning_rate": 4.1783758037628005e-05, "loss": 0.8883, "step": 6210 }, { "epoch": 0.4937585584155272, "grad_norm": 0.8336936235427856, "learning_rate": 4.1770527374242544e-05, "loss": 0.8417, "step": 6220 }, { "epoch": 0.4945523824644268, "grad_norm": 0.8911527395248413, "learning_rate": 4.175729671085708e-05, "loss": 0.7989, "step": 6230 }, { "epoch": 0.49534620651332634, "grad_norm": 0.5336394906044006, "learning_rate": 4.174406604747162e-05, "loss": 0.9055, "step": 6240 }, { "epoch": 0.4961400305622259, "grad_norm": 0.8444200158119202, "learning_rate": 4.173083538408616e-05, "loss": 0.8582, "step": 6250 }, { "epoch": 0.49693385461112544, "grad_norm": 0.9549778699874878, "learning_rate": 4.17176047207007e-05, "loss": 0.9263, "step": 6260 }, { "epoch": 0.497727678660025, "grad_norm": 0.8041070699691772, "learning_rate": 4.170437405731524e-05, "loss": 0.8669, "step": 6270 }, { "epoch": 0.4985215027089246, "grad_norm": 0.7747896313667297, "learning_rate": 4.169114339392977e-05, "loss": 0.8525, "step": 6280 }, { "epoch": 0.49931532675782414, "grad_norm": 0.6687043309211731, "learning_rate": 4.167791273054431e-05, "loss": 0.9159, "step": 6290 }, { "epoch": 0.5001091508067237, "grad_norm": 0.725878119468689, "learning_rate": 4.166468206715885e-05, "loss": 0.9252, "step": 6300 }, { "epoch": 0.5009029748556233, "grad_norm": 0.9067284464836121, "learning_rate": 4.165145140377339e-05, "loss": 0.8905, "step": 6310 }, { "epoch": 0.5016967989045228, "grad_norm": 0.8240019083023071, "learning_rate": 4.163822074038792e-05, "loss": 0.8641, "step": 6320 }, { "epoch": 0.5024906229534224, "grad_norm": 0.7333963513374329, "learning_rate": 4.1624990077002466e-05, "loss": 0.8823, "step": 6330 }, { "epoch": 0.5032844470023219, "grad_norm": 1.1056770086288452, "learning_rate": 4.1611759413617005e-05, "loss": 0.8817, "step": 6340 }, { "epoch": 0.5040782710512215, "grad_norm": 0.8191234469413757, "learning_rate": 4.1598528750231543e-05, "loss": 0.8986, "step": 6350 }, { "epoch": 0.5048720951001211, "grad_norm": 0.6650758981704712, "learning_rate": 4.1585298086846076e-05, "loss": 0.885, "step": 6360 }, { "epoch": 0.5056659191490206, "grad_norm": 0.8753699064254761, "learning_rate": 4.1572067423460615e-05, "loss": 0.8402, "step": 6370 }, { "epoch": 0.5064597431979202, "grad_norm": 0.8924400806427002, "learning_rate": 4.1558836760075153e-05, "loss": 0.9016, "step": 6380 }, { "epoch": 0.5072535672468197, "grad_norm": 0.7312005758285522, "learning_rate": 4.1545606096689686e-05, "loss": 0.914, "step": 6390 }, { "epoch": 0.5080473912957193, "grad_norm": 0.771732747554779, "learning_rate": 4.1532375433304225e-05, "loss": 0.8976, "step": 6400 }, { "epoch": 0.5088412153446189, "grad_norm": 0.7958862781524658, "learning_rate": 4.1519144769918763e-05, "loss": 0.8749, "step": 6410 }, { "epoch": 0.5096350393935184, "grad_norm": 0.751964271068573, "learning_rate": 4.15059141065333e-05, "loss": 0.852, "step": 6420 }, { "epoch": 0.510428863442418, "grad_norm": 0.721443772315979, "learning_rate": 4.149268344314784e-05, "loss": 0.8887, "step": 6430 }, { "epoch": 0.5112226874913175, "grad_norm": 0.6288412809371948, "learning_rate": 4.147945277976238e-05, "loss": 0.8488, "step": 6440 }, { "epoch": 0.5120165115402171, "grad_norm": 0.7060637474060059, "learning_rate": 4.146622211637692e-05, "loss": 0.8506, "step": 6450 }, { "epoch": 0.5128103355891167, "grad_norm": 0.5948452949523926, "learning_rate": 4.145299145299146e-05, "loss": 0.8759, "step": 6460 }, { "epoch": 0.5136041596380162, "grad_norm": 0.7204840183258057, "learning_rate": 4.143976078960599e-05, "loss": 0.9107, "step": 6470 }, { "epoch": 0.5143979836869158, "grad_norm": 0.9579210877418518, "learning_rate": 4.142653012622053e-05, "loss": 0.7921, "step": 6480 }, { "epoch": 0.5151918077358154, "grad_norm": 0.8127973079681396, "learning_rate": 4.141329946283507e-05, "loss": 0.8468, "step": 6490 }, { "epoch": 0.5159856317847149, "grad_norm": 0.6830429434776306, "learning_rate": 4.140006879944961e-05, "loss": 0.9339, "step": 6500 }, { "epoch": 0.5167794558336145, "grad_norm": 0.8261887431144714, "learning_rate": 4.1386838136064146e-05, "loss": 0.9211, "step": 6510 }, { "epoch": 0.517573279882514, "grad_norm": 0.803862988948822, "learning_rate": 4.1373607472678685e-05, "loss": 0.883, "step": 6520 }, { "epoch": 0.5183671039314136, "grad_norm": 0.971078097820282, "learning_rate": 4.1360376809293224e-05, "loss": 0.8913, "step": 6530 }, { "epoch": 0.5191609279803132, "grad_norm": 0.8098673224449158, "learning_rate": 4.1347146145907756e-05, "loss": 0.9281, "step": 6540 }, { "epoch": 0.5199547520292127, "grad_norm": 0.9599949717521667, "learning_rate": 4.1333915482522295e-05, "loss": 0.8459, "step": 6550 }, { "epoch": 0.5207485760781123, "grad_norm": 0.7749876379966736, "learning_rate": 4.1320684819136834e-05, "loss": 0.8717, "step": 6560 }, { "epoch": 0.5215424001270118, "grad_norm": 0.9038444757461548, "learning_rate": 4.130745415575137e-05, "loss": 0.8716, "step": 6570 }, { "epoch": 0.5223362241759114, "grad_norm": 0.9091252088546753, "learning_rate": 4.1294223492365905e-05, "loss": 0.8709, "step": 6580 }, { "epoch": 0.523130048224811, "grad_norm": 0.8427479863166809, "learning_rate": 4.1280992828980444e-05, "loss": 0.905, "step": 6590 }, { "epoch": 0.5239238722737105, "grad_norm": 0.6537529230117798, "learning_rate": 4.126776216559499e-05, "loss": 0.8252, "step": 6600 }, { "epoch": 0.5247176963226101, "grad_norm": 0.7358630895614624, "learning_rate": 4.125453150220952e-05, "loss": 0.9075, "step": 6610 }, { "epoch": 0.5255115203715096, "grad_norm": 0.9415682554244995, "learning_rate": 4.124130083882406e-05, "loss": 0.8233, "step": 6620 }, { "epoch": 0.5263053444204092, "grad_norm": 0.790571928024292, "learning_rate": 4.12280701754386e-05, "loss": 0.9097, "step": 6630 }, { "epoch": 0.5270991684693088, "grad_norm": 0.9315680861473083, "learning_rate": 4.121483951205314e-05, "loss": 0.8962, "step": 6640 }, { "epoch": 0.5278929925182083, "grad_norm": 0.8426125645637512, "learning_rate": 4.120160884866767e-05, "loss": 0.8931, "step": 6650 }, { "epoch": 0.5286868165671079, "grad_norm": 0.7593029141426086, "learning_rate": 4.118837818528221e-05, "loss": 0.8584, "step": 6660 }, { "epoch": 0.5294806406160074, "grad_norm": 0.719986617565155, "learning_rate": 4.117514752189675e-05, "loss": 0.9337, "step": 6670 }, { "epoch": 0.530274464664907, "grad_norm": 0.8458060026168823, "learning_rate": 4.116191685851129e-05, "loss": 0.8355, "step": 6680 }, { "epoch": 0.5310682887138066, "grad_norm": 0.7757362127304077, "learning_rate": 4.1148686195125826e-05, "loss": 0.897, "step": 6690 }, { "epoch": 0.5318621127627061, "grad_norm": 0.7549418807029724, "learning_rate": 4.1135455531740365e-05, "loss": 0.8864, "step": 6700 }, { "epoch": 0.5326559368116057, "grad_norm": 0.8752275705337524, "learning_rate": 4.1122224868354904e-05, "loss": 0.902, "step": 6710 }, { "epoch": 0.5334497608605052, "grad_norm": 0.901665985584259, "learning_rate": 4.1108994204969436e-05, "loss": 0.8892, "step": 6720 }, { "epoch": 0.5342435849094048, "grad_norm": 0.8543225526809692, "learning_rate": 4.1095763541583975e-05, "loss": 0.8923, "step": 6730 }, { "epoch": 0.5350374089583044, "grad_norm": 0.8486155867576599, "learning_rate": 4.1082532878198514e-05, "loss": 0.9043, "step": 6740 }, { "epoch": 0.5358312330072039, "grad_norm": 0.869857668876648, "learning_rate": 4.106930221481305e-05, "loss": 0.8406, "step": 6750 }, { "epoch": 0.5366250570561035, "grad_norm": 0.9113563299179077, "learning_rate": 4.1056071551427585e-05, "loss": 0.9667, "step": 6760 }, { "epoch": 0.537418881105003, "grad_norm": 0.9034312963485718, "learning_rate": 4.104284088804213e-05, "loss": 0.8792, "step": 6770 }, { "epoch": 0.5382127051539026, "grad_norm": 0.8775055408477783, "learning_rate": 4.102961022465667e-05, "loss": 0.8938, "step": 6780 }, { "epoch": 0.5390065292028022, "grad_norm": 0.8911844491958618, "learning_rate": 4.101637956127121e-05, "loss": 0.8922, "step": 6790 }, { "epoch": 0.5398003532517017, "grad_norm": 0.9125531315803528, "learning_rate": 4.100314889788574e-05, "loss": 0.9208, "step": 6800 }, { "epoch": 0.5405941773006013, "grad_norm": 0.8507636189460754, "learning_rate": 4.098991823450028e-05, "loss": 0.8493, "step": 6810 }, { "epoch": 0.5413880013495009, "grad_norm": 0.9445191621780396, "learning_rate": 4.097668757111482e-05, "loss": 0.8364, "step": 6820 }, { "epoch": 0.5421818253984004, "grad_norm": 0.6830033659934998, "learning_rate": 4.096345690772935e-05, "loss": 0.8302, "step": 6830 }, { "epoch": 0.5429756494473, "grad_norm": 0.9439083337783813, "learning_rate": 4.095022624434389e-05, "loss": 0.8545, "step": 6840 }, { "epoch": 0.5437694734961995, "grad_norm": 0.8083987236022949, "learning_rate": 4.093699558095843e-05, "loss": 0.9273, "step": 6850 }, { "epoch": 0.5445632975450991, "grad_norm": 0.7800227403640747, "learning_rate": 4.092376491757297e-05, "loss": 0.8564, "step": 6860 }, { "epoch": 0.5453571215939987, "grad_norm": 0.7036982178688049, "learning_rate": 4.091053425418751e-05, "loss": 0.8236, "step": 6870 }, { "epoch": 0.5461509456428982, "grad_norm": 0.6380200982093811, "learning_rate": 4.0897303590802046e-05, "loss": 0.8749, "step": 6880 }, { "epoch": 0.5469447696917978, "grad_norm": 0.9935095906257629, "learning_rate": 4.0884072927416585e-05, "loss": 0.8536, "step": 6890 }, { "epoch": 0.5477385937406973, "grad_norm": 0.9212964773178101, "learning_rate": 4.0870842264031123e-05, "loss": 0.88, "step": 6900 }, { "epoch": 0.5485324177895969, "grad_norm": 0.7856978178024292, "learning_rate": 4.0857611600645656e-05, "loss": 0.9521, "step": 6910 }, { "epoch": 0.5493262418384965, "grad_norm": 0.8367793560028076, "learning_rate": 4.0844380937260195e-05, "loss": 0.8705, "step": 6920 }, { "epoch": 0.550120065887396, "grad_norm": 0.735126256942749, "learning_rate": 4.0831150273874733e-05, "loss": 0.9276, "step": 6930 }, { "epoch": 0.5509138899362956, "grad_norm": 0.7849065065383911, "learning_rate": 4.081791961048927e-05, "loss": 0.8424, "step": 6940 }, { "epoch": 0.5517077139851951, "grad_norm": 0.9133653044700623, "learning_rate": 4.080468894710381e-05, "loss": 0.8708, "step": 6950 }, { "epoch": 0.5525015380340947, "grad_norm": 0.6829420924186707, "learning_rate": 4.079145828371835e-05, "loss": 0.8696, "step": 6960 }, { "epoch": 0.5532953620829943, "grad_norm": 0.8633001446723938, "learning_rate": 4.077822762033289e-05, "loss": 0.9033, "step": 6970 }, { "epoch": 0.5540891861318938, "grad_norm": 0.6927655935287476, "learning_rate": 4.076499695694742e-05, "loss": 0.8687, "step": 6980 }, { "epoch": 0.5548830101807934, "grad_norm": 0.9020519852638245, "learning_rate": 4.075176629356196e-05, "loss": 0.8289, "step": 6990 }, { "epoch": 0.5556768342296929, "grad_norm": 0.8436912894248962, "learning_rate": 4.07385356301765e-05, "loss": 0.8881, "step": 7000 }, { "epoch": 0.5564706582785925, "grad_norm": 0.7371892333030701, "learning_rate": 4.072530496679104e-05, "loss": 0.8995, "step": 7010 }, { "epoch": 0.5572644823274921, "grad_norm": 0.8149321675300598, "learning_rate": 4.071207430340557e-05, "loss": 0.9026, "step": 7020 }, { "epoch": 0.5580583063763916, "grad_norm": 0.6326161026954651, "learning_rate": 4.069884364002011e-05, "loss": 0.8795, "step": 7030 }, { "epoch": 0.5588521304252912, "grad_norm": 0.8339284062385559, "learning_rate": 4.0685612976634655e-05, "loss": 0.8199, "step": 7040 }, { "epoch": 0.5596459544741907, "grad_norm": 0.7637563347816467, "learning_rate": 4.0672382313249194e-05, "loss": 0.8635, "step": 7050 }, { "epoch": 0.5604397785230903, "grad_norm": 0.8212976455688477, "learning_rate": 4.0659151649863726e-05, "loss": 0.8681, "step": 7060 }, { "epoch": 0.56123360257199, "grad_norm": 0.7294253706932068, "learning_rate": 4.0645920986478265e-05, "loss": 0.8807, "step": 7070 }, { "epoch": 0.5620274266208894, "grad_norm": 0.7334606051445007, "learning_rate": 4.0632690323092804e-05, "loss": 0.8732, "step": 7080 }, { "epoch": 0.562821250669789, "grad_norm": 0.8146048188209534, "learning_rate": 4.0619459659707336e-05, "loss": 0.8941, "step": 7090 }, { "epoch": 0.5636150747186887, "grad_norm": 0.8506765365600586, "learning_rate": 4.0606228996321875e-05, "loss": 0.8885, "step": 7100 }, { "epoch": 0.5644088987675882, "grad_norm": 1.0465277433395386, "learning_rate": 4.0592998332936414e-05, "loss": 0.877, "step": 7110 }, { "epoch": 0.5652027228164878, "grad_norm": 1.0059021711349487, "learning_rate": 4.057976766955095e-05, "loss": 0.9511, "step": 7120 }, { "epoch": 0.5659965468653873, "grad_norm": 0.7272640466690063, "learning_rate": 4.056653700616549e-05, "loss": 0.8562, "step": 7130 }, { "epoch": 0.5667903709142869, "grad_norm": 0.6533452868461609, "learning_rate": 4.055330634278003e-05, "loss": 0.8402, "step": 7140 }, { "epoch": 0.5675841949631865, "grad_norm": 0.8188508152961731, "learning_rate": 4.054007567939457e-05, "loss": 0.8589, "step": 7150 }, { "epoch": 0.568378019012086, "grad_norm": 0.8505097031593323, "learning_rate": 4.052684501600911e-05, "loss": 0.8614, "step": 7160 }, { "epoch": 0.5691718430609856, "grad_norm": 0.7959850430488586, "learning_rate": 4.051361435262364e-05, "loss": 0.8628, "step": 7170 }, { "epoch": 0.569965667109885, "grad_norm": 0.9886569976806641, "learning_rate": 4.050038368923818e-05, "loss": 0.8947, "step": 7180 }, { "epoch": 0.5707594911587847, "grad_norm": 0.8581879138946533, "learning_rate": 4.048715302585272e-05, "loss": 0.8619, "step": 7190 }, { "epoch": 0.5715533152076843, "grad_norm": 0.9623154401779175, "learning_rate": 4.047392236246725e-05, "loss": 0.8252, "step": 7200 }, { "epoch": 0.5723471392565838, "grad_norm": 0.8627947568893433, "learning_rate": 4.0460691699081796e-05, "loss": 0.8785, "step": 7210 }, { "epoch": 0.5731409633054834, "grad_norm": 0.7813371419906616, "learning_rate": 4.0447461035696335e-05, "loss": 0.9171, "step": 7220 }, { "epoch": 0.5739347873543829, "grad_norm": 1.0067614316940308, "learning_rate": 4.0434230372310874e-05, "loss": 0.836, "step": 7230 }, { "epoch": 0.5747286114032825, "grad_norm": 0.6856113076210022, "learning_rate": 4.0420999708925406e-05, "loss": 0.9001, "step": 7240 }, { "epoch": 0.5755224354521821, "grad_norm": 1.0112297534942627, "learning_rate": 4.0407769045539945e-05, "loss": 0.8726, "step": 7250 }, { "epoch": 0.5763162595010816, "grad_norm": 0.6633104085922241, "learning_rate": 4.0394538382154484e-05, "loss": 0.8487, "step": 7260 }, { "epoch": 0.5771100835499812, "grad_norm": 0.7768102884292603, "learning_rate": 4.038130771876902e-05, "loss": 0.8202, "step": 7270 }, { "epoch": 0.5779039075988807, "grad_norm": 0.7367390394210815, "learning_rate": 4.0368077055383555e-05, "loss": 0.9207, "step": 7280 }, { "epoch": 0.5786977316477803, "grad_norm": 0.9495264887809753, "learning_rate": 4.0354846391998094e-05, "loss": 0.9026, "step": 7290 }, { "epoch": 0.5794915556966799, "grad_norm": 0.880790650844574, "learning_rate": 4.034161572861263e-05, "loss": 0.8974, "step": 7300 }, { "epoch": 0.5802853797455794, "grad_norm": 0.8407362103462219, "learning_rate": 4.032838506522717e-05, "loss": 0.8189, "step": 7310 }, { "epoch": 0.581079203794479, "grad_norm": 0.6734247207641602, "learning_rate": 4.031515440184171e-05, "loss": 0.9121, "step": 7320 }, { "epoch": 0.5818730278433785, "grad_norm": 0.965093195438385, "learning_rate": 4.030192373845625e-05, "loss": 0.8464, "step": 7330 }, { "epoch": 0.5826668518922781, "grad_norm": 0.7038446068763733, "learning_rate": 4.028869307507079e-05, "loss": 0.9442, "step": 7340 }, { "epoch": 0.5834606759411777, "grad_norm": 0.6789405345916748, "learning_rate": 4.027546241168532e-05, "loss": 0.9112, "step": 7350 }, { "epoch": 0.5842544999900772, "grad_norm": 0.8215457797050476, "learning_rate": 4.026223174829986e-05, "loss": 0.891, "step": 7360 }, { "epoch": 0.5850483240389768, "grad_norm": 0.8473328948020935, "learning_rate": 4.02490010849144e-05, "loss": 0.8637, "step": 7370 }, { "epoch": 0.5858421480878763, "grad_norm": 0.7100654244422913, "learning_rate": 4.023577042152894e-05, "loss": 0.9027, "step": 7380 }, { "epoch": 0.5866359721367759, "grad_norm": 0.8264563083648682, "learning_rate": 4.022253975814348e-05, "loss": 0.8289, "step": 7390 }, { "epoch": 0.5874297961856755, "grad_norm": 0.595227062702179, "learning_rate": 4.0209309094758016e-05, "loss": 0.8588, "step": 7400 }, { "epoch": 0.588223620234575, "grad_norm": 0.8084173202514648, "learning_rate": 4.0196078431372555e-05, "loss": 0.9038, "step": 7410 }, { "epoch": 0.5890174442834746, "grad_norm": 0.8075309991836548, "learning_rate": 4.018284776798709e-05, "loss": 0.9102, "step": 7420 }, { "epoch": 0.5898112683323742, "grad_norm": 0.5747039318084717, "learning_rate": 4.0169617104601626e-05, "loss": 0.8329, "step": 7430 }, { "epoch": 0.5906050923812737, "grad_norm": 0.943231463432312, "learning_rate": 4.0156386441216165e-05, "loss": 0.8205, "step": 7440 }, { "epoch": 0.5913989164301733, "grad_norm": 0.8682481050491333, "learning_rate": 4.0143155777830703e-05, "loss": 0.862, "step": 7450 }, { "epoch": 0.5921927404790728, "grad_norm": 0.6591200232505798, "learning_rate": 4.0129925114445236e-05, "loss": 0.8736, "step": 7460 }, { "epoch": 0.5929865645279724, "grad_norm": 0.7325188517570496, "learning_rate": 4.0116694451059775e-05, "loss": 0.8911, "step": 7470 }, { "epoch": 0.593780388576872, "grad_norm": 0.5145597457885742, "learning_rate": 4.010346378767432e-05, "loss": 0.8992, "step": 7480 }, { "epoch": 0.5945742126257715, "grad_norm": 0.6899420022964478, "learning_rate": 4.009023312428886e-05, "loss": 0.8651, "step": 7490 }, { "epoch": 0.5953680366746711, "grad_norm": 0.8163533806800842, "learning_rate": 4.007700246090339e-05, "loss": 0.8571, "step": 7500 }, { "epoch": 0.5961618607235706, "grad_norm": 0.8414540886878967, "learning_rate": 4.006377179751793e-05, "loss": 0.8866, "step": 7510 }, { "epoch": 0.5969556847724702, "grad_norm": 0.9524148106575012, "learning_rate": 4.005054113413247e-05, "loss": 0.871, "step": 7520 }, { "epoch": 0.5977495088213698, "grad_norm": 0.8480884432792664, "learning_rate": 4.0037310470747e-05, "loss": 0.8807, "step": 7530 }, { "epoch": 0.5985433328702693, "grad_norm": 0.9701437950134277, "learning_rate": 4.002407980736154e-05, "loss": 0.8092, "step": 7540 }, { "epoch": 0.5993371569191689, "grad_norm": 0.9509230256080627, "learning_rate": 4.001084914397608e-05, "loss": 0.9041, "step": 7550 }, { "epoch": 0.6001309809680684, "grad_norm": 0.6511445045471191, "learning_rate": 3.999761848059062e-05, "loss": 0.8408, "step": 7560 }, { "epoch": 0.600924805016968, "grad_norm": 0.7083035111427307, "learning_rate": 3.998438781720516e-05, "loss": 0.8712, "step": 7570 }, { "epoch": 0.6017186290658676, "grad_norm": 0.8894531726837158, "learning_rate": 3.9971157153819696e-05, "loss": 0.8662, "step": 7580 }, { "epoch": 0.6025124531147671, "grad_norm": 0.7354892492294312, "learning_rate": 3.9957926490434235e-05, "loss": 0.7996, "step": 7590 }, { "epoch": 0.6033062771636667, "grad_norm": 0.8339380025863647, "learning_rate": 3.9944695827048774e-05, "loss": 0.8971, "step": 7600 }, { "epoch": 0.6041001012125662, "grad_norm": 0.7149907350540161, "learning_rate": 3.9931465163663306e-05, "loss": 0.8674, "step": 7610 }, { "epoch": 0.6048939252614658, "grad_norm": 0.6684656739234924, "learning_rate": 3.9918234500277845e-05, "loss": 0.8449, "step": 7620 }, { "epoch": 0.6056877493103654, "grad_norm": 0.803089439868927, "learning_rate": 3.9905003836892384e-05, "loss": 0.8952, "step": 7630 }, { "epoch": 0.6064815733592649, "grad_norm": 0.7551273107528687, "learning_rate": 3.9891773173506916e-05, "loss": 0.8461, "step": 7640 }, { "epoch": 0.6072753974081645, "grad_norm": 0.6433910131454468, "learning_rate": 3.9878542510121455e-05, "loss": 0.8795, "step": 7650 }, { "epoch": 0.608069221457064, "grad_norm": 0.73484867811203, "learning_rate": 3.9865311846736e-05, "loss": 0.8916, "step": 7660 }, { "epoch": 0.6088630455059636, "grad_norm": 0.8747826218605042, "learning_rate": 3.985208118335054e-05, "loss": 0.8803, "step": 7670 }, { "epoch": 0.6096568695548632, "grad_norm": 0.9112239480018616, "learning_rate": 3.983885051996507e-05, "loss": 0.9397, "step": 7680 }, { "epoch": 0.6104506936037627, "grad_norm": 0.8457674384117126, "learning_rate": 3.982561985657961e-05, "loss": 0.8451, "step": 7690 }, { "epoch": 0.6112445176526623, "grad_norm": 0.7593239545822144, "learning_rate": 3.981238919319415e-05, "loss": 0.9272, "step": 7700 }, { "epoch": 0.6120383417015618, "grad_norm": 0.7265938520431519, "learning_rate": 3.979915852980869e-05, "loss": 0.9268, "step": 7710 }, { "epoch": 0.6128321657504614, "grad_norm": 0.7465494871139526, "learning_rate": 3.978592786642322e-05, "loss": 0.872, "step": 7720 }, { "epoch": 0.613625989799361, "grad_norm": 0.7977067828178406, "learning_rate": 3.977269720303776e-05, "loss": 0.8586, "step": 7730 }, { "epoch": 0.6144198138482605, "grad_norm": 0.76861572265625, "learning_rate": 3.97594665396523e-05, "loss": 0.9187, "step": 7740 }, { "epoch": 0.6152136378971601, "grad_norm": 0.9571815133094788, "learning_rate": 3.974623587626684e-05, "loss": 0.835, "step": 7750 }, { "epoch": 0.6160074619460597, "grad_norm": 0.7097697257995605, "learning_rate": 3.9733005212881376e-05, "loss": 0.8519, "step": 7760 }, { "epoch": 0.6168012859949592, "grad_norm": 0.6047300696372986, "learning_rate": 3.9719774549495915e-05, "loss": 0.8864, "step": 7770 }, { "epoch": 0.6175951100438588, "grad_norm": 0.7774935364723206, "learning_rate": 3.9706543886110454e-05, "loss": 0.8528, "step": 7780 }, { "epoch": 0.6183889340927583, "grad_norm": 0.7351526021957397, "learning_rate": 3.9693313222724986e-05, "loss": 0.8548, "step": 7790 }, { "epoch": 0.6191827581416579, "grad_norm": 0.8887933492660522, "learning_rate": 3.9680082559339525e-05, "loss": 0.9191, "step": 7800 }, { "epoch": 0.6199765821905575, "grad_norm": 0.6193240284919739, "learning_rate": 3.9666851895954064e-05, "loss": 0.8906, "step": 7810 }, { "epoch": 0.620770406239457, "grad_norm": 1.0305780172348022, "learning_rate": 3.96536212325686e-05, "loss": 0.9161, "step": 7820 }, { "epoch": 0.6215642302883566, "grad_norm": 0.9303094744682312, "learning_rate": 3.964039056918314e-05, "loss": 0.923, "step": 7830 }, { "epoch": 0.6223580543372561, "grad_norm": 0.7105975151062012, "learning_rate": 3.962715990579768e-05, "loss": 0.8918, "step": 7840 }, { "epoch": 0.6231518783861557, "grad_norm": 0.7743216753005981, "learning_rate": 3.961392924241222e-05, "loss": 0.914, "step": 7850 }, { "epoch": 0.6239457024350553, "grad_norm": 1.069062352180481, "learning_rate": 3.960069857902676e-05, "loss": 0.9209, "step": 7860 }, { "epoch": 0.6247395264839548, "grad_norm": 0.8283140659332275, "learning_rate": 3.958746791564129e-05, "loss": 0.8587, "step": 7870 }, { "epoch": 0.6255333505328544, "grad_norm": 0.6875911355018616, "learning_rate": 3.957423725225583e-05, "loss": 0.8845, "step": 7880 }, { "epoch": 0.6263271745817539, "grad_norm": 0.8538176417350769, "learning_rate": 3.956100658887037e-05, "loss": 0.8435, "step": 7890 }, { "epoch": 0.6271209986306535, "grad_norm": 0.6891659498214722, "learning_rate": 3.95477759254849e-05, "loss": 0.8122, "step": 7900 }, { "epoch": 0.6279148226795531, "grad_norm": 0.6774616837501526, "learning_rate": 3.953454526209944e-05, "loss": 0.9223, "step": 7910 }, { "epoch": 0.6287086467284526, "grad_norm": 0.6957716345787048, "learning_rate": 3.952131459871398e-05, "loss": 0.9055, "step": 7920 }, { "epoch": 0.6295024707773522, "grad_norm": 0.7864802479743958, "learning_rate": 3.9508083935328525e-05, "loss": 0.8758, "step": 7930 }, { "epoch": 0.6302962948262517, "grad_norm": 0.6542963981628418, "learning_rate": 3.949485327194306e-05, "loss": 0.8862, "step": 7940 }, { "epoch": 0.6310901188751513, "grad_norm": 0.8261880278587341, "learning_rate": 3.9481622608557596e-05, "loss": 0.8559, "step": 7950 }, { "epoch": 0.6318839429240509, "grad_norm": 0.7244775295257568, "learning_rate": 3.9468391945172135e-05, "loss": 0.896, "step": 7960 }, { "epoch": 0.6326777669729504, "grad_norm": 0.8435840010643005, "learning_rate": 3.9455161281786673e-05, "loss": 0.8781, "step": 7970 }, { "epoch": 0.63347159102185, "grad_norm": 0.8194109797477722, "learning_rate": 3.9441930618401206e-05, "loss": 0.9129, "step": 7980 }, { "epoch": 0.6342654150707495, "grad_norm": 0.746361494064331, "learning_rate": 3.9428699955015745e-05, "loss": 0.9033, "step": 7990 }, { "epoch": 0.6350592391196491, "grad_norm": 0.7431300282478333, "learning_rate": 3.9415469291630283e-05, "loss": 0.9065, "step": 8000 }, { "epoch": 0.6358530631685487, "grad_norm": 0.7559617757797241, "learning_rate": 3.940223862824482e-05, "loss": 0.8851, "step": 8010 }, { "epoch": 0.6366468872174482, "grad_norm": 0.7204374074935913, "learning_rate": 3.938900796485936e-05, "loss": 0.9143, "step": 8020 }, { "epoch": 0.6374407112663478, "grad_norm": 0.9044206142425537, "learning_rate": 3.93757773014739e-05, "loss": 0.9308, "step": 8030 }, { "epoch": 0.6382345353152473, "grad_norm": 0.6706372499465942, "learning_rate": 3.936254663808844e-05, "loss": 0.8871, "step": 8040 }, { "epoch": 0.6390283593641469, "grad_norm": 0.8371633291244507, "learning_rate": 3.934931597470297e-05, "loss": 0.8375, "step": 8050 }, { "epoch": 0.6398221834130465, "grad_norm": 0.823395311832428, "learning_rate": 3.933608531131751e-05, "loss": 0.8351, "step": 8060 }, { "epoch": 0.640616007461946, "grad_norm": 0.7647190690040588, "learning_rate": 3.932417771427059e-05, "loss": 0.8564, "step": 8070 }, { "epoch": 0.6414098315108456, "grad_norm": 0.7140836119651794, "learning_rate": 3.931094705088513e-05, "loss": 0.8248, "step": 8080 }, { "epoch": 0.6422036555597452, "grad_norm": 0.7830526232719421, "learning_rate": 3.929771638749967e-05, "loss": 0.8325, "step": 8090 }, { "epoch": 0.6429974796086447, "grad_norm": 0.8376036286354065, "learning_rate": 3.9284485724114215e-05, "loss": 0.8787, "step": 8100 }, { "epoch": 0.6437913036575443, "grad_norm": 0.8224188685417175, "learning_rate": 3.927125506072875e-05, "loss": 0.8556, "step": 8110 }, { "epoch": 0.6445851277064438, "grad_norm": 0.7094652056694031, "learning_rate": 3.9258024397343286e-05, "loss": 0.9008, "step": 8120 }, { "epoch": 0.6453789517553434, "grad_norm": 0.9661831855773926, "learning_rate": 3.9244793733957825e-05, "loss": 0.9017, "step": 8130 }, { "epoch": 0.646172775804243, "grad_norm": 0.5937822461128235, "learning_rate": 3.923156307057236e-05, "loss": 0.93, "step": 8140 }, { "epoch": 0.6469665998531425, "grad_norm": 0.8832845091819763, "learning_rate": 3.9218332407186896e-05, "loss": 0.7535, "step": 8150 }, { "epoch": 0.6477604239020421, "grad_norm": 0.8030399084091187, "learning_rate": 3.9205101743801435e-05, "loss": 0.8628, "step": 8160 }, { "epoch": 0.6485542479509416, "grad_norm": 0.6828364729881287, "learning_rate": 3.9191871080415974e-05, "loss": 0.8172, "step": 8170 }, { "epoch": 0.6493480719998412, "grad_norm": 0.7362493872642517, "learning_rate": 3.917864041703051e-05, "loss": 0.8649, "step": 8180 }, { "epoch": 0.6501418960487408, "grad_norm": 0.711621105670929, "learning_rate": 3.916540975364505e-05, "loss": 0.8908, "step": 8190 }, { "epoch": 0.6509357200976403, "grad_norm": 0.7424710392951965, "learning_rate": 3.915217909025959e-05, "loss": 0.8684, "step": 8200 }, { "epoch": 0.6517295441465399, "grad_norm": 0.8908485770225525, "learning_rate": 3.913894842687413e-05, "loss": 0.867, "step": 8210 }, { "epoch": 0.6525233681954394, "grad_norm": 0.8501769304275513, "learning_rate": 3.912571776348866e-05, "loss": 0.8688, "step": 8220 }, { "epoch": 0.653317192244339, "grad_norm": 0.8504555225372314, "learning_rate": 3.91124871001032e-05, "loss": 0.8451, "step": 8230 }, { "epoch": 0.6541110162932386, "grad_norm": 1.316743016242981, "learning_rate": 3.909925643671774e-05, "loss": 0.8375, "step": 8240 }, { "epoch": 0.6549048403421381, "grad_norm": 0.7561972141265869, "learning_rate": 3.908602577333227e-05, "loss": 0.8597, "step": 8250 }, { "epoch": 0.6556986643910377, "grad_norm": 0.7232686877250671, "learning_rate": 3.907279510994681e-05, "loss": 0.8808, "step": 8260 }, { "epoch": 0.6564924884399372, "grad_norm": 0.6502814888954163, "learning_rate": 3.9059564446561356e-05, "loss": 0.939, "step": 8270 }, { "epoch": 0.6572863124888368, "grad_norm": 0.9864717721939087, "learning_rate": 3.9046333783175895e-05, "loss": 0.8091, "step": 8280 }, { "epoch": 0.6580801365377364, "grad_norm": 0.8748832941055298, "learning_rate": 3.903310311979043e-05, "loss": 0.8676, "step": 8290 }, { "epoch": 0.6588739605866359, "grad_norm": 0.8619644641876221, "learning_rate": 3.9019872456404966e-05, "loss": 0.8134, "step": 8300 }, { "epoch": 0.6596677846355355, "grad_norm": 0.7893159985542297, "learning_rate": 3.9006641793019505e-05, "loss": 0.8656, "step": 8310 }, { "epoch": 0.660461608684435, "grad_norm": 0.8085225820541382, "learning_rate": 3.8993411129634044e-05, "loss": 0.9401, "step": 8320 }, { "epoch": 0.6612554327333346, "grad_norm": 0.8929345011711121, "learning_rate": 3.8980180466248576e-05, "loss": 0.8665, "step": 8330 }, { "epoch": 0.6620492567822343, "grad_norm": 0.9615473747253418, "learning_rate": 3.8966949802863115e-05, "loss": 0.8331, "step": 8340 }, { "epoch": 0.6628430808311337, "grad_norm": 0.582528293132782, "learning_rate": 3.8953719139477654e-05, "loss": 0.8626, "step": 8350 }, { "epoch": 0.6636369048800334, "grad_norm": 0.7773953676223755, "learning_rate": 3.894048847609219e-05, "loss": 0.8119, "step": 8360 }, { "epoch": 0.664430728928933, "grad_norm": 0.8851808309555054, "learning_rate": 3.892725781270673e-05, "loss": 0.8806, "step": 8370 }, { "epoch": 0.6652245529778325, "grad_norm": 0.8507623076438904, "learning_rate": 3.891402714932127e-05, "loss": 0.8734, "step": 8380 }, { "epoch": 0.6660183770267321, "grad_norm": 0.8153007626533508, "learning_rate": 3.890079648593581e-05, "loss": 0.8501, "step": 8390 }, { "epoch": 0.6668122010756315, "grad_norm": 0.8047279715538025, "learning_rate": 3.888756582255034e-05, "loss": 0.8574, "step": 8400 }, { "epoch": 0.6676060251245312, "grad_norm": 0.933725118637085, "learning_rate": 3.887433515916488e-05, "loss": 0.8638, "step": 8410 }, { "epoch": 0.6683998491734308, "grad_norm": 0.7633134126663208, "learning_rate": 3.886110449577942e-05, "loss": 0.9276, "step": 8420 }, { "epoch": 0.6691936732223303, "grad_norm": 0.8847464323043823, "learning_rate": 3.884787383239396e-05, "loss": 0.9414, "step": 8430 }, { "epoch": 0.6699874972712299, "grad_norm": 0.9716514945030212, "learning_rate": 3.88346431690085e-05, "loss": 0.8823, "step": 8440 }, { "epoch": 0.6707813213201294, "grad_norm": 0.8320568203926086, "learning_rate": 3.8821412505623037e-05, "loss": 0.812, "step": 8450 }, { "epoch": 0.671575145369029, "grad_norm": 0.7961634397506714, "learning_rate": 3.8808181842237575e-05, "loss": 0.816, "step": 8460 }, { "epoch": 0.6723689694179286, "grad_norm": 0.7325245141983032, "learning_rate": 3.879495117885211e-05, "loss": 0.8224, "step": 8470 }, { "epoch": 0.6731627934668281, "grad_norm": 0.7438898086547852, "learning_rate": 3.8781720515466647e-05, "loss": 0.8102, "step": 8480 }, { "epoch": 0.6739566175157277, "grad_norm": 0.7299096584320068, "learning_rate": 3.8768489852081186e-05, "loss": 0.9632, "step": 8490 }, { "epoch": 0.6747504415646272, "grad_norm": 0.895844578742981, "learning_rate": 3.8755259188695724e-05, "loss": 0.9497, "step": 8500 }, { "epoch": 0.6755442656135268, "grad_norm": 0.8177179098129272, "learning_rate": 3.8742028525310257e-05, "loss": 0.853, "step": 8510 }, { "epoch": 0.6763380896624264, "grad_norm": 0.6874784827232361, "learning_rate": 3.8728797861924796e-05, "loss": 0.8464, "step": 8520 }, { "epoch": 0.6771319137113259, "grad_norm": 0.8291011452674866, "learning_rate": 3.8715567198539334e-05, "loss": 0.9136, "step": 8530 }, { "epoch": 0.6779257377602255, "grad_norm": 0.6445680260658264, "learning_rate": 3.870233653515388e-05, "loss": 0.8429, "step": 8540 }, { "epoch": 0.678719561809125, "grad_norm": 0.7403009533882141, "learning_rate": 3.868910587176841e-05, "loss": 0.8473, "step": 8550 }, { "epoch": 0.6795133858580246, "grad_norm": 0.5939794778823853, "learning_rate": 3.867587520838295e-05, "loss": 0.9001, "step": 8560 }, { "epoch": 0.6803072099069242, "grad_norm": 0.6442236304283142, "learning_rate": 3.866264454499749e-05, "loss": 0.8819, "step": 8570 }, { "epoch": 0.6811010339558237, "grad_norm": 0.7586227655410767, "learning_rate": 3.864941388161202e-05, "loss": 0.8705, "step": 8580 }, { "epoch": 0.6818948580047233, "grad_norm": 0.7336118817329407, "learning_rate": 3.863618321822656e-05, "loss": 0.9112, "step": 8590 }, { "epoch": 0.6826886820536228, "grad_norm": 0.7981590032577515, "learning_rate": 3.86229525548411e-05, "loss": 0.9474, "step": 8600 }, { "epoch": 0.6834825061025224, "grad_norm": 0.5665314197540283, "learning_rate": 3.860972189145564e-05, "loss": 0.8787, "step": 8610 }, { "epoch": 0.684276330151422, "grad_norm": 0.6617142558097839, "learning_rate": 3.859649122807018e-05, "loss": 0.9087, "step": 8620 }, { "epoch": 0.6850701542003215, "grad_norm": 0.7118547558784485, "learning_rate": 3.858326056468472e-05, "loss": 0.8094, "step": 8630 }, { "epoch": 0.6858639782492211, "grad_norm": 0.6482293605804443, "learning_rate": 3.8570029901299256e-05, "loss": 0.861, "step": 8640 }, { "epoch": 0.6866578022981206, "grad_norm": 0.8651313781738281, "learning_rate": 3.8556799237913795e-05, "loss": 0.7867, "step": 8650 }, { "epoch": 0.6874516263470202, "grad_norm": 0.5534746646881104, "learning_rate": 3.854356857452833e-05, "loss": 0.9116, "step": 8660 }, { "epoch": 0.6882454503959198, "grad_norm": 0.6434946060180664, "learning_rate": 3.8530337911142866e-05, "loss": 0.8464, "step": 8670 }, { "epoch": 0.6890392744448193, "grad_norm": 0.7514382600784302, "learning_rate": 3.8517107247757405e-05, "loss": 0.8618, "step": 8680 }, { "epoch": 0.6898330984937189, "grad_norm": 0.7134977579116821, "learning_rate": 3.850387658437194e-05, "loss": 0.8792, "step": 8690 }, { "epoch": 0.6906269225426185, "grad_norm": 0.8967533707618713, "learning_rate": 3.8490645920986476e-05, "loss": 0.8489, "step": 8700 }, { "epoch": 0.691420746591518, "grad_norm": 0.7297849655151367, "learning_rate": 3.847741525760102e-05, "loss": 0.8498, "step": 8710 }, { "epoch": 0.6922145706404176, "grad_norm": 0.8720963001251221, "learning_rate": 3.846418459421556e-05, "loss": 0.8486, "step": 8720 }, { "epoch": 0.6930083946893171, "grad_norm": 0.9449414610862732, "learning_rate": 3.845095393083009e-05, "loss": 0.8597, "step": 8730 }, { "epoch": 0.6938022187382167, "grad_norm": 0.7103795409202576, "learning_rate": 3.843772326744463e-05, "loss": 0.8633, "step": 8740 }, { "epoch": 0.6945960427871163, "grad_norm": 1.0056965351104736, "learning_rate": 3.842449260405917e-05, "loss": 0.8723, "step": 8750 }, { "epoch": 0.6953898668360158, "grad_norm": 0.6581205725669861, "learning_rate": 3.841126194067371e-05, "loss": 0.8956, "step": 8760 }, { "epoch": 0.6961836908849154, "grad_norm": 0.8664624691009521, "learning_rate": 3.839803127728824e-05, "loss": 0.8979, "step": 8770 }, { "epoch": 0.6969775149338149, "grad_norm": 0.6672316789627075, "learning_rate": 3.838480061390278e-05, "loss": 0.896, "step": 8780 }, { "epoch": 0.6977713389827145, "grad_norm": 0.7690501809120178, "learning_rate": 3.837156995051732e-05, "loss": 0.856, "step": 8790 }, { "epoch": 0.6985651630316141, "grad_norm": 0.7820170521736145, "learning_rate": 3.835833928713186e-05, "loss": 0.8775, "step": 8800 }, { "epoch": 0.6993589870805136, "grad_norm": 1.0023752450942993, "learning_rate": 3.83451086237464e-05, "loss": 0.8321, "step": 8810 }, { "epoch": 0.7001528111294132, "grad_norm": 0.7955570816993713, "learning_rate": 3.8331877960360936e-05, "loss": 0.8198, "step": 8820 }, { "epoch": 0.7009466351783127, "grad_norm": 0.7987425327301025, "learning_rate": 3.8318647296975475e-05, "loss": 0.8072, "step": 8830 }, { "epoch": 0.7017404592272123, "grad_norm": 0.6776150465011597, "learning_rate": 3.830541663359001e-05, "loss": 0.8903, "step": 8840 }, { "epoch": 0.7025342832761119, "grad_norm": 0.8060004711151123, "learning_rate": 3.8292185970204546e-05, "loss": 0.8328, "step": 8850 }, { "epoch": 0.7033281073250114, "grad_norm": 0.6647984981536865, "learning_rate": 3.8278955306819085e-05, "loss": 0.9151, "step": 8860 }, { "epoch": 0.704121931373911, "grad_norm": 0.7198352217674255, "learning_rate": 3.8265724643433624e-05, "loss": 0.8502, "step": 8870 }, { "epoch": 0.7049157554228105, "grad_norm": 0.8274781107902527, "learning_rate": 3.825249398004816e-05, "loss": 0.8856, "step": 8880 }, { "epoch": 0.7057095794717101, "grad_norm": 0.868508517742157, "learning_rate": 3.82392633166627e-05, "loss": 0.806, "step": 8890 }, { "epoch": 0.7065034035206097, "grad_norm": 0.7774865627288818, "learning_rate": 3.822603265327724e-05, "loss": 0.844, "step": 8900 }, { "epoch": 0.7072972275695092, "grad_norm": 0.8134011030197144, "learning_rate": 3.821280198989178e-05, "loss": 0.946, "step": 8910 }, { "epoch": 0.7080910516184088, "grad_norm": 0.7954188585281372, "learning_rate": 3.819957132650631e-05, "loss": 0.8422, "step": 8920 }, { "epoch": 0.7088848756673083, "grad_norm": 0.7246106863021851, "learning_rate": 3.818634066312085e-05, "loss": 0.8401, "step": 8930 }, { "epoch": 0.7096786997162079, "grad_norm": 0.7810704112052917, "learning_rate": 3.817310999973539e-05, "loss": 0.8684, "step": 8940 }, { "epoch": 0.7104725237651075, "grad_norm": 0.74953293800354, "learning_rate": 3.815987933634992e-05, "loss": 0.8565, "step": 8950 }, { "epoch": 0.711266347814007, "grad_norm": 0.9855328798294067, "learning_rate": 3.814664867296446e-05, "loss": 0.9263, "step": 8960 }, { "epoch": 0.7120601718629066, "grad_norm": 0.5810590386390686, "learning_rate": 3.8133418009579e-05, "loss": 0.9298, "step": 8970 }, { "epoch": 0.7128539959118061, "grad_norm": 0.817237377166748, "learning_rate": 3.8120187346193545e-05, "loss": 0.8469, "step": 8980 }, { "epoch": 0.7136478199607057, "grad_norm": 0.7904085516929626, "learning_rate": 3.810695668280808e-05, "loss": 0.8513, "step": 8990 }, { "epoch": 0.7144416440096053, "grad_norm": 0.6733309626579285, "learning_rate": 3.8093726019422617e-05, "loss": 0.8477, "step": 9000 }, { "epoch": 0.7152354680585048, "grad_norm": 0.7731225490570068, "learning_rate": 3.8080495356037155e-05, "loss": 0.9097, "step": 9010 }, { "epoch": 0.7160292921074044, "grad_norm": 0.8163710236549377, "learning_rate": 3.8067264692651694e-05, "loss": 0.8878, "step": 9020 }, { "epoch": 0.716823116156304, "grad_norm": 0.8576553463935852, "learning_rate": 3.8054034029266227e-05, "loss": 0.8706, "step": 9030 }, { "epoch": 0.7176169402052035, "grad_norm": 0.7297602295875549, "learning_rate": 3.8040803365880766e-05, "loss": 0.9051, "step": 9040 }, { "epoch": 0.7184107642541031, "grad_norm": 0.6882147192955017, "learning_rate": 3.8027572702495304e-05, "loss": 0.8919, "step": 9050 }, { "epoch": 0.7192045883030026, "grad_norm": 0.8439272046089172, "learning_rate": 3.801434203910984e-05, "loss": 0.898, "step": 9060 }, { "epoch": 0.7199984123519022, "grad_norm": 0.7839822173118591, "learning_rate": 3.800111137572438e-05, "loss": 0.9044, "step": 9070 }, { "epoch": 0.7207922364008018, "grad_norm": 0.6823743581771851, "learning_rate": 3.798788071233892e-05, "loss": 0.8372, "step": 9080 }, { "epoch": 0.7215860604497013, "grad_norm": 0.7644033432006836, "learning_rate": 3.797465004895346e-05, "loss": 0.9316, "step": 9090 }, { "epoch": 0.7223798844986009, "grad_norm": 0.826184093952179, "learning_rate": 3.796141938556799e-05, "loss": 0.8519, "step": 9100 }, { "epoch": 0.7231737085475004, "grad_norm": 0.8634784817695618, "learning_rate": 3.794818872218253e-05, "loss": 0.905, "step": 9110 }, { "epoch": 0.7239675325964, "grad_norm": 0.6479964852333069, "learning_rate": 3.793495805879707e-05, "loss": 0.7854, "step": 9120 }, { "epoch": 0.7247613566452996, "grad_norm": 0.7302672863006592, "learning_rate": 3.792172739541161e-05, "loss": 0.877, "step": 9130 }, { "epoch": 0.7255551806941991, "grad_norm": 0.7126200199127197, "learning_rate": 3.790849673202614e-05, "loss": 0.9136, "step": 9140 }, { "epoch": 0.7263490047430987, "grad_norm": 0.8808421492576599, "learning_rate": 3.789526606864068e-05, "loss": 0.8717, "step": 9150 }, { "epoch": 0.7271428287919982, "grad_norm": 0.7039631605148315, "learning_rate": 3.7882035405255226e-05, "loss": 0.835, "step": 9160 }, { "epoch": 0.7279366528408978, "grad_norm": 0.7323824167251587, "learning_rate": 3.786880474186976e-05, "loss": 0.8713, "step": 9170 }, { "epoch": 0.7287304768897974, "grad_norm": 0.7466415762901306, "learning_rate": 3.78555740784843e-05, "loss": 0.874, "step": 9180 }, { "epoch": 0.7295243009386969, "grad_norm": 0.6216104626655579, "learning_rate": 3.7842343415098836e-05, "loss": 0.8985, "step": 9190 }, { "epoch": 0.7303181249875965, "grad_norm": 0.8570176362991333, "learning_rate": 3.7829112751713375e-05, "loss": 0.8755, "step": 9200 }, { "epoch": 0.731111949036496, "grad_norm": 0.7818983197212219, "learning_rate": 3.781588208832791e-05, "loss": 0.8462, "step": 9210 }, { "epoch": 0.7319057730853956, "grad_norm": 0.6952176094055176, "learning_rate": 3.7802651424942446e-05, "loss": 0.8286, "step": 9220 }, { "epoch": 0.7326995971342952, "grad_norm": 0.6671289801597595, "learning_rate": 3.7789420761556985e-05, "loss": 0.8437, "step": 9230 }, { "epoch": 0.7334934211831947, "grad_norm": 0.6920092701911926, "learning_rate": 3.7776190098171524e-05, "loss": 0.8691, "step": 9240 }, { "epoch": 0.7342872452320943, "grad_norm": 0.5979323387145996, "learning_rate": 3.776295943478606e-05, "loss": 0.8728, "step": 9250 }, { "epoch": 0.7350810692809938, "grad_norm": 0.8079019784927368, "learning_rate": 3.77497287714006e-05, "loss": 0.8504, "step": 9260 }, { "epoch": 0.7358748933298934, "grad_norm": 0.6762669086456299, "learning_rate": 3.773649810801514e-05, "loss": 0.8608, "step": 9270 }, { "epoch": 0.736668717378793, "grad_norm": 0.6366623044013977, "learning_rate": 3.772326744462967e-05, "loss": 0.873, "step": 9280 }, { "epoch": 0.7374625414276925, "grad_norm": 0.8903300166130066, "learning_rate": 3.771003678124421e-05, "loss": 0.8963, "step": 9290 }, { "epoch": 0.7382563654765921, "grad_norm": 0.8363783955574036, "learning_rate": 3.769680611785875e-05, "loss": 0.8462, "step": 9300 }, { "epoch": 0.7390501895254917, "grad_norm": 0.716432511806488, "learning_rate": 3.768357545447329e-05, "loss": 0.8599, "step": 9310 }, { "epoch": 0.7398440135743912, "grad_norm": 0.8705762624740601, "learning_rate": 3.767034479108782e-05, "loss": 0.8659, "step": 9320 }, { "epoch": 0.7406378376232908, "grad_norm": 0.7024847865104675, "learning_rate": 3.765711412770237e-05, "loss": 0.8697, "step": 9330 }, { "epoch": 0.7414316616721903, "grad_norm": 0.7398332357406616, "learning_rate": 3.7643883464316906e-05, "loss": 0.8322, "step": 9340 }, { "epoch": 0.7422254857210899, "grad_norm": 0.7438483834266663, "learning_rate": 3.7630652800931445e-05, "loss": 0.8447, "step": 9350 }, { "epoch": 0.7430193097699895, "grad_norm": 0.7145562767982483, "learning_rate": 3.761742213754598e-05, "loss": 0.8426, "step": 9360 }, { "epoch": 0.743813133818889, "grad_norm": 0.7920240759849548, "learning_rate": 3.7604191474160516e-05, "loss": 0.8861, "step": 9370 }, { "epoch": 0.7446069578677886, "grad_norm": 0.7747379541397095, "learning_rate": 3.7590960810775055e-05, "loss": 0.864, "step": 9380 }, { "epoch": 0.7454007819166881, "grad_norm": 0.8978133201599121, "learning_rate": 3.757773014738959e-05, "loss": 0.7788, "step": 9390 }, { "epoch": 0.7461946059655877, "grad_norm": 0.6355377435684204, "learning_rate": 3.7564499484004126e-05, "loss": 0.8701, "step": 9400 }, { "epoch": 0.7469884300144873, "grad_norm": 0.6304174065589905, "learning_rate": 3.7551268820618665e-05, "loss": 0.8598, "step": 9410 }, { "epoch": 0.7477822540633868, "grad_norm": 0.8747965693473816, "learning_rate": 3.7538038157233204e-05, "loss": 0.8457, "step": 9420 }, { "epoch": 0.7485760781122864, "grad_norm": 0.7455962300300598, "learning_rate": 3.752480749384774e-05, "loss": 0.838, "step": 9430 }, { "epoch": 0.7493699021611859, "grad_norm": 0.6928434371948242, "learning_rate": 3.751289989680083e-05, "loss": 0.8531, "step": 9440 }, { "epoch": 0.7501637262100855, "grad_norm": 0.6415355801582336, "learning_rate": 3.749966923341536e-05, "loss": 0.836, "step": 9450 }, { "epoch": 0.7509575502589851, "grad_norm": 0.7036964893341064, "learning_rate": 3.74864385700299e-05, "loss": 0.8683, "step": 9460 }, { "epoch": 0.7517513743078846, "grad_norm": 0.546319305896759, "learning_rate": 3.747320790664444e-05, "loss": 0.864, "step": 9470 }, { "epoch": 0.7525451983567842, "grad_norm": 0.7785167098045349, "learning_rate": 3.745997724325898e-05, "loss": 0.9178, "step": 9480 }, { "epoch": 0.7533390224056837, "grad_norm": 0.6985113024711609, "learning_rate": 3.744674657987351e-05, "loss": 0.8726, "step": 9490 }, { "epoch": 0.7541328464545833, "grad_norm": 0.7310826182365417, "learning_rate": 3.743351591648806e-05, "loss": 0.8742, "step": 9500 }, { "epoch": 0.7549266705034829, "grad_norm": 0.8766258358955383, "learning_rate": 3.7420285253102596e-05, "loss": 0.8659, "step": 9510 }, { "epoch": 0.7557204945523824, "grad_norm": 0.6016245484352112, "learning_rate": 3.740705458971713e-05, "loss": 0.8517, "step": 9520 }, { "epoch": 0.756514318601282, "grad_norm": 0.6588014960289001, "learning_rate": 3.739382392633167e-05, "loss": 0.8621, "step": 9530 }, { "epoch": 0.7573081426501815, "grad_norm": 0.8283513784408569, "learning_rate": 3.7380593262946206e-05, "loss": 0.8527, "step": 9540 }, { "epoch": 0.7581019666990811, "grad_norm": 0.6860958337783813, "learning_rate": 3.7367362599560745e-05, "loss": 0.8549, "step": 9550 }, { "epoch": 0.7588957907479807, "grad_norm": 0.8800874948501587, "learning_rate": 3.735413193617528e-05, "loss": 0.8435, "step": 9560 }, { "epoch": 0.7596896147968802, "grad_norm": 0.6277894973754883, "learning_rate": 3.7340901272789816e-05, "loss": 0.845, "step": 9570 }, { "epoch": 0.7604834388457798, "grad_norm": 0.8731912970542908, "learning_rate": 3.7327670609404355e-05, "loss": 0.8302, "step": 9580 }, { "epoch": 0.7612772628946793, "grad_norm": 0.9646309018135071, "learning_rate": 3.7314439946018894e-05, "loss": 0.7982, "step": 9590 }, { "epoch": 0.762071086943579, "grad_norm": 0.7538288831710815, "learning_rate": 3.730120928263343e-05, "loss": 0.8195, "step": 9600 }, { "epoch": 0.7628649109924786, "grad_norm": 0.8189886212348938, "learning_rate": 3.728797861924797e-05, "loss": 0.8968, "step": 9610 }, { "epoch": 0.763658735041378, "grad_norm": 0.9274044036865234, "learning_rate": 3.727474795586251e-05, "loss": 0.8673, "step": 9620 }, { "epoch": 0.7644525590902777, "grad_norm": 0.7127663493156433, "learning_rate": 3.726151729247704e-05, "loss": 0.8359, "step": 9630 }, { "epoch": 0.7652463831391773, "grad_norm": 0.8801278471946716, "learning_rate": 3.724828662909158e-05, "loss": 0.8546, "step": 9640 }, { "epoch": 0.7660402071880767, "grad_norm": 0.8994466066360474, "learning_rate": 3.723505596570612e-05, "loss": 0.8434, "step": 9650 }, { "epoch": 0.7668340312369764, "grad_norm": 0.7613593339920044, "learning_rate": 3.722182530232066e-05, "loss": 0.8683, "step": 9660 }, { "epoch": 0.7676278552858758, "grad_norm": 0.6378811597824097, "learning_rate": 3.72085946389352e-05, "loss": 0.8729, "step": 9670 }, { "epoch": 0.7684216793347755, "grad_norm": 0.6833720207214355, "learning_rate": 3.719536397554974e-05, "loss": 0.8427, "step": 9680 }, { "epoch": 0.7692155033836751, "grad_norm": 0.6543610692024231, "learning_rate": 3.718213331216428e-05, "loss": 0.8309, "step": 9690 }, { "epoch": 0.7700093274325746, "grad_norm": 0.7500269412994385, "learning_rate": 3.7168902648778816e-05, "loss": 0.8621, "step": 9700 }, { "epoch": 0.7708031514814742, "grad_norm": 0.7766570448875427, "learning_rate": 3.715567198539335e-05, "loss": 0.8549, "step": 9710 }, { "epoch": 0.7715969755303737, "grad_norm": 0.8536286950111389, "learning_rate": 3.714244132200789e-05, "loss": 0.8375, "step": 9720 }, { "epoch": 0.7723907995792733, "grad_norm": 0.8572762608528137, "learning_rate": 3.7129210658622426e-05, "loss": 0.8188, "step": 9730 }, { "epoch": 0.7731846236281729, "grad_norm": 1.0639240741729736, "learning_rate": 3.711597999523696e-05, "loss": 0.8433, "step": 9740 }, { "epoch": 0.7739784476770724, "grad_norm": 0.7401818633079529, "learning_rate": 3.71027493318515e-05, "loss": 0.8178, "step": 9750 }, { "epoch": 0.774772271725972, "grad_norm": 0.9968650341033936, "learning_rate": 3.7089518668466036e-05, "loss": 0.8486, "step": 9760 }, { "epoch": 0.7755660957748715, "grad_norm": 0.9690655469894409, "learning_rate": 3.707628800508058e-05, "loss": 0.8331, "step": 9770 }, { "epoch": 0.7763599198237711, "grad_norm": 1.0017821788787842, "learning_rate": 3.7063057341695114e-05, "loss": 0.8756, "step": 9780 }, { "epoch": 0.7771537438726707, "grad_norm": 0.7528254389762878, "learning_rate": 3.704982667830965e-05, "loss": 0.9261, "step": 9790 }, { "epoch": 0.7779475679215702, "grad_norm": 0.7826637625694275, "learning_rate": 3.703659601492419e-05, "loss": 0.8768, "step": 9800 }, { "epoch": 0.7787413919704698, "grad_norm": 0.6399036049842834, "learning_rate": 3.702336535153873e-05, "loss": 0.8645, "step": 9810 }, { "epoch": 0.7795352160193693, "grad_norm": 0.649523138999939, "learning_rate": 3.701013468815326e-05, "loss": 0.8536, "step": 9820 }, { "epoch": 0.7803290400682689, "grad_norm": 0.955564558506012, "learning_rate": 3.69969040247678e-05, "loss": 0.8279, "step": 9830 }, { "epoch": 0.7811228641171685, "grad_norm": 0.733024537563324, "learning_rate": 3.698367336138234e-05, "loss": 0.9158, "step": 9840 }, { "epoch": 0.781916688166068, "grad_norm": 0.6793590784072876, "learning_rate": 3.697044269799688e-05, "loss": 0.8817, "step": 9850 }, { "epoch": 0.7827105122149676, "grad_norm": 0.7847456932067871, "learning_rate": 3.695721203461142e-05, "loss": 0.8568, "step": 9860 }, { "epoch": 0.7835043362638671, "grad_norm": 0.8460919260978699, "learning_rate": 3.694398137122596e-05, "loss": 0.8767, "step": 9870 }, { "epoch": 0.7842981603127667, "grad_norm": 0.6977751851081848, "learning_rate": 3.6930750707840496e-05, "loss": 0.8671, "step": 9880 }, { "epoch": 0.7850919843616663, "grad_norm": 0.7356807589530945, "learning_rate": 3.691752004445503e-05, "loss": 0.9341, "step": 9890 }, { "epoch": 0.7858858084105658, "grad_norm": 0.7931004762649536, "learning_rate": 3.690428938106957e-05, "loss": 0.868, "step": 9900 }, { "epoch": 0.7866796324594654, "grad_norm": 0.8790589570999146, "learning_rate": 3.6891058717684106e-05, "loss": 0.8516, "step": 9910 }, { "epoch": 0.7874734565083649, "grad_norm": 0.7858555912971497, "learning_rate": 3.6877828054298645e-05, "loss": 0.8336, "step": 9920 }, { "epoch": 0.7882672805572645, "grad_norm": 0.6692208647727966, "learning_rate": 3.686459739091318e-05, "loss": 0.8679, "step": 9930 }, { "epoch": 0.7890611046061641, "grad_norm": 0.9050410389900208, "learning_rate": 3.685136672752772e-05, "loss": 0.7942, "step": 9940 }, { "epoch": 0.7898549286550636, "grad_norm": 0.8038280010223389, "learning_rate": 3.683813606414226e-05, "loss": 0.8699, "step": 9950 }, { "epoch": 0.7906487527039632, "grad_norm": 0.7280693054199219, "learning_rate": 3.68249054007568e-05, "loss": 0.8876, "step": 9960 }, { "epoch": 0.7914425767528628, "grad_norm": 0.6296914219856262, "learning_rate": 3.681167473737133e-05, "loss": 0.8535, "step": 9970 }, { "epoch": 0.7922364008017623, "grad_norm": 0.8135585784912109, "learning_rate": 3.679844407398587e-05, "loss": 0.8563, "step": 9980 }, { "epoch": 0.7930302248506619, "grad_norm": 0.6743822693824768, "learning_rate": 3.678521341060041e-05, "loss": 0.9013, "step": 9990 }, { "epoch": 0.7938240488995614, "grad_norm": 0.6657276153564453, "learning_rate": 3.677198274721494e-05, "loss": 0.8898, "step": 10000 }, { "epoch": 0.794617872948461, "grad_norm": 0.7389101386070251, "learning_rate": 3.675875208382948e-05, "loss": 0.8925, "step": 10010 }, { "epoch": 0.7954116969973606, "grad_norm": 0.802793562412262, "learning_rate": 3.674552142044402e-05, "loss": 0.8091, "step": 10020 }, { "epoch": 0.7962055210462601, "grad_norm": 0.9091024398803711, "learning_rate": 3.673229075705856e-05, "loss": 0.8559, "step": 10030 }, { "epoch": 0.7969993450951597, "grad_norm": 0.8592483997344971, "learning_rate": 3.67190600936731e-05, "loss": 0.8379, "step": 10040 }, { "epoch": 0.7977931691440592, "grad_norm": 0.8377381563186646, "learning_rate": 3.670582943028764e-05, "loss": 0.9248, "step": 10050 }, { "epoch": 0.7985869931929588, "grad_norm": 0.7962272763252258, "learning_rate": 3.6692598766902176e-05, "loss": 0.8846, "step": 10060 }, { "epoch": 0.7993808172418584, "grad_norm": 0.5838531255722046, "learning_rate": 3.6679368103516715e-05, "loss": 0.8658, "step": 10070 }, { "epoch": 0.8001746412907579, "grad_norm": 0.6025267839431763, "learning_rate": 3.666613744013125e-05, "loss": 0.8649, "step": 10080 }, { "epoch": 0.8009684653396575, "grad_norm": 0.6443192362785339, "learning_rate": 3.6652906776745786e-05, "loss": 0.8555, "step": 10090 }, { "epoch": 0.801762289388557, "grad_norm": 0.7511973977088928, "learning_rate": 3.6639676113360325e-05, "loss": 0.945, "step": 10100 }, { "epoch": 0.8025561134374566, "grad_norm": 0.7938660979270935, "learning_rate": 3.6626445449974864e-05, "loss": 0.796, "step": 10110 }, { "epoch": 0.8033499374863562, "grad_norm": 0.7330296039581299, "learning_rate": 3.66132147865894e-05, "loss": 0.7629, "step": 10120 }, { "epoch": 0.8041437615352557, "grad_norm": 0.8548980355262756, "learning_rate": 3.659998412320394e-05, "loss": 0.8621, "step": 10130 }, { "epoch": 0.8049375855841553, "grad_norm": 0.7704104781150818, "learning_rate": 3.658675345981848e-05, "loss": 0.8502, "step": 10140 }, { "epoch": 0.8057314096330548, "grad_norm": 0.672539472579956, "learning_rate": 3.657352279643301e-05, "loss": 0.8981, "step": 10150 }, { "epoch": 0.8065252336819544, "grad_norm": 0.8186033964157104, "learning_rate": 3.656029213304755e-05, "loss": 0.9071, "step": 10160 }, { "epoch": 0.807319057730854, "grad_norm": 0.5315646529197693, "learning_rate": 3.654706146966209e-05, "loss": 0.8992, "step": 10170 }, { "epoch": 0.8081128817797535, "grad_norm": 0.6407436728477478, "learning_rate": 3.653383080627663e-05, "loss": 0.8987, "step": 10180 }, { "epoch": 0.8089067058286531, "grad_norm": 0.6754816770553589, "learning_rate": 3.652060014289116e-05, "loss": 0.8625, "step": 10190 }, { "epoch": 0.8097005298775526, "grad_norm": 0.7109003663063049, "learning_rate": 3.65073694795057e-05, "loss": 0.8156, "step": 10200 }, { "epoch": 0.8104943539264522, "grad_norm": 0.7097606658935547, "learning_rate": 3.649413881612025e-05, "loss": 0.8754, "step": 10210 }, { "epoch": 0.8112881779753518, "grad_norm": 0.7361970543861389, "learning_rate": 3.648090815273478e-05, "loss": 0.8663, "step": 10220 }, { "epoch": 0.8120820020242513, "grad_norm": 0.5668066740036011, "learning_rate": 3.646767748934932e-05, "loss": 0.8903, "step": 10230 }, { "epoch": 0.8128758260731509, "grad_norm": 0.9411951899528503, "learning_rate": 3.645444682596386e-05, "loss": 0.8686, "step": 10240 }, { "epoch": 0.8136696501220505, "grad_norm": 0.6737961769104004, "learning_rate": 3.6441216162578396e-05, "loss": 0.8497, "step": 10250 }, { "epoch": 0.81446347417095, "grad_norm": 0.8757466673851013, "learning_rate": 3.642798549919293e-05, "loss": 0.8725, "step": 10260 }, { "epoch": 0.8152572982198496, "grad_norm": 0.670192539691925, "learning_rate": 3.641475483580747e-05, "loss": 0.892, "step": 10270 }, { "epoch": 0.8160511222687491, "grad_norm": 0.7439650893211365, "learning_rate": 3.6401524172422006e-05, "loss": 0.8589, "step": 10280 }, { "epoch": 0.8168449463176487, "grad_norm": 0.795250654220581, "learning_rate": 3.6388293509036545e-05, "loss": 0.8543, "step": 10290 }, { "epoch": 0.8176387703665483, "grad_norm": 0.7579794526100159, "learning_rate": 3.6375062845651084e-05, "loss": 0.8498, "step": 10300 }, { "epoch": 0.8184325944154478, "grad_norm": 0.5933066606521606, "learning_rate": 3.636183218226562e-05, "loss": 0.8498, "step": 10310 }, { "epoch": 0.8192264184643474, "grad_norm": 0.8285762667655945, "learning_rate": 3.634860151888016e-05, "loss": 0.9121, "step": 10320 }, { "epoch": 0.8200202425132469, "grad_norm": 0.7152075171470642, "learning_rate": 3.6335370855494694e-05, "loss": 0.8678, "step": 10330 }, { "epoch": 0.8208140665621465, "grad_norm": 0.933392345905304, "learning_rate": 3.632214019210923e-05, "loss": 0.8849, "step": 10340 }, { "epoch": 0.8216078906110461, "grad_norm": 0.698727548122406, "learning_rate": 3.630890952872377e-05, "loss": 0.8407, "step": 10350 }, { "epoch": 0.8224017146599456, "grad_norm": 0.6476231217384338, "learning_rate": 3.629567886533831e-05, "loss": 0.9283, "step": 10360 }, { "epoch": 0.8231955387088452, "grad_norm": 0.6746705770492554, "learning_rate": 3.628244820195284e-05, "loss": 0.8968, "step": 10370 }, { "epoch": 0.8239893627577447, "grad_norm": 0.654994547367096, "learning_rate": 3.626921753856739e-05, "loss": 0.871, "step": 10380 }, { "epoch": 0.8247831868066443, "grad_norm": 0.9437777996063232, "learning_rate": 3.625598687518193e-05, "loss": 0.8428, "step": 10390 }, { "epoch": 0.8255770108555439, "grad_norm": 0.6646280884742737, "learning_rate": 3.6242756211796466e-05, "loss": 0.9034, "step": 10400 }, { "epoch": 0.8263708349044434, "grad_norm": 0.8529212474822998, "learning_rate": 3.6229525548411e-05, "loss": 0.8507, "step": 10410 }, { "epoch": 0.827164658953343, "grad_norm": 0.7901684045791626, "learning_rate": 3.621629488502554e-05, "loss": 0.8191, "step": 10420 }, { "epoch": 0.8279584830022425, "grad_norm": 0.6879044771194458, "learning_rate": 3.6203064221640076e-05, "loss": 0.8222, "step": 10430 }, { "epoch": 0.8287523070511421, "grad_norm": 0.8270571827888489, "learning_rate": 3.618983355825461e-05, "loss": 0.8483, "step": 10440 }, { "epoch": 0.8295461311000417, "grad_norm": 0.8302059769630432, "learning_rate": 3.617660289486915e-05, "loss": 0.8513, "step": 10450 }, { "epoch": 0.8303399551489412, "grad_norm": 0.669792890548706, "learning_rate": 3.6163372231483686e-05, "loss": 0.8854, "step": 10460 }, { "epoch": 0.8311337791978408, "grad_norm": 0.858910322189331, "learning_rate": 3.6150141568098225e-05, "loss": 0.8241, "step": 10470 }, { "epoch": 0.8319276032467403, "grad_norm": 0.8056598901748657, "learning_rate": 3.6136910904712764e-05, "loss": 0.8923, "step": 10480 }, { "epoch": 0.8327214272956399, "grad_norm": 0.733371913433075, "learning_rate": 3.61236802413273e-05, "loss": 0.8247, "step": 10490 }, { "epoch": 0.8335152513445395, "grad_norm": 0.5836498141288757, "learning_rate": 3.611044957794184e-05, "loss": 0.8817, "step": 10500 }, { "epoch": 0.834309075393439, "grad_norm": 0.8154313564300537, "learning_rate": 3.609721891455638e-05, "loss": 0.8183, "step": 10510 }, { "epoch": 0.8351028994423386, "grad_norm": 0.7569496631622314, "learning_rate": 3.608398825117091e-05, "loss": 0.9073, "step": 10520 }, { "epoch": 0.8358967234912381, "grad_norm": 0.6307011246681213, "learning_rate": 3.607075758778545e-05, "loss": 0.9599, "step": 10530 }, { "epoch": 0.8366905475401377, "grad_norm": 0.7529721856117249, "learning_rate": 3.605752692439999e-05, "loss": 0.8456, "step": 10540 }, { "epoch": 0.8374843715890373, "grad_norm": 0.8897116184234619, "learning_rate": 3.604429626101452e-05, "loss": 0.8309, "step": 10550 }, { "epoch": 0.8382781956379368, "grad_norm": 0.8665919303894043, "learning_rate": 3.603106559762907e-05, "loss": 0.8252, "step": 10560 }, { "epoch": 0.8390720196868364, "grad_norm": 0.7858225703239441, "learning_rate": 3.601783493424361e-05, "loss": 0.8592, "step": 10570 }, { "epoch": 0.839865843735736, "grad_norm": 0.719430685043335, "learning_rate": 3.6004604270858146e-05, "loss": 0.8424, "step": 10580 }, { "epoch": 0.8406596677846355, "grad_norm": 0.8680655360221863, "learning_rate": 3.599137360747268e-05, "loss": 0.8125, "step": 10590 }, { "epoch": 0.8414534918335351, "grad_norm": 0.7084610462188721, "learning_rate": 3.597814294408722e-05, "loss": 0.878, "step": 10600 }, { "epoch": 0.8422473158824346, "grad_norm": 0.9070943593978882, "learning_rate": 3.5964912280701756e-05, "loss": 0.8752, "step": 10610 }, { "epoch": 0.8430411399313342, "grad_norm": 0.8004103302955627, "learning_rate": 3.5951681617316295e-05, "loss": 0.8462, "step": 10620 }, { "epoch": 0.8438349639802338, "grad_norm": 0.7617958784103394, "learning_rate": 3.593845095393083e-05, "loss": 0.832, "step": 10630 }, { "epoch": 0.8446287880291333, "grad_norm": 0.8618339896202087, "learning_rate": 3.5925220290545366e-05, "loss": 0.8751, "step": 10640 }, { "epoch": 0.8454226120780329, "grad_norm": 0.6573039889335632, "learning_rate": 3.5911989627159905e-05, "loss": 0.8706, "step": 10650 }, { "epoch": 0.8462164361269324, "grad_norm": 0.6365640163421631, "learning_rate": 3.589875896377445e-05, "loss": 0.8706, "step": 10660 }, { "epoch": 0.847010260175832, "grad_norm": 0.712862491607666, "learning_rate": 3.588552830038898e-05, "loss": 0.7904, "step": 10670 }, { "epoch": 0.8478040842247316, "grad_norm": 0.7353672981262207, "learning_rate": 3.587229763700352e-05, "loss": 0.8465, "step": 10680 }, { "epoch": 0.8485979082736311, "grad_norm": 0.7351557612419128, "learning_rate": 3.585906697361806e-05, "loss": 0.8947, "step": 10690 }, { "epoch": 0.8493917323225307, "grad_norm": 0.7744636535644531, "learning_rate": 3.584583631023259e-05, "loss": 0.8291, "step": 10700 }, { "epoch": 0.8501855563714302, "grad_norm": 0.5523749589920044, "learning_rate": 3.583260564684713e-05, "loss": 0.9476, "step": 10710 }, { "epoch": 0.8509793804203298, "grad_norm": 0.7999189496040344, "learning_rate": 3.581937498346167e-05, "loss": 0.7629, "step": 10720 }, { "epoch": 0.8517732044692294, "grad_norm": 0.7351500988006592, "learning_rate": 3.580614432007621e-05, "loss": 0.8504, "step": 10730 }, { "epoch": 0.8525670285181289, "grad_norm": 0.7090862393379211, "learning_rate": 3.579291365669075e-05, "loss": 0.9068, "step": 10740 }, { "epoch": 0.8533608525670285, "grad_norm": 0.6834081411361694, "learning_rate": 3.577968299330529e-05, "loss": 0.8594, "step": 10750 }, { "epoch": 0.854154676615928, "grad_norm": 0.7891108989715576, "learning_rate": 3.576645232991983e-05, "loss": 0.8918, "step": 10760 }, { "epoch": 0.8549485006648276, "grad_norm": 0.7295839786529541, "learning_rate": 3.5753221666534366e-05, "loss": 0.8802, "step": 10770 }, { "epoch": 0.8557423247137272, "grad_norm": 0.8786309361457825, "learning_rate": 3.57399910031489e-05, "loss": 0.8576, "step": 10780 }, { "epoch": 0.8565361487626267, "grad_norm": 0.7351185083389282, "learning_rate": 3.572676033976344e-05, "loss": 0.8407, "step": 10790 }, { "epoch": 0.8573299728115263, "grad_norm": 0.6069027185440063, "learning_rate": 3.5713529676377976e-05, "loss": 0.8566, "step": 10800 }, { "epoch": 0.8581237968604258, "grad_norm": 0.8635075092315674, "learning_rate": 3.570029901299251e-05, "loss": 0.885, "step": 10810 }, { "epoch": 0.8589176209093254, "grad_norm": 0.7878072261810303, "learning_rate": 3.568706834960705e-05, "loss": 0.8718, "step": 10820 }, { "epoch": 0.859711444958225, "grad_norm": 0.7929555177688599, "learning_rate": 3.567383768622159e-05, "loss": 0.8686, "step": 10830 }, { "epoch": 0.8605052690071245, "grad_norm": 0.8081250786781311, "learning_rate": 3.566060702283613e-05, "loss": 0.893, "step": 10840 }, { "epoch": 0.8612990930560241, "grad_norm": 0.7977842688560486, "learning_rate": 3.5647376359450664e-05, "loss": 0.8753, "step": 10850 }, { "epoch": 0.8620929171049236, "grad_norm": 0.7646862268447876, "learning_rate": 3.56341456960652e-05, "loss": 0.8658, "step": 10860 }, { "epoch": 0.8628867411538232, "grad_norm": 0.7526537775993347, "learning_rate": 3.562091503267974e-05, "loss": 0.8557, "step": 10870 }, { "epoch": 0.8636805652027228, "grad_norm": 0.7993239760398865, "learning_rate": 3.560768436929428e-05, "loss": 0.8521, "step": 10880 }, { "epoch": 0.8644743892516223, "grad_norm": 0.9761634469032288, "learning_rate": 3.559445370590881e-05, "loss": 0.926, "step": 10890 }, { "epoch": 0.865268213300522, "grad_norm": 0.7890929579734802, "learning_rate": 3.558122304252335e-05, "loss": 0.8633, "step": 10900 }, { "epoch": 0.8660620373494216, "grad_norm": 0.7080404758453369, "learning_rate": 3.556799237913789e-05, "loss": 0.9538, "step": 10910 }, { "epoch": 0.866855861398321, "grad_norm": 0.8383463621139526, "learning_rate": 3.555476171575243e-05, "loss": 0.8548, "step": 10920 }, { "epoch": 0.8676496854472207, "grad_norm": 0.8602036237716675, "learning_rate": 3.554153105236697e-05, "loss": 0.8676, "step": 10930 }, { "epoch": 0.8684435094961201, "grad_norm": 0.7755318880081177, "learning_rate": 3.552830038898151e-05, "loss": 0.8335, "step": 10940 }, { "epoch": 0.8692373335450198, "grad_norm": 0.7934598922729492, "learning_rate": 3.5515069725596046e-05, "loss": 0.7969, "step": 10950 }, { "epoch": 0.8700311575939194, "grad_norm": 0.9328092932701111, "learning_rate": 3.550183906221058e-05, "loss": 0.7799, "step": 10960 }, { "epoch": 0.8708249816428189, "grad_norm": 0.665879487991333, "learning_rate": 3.548860839882512e-05, "loss": 0.8617, "step": 10970 }, { "epoch": 0.8716188056917185, "grad_norm": 0.775994598865509, "learning_rate": 3.5475377735439656e-05, "loss": 0.8282, "step": 10980 }, { "epoch": 0.872412629740618, "grad_norm": 0.8753870129585266, "learning_rate": 3.5462147072054195e-05, "loss": 0.786, "step": 10990 }, { "epoch": 0.8732064537895176, "grad_norm": 0.8270352482795715, "learning_rate": 3.5448916408668734e-05, "loss": 0.8371, "step": 11000 }, { "epoch": 0.8740002778384172, "grad_norm": 0.7965131402015686, "learning_rate": 3.543568574528327e-05, "loss": 0.885, "step": 11010 }, { "epoch": 0.8747941018873167, "grad_norm": 0.7161972522735596, "learning_rate": 3.542245508189781e-05, "loss": 0.7859, "step": 11020 }, { "epoch": 0.8755879259362163, "grad_norm": 0.7473718523979187, "learning_rate": 3.5409224418512344e-05, "loss": 0.8075, "step": 11030 }, { "epoch": 0.8763817499851158, "grad_norm": 0.8067750334739685, "learning_rate": 3.539599375512688e-05, "loss": 0.7951, "step": 11040 }, { "epoch": 0.8771755740340154, "grad_norm": 0.8966103196144104, "learning_rate": 3.538276309174142e-05, "loss": 0.8467, "step": 11050 }, { "epoch": 0.877969398082915, "grad_norm": 0.7432875037193298, "learning_rate": 3.536953242835596e-05, "loss": 0.8957, "step": 11060 }, { "epoch": 0.8787632221318145, "grad_norm": 0.7818832993507385, "learning_rate": 3.535630176497049e-05, "loss": 0.9194, "step": 11070 }, { "epoch": 0.8795570461807141, "grad_norm": 0.8966363668441772, "learning_rate": 3.534307110158503e-05, "loss": 0.8665, "step": 11080 }, { "epoch": 0.8803508702296136, "grad_norm": 0.5509730577468872, "learning_rate": 3.532984043819957e-05, "loss": 0.9304, "step": 11090 }, { "epoch": 0.8811446942785132, "grad_norm": 0.7345530986785889, "learning_rate": 3.5316609774814116e-05, "loss": 0.8524, "step": 11100 }, { "epoch": 0.8819385183274128, "grad_norm": 0.9241631031036377, "learning_rate": 3.530337911142865e-05, "loss": 0.8748, "step": 11110 }, { "epoch": 0.8827323423763123, "grad_norm": 0.7990071177482605, "learning_rate": 3.529014844804319e-05, "loss": 0.8633, "step": 11120 }, { "epoch": 0.8835261664252119, "grad_norm": 0.8361101746559143, "learning_rate": 3.5276917784657726e-05, "loss": 0.8708, "step": 11130 }, { "epoch": 0.8843199904741114, "grad_norm": 0.6737117767333984, "learning_rate": 3.526368712127226e-05, "loss": 0.8489, "step": 11140 }, { "epoch": 0.885113814523011, "grad_norm": 0.8274135589599609, "learning_rate": 3.52504564578868e-05, "loss": 0.9062, "step": 11150 }, { "epoch": 0.8859076385719106, "grad_norm": 0.5696777701377869, "learning_rate": 3.5237225794501336e-05, "loss": 0.8464, "step": 11160 }, { "epoch": 0.8867014626208101, "grad_norm": 0.8153122067451477, "learning_rate": 3.5223995131115875e-05, "loss": 0.8514, "step": 11170 }, { "epoch": 0.8874952866697097, "grad_norm": 0.7769964933395386, "learning_rate": 3.5210764467730414e-05, "loss": 0.8245, "step": 11180 }, { "epoch": 0.8882891107186092, "grad_norm": 0.6569917798042297, "learning_rate": 3.519753380434495e-05, "loss": 0.799, "step": 11190 }, { "epoch": 0.8890829347675088, "grad_norm": 0.5966030359268188, "learning_rate": 3.518430314095949e-05, "loss": 0.8563, "step": 11200 }, { "epoch": 0.8898767588164084, "grad_norm": 0.6492002010345459, "learning_rate": 3.517107247757403e-05, "loss": 0.861, "step": 11210 }, { "epoch": 0.8906705828653079, "grad_norm": 0.7151293158531189, "learning_rate": 3.515784181418856e-05, "loss": 0.8536, "step": 11220 }, { "epoch": 0.8914644069142075, "grad_norm": 0.6742676496505737, "learning_rate": 3.51446111508031e-05, "loss": 0.8548, "step": 11230 }, { "epoch": 0.8922582309631071, "grad_norm": 0.8235064148902893, "learning_rate": 3.513138048741764e-05, "loss": 0.8937, "step": 11240 }, { "epoch": 0.8930520550120066, "grad_norm": 0.6672957539558411, "learning_rate": 3.511814982403217e-05, "loss": 0.8116, "step": 11250 }, { "epoch": 0.8938458790609062, "grad_norm": 0.745836079120636, "learning_rate": 3.510491916064671e-05, "loss": 0.8183, "step": 11260 }, { "epoch": 0.8946397031098057, "grad_norm": 0.93377286195755, "learning_rate": 3.509168849726126e-05, "loss": 0.87, "step": 11270 }, { "epoch": 0.8954335271587053, "grad_norm": 0.7982069253921509, "learning_rate": 3.50784578338758e-05, "loss": 0.8557, "step": 11280 }, { "epoch": 0.8962273512076049, "grad_norm": 0.7779275178909302, "learning_rate": 3.506522717049033e-05, "loss": 0.7948, "step": 11290 }, { "epoch": 0.8970211752565044, "grad_norm": 0.7200841307640076, "learning_rate": 3.505199650710487e-05, "loss": 0.8031, "step": 11300 }, { "epoch": 0.897814999305404, "grad_norm": 0.7799506783485413, "learning_rate": 3.503876584371941e-05, "loss": 0.8692, "step": 11310 }, { "epoch": 0.8986088233543035, "grad_norm": 0.6927218437194824, "learning_rate": 3.5025535180333946e-05, "loss": 0.8854, "step": 11320 }, { "epoch": 0.8994026474032031, "grad_norm": 0.8087165951728821, "learning_rate": 3.501230451694848e-05, "loss": 0.8593, "step": 11330 }, { "epoch": 0.9001964714521027, "grad_norm": 0.817240834236145, "learning_rate": 3.499907385356302e-05, "loss": 0.8312, "step": 11340 }, { "epoch": 0.9009902955010022, "grad_norm": 0.6710502505302429, "learning_rate": 3.4985843190177556e-05, "loss": 0.883, "step": 11350 }, { "epoch": 0.9017841195499018, "grad_norm": 0.6229230761528015, "learning_rate": 3.4972612526792095e-05, "loss": 0.885, "step": 11360 }, { "epoch": 0.9025779435988013, "grad_norm": 0.7668790221214294, "learning_rate": 3.4959381863406634e-05, "loss": 0.8206, "step": 11370 }, { "epoch": 0.9033717676477009, "grad_norm": 0.668046236038208, "learning_rate": 3.494615120002117e-05, "loss": 0.8866, "step": 11380 }, { "epoch": 0.9041655916966005, "grad_norm": 0.7366786003112793, "learning_rate": 3.493292053663571e-05, "loss": 0.8091, "step": 11390 }, { "epoch": 0.9049594157455, "grad_norm": 0.9512502551078796, "learning_rate": 3.4919689873250244e-05, "loss": 0.7877, "step": 11400 }, { "epoch": 0.9057532397943996, "grad_norm": 0.8555247783660889, "learning_rate": 3.490645920986478e-05, "loss": 0.901, "step": 11410 }, { "epoch": 0.9065470638432991, "grad_norm": 0.6285889148712158, "learning_rate": 3.489322854647932e-05, "loss": 0.8963, "step": 11420 }, { "epoch": 0.9073408878921987, "grad_norm": 0.7358067631721497, "learning_rate": 3.487999788309386e-05, "loss": 0.8679, "step": 11430 }, { "epoch": 0.9081347119410983, "grad_norm": 0.8041672110557556, "learning_rate": 3.48667672197084e-05, "loss": 0.8222, "step": 11440 }, { "epoch": 0.9089285359899978, "grad_norm": 0.7826716899871826, "learning_rate": 3.485353655632294e-05, "loss": 0.8347, "step": 11450 }, { "epoch": 0.9097223600388974, "grad_norm": 0.8296499252319336, "learning_rate": 3.484030589293748e-05, "loss": 0.8105, "step": 11460 }, { "epoch": 0.9105161840877969, "grad_norm": 0.6100326776504517, "learning_rate": 3.482707522955201e-05, "loss": 0.8618, "step": 11470 }, { "epoch": 0.9113100081366965, "grad_norm": 0.9201139807701111, "learning_rate": 3.481384456616655e-05, "loss": 0.905, "step": 11480 }, { "epoch": 0.9121038321855961, "grad_norm": 0.8482776880264282, "learning_rate": 3.480061390278109e-05, "loss": 0.8644, "step": 11490 }, { "epoch": 0.9128976562344956, "grad_norm": 1.077039122581482, "learning_rate": 3.4787383239395626e-05, "loss": 0.848, "step": 11500 }, { "epoch": 0.9136914802833952, "grad_norm": 0.6531374454498291, "learning_rate": 3.477415257601016e-05, "loss": 0.8716, "step": 11510 }, { "epoch": 0.9144853043322948, "grad_norm": 0.7523869276046753, "learning_rate": 3.47609219126247e-05, "loss": 0.9086, "step": 11520 }, { "epoch": 0.9152791283811943, "grad_norm": 0.7146991491317749, "learning_rate": 3.4747691249239236e-05, "loss": 0.8156, "step": 11530 }, { "epoch": 0.9160729524300939, "grad_norm": 0.916566789150238, "learning_rate": 3.473446058585378e-05, "loss": 0.9163, "step": 11540 }, { "epoch": 0.9168667764789934, "grad_norm": 0.8944635987281799, "learning_rate": 3.4721229922468314e-05, "loss": 0.8811, "step": 11550 }, { "epoch": 0.917660600527893, "grad_norm": 0.7929695248603821, "learning_rate": 3.470799925908285e-05, "loss": 0.8048, "step": 11560 }, { "epoch": 0.9184544245767926, "grad_norm": 0.6163130402565002, "learning_rate": 3.469476859569739e-05, "loss": 0.904, "step": 11570 }, { "epoch": 0.9192482486256921, "grad_norm": 0.7213409543037415, "learning_rate": 3.468153793231193e-05, "loss": 0.9098, "step": 11580 }, { "epoch": 0.9200420726745917, "grad_norm": 0.8145052790641785, "learning_rate": 3.466830726892646e-05, "loss": 0.8596, "step": 11590 }, { "epoch": 0.9208358967234912, "grad_norm": 0.7872694730758667, "learning_rate": 3.4655076605541e-05, "loss": 0.8214, "step": 11600 }, { "epoch": 0.9216297207723908, "grad_norm": 0.745549201965332, "learning_rate": 3.464184594215554e-05, "loss": 0.885, "step": 11610 }, { "epoch": 0.9224235448212904, "grad_norm": 0.8459563255310059, "learning_rate": 3.462861527877008e-05, "loss": 0.9197, "step": 11620 }, { "epoch": 0.9232173688701899, "grad_norm": 0.9419437646865845, "learning_rate": 3.461538461538462e-05, "loss": 0.7556, "step": 11630 }, { "epoch": 0.9240111929190895, "grad_norm": 0.7619801759719849, "learning_rate": 3.460215395199916e-05, "loss": 0.87, "step": 11640 }, { "epoch": 0.924805016967989, "grad_norm": 0.5955981612205505, "learning_rate": 3.4588923288613696e-05, "loss": 0.922, "step": 11650 }, { "epoch": 0.9255988410168886, "grad_norm": 0.7901360392570496, "learning_rate": 3.457569262522823e-05, "loss": 0.8939, "step": 11660 }, { "epoch": 0.9263926650657882, "grad_norm": 0.6033757328987122, "learning_rate": 3.456246196184277e-05, "loss": 0.7984, "step": 11670 }, { "epoch": 0.9271864891146877, "grad_norm": 0.7921754121780396, "learning_rate": 3.4549231298457306e-05, "loss": 0.8477, "step": 11680 }, { "epoch": 0.9279803131635873, "grad_norm": 0.6191670298576355, "learning_rate": 3.4536000635071845e-05, "loss": 0.8468, "step": 11690 }, { "epoch": 0.9287741372124868, "grad_norm": 0.6756204962730408, "learning_rate": 3.452276997168638e-05, "loss": 0.8113, "step": 11700 }, { "epoch": 0.9295679612613864, "grad_norm": 0.8277655839920044, "learning_rate": 3.450953930830092e-05, "loss": 0.889, "step": 11710 }, { "epoch": 0.930361785310286, "grad_norm": 0.6921846270561218, "learning_rate": 3.449630864491546e-05, "loss": 0.9316, "step": 11720 }, { "epoch": 0.9311556093591855, "grad_norm": 0.6792823672294617, "learning_rate": 3.4483077981529994e-05, "loss": 0.8069, "step": 11730 }, { "epoch": 0.9319494334080851, "grad_norm": 0.9177089929580688, "learning_rate": 3.446984731814453e-05, "loss": 0.8124, "step": 11740 }, { "epoch": 0.9327432574569846, "grad_norm": 0.8078745603561401, "learning_rate": 3.445661665475907e-05, "loss": 0.8762, "step": 11750 }, { "epoch": 0.9335370815058842, "grad_norm": 0.9108356833457947, "learning_rate": 3.444338599137361e-05, "loss": 0.7767, "step": 11760 }, { "epoch": 0.9343309055547838, "grad_norm": 0.6663825511932373, "learning_rate": 3.443015532798814e-05, "loss": 0.8749, "step": 11770 }, { "epoch": 0.9351247296036833, "grad_norm": 0.8398308753967285, "learning_rate": 3.441692466460268e-05, "loss": 0.8793, "step": 11780 }, { "epoch": 0.9359185536525829, "grad_norm": 0.8784694075584412, "learning_rate": 3.440369400121722e-05, "loss": 0.8916, "step": 11790 }, { "epoch": 0.9367123777014824, "grad_norm": 0.7957909107208252, "learning_rate": 3.439046333783176e-05, "loss": 0.8855, "step": 11800 }, { "epoch": 0.937506201750382, "grad_norm": 0.6235615015029907, "learning_rate": 3.43772326744463e-05, "loss": 0.9407, "step": 11810 }, { "epoch": 0.9383000257992816, "grad_norm": 0.912480354309082, "learning_rate": 3.436400201106084e-05, "loss": 0.8935, "step": 11820 }, { "epoch": 0.9390938498481811, "grad_norm": 0.8489815592765808, "learning_rate": 3.435077134767538e-05, "loss": 0.8482, "step": 11830 }, { "epoch": 0.9398876738970807, "grad_norm": 0.6857966780662537, "learning_rate": 3.433754068428991e-05, "loss": 0.8982, "step": 11840 }, { "epoch": 0.9406814979459803, "grad_norm": 0.6412976384162903, "learning_rate": 3.432431002090445e-05, "loss": 0.8496, "step": 11850 }, { "epoch": 0.9414753219948798, "grad_norm": 0.7496560215950012, "learning_rate": 3.431107935751899e-05, "loss": 0.8434, "step": 11860 }, { "epoch": 0.9422691460437794, "grad_norm": 0.7269144058227539, "learning_rate": 3.4297848694133526e-05, "loss": 0.8561, "step": 11870 }, { "epoch": 0.9430629700926789, "grad_norm": 0.8505829572677612, "learning_rate": 3.4284618030748065e-05, "loss": 0.8006, "step": 11880 }, { "epoch": 0.9438567941415785, "grad_norm": 0.7966324090957642, "learning_rate": 3.4271387367362604e-05, "loss": 0.8968, "step": 11890 }, { "epoch": 0.9446506181904781, "grad_norm": 0.7437507510185242, "learning_rate": 3.425815670397714e-05, "loss": 0.8193, "step": 11900 }, { "epoch": 0.9454444422393776, "grad_norm": 0.8566707968711853, "learning_rate": 3.424492604059168e-05, "loss": 0.8084, "step": 11910 }, { "epoch": 0.9462382662882772, "grad_norm": 0.6849654316902161, "learning_rate": 3.4231695377206214e-05, "loss": 0.8792, "step": 11920 }, { "epoch": 0.9470320903371767, "grad_norm": 0.7506431341171265, "learning_rate": 3.421846471382075e-05, "loss": 0.8374, "step": 11930 }, { "epoch": 0.9478259143860763, "grad_norm": 0.6591388583183289, "learning_rate": 3.420523405043529e-05, "loss": 0.9128, "step": 11940 }, { "epoch": 0.9486197384349759, "grad_norm": 0.6423110365867615, "learning_rate": 3.4192003387049824e-05, "loss": 0.7918, "step": 11950 }, { "epoch": 0.9494135624838754, "grad_norm": 0.6780298948287964, "learning_rate": 3.417877272366436e-05, "loss": 0.8715, "step": 11960 }, { "epoch": 0.950207386532775, "grad_norm": 0.6950411200523376, "learning_rate": 3.41655420602789e-05, "loss": 0.8555, "step": 11970 }, { "epoch": 0.9510012105816745, "grad_norm": 0.8524368405342102, "learning_rate": 3.415231139689345e-05, "loss": 0.8706, "step": 11980 }, { "epoch": 0.9517950346305741, "grad_norm": 0.8601171374320984, "learning_rate": 3.413908073350798e-05, "loss": 0.8835, "step": 11990 }, { "epoch": 0.9525888586794737, "grad_norm": 0.7460906505584717, "learning_rate": 3.412585007012252e-05, "loss": 0.8911, "step": 12000 }, { "epoch": 0.9533826827283732, "grad_norm": 0.6877840757369995, "learning_rate": 3.411261940673706e-05, "loss": 0.8733, "step": 12010 }, { "epoch": 0.9541765067772728, "grad_norm": 0.8841610550880432, "learning_rate": 3.4099388743351596e-05, "loss": 0.8988, "step": 12020 }, { "epoch": 0.9549703308261723, "grad_norm": 0.7192667126655579, "learning_rate": 3.408615807996613e-05, "loss": 0.876, "step": 12030 }, { "epoch": 0.9557641548750719, "grad_norm": 0.739669144153595, "learning_rate": 3.407292741658067e-05, "loss": 0.9147, "step": 12040 }, { "epoch": 0.9565579789239715, "grad_norm": 0.9102823138237, "learning_rate": 3.4059696753195206e-05, "loss": 0.8335, "step": 12050 }, { "epoch": 0.957351802972871, "grad_norm": 0.8623813986778259, "learning_rate": 3.4046466089809745e-05, "loss": 0.9067, "step": 12060 }, { "epoch": 0.9581456270217706, "grad_norm": 0.6548823714256287, "learning_rate": 3.4033235426424284e-05, "loss": 0.8553, "step": 12070 }, { "epoch": 0.9589394510706701, "grad_norm": 0.6950840950012207, "learning_rate": 3.402000476303882e-05, "loss": 0.8346, "step": 12080 }, { "epoch": 0.9597332751195697, "grad_norm": 0.8395444750785828, "learning_rate": 3.400677409965336e-05, "loss": 0.8331, "step": 12090 }, { "epoch": 0.9605270991684693, "grad_norm": 0.7684609889984131, "learning_rate": 3.3993543436267894e-05, "loss": 0.8309, "step": 12100 }, { "epoch": 0.9613209232173688, "grad_norm": 0.6737335324287415, "learning_rate": 3.398031277288243e-05, "loss": 0.8495, "step": 12110 }, { "epoch": 0.9621147472662684, "grad_norm": 0.8039416670799255, "learning_rate": 3.396708210949697e-05, "loss": 0.8201, "step": 12120 }, { "epoch": 0.9629085713151679, "grad_norm": 0.7795635461807251, "learning_rate": 3.395385144611151e-05, "loss": 0.8921, "step": 12130 }, { "epoch": 0.9637023953640675, "grad_norm": 0.8788042068481445, "learning_rate": 3.394062078272604e-05, "loss": 0.849, "step": 12140 }, { "epoch": 0.9644962194129671, "grad_norm": 0.8527970910072327, "learning_rate": 3.392739011934058e-05, "loss": 0.8119, "step": 12150 }, { "epoch": 0.9652900434618666, "grad_norm": 0.7557294964790344, "learning_rate": 3.391415945595513e-05, "loss": 0.8121, "step": 12160 }, { "epoch": 0.9660838675107662, "grad_norm": 0.871924877166748, "learning_rate": 3.390092879256966e-05, "loss": 0.9318, "step": 12170 }, { "epoch": 0.9668776915596659, "grad_norm": 0.6994668245315552, "learning_rate": 3.38876981291842e-05, "loss": 0.8465, "step": 12180 }, { "epoch": 0.9676715156085653, "grad_norm": 0.7652361989021301, "learning_rate": 3.387446746579874e-05, "loss": 0.8967, "step": 12190 }, { "epoch": 0.968465339657465, "grad_norm": 0.7152222990989685, "learning_rate": 3.3861236802413276e-05, "loss": 0.9297, "step": 12200 }, { "epoch": 0.9692591637063644, "grad_norm": 0.8487961292266846, "learning_rate": 3.384800613902781e-05, "loss": 0.8412, "step": 12210 }, { "epoch": 0.970052987755264, "grad_norm": 0.7079365849494934, "learning_rate": 3.383477547564235e-05, "loss": 0.8746, "step": 12220 }, { "epoch": 0.9708468118041637, "grad_norm": 0.830172598361969, "learning_rate": 3.3821544812256886e-05, "loss": 0.864, "step": 12230 }, { "epoch": 0.9716406358530632, "grad_norm": 0.9805923104286194, "learning_rate": 3.3808314148871425e-05, "loss": 0.9199, "step": 12240 }, { "epoch": 0.9724344599019628, "grad_norm": 0.7402176856994629, "learning_rate": 3.3795083485485964e-05, "loss": 0.8274, "step": 12250 }, { "epoch": 0.9732282839508622, "grad_norm": 0.7366051077842712, "learning_rate": 3.37818528221005e-05, "loss": 0.8966, "step": 12260 }, { "epoch": 0.9740221079997619, "grad_norm": 0.8370699286460876, "learning_rate": 3.376862215871504e-05, "loss": 0.7642, "step": 12270 }, { "epoch": 0.9748159320486615, "grad_norm": 0.8621345162391663, "learning_rate": 3.3755391495329574e-05, "loss": 0.8868, "step": 12280 }, { "epoch": 0.975609756097561, "grad_norm": 0.6125873327255249, "learning_rate": 3.374216083194411e-05, "loss": 0.8572, "step": 12290 }, { "epoch": 0.9764035801464606, "grad_norm": 0.708325207233429, "learning_rate": 3.372893016855865e-05, "loss": 0.811, "step": 12300 }, { "epoch": 0.97719740419536, "grad_norm": 0.9190065264701843, "learning_rate": 3.371569950517319e-05, "loss": 0.7922, "step": 12310 }, { "epoch": 0.9779912282442597, "grad_norm": 0.6722436547279358, "learning_rate": 3.370246884178772e-05, "loss": 0.8409, "step": 12320 }, { "epoch": 0.9787850522931593, "grad_norm": 0.6370068788528442, "learning_rate": 3.368923817840227e-05, "loss": 0.8367, "step": 12330 }, { "epoch": 0.9795788763420588, "grad_norm": 0.7187243103981018, "learning_rate": 3.367600751501681e-05, "loss": 0.8673, "step": 12340 }, { "epoch": 0.9803727003909584, "grad_norm": 0.6579310297966003, "learning_rate": 3.366277685163135e-05, "loss": 0.8582, "step": 12350 }, { "epoch": 0.9811665244398579, "grad_norm": 0.9233648777008057, "learning_rate": 3.364954618824588e-05, "loss": 0.8838, "step": 12360 }, { "epoch": 0.9819603484887575, "grad_norm": 0.8820499181747437, "learning_rate": 3.363631552486042e-05, "loss": 0.8572, "step": 12370 }, { "epoch": 0.9827541725376571, "grad_norm": 0.7715148329734802, "learning_rate": 3.362308486147496e-05, "loss": 0.8855, "step": 12380 }, { "epoch": 0.9835479965865566, "grad_norm": 0.85701984167099, "learning_rate": 3.3609854198089496e-05, "loss": 0.8448, "step": 12390 }, { "epoch": 0.9843418206354562, "grad_norm": 0.8934018015861511, "learning_rate": 3.359662353470403e-05, "loss": 0.7882, "step": 12400 }, { "epoch": 0.9851356446843557, "grad_norm": 0.7508744597434998, "learning_rate": 3.358339287131857e-05, "loss": 0.8488, "step": 12410 }, { "epoch": 0.9859294687332553, "grad_norm": 0.7345721125602722, "learning_rate": 3.3570162207933106e-05, "loss": 0.8662, "step": 12420 }, { "epoch": 0.9867232927821549, "grad_norm": 0.7805215120315552, "learning_rate": 3.3556931544547645e-05, "loss": 0.7834, "step": 12430 }, { "epoch": 0.9875171168310544, "grad_norm": 0.8609070181846619, "learning_rate": 3.3543700881162184e-05, "loss": 0.7946, "step": 12440 }, { "epoch": 0.988310940879954, "grad_norm": 0.8504823446273804, "learning_rate": 3.353047021777672e-05, "loss": 0.8471, "step": 12450 }, { "epoch": 0.9891047649288536, "grad_norm": 0.800946831703186, "learning_rate": 3.351723955439126e-05, "loss": 0.8686, "step": 12460 }, { "epoch": 0.9898985889777531, "grad_norm": 0.770326554775238, "learning_rate": 3.3504008891005794e-05, "loss": 0.7954, "step": 12470 }, { "epoch": 0.9906924130266527, "grad_norm": 0.7792863845825195, "learning_rate": 3.349077822762033e-05, "loss": 0.8636, "step": 12480 }, { "epoch": 0.9914862370755522, "grad_norm": 0.7109197974205017, "learning_rate": 3.347754756423487e-05, "loss": 0.9198, "step": 12490 }, { "epoch": 0.9922800611244518, "grad_norm": 0.6527384519577026, "learning_rate": 3.346431690084941e-05, "loss": 0.8457, "step": 12500 }, { "epoch": 0.9930738851733514, "grad_norm": 0.9128293395042419, "learning_rate": 3.345108623746395e-05, "loss": 0.9064, "step": 12510 }, { "epoch": 0.9938677092222509, "grad_norm": 0.7357638478279114, "learning_rate": 3.343785557407849e-05, "loss": 0.8497, "step": 12520 }, { "epoch": 0.9946615332711505, "grad_norm": 0.8037011027336121, "learning_rate": 3.342462491069303e-05, "loss": 0.8659, "step": 12530 }, { "epoch": 0.99545535732005, "grad_norm": 0.8320547938346863, "learning_rate": 3.341139424730756e-05, "loss": 0.8856, "step": 12540 }, { "epoch": 0.9962491813689496, "grad_norm": 0.7249430418014526, "learning_rate": 3.33981635839221e-05, "loss": 0.8472, "step": 12550 }, { "epoch": 0.9970430054178492, "grad_norm": 0.7746173143386841, "learning_rate": 3.338493292053664e-05, "loss": 0.8397, "step": 12560 }, { "epoch": 0.9978368294667487, "grad_norm": 0.7750112414360046, "learning_rate": 3.3371702257151176e-05, "loss": 0.8164, "step": 12570 }, { "epoch": 0.9986306535156483, "grad_norm": 0.9370802044868469, "learning_rate": 3.335847159376571e-05, "loss": 0.9081, "step": 12580 }, { "epoch": 0.9994244775645478, "grad_norm": 0.7398301362991333, "learning_rate": 3.334524093038025e-05, "loss": 0.8786, "step": 12590 }, { "epoch": 1.0002183016134474, "grad_norm": 0.811583399772644, "learning_rate": 3.333201026699479e-05, "loss": 0.7198, "step": 12600 }, { "epoch": 1.001012125662347, "grad_norm": 0.8118659853935242, "learning_rate": 3.331877960360933e-05, "loss": 0.8307, "step": 12610 }, { "epoch": 1.0018059497112466, "grad_norm": 0.6908761858940125, "learning_rate": 3.3305548940223864e-05, "loss": 0.8154, "step": 12620 }, { "epoch": 1.002599773760146, "grad_norm": 0.5952050685882568, "learning_rate": 3.32923182768384e-05, "loss": 0.8398, "step": 12630 }, { "epoch": 1.0033935978090456, "grad_norm": 0.7398274540901184, "learning_rate": 3.327908761345294e-05, "loss": 0.8172, "step": 12640 }, { "epoch": 1.0041874218579452, "grad_norm": 0.6245648860931396, "learning_rate": 3.3265856950067474e-05, "loss": 0.8967, "step": 12650 }, { "epoch": 1.0049812459068448, "grad_norm": 0.7542557716369629, "learning_rate": 3.325262628668201e-05, "loss": 0.763, "step": 12660 }, { "epoch": 1.0057750699557444, "grad_norm": 0.6645131707191467, "learning_rate": 3.323939562329655e-05, "loss": 0.8822, "step": 12670 }, { "epoch": 1.0065688940046438, "grad_norm": 0.8816360235214233, "learning_rate": 3.322616495991109e-05, "loss": 0.879, "step": 12680 }, { "epoch": 1.0073627180535434, "grad_norm": 0.8549253344535828, "learning_rate": 3.321293429652563e-05, "loss": 0.836, "step": 12690 }, { "epoch": 1.008156542102443, "grad_norm": 0.95768141746521, "learning_rate": 3.319970363314017e-05, "loss": 0.842, "step": 12700 }, { "epoch": 1.0089503661513426, "grad_norm": 0.8039479851722717, "learning_rate": 3.318647296975471e-05, "loss": 0.7732, "step": 12710 }, { "epoch": 1.0097441902002422, "grad_norm": 0.724469006061554, "learning_rate": 3.3173242306369246e-05, "loss": 0.797, "step": 12720 }, { "epoch": 1.0105380142491416, "grad_norm": 0.7534793019294739, "learning_rate": 3.316001164298378e-05, "loss": 0.8058, "step": 12730 }, { "epoch": 1.0113318382980412, "grad_norm": 0.9147580862045288, "learning_rate": 3.314678097959832e-05, "loss": 0.819, "step": 12740 }, { "epoch": 1.0121256623469408, "grad_norm": 0.8486768007278442, "learning_rate": 3.3133550316212856e-05, "loss": 0.7997, "step": 12750 }, { "epoch": 1.0129194863958404, "grad_norm": 0.7328081130981445, "learning_rate": 3.312031965282739e-05, "loss": 0.8433, "step": 12760 }, { "epoch": 1.01371331044474, "grad_norm": 0.9267632961273193, "learning_rate": 3.3107088989441934e-05, "loss": 0.8013, "step": 12770 }, { "epoch": 1.0145071344936394, "grad_norm": 0.7737709879875183, "learning_rate": 3.309385832605647e-05, "loss": 0.8186, "step": 12780 }, { "epoch": 1.015300958542539, "grad_norm": 0.6647375822067261, "learning_rate": 3.308062766267101e-05, "loss": 0.8904, "step": 12790 }, { "epoch": 1.0160947825914386, "grad_norm": 0.7600811123847961, "learning_rate": 3.3067396999285544e-05, "loss": 0.8189, "step": 12800 }, { "epoch": 1.0168886066403382, "grad_norm": 0.7824926972389221, "learning_rate": 3.305416633590008e-05, "loss": 0.8189, "step": 12810 }, { "epoch": 1.0176824306892378, "grad_norm": 0.5769202709197998, "learning_rate": 3.304093567251462e-05, "loss": 0.8361, "step": 12820 }, { "epoch": 1.0184762547381372, "grad_norm": 0.8311686515808105, "learning_rate": 3.302770500912916e-05, "loss": 0.8081, "step": 12830 }, { "epoch": 1.0192700787870368, "grad_norm": 0.7909318804740906, "learning_rate": 3.301447434574369e-05, "loss": 0.7855, "step": 12840 }, { "epoch": 1.0200639028359364, "grad_norm": 0.8774951100349426, "learning_rate": 3.300124368235823e-05, "loss": 0.8729, "step": 12850 }, { "epoch": 1.020857726884836, "grad_norm": 0.747606635093689, "learning_rate": 3.298801301897277e-05, "loss": 0.8321, "step": 12860 }, { "epoch": 1.0216515509337356, "grad_norm": 0.8213605880737305, "learning_rate": 3.297478235558731e-05, "loss": 0.8039, "step": 12870 }, { "epoch": 1.022445374982635, "grad_norm": 0.7250701189041138, "learning_rate": 3.296155169220185e-05, "loss": 0.7972, "step": 12880 }, { "epoch": 1.0232391990315346, "grad_norm": 0.7011380195617676, "learning_rate": 3.294832102881639e-05, "loss": 0.8402, "step": 12890 }, { "epoch": 1.0240330230804342, "grad_norm": 0.9204203486442566, "learning_rate": 3.293509036543093e-05, "loss": 0.8623, "step": 12900 }, { "epoch": 1.0248268471293338, "grad_norm": 0.7364963293075562, "learning_rate": 3.292185970204546e-05, "loss": 0.8065, "step": 12910 }, { "epoch": 1.0256206711782334, "grad_norm": 0.8210947513580322, "learning_rate": 3.290862903866e-05, "loss": 0.8116, "step": 12920 }, { "epoch": 1.026414495227133, "grad_norm": 0.8079206347465515, "learning_rate": 3.289539837527454e-05, "loss": 0.8278, "step": 12930 }, { "epoch": 1.0272083192760324, "grad_norm": 0.8758916258811951, "learning_rate": 3.2882167711889076e-05, "loss": 0.8202, "step": 12940 }, { "epoch": 1.028002143324932, "grad_norm": 0.8359827399253845, "learning_rate": 3.2868937048503615e-05, "loss": 0.7738, "step": 12950 }, { "epoch": 1.0287959673738316, "grad_norm": 0.7513866424560547, "learning_rate": 3.2855706385118154e-05, "loss": 0.8644, "step": 12960 }, { "epoch": 1.0295897914227312, "grad_norm": 0.8743990063667297, "learning_rate": 3.284247572173269e-05, "loss": 0.8927, "step": 12970 }, { "epoch": 1.0303836154716308, "grad_norm": 0.8003587126731873, "learning_rate": 3.2829245058347225e-05, "loss": 0.8206, "step": 12980 }, { "epoch": 1.0311774395205302, "grad_norm": 0.7568554878234863, "learning_rate": 3.2816014394961764e-05, "loss": 0.8321, "step": 12990 }, { "epoch": 1.0319712635694298, "grad_norm": 0.7184220552444458, "learning_rate": 3.28027837315763e-05, "loss": 0.8007, "step": 13000 }, { "epoch": 1.0327650876183294, "grad_norm": 0.7958794236183167, "learning_rate": 3.278955306819084e-05, "loss": 0.886, "step": 13010 }, { "epoch": 1.033558911667229, "grad_norm": 0.8230629563331604, "learning_rate": 3.2776322404805374e-05, "loss": 0.8296, "step": 13020 }, { "epoch": 1.0343527357161286, "grad_norm": 0.8877654671669006, "learning_rate": 3.276309174141991e-05, "loss": 0.8461, "step": 13030 }, { "epoch": 1.035146559765028, "grad_norm": 0.7817438244819641, "learning_rate": 3.274986107803446e-05, "loss": 0.8301, "step": 13040 }, { "epoch": 1.0359403838139276, "grad_norm": 0.9634678959846497, "learning_rate": 3.2736630414649e-05, "loss": 0.8447, "step": 13050 }, { "epoch": 1.0367342078628272, "grad_norm": 0.8039039373397827, "learning_rate": 3.272339975126353e-05, "loss": 0.8416, "step": 13060 }, { "epoch": 1.0375280319117268, "grad_norm": 0.813490092754364, "learning_rate": 3.271016908787807e-05, "loss": 0.8281, "step": 13070 }, { "epoch": 1.0383218559606264, "grad_norm": 0.8581125736236572, "learning_rate": 3.269693842449261e-05, "loss": 0.8527, "step": 13080 }, { "epoch": 1.0391156800095258, "grad_norm": 0.7691791653633118, "learning_rate": 3.268370776110714e-05, "loss": 0.8385, "step": 13090 }, { "epoch": 1.0399095040584254, "grad_norm": 0.788347601890564, "learning_rate": 3.267047709772168e-05, "loss": 0.8372, "step": 13100 }, { "epoch": 1.040703328107325, "grad_norm": 0.8846760392189026, "learning_rate": 3.265724643433622e-05, "loss": 0.8794, "step": 13110 }, { "epoch": 1.0414971521562246, "grad_norm": 0.8345226645469666, "learning_rate": 3.2644015770950756e-05, "loss": 0.848, "step": 13120 }, { "epoch": 1.0422909762051242, "grad_norm": 0.7519625425338745, "learning_rate": 3.2630785107565295e-05, "loss": 0.8409, "step": 13130 }, { "epoch": 1.0430848002540236, "grad_norm": 0.7269890904426575, "learning_rate": 3.2617554444179834e-05, "loss": 0.8638, "step": 13140 }, { "epoch": 1.0438786243029232, "grad_norm": 0.8326950669288635, "learning_rate": 3.260432378079437e-05, "loss": 0.7581, "step": 13150 }, { "epoch": 1.0446724483518228, "grad_norm": 0.8327310681343079, "learning_rate": 3.259109311740891e-05, "loss": 0.834, "step": 13160 }, { "epoch": 1.0454662724007224, "grad_norm": 0.9904863238334656, "learning_rate": 3.2577862454023444e-05, "loss": 0.7524, "step": 13170 }, { "epoch": 1.046260096449622, "grad_norm": 0.9039106965065002, "learning_rate": 3.256463179063798e-05, "loss": 0.7784, "step": 13180 }, { "epoch": 1.0470539204985214, "grad_norm": 0.6900113821029663, "learning_rate": 3.255140112725252e-05, "loss": 0.8065, "step": 13190 }, { "epoch": 1.047847744547421, "grad_norm": 0.8624986410140991, "learning_rate": 3.253817046386706e-05, "loss": 0.8827, "step": 13200 }, { "epoch": 1.0486415685963206, "grad_norm": 0.7626116275787354, "learning_rate": 3.25249398004816e-05, "loss": 0.8464, "step": 13210 }, { "epoch": 1.0494353926452202, "grad_norm": 0.7918316721916199, "learning_rate": 3.251170913709614e-05, "loss": 0.8258, "step": 13220 }, { "epoch": 1.0502292166941198, "grad_norm": 0.7877132892608643, "learning_rate": 3.249847847371068e-05, "loss": 0.8688, "step": 13230 }, { "epoch": 1.0510230407430192, "grad_norm": 0.7505654096603394, "learning_rate": 3.248524781032521e-05, "loss": 0.8173, "step": 13240 }, { "epoch": 1.0518168647919188, "grad_norm": 0.8359394073486328, "learning_rate": 3.247201714693975e-05, "loss": 0.8227, "step": 13250 }, { "epoch": 1.0526106888408184, "grad_norm": 0.9003983736038208, "learning_rate": 3.245878648355429e-05, "loss": 0.7917, "step": 13260 }, { "epoch": 1.053404512889718, "grad_norm": 0.7913084030151367, "learning_rate": 3.2445555820168826e-05, "loss": 0.8602, "step": 13270 }, { "epoch": 1.0541983369386176, "grad_norm": 0.7529349327087402, "learning_rate": 3.243232515678336e-05, "loss": 0.7734, "step": 13280 }, { "epoch": 1.054992160987517, "grad_norm": 0.7655823230743408, "learning_rate": 3.24190944933979e-05, "loss": 0.8708, "step": 13290 }, { "epoch": 1.0557859850364166, "grad_norm": 0.7675358057022095, "learning_rate": 3.2405863830012436e-05, "loss": 0.7815, "step": 13300 }, { "epoch": 1.0565798090853162, "grad_norm": 0.8172122240066528, "learning_rate": 3.239263316662698e-05, "loss": 0.8038, "step": 13310 }, { "epoch": 1.0573736331342158, "grad_norm": 0.8098911643028259, "learning_rate": 3.2379402503241514e-05, "loss": 0.8107, "step": 13320 }, { "epoch": 1.0581674571831154, "grad_norm": 0.7456363439559937, "learning_rate": 3.236617183985605e-05, "loss": 0.8484, "step": 13330 }, { "epoch": 1.0589612812320148, "grad_norm": 0.8800321221351624, "learning_rate": 3.235294117647059e-05, "loss": 0.7985, "step": 13340 }, { "epoch": 1.0597551052809144, "grad_norm": 0.7727877497673035, "learning_rate": 3.2339710513085124e-05, "loss": 0.8827, "step": 13350 }, { "epoch": 1.060548929329814, "grad_norm": 0.8835548162460327, "learning_rate": 3.232647984969966e-05, "loss": 0.7698, "step": 13360 }, { "epoch": 1.0613427533787136, "grad_norm": 0.7106403708457947, "learning_rate": 3.23132491863142e-05, "loss": 0.863, "step": 13370 }, { "epoch": 1.0621365774276132, "grad_norm": 0.6507337689399719, "learning_rate": 3.230001852292874e-05, "loss": 0.7971, "step": 13380 }, { "epoch": 1.0629304014765126, "grad_norm": 0.7235475778579712, "learning_rate": 3.228678785954328e-05, "loss": 0.7664, "step": 13390 }, { "epoch": 1.0637242255254122, "grad_norm": 0.7101955413818359, "learning_rate": 3.227355719615782e-05, "loss": 0.7829, "step": 13400 }, { "epoch": 1.0645180495743118, "grad_norm": 0.7536014318466187, "learning_rate": 3.226032653277236e-05, "loss": 0.8732, "step": 13410 }, { "epoch": 1.0653118736232114, "grad_norm": 0.7618986368179321, "learning_rate": 3.22470958693869e-05, "loss": 0.8343, "step": 13420 }, { "epoch": 1.066105697672111, "grad_norm": 0.8681114315986633, "learning_rate": 3.223386520600143e-05, "loss": 0.8696, "step": 13430 }, { "epoch": 1.0668995217210104, "grad_norm": 0.9769225120544434, "learning_rate": 3.222063454261597e-05, "loss": 0.8216, "step": 13440 }, { "epoch": 1.06769334576991, "grad_norm": 0.775951087474823, "learning_rate": 3.220872694556905e-05, "loss": 0.8409, "step": 13450 }, { "epoch": 1.0684871698188096, "grad_norm": 0.7240128517150879, "learning_rate": 3.219549628218359e-05, "loss": 0.881, "step": 13460 }, { "epoch": 1.0692809938677093, "grad_norm": 0.7799546122550964, "learning_rate": 3.218226561879813e-05, "loss": 0.8269, "step": 13470 }, { "epoch": 1.0700748179166089, "grad_norm": 0.7071607112884521, "learning_rate": 3.2169034955412666e-05, "loss": 0.8752, "step": 13480 }, { "epoch": 1.0708686419655082, "grad_norm": 0.8684601783752441, "learning_rate": 3.2155804292027205e-05, "loss": 0.8156, "step": 13490 }, { "epoch": 1.0716624660144078, "grad_norm": 0.7755899429321289, "learning_rate": 3.2142573628641743e-05, "loss": 0.8075, "step": 13500 }, { "epoch": 1.0724562900633074, "grad_norm": 0.7392835021018982, "learning_rate": 3.212934296525628e-05, "loss": 0.8711, "step": 13510 }, { "epoch": 1.073250114112207, "grad_norm": 0.8979812860488892, "learning_rate": 3.2116112301870815e-05, "loss": 0.8296, "step": 13520 }, { "epoch": 1.0740439381611067, "grad_norm": 0.9251773357391357, "learning_rate": 3.2102881638485354e-05, "loss": 0.8392, "step": 13530 }, { "epoch": 1.0748377622100063, "grad_norm": 0.6050375699996948, "learning_rate": 3.208965097509989e-05, "loss": 0.8432, "step": 13540 }, { "epoch": 1.0756315862589056, "grad_norm": 0.8619400858879089, "learning_rate": 3.207642031171443e-05, "loss": 0.8651, "step": 13550 }, { "epoch": 1.0764254103078053, "grad_norm": 0.9240981340408325, "learning_rate": 3.206318964832897e-05, "loss": 0.8467, "step": 13560 }, { "epoch": 1.0772192343567049, "grad_norm": 0.9178832173347473, "learning_rate": 3.204995898494351e-05, "loss": 0.8079, "step": 13570 }, { "epoch": 1.0780130584056045, "grad_norm": 0.7604396343231201, "learning_rate": 3.203672832155805e-05, "loss": 0.7611, "step": 13580 }, { "epoch": 1.0788068824545038, "grad_norm": 0.5721964836120605, "learning_rate": 3.202349765817258e-05, "loss": 0.7863, "step": 13590 }, { "epoch": 1.0796007065034035, "grad_norm": 0.5710390210151672, "learning_rate": 3.201026699478712e-05, "loss": 0.873, "step": 13600 }, { "epoch": 1.080394530552303, "grad_norm": 0.72386234998703, "learning_rate": 3.199703633140166e-05, "loss": 0.866, "step": 13610 }, { "epoch": 1.0811883546012027, "grad_norm": 0.6764307618141174, "learning_rate": 3.19838056680162e-05, "loss": 0.8335, "step": 13620 }, { "epoch": 1.0819821786501023, "grad_norm": 0.796247124671936, "learning_rate": 3.197057500463073e-05, "loss": 0.8253, "step": 13630 }, { "epoch": 1.0827760026990019, "grad_norm": 0.8703547120094299, "learning_rate": 3.195734434124527e-05, "loss": 0.8459, "step": 13640 }, { "epoch": 1.0835698267479013, "grad_norm": 0.6727367639541626, "learning_rate": 3.194411367785981e-05, "loss": 0.8283, "step": 13650 }, { "epoch": 1.0843636507968009, "grad_norm": 0.8048222064971924, "learning_rate": 3.193088301447435e-05, "loss": 0.8383, "step": 13660 }, { "epoch": 1.0851574748457005, "grad_norm": 0.8705745339393616, "learning_rate": 3.1917652351088885e-05, "loss": 0.8146, "step": 13670 }, { "epoch": 1.0859512988946, "grad_norm": 0.7636762261390686, "learning_rate": 3.1904421687703424e-05, "loss": 0.9398, "step": 13680 }, { "epoch": 1.0867451229434997, "grad_norm": 0.806253969669342, "learning_rate": 3.189119102431796e-05, "loss": 0.8356, "step": 13690 }, { "epoch": 1.087538946992399, "grad_norm": 0.8001126646995544, "learning_rate": 3.1877960360932495e-05, "loss": 0.8216, "step": 13700 }, { "epoch": 1.0883327710412987, "grad_norm": 0.8212706446647644, "learning_rate": 3.1864729697547034e-05, "loss": 0.8063, "step": 13710 }, { "epoch": 1.0891265950901983, "grad_norm": 0.8403292894363403, "learning_rate": 3.185149903416157e-05, "loss": 0.7797, "step": 13720 }, { "epoch": 1.0899204191390979, "grad_norm": 0.876654326915741, "learning_rate": 3.183826837077611e-05, "loss": 0.8569, "step": 13730 }, { "epoch": 1.0907142431879975, "grad_norm": 0.69620680809021, "learning_rate": 3.182503770739065e-05, "loss": 0.8604, "step": 13740 }, { "epoch": 1.0915080672368969, "grad_norm": 0.6936290860176086, "learning_rate": 3.181180704400519e-05, "loss": 0.8722, "step": 13750 }, { "epoch": 1.0923018912857965, "grad_norm": 0.848301112651825, "learning_rate": 3.179857638061973e-05, "loss": 0.8742, "step": 13760 }, { "epoch": 1.093095715334696, "grad_norm": 0.9208377003669739, "learning_rate": 3.178534571723427e-05, "loss": 0.8333, "step": 13770 }, { "epoch": 1.0938895393835957, "grad_norm": 0.8173259496688843, "learning_rate": 3.17721150538488e-05, "loss": 0.8459, "step": 13780 }, { "epoch": 1.0946833634324953, "grad_norm": 0.8618404865264893, "learning_rate": 3.175888439046334e-05, "loss": 0.8343, "step": 13790 }, { "epoch": 1.0954771874813947, "grad_norm": 0.7455660104751587, "learning_rate": 3.174565372707788e-05, "loss": 0.7877, "step": 13800 }, { "epoch": 1.0962710115302943, "grad_norm": 0.7906826734542847, "learning_rate": 3.173242306369241e-05, "loss": 0.8711, "step": 13810 }, { "epoch": 1.0970648355791939, "grad_norm": 0.8172614574432373, "learning_rate": 3.171919240030695e-05, "loss": 0.8373, "step": 13820 }, { "epoch": 1.0978586596280935, "grad_norm": 0.7989473938941956, "learning_rate": 3.1705961736921494e-05, "loss": 0.8162, "step": 13830 }, { "epoch": 1.098652483676993, "grad_norm": 0.8977952003479004, "learning_rate": 3.169273107353603e-05, "loss": 0.8057, "step": 13840 }, { "epoch": 1.0994463077258925, "grad_norm": 0.6931014060974121, "learning_rate": 3.1679500410150565e-05, "loss": 0.8232, "step": 13850 }, { "epoch": 1.100240131774792, "grad_norm": 0.8068207502365112, "learning_rate": 3.1666269746765104e-05, "loss": 0.8804, "step": 13860 }, { "epoch": 1.1010339558236917, "grad_norm": 0.7539369463920593, "learning_rate": 3.165303908337964e-05, "loss": 0.8351, "step": 13870 }, { "epoch": 1.1018277798725913, "grad_norm": 0.7581447958946228, "learning_rate": 3.163980841999418e-05, "loss": 0.846, "step": 13880 }, { "epoch": 1.102621603921491, "grad_norm": 0.9124611616134644, "learning_rate": 3.1626577756608714e-05, "loss": 0.8507, "step": 13890 }, { "epoch": 1.1034154279703903, "grad_norm": 0.7561845779418945, "learning_rate": 3.161334709322325e-05, "loss": 0.7908, "step": 13900 }, { "epoch": 1.1042092520192899, "grad_norm": 0.718763530254364, "learning_rate": 3.160011642983779e-05, "loss": 0.832, "step": 13910 }, { "epoch": 1.1050030760681895, "grad_norm": 0.873539924621582, "learning_rate": 3.158688576645233e-05, "loss": 0.7854, "step": 13920 }, { "epoch": 1.105796900117089, "grad_norm": 0.8948822021484375, "learning_rate": 3.157365510306687e-05, "loss": 0.7936, "step": 13930 }, { "epoch": 1.1065907241659887, "grad_norm": 0.7010993957519531, "learning_rate": 3.156042443968141e-05, "loss": 0.7964, "step": 13940 }, { "epoch": 1.107384548214888, "grad_norm": 0.8644427061080933, "learning_rate": 3.154719377629595e-05, "loss": 0.8249, "step": 13950 }, { "epoch": 1.1081783722637877, "grad_norm": 0.8382445573806763, "learning_rate": 3.153396311291048e-05, "loss": 0.8446, "step": 13960 }, { "epoch": 1.1089721963126873, "grad_norm": 0.9029629230499268, "learning_rate": 3.152073244952502e-05, "loss": 0.8784, "step": 13970 }, { "epoch": 1.109766020361587, "grad_norm": 0.8613706827163696, "learning_rate": 3.150750178613956e-05, "loss": 0.7617, "step": 13980 }, { "epoch": 1.1105598444104865, "grad_norm": 1.0319517850875854, "learning_rate": 3.14942711227541e-05, "loss": 0.8107, "step": 13990 }, { "epoch": 1.1113536684593859, "grad_norm": 0.8257167339324951, "learning_rate": 3.1481040459368636e-05, "loss": 0.7137, "step": 14000 }, { "epoch": 1.1121474925082855, "grad_norm": 0.8850013017654419, "learning_rate": 3.1467809795983175e-05, "loss": 0.8005, "step": 14010 }, { "epoch": 1.112941316557185, "grad_norm": 0.7616755366325378, "learning_rate": 3.1454579132597713e-05, "loss": 0.7917, "step": 14020 }, { "epoch": 1.1137351406060847, "grad_norm": 0.891237735748291, "learning_rate": 3.1441348469212246e-05, "loss": 0.7803, "step": 14030 }, { "epoch": 1.1145289646549843, "grad_norm": 0.7421796917915344, "learning_rate": 3.1428117805826785e-05, "loss": 0.8219, "step": 14040 }, { "epoch": 1.1153227887038837, "grad_norm": 0.8168928027153015, "learning_rate": 3.1414887142441324e-05, "loss": 0.7825, "step": 14050 }, { "epoch": 1.1161166127527833, "grad_norm": 0.7771959900856018, "learning_rate": 3.140165647905586e-05, "loss": 0.8165, "step": 14060 }, { "epoch": 1.116910436801683, "grad_norm": 0.7545336484909058, "learning_rate": 3.1388425815670395e-05, "loss": 0.9121, "step": 14070 }, { "epoch": 1.1177042608505825, "grad_norm": 0.626607358455658, "learning_rate": 3.1375195152284934e-05, "loss": 0.8899, "step": 14080 }, { "epoch": 1.118498084899482, "grad_norm": 0.8673813343048096, "learning_rate": 3.136196448889947e-05, "loss": 0.8339, "step": 14090 }, { "epoch": 1.1192919089483815, "grad_norm": 0.8372191786766052, "learning_rate": 3.134873382551402e-05, "loss": 0.7954, "step": 14100 }, { "epoch": 1.120085732997281, "grad_norm": 0.9092103838920593, "learning_rate": 3.133550316212855e-05, "loss": 0.8326, "step": 14110 }, { "epoch": 1.1208795570461807, "grad_norm": 0.7805484533309937, "learning_rate": 3.132227249874309e-05, "loss": 0.799, "step": 14120 }, { "epoch": 1.1216733810950803, "grad_norm": 0.8048092126846313, "learning_rate": 3.130904183535763e-05, "loss": 0.8497, "step": 14130 }, { "epoch": 1.12246720514398, "grad_norm": 0.922804594039917, "learning_rate": 3.129581117197217e-05, "loss": 0.7648, "step": 14140 }, { "epoch": 1.1232610291928795, "grad_norm": 0.7149479985237122, "learning_rate": 3.12825805085867e-05, "loss": 0.8884, "step": 14150 }, { "epoch": 1.124054853241779, "grad_norm": 0.6718799471855164, "learning_rate": 3.126934984520124e-05, "loss": 0.7914, "step": 14160 }, { "epoch": 1.1248486772906785, "grad_norm": 0.6949440836906433, "learning_rate": 3.125611918181578e-05, "loss": 0.8319, "step": 14170 }, { "epoch": 1.125642501339578, "grad_norm": 0.8410897254943848, "learning_rate": 3.1242888518430316e-05, "loss": 0.8594, "step": 14180 }, { "epoch": 1.1264363253884777, "grad_norm": 0.6044743657112122, "learning_rate": 3.1229657855044855e-05, "loss": 0.8168, "step": 14190 }, { "epoch": 1.127230149437377, "grad_norm": 1.078133225440979, "learning_rate": 3.1216427191659394e-05, "loss": 0.834, "step": 14200 }, { "epoch": 1.1280239734862767, "grad_norm": 0.9174512624740601, "learning_rate": 3.120319652827393e-05, "loss": 0.7889, "step": 14210 }, { "epoch": 1.1288177975351763, "grad_norm": 0.5952799916267395, "learning_rate": 3.1189965864888465e-05, "loss": 0.79, "step": 14220 }, { "epoch": 1.129611621584076, "grad_norm": 0.8574535250663757, "learning_rate": 3.1176735201503004e-05, "loss": 0.8347, "step": 14230 }, { "epoch": 1.1304054456329755, "grad_norm": 0.882088303565979, "learning_rate": 3.116350453811754e-05, "loss": 0.8774, "step": 14240 }, { "epoch": 1.1311992696818751, "grad_norm": 0.8594134449958801, "learning_rate": 3.115027387473208e-05, "loss": 0.7897, "step": 14250 }, { "epoch": 1.1319930937307745, "grad_norm": 0.739986002445221, "learning_rate": 3.1137043211346614e-05, "loss": 0.8021, "step": 14260 }, { "epoch": 1.132786917779674, "grad_norm": 0.8186190724372864, "learning_rate": 3.112381254796116e-05, "loss": 0.8212, "step": 14270 }, { "epoch": 1.1335807418285737, "grad_norm": 0.5985064506530762, "learning_rate": 3.11105818845757e-05, "loss": 0.8579, "step": 14280 }, { "epoch": 1.1343745658774733, "grad_norm": 0.7839154601097107, "learning_rate": 3.109735122119023e-05, "loss": 0.8212, "step": 14290 }, { "epoch": 1.1351683899263727, "grad_norm": 0.8646635413169861, "learning_rate": 3.108412055780477e-05, "loss": 0.8486, "step": 14300 }, { "epoch": 1.1359622139752723, "grad_norm": 0.6865241527557373, "learning_rate": 3.107088989441931e-05, "loss": 0.8799, "step": 14310 }, { "epoch": 1.136756038024172, "grad_norm": 0.7246029376983643, "learning_rate": 3.105765923103385e-05, "loss": 0.8432, "step": 14320 }, { "epoch": 1.1375498620730715, "grad_norm": 0.8030770421028137, "learning_rate": 3.104442856764838e-05, "loss": 0.7673, "step": 14330 }, { "epoch": 1.1383436861219711, "grad_norm": 0.7397379279136658, "learning_rate": 3.103119790426292e-05, "loss": 0.8608, "step": 14340 }, { "epoch": 1.1391375101708707, "grad_norm": 0.7867853045463562, "learning_rate": 3.101796724087746e-05, "loss": 0.8263, "step": 14350 }, { "epoch": 1.13993133421977, "grad_norm": 0.6703535914421082, "learning_rate": 3.1004736577491996e-05, "loss": 0.8149, "step": 14360 }, { "epoch": 1.1407251582686697, "grad_norm": 1.018248200416565, "learning_rate": 3.0991505914106535e-05, "loss": 0.8686, "step": 14370 }, { "epoch": 1.1415189823175693, "grad_norm": 0.8166862726211548, "learning_rate": 3.0978275250721074e-05, "loss": 0.8326, "step": 14380 }, { "epoch": 1.142312806366469, "grad_norm": 0.8093626499176025, "learning_rate": 3.096504458733561e-05, "loss": 0.7861, "step": 14390 }, { "epoch": 1.1431066304153685, "grad_norm": 1.1167017221450806, "learning_rate": 3.0951813923950145e-05, "loss": 0.8208, "step": 14400 }, { "epoch": 1.143900454464268, "grad_norm": 0.6292011141777039, "learning_rate": 3.0938583260564684e-05, "loss": 0.7954, "step": 14410 }, { "epoch": 1.1446942785131675, "grad_norm": 0.7709428071975708, "learning_rate": 3.092535259717922e-05, "loss": 0.8027, "step": 14420 }, { "epoch": 1.1454881025620671, "grad_norm": 0.8300706744194031, "learning_rate": 3.091212193379376e-05, "loss": 0.8297, "step": 14430 }, { "epoch": 1.1462819266109667, "grad_norm": 0.7700504064559937, "learning_rate": 3.08988912704083e-05, "loss": 0.8367, "step": 14440 }, { "epoch": 1.1470757506598663, "grad_norm": 1.0189820528030396, "learning_rate": 3.088566060702284e-05, "loss": 0.7969, "step": 14450 }, { "epoch": 1.1478695747087657, "grad_norm": 0.7077481150627136, "learning_rate": 3.087242994363738e-05, "loss": 0.7619, "step": 14460 }, { "epoch": 1.1486633987576653, "grad_norm": 0.8861839175224304, "learning_rate": 3.085919928025192e-05, "loss": 0.7641, "step": 14470 }, { "epoch": 1.149457222806565, "grad_norm": 0.8999897837638855, "learning_rate": 3.084596861686645e-05, "loss": 0.7736, "step": 14480 }, { "epoch": 1.1502510468554645, "grad_norm": 0.9881305694580078, "learning_rate": 3.083273795348099e-05, "loss": 0.8508, "step": 14490 }, { "epoch": 1.1510448709043641, "grad_norm": 0.6451756358146667, "learning_rate": 3.081950729009553e-05, "loss": 0.7832, "step": 14500 }, { "epoch": 1.1518386949532635, "grad_norm": 0.9463132619857788, "learning_rate": 3.080627662671006e-05, "loss": 0.8006, "step": 14510 }, { "epoch": 1.1526325190021631, "grad_norm": 0.8050596117973328, "learning_rate": 3.07930459633246e-05, "loss": 0.8835, "step": 14520 }, { "epoch": 1.1534263430510627, "grad_norm": 0.6816156506538391, "learning_rate": 3.077981529993914e-05, "loss": 0.8281, "step": 14530 }, { "epoch": 1.1542201670999623, "grad_norm": 0.7849035263061523, "learning_rate": 3.0766584636553683e-05, "loss": 0.7944, "step": 14540 }, { "epoch": 1.155013991148862, "grad_norm": 0.7930827140808105, "learning_rate": 3.0753353973168216e-05, "loss": 0.8804, "step": 14550 }, { "epoch": 1.1558078151977613, "grad_norm": 0.8753893971443176, "learning_rate": 3.0740123309782755e-05, "loss": 0.8789, "step": 14560 }, { "epoch": 1.156601639246661, "grad_norm": 0.974399983882904, "learning_rate": 3.0726892646397293e-05, "loss": 0.8217, "step": 14570 }, { "epoch": 1.1573954632955605, "grad_norm": 0.7714707255363464, "learning_rate": 3.071366198301183e-05, "loss": 0.767, "step": 14580 }, { "epoch": 1.1581892873444601, "grad_norm": 0.7159197926521301, "learning_rate": 3.0700431319626365e-05, "loss": 0.7676, "step": 14590 }, { "epoch": 1.1589831113933597, "grad_norm": 0.6954260468482971, "learning_rate": 3.0687200656240904e-05, "loss": 0.8319, "step": 14600 }, { "epoch": 1.1597769354422591, "grad_norm": 0.6127654910087585, "learning_rate": 3.067396999285544e-05, "loss": 0.8533, "step": 14610 }, { "epoch": 1.1605707594911587, "grad_norm": 0.6652526259422302, "learning_rate": 3.066073932946998e-05, "loss": 0.8525, "step": 14620 }, { "epoch": 1.1613645835400583, "grad_norm": 0.9769663214683533, "learning_rate": 3.064750866608452e-05, "loss": 0.8263, "step": 14630 }, { "epoch": 1.162158407588958, "grad_norm": 0.8271260261535645, "learning_rate": 3.063427800269906e-05, "loss": 0.8327, "step": 14640 }, { "epoch": 1.1629522316378575, "grad_norm": 0.7566442489624023, "learning_rate": 3.06210473393136e-05, "loss": 0.7574, "step": 14650 }, { "epoch": 1.1637460556867572, "grad_norm": 0.7964077591896057, "learning_rate": 3.060781667592813e-05, "loss": 0.7874, "step": 14660 }, { "epoch": 1.1645398797356565, "grad_norm": 0.7571009993553162, "learning_rate": 3.059458601254267e-05, "loss": 0.7855, "step": 14670 }, { "epoch": 1.1653337037845561, "grad_norm": 0.8210294842720032, "learning_rate": 3.058135534915721e-05, "loss": 0.8045, "step": 14680 }, { "epoch": 1.1661275278334557, "grad_norm": 0.6790111064910889, "learning_rate": 3.056812468577175e-05, "loss": 0.8583, "step": 14690 }, { "epoch": 1.1669213518823554, "grad_norm": 0.8836639523506165, "learning_rate": 3.055489402238628e-05, "loss": 0.81, "step": 14700 }, { "epoch": 1.1677151759312547, "grad_norm": 0.92630535364151, "learning_rate": 3.0541663359000825e-05, "loss": 0.8531, "step": 14710 }, { "epoch": 1.1685089999801543, "grad_norm": 0.681800365447998, "learning_rate": 3.0528432695615364e-05, "loss": 0.817, "step": 14720 }, { "epoch": 1.169302824029054, "grad_norm": 0.7862175107002258, "learning_rate": 3.0515202032229896e-05, "loss": 0.8026, "step": 14730 }, { "epoch": 1.1700966480779535, "grad_norm": 0.7809861898422241, "learning_rate": 3.0501971368844435e-05, "loss": 0.8225, "step": 14740 }, { "epoch": 1.1708904721268532, "grad_norm": 0.9233412146568298, "learning_rate": 3.0488740705458974e-05, "loss": 0.8066, "step": 14750 }, { "epoch": 1.1716842961757528, "grad_norm": 0.8254870772361755, "learning_rate": 3.0475510042073513e-05, "loss": 0.8538, "step": 14760 }, { "epoch": 1.1724781202246521, "grad_norm": 0.8267969489097595, "learning_rate": 3.046227937868805e-05, "loss": 0.8346, "step": 14770 }, { "epoch": 1.1732719442735517, "grad_norm": 0.7673428058624268, "learning_rate": 3.0449048715302587e-05, "loss": 0.8336, "step": 14780 }, { "epoch": 1.1740657683224514, "grad_norm": 0.8175097107887268, "learning_rate": 3.0435818051917126e-05, "loss": 0.8939, "step": 14790 }, { "epoch": 1.174859592371351, "grad_norm": 0.8476851582527161, "learning_rate": 3.0422587388531665e-05, "loss": 0.828, "step": 14800 }, { "epoch": 1.1756534164202503, "grad_norm": 0.8046305179595947, "learning_rate": 3.0409356725146197e-05, "loss": 0.8243, "step": 14810 }, { "epoch": 1.17644724046915, "grad_norm": 0.8776468634605408, "learning_rate": 3.039612606176074e-05, "loss": 0.8292, "step": 14820 }, { "epoch": 1.1772410645180496, "grad_norm": 0.7931563258171082, "learning_rate": 3.038289539837528e-05, "loss": 0.7834, "step": 14830 }, { "epoch": 1.1780348885669492, "grad_norm": 1.0599154233932495, "learning_rate": 3.036966473498981e-05, "loss": 0.7835, "step": 14840 }, { "epoch": 1.1788287126158488, "grad_norm": 0.902905285358429, "learning_rate": 3.035643407160435e-05, "loss": 0.8089, "step": 14850 }, { "epoch": 1.1796225366647484, "grad_norm": 0.670547604560852, "learning_rate": 3.034320340821889e-05, "loss": 0.7964, "step": 14860 }, { "epoch": 1.1804163607136477, "grad_norm": 0.7922139763832092, "learning_rate": 3.032997274483343e-05, "loss": 0.8179, "step": 14870 }, { "epoch": 1.1812101847625474, "grad_norm": 0.8534002304077148, "learning_rate": 3.0316742081447963e-05, "loss": 0.8396, "step": 14880 }, { "epoch": 1.182004008811447, "grad_norm": 0.8879272937774658, "learning_rate": 3.0303511418062502e-05, "loss": 0.7891, "step": 14890 }, { "epoch": 1.1827978328603466, "grad_norm": 0.711462676525116, "learning_rate": 3.029028075467704e-05, "loss": 0.7261, "step": 14900 }, { "epoch": 1.183591656909246, "grad_norm": 0.790237545967102, "learning_rate": 3.027705009129158e-05, "loss": 0.8176, "step": 14910 }, { "epoch": 1.1843854809581456, "grad_norm": 0.7441157102584839, "learning_rate": 3.0263819427906115e-05, "loss": 0.8004, "step": 14920 }, { "epoch": 1.1851793050070452, "grad_norm": 0.6485238075256348, "learning_rate": 3.0250588764520654e-05, "loss": 0.8286, "step": 14930 }, { "epoch": 1.1859731290559448, "grad_norm": 0.5834975242614746, "learning_rate": 3.0237358101135193e-05, "loss": 0.8362, "step": 14940 }, { "epoch": 1.1867669531048444, "grad_norm": 0.7251124382019043, "learning_rate": 3.0224127437749732e-05, "loss": 0.8968, "step": 14950 }, { "epoch": 1.187560777153744, "grad_norm": 0.9666823148727417, "learning_rate": 3.0210896774364268e-05, "loss": 0.8247, "step": 14960 }, { "epoch": 1.1883546012026434, "grad_norm": 0.839264988899231, "learning_rate": 3.0197666110978807e-05, "loss": 0.8347, "step": 14970 }, { "epoch": 1.189148425251543, "grad_norm": 0.6984066367149353, "learning_rate": 3.0184435447593345e-05, "loss": 0.9087, "step": 14980 }, { "epoch": 1.1899422493004426, "grad_norm": 0.6566882729530334, "learning_rate": 3.017120478420788e-05, "loss": 0.8933, "step": 14990 }, { "epoch": 1.1907360733493422, "grad_norm": 0.880733847618103, "learning_rate": 3.015797412082242e-05, "loss": 0.8226, "step": 15000 }, { "epoch": 1.1915298973982416, "grad_norm": 0.6445071697235107, "learning_rate": 3.014474345743696e-05, "loss": 0.8331, "step": 15010 }, { "epoch": 1.1923237214471412, "grad_norm": 1.1646579504013062, "learning_rate": 3.0131512794051498e-05, "loss": 0.8809, "step": 15020 }, { "epoch": 1.1931175454960408, "grad_norm": 0.7741886973381042, "learning_rate": 3.011828213066603e-05, "loss": 0.8101, "step": 15030 }, { "epoch": 1.1939113695449404, "grad_norm": 0.760037899017334, "learning_rate": 3.0105051467280572e-05, "loss": 0.7767, "step": 15040 }, { "epoch": 1.19470519359384, "grad_norm": 1.031218409538269, "learning_rate": 3.009182080389511e-05, "loss": 0.8009, "step": 15050 }, { "epoch": 1.1954990176427396, "grad_norm": 0.6918702125549316, "learning_rate": 3.007859014050965e-05, "loss": 0.7885, "step": 15060 }, { "epoch": 1.196292841691639, "grad_norm": 1.0237363576889038, "learning_rate": 3.0065359477124182e-05, "loss": 0.8503, "step": 15070 }, { "epoch": 1.1970866657405386, "grad_norm": 1.026094913482666, "learning_rate": 3.005212881373872e-05, "loss": 0.7394, "step": 15080 }, { "epoch": 1.1978804897894382, "grad_norm": 0.8072192072868347, "learning_rate": 3.0038898150353263e-05, "loss": 0.8343, "step": 15090 }, { "epoch": 1.1986743138383378, "grad_norm": 0.8054350018501282, "learning_rate": 3.0025667486967796e-05, "loss": 0.8339, "step": 15100 }, { "epoch": 1.1994681378872374, "grad_norm": 0.7186553478240967, "learning_rate": 3.0012436823582335e-05, "loss": 0.7915, "step": 15110 }, { "epoch": 1.2002619619361368, "grad_norm": 0.9386051297187805, "learning_rate": 2.9999206160196874e-05, "loss": 0.8306, "step": 15120 }, { "epoch": 1.2010557859850364, "grad_norm": 0.7570390105247498, "learning_rate": 2.9985975496811412e-05, "loss": 0.832, "step": 15130 }, { "epoch": 1.201849610033936, "grad_norm": 0.8414233326911926, "learning_rate": 2.9972744833425948e-05, "loss": 0.8668, "step": 15140 }, { "epoch": 1.2026434340828356, "grad_norm": 0.790988564491272, "learning_rate": 2.9959514170040487e-05, "loss": 0.802, "step": 15150 }, { "epoch": 1.2034372581317352, "grad_norm": 1.0100650787353516, "learning_rate": 2.9946283506655026e-05, "loss": 0.8767, "step": 15160 }, { "epoch": 1.2042310821806346, "grad_norm": 0.8488495349884033, "learning_rate": 2.9933052843269565e-05, "loss": 0.8674, "step": 15170 }, { "epoch": 1.2050249062295342, "grad_norm": 0.8193151354789734, "learning_rate": 2.99198221798841e-05, "loss": 0.8027, "step": 15180 }, { "epoch": 1.2058187302784338, "grad_norm": 0.7268685698509216, "learning_rate": 2.990659151649864e-05, "loss": 0.8172, "step": 15190 }, { "epoch": 1.2066125543273334, "grad_norm": 0.8344203233718872, "learning_rate": 2.9893360853113178e-05, "loss": 0.8125, "step": 15200 }, { "epoch": 1.207406378376233, "grad_norm": 0.8553740978240967, "learning_rate": 2.988013018972771e-05, "loss": 0.7997, "step": 15210 }, { "epoch": 1.2082002024251324, "grad_norm": 0.7145830988883972, "learning_rate": 2.9866899526342253e-05, "loss": 0.7697, "step": 15220 }, { "epoch": 1.208994026474032, "grad_norm": 0.7507617473602295, "learning_rate": 2.985366886295679e-05, "loss": 0.8852, "step": 15230 }, { "epoch": 1.2097878505229316, "grad_norm": 0.9048145413398743, "learning_rate": 2.984043819957133e-05, "loss": 0.8967, "step": 15240 }, { "epoch": 1.2105816745718312, "grad_norm": 0.7048582434654236, "learning_rate": 2.9827207536185863e-05, "loss": 0.8613, "step": 15250 }, { "epoch": 1.2113754986207308, "grad_norm": 0.6709416508674622, "learning_rate": 2.98139768728004e-05, "loss": 0.8725, "step": 15260 }, { "epoch": 1.2121693226696304, "grad_norm": 0.7239158749580383, "learning_rate": 2.9800746209414944e-05, "loss": 0.8236, "step": 15270 }, { "epoch": 1.2129631467185298, "grad_norm": 0.7127059102058411, "learning_rate": 2.9787515546029483e-05, "loss": 0.8302, "step": 15280 }, { "epoch": 1.2137569707674294, "grad_norm": 0.8340943455696106, "learning_rate": 2.9774284882644015e-05, "loss": 0.7475, "step": 15290 }, { "epoch": 1.214550794816329, "grad_norm": 0.9518420696258545, "learning_rate": 2.9761054219258554e-05, "loss": 0.8673, "step": 15300 }, { "epoch": 1.2153446188652286, "grad_norm": 0.6714077591896057, "learning_rate": 2.9747823555873093e-05, "loss": 0.7967, "step": 15310 }, { "epoch": 1.216138442914128, "grad_norm": 0.9402453303337097, "learning_rate": 2.973459289248763e-05, "loss": 0.8375, "step": 15320 }, { "epoch": 1.2169322669630276, "grad_norm": 0.8551952242851257, "learning_rate": 2.9721362229102167e-05, "loss": 0.8432, "step": 15330 }, { "epoch": 1.2177260910119272, "grad_norm": 0.9510105848312378, "learning_rate": 2.9708131565716706e-05, "loss": 0.8647, "step": 15340 }, { "epoch": 1.2185199150608268, "grad_norm": 0.6727361679077148, "learning_rate": 2.9694900902331245e-05, "loss": 0.8573, "step": 15350 }, { "epoch": 1.2193137391097264, "grad_norm": 0.8175778388977051, "learning_rate": 2.968167023894578e-05, "loss": 0.793, "step": 15360 }, { "epoch": 1.220107563158626, "grad_norm": 0.771083652973175, "learning_rate": 2.966843957556032e-05, "loss": 0.8401, "step": 15370 }, { "epoch": 1.2209013872075254, "grad_norm": 0.8669653534889221, "learning_rate": 2.965520891217486e-05, "loss": 0.7758, "step": 15380 }, { "epoch": 1.221695211256425, "grad_norm": 0.7607120871543884, "learning_rate": 2.9641978248789397e-05, "loss": 0.8172, "step": 15390 }, { "epoch": 1.2224890353053246, "grad_norm": 0.9306351542472839, "learning_rate": 2.9628747585403933e-05, "loss": 0.8082, "step": 15400 }, { "epoch": 1.2232828593542242, "grad_norm": 0.8709003329277039, "learning_rate": 2.9615516922018472e-05, "loss": 0.7716, "step": 15410 }, { "epoch": 1.2240766834031236, "grad_norm": 0.9127326011657715, "learning_rate": 2.960228625863301e-05, "loss": 0.8231, "step": 15420 }, { "epoch": 1.2248705074520232, "grad_norm": 0.7594894766807556, "learning_rate": 2.9589055595247543e-05, "loss": 0.7788, "step": 15430 }, { "epoch": 1.2256643315009228, "grad_norm": 0.7096917033195496, "learning_rate": 2.9575824931862085e-05, "loss": 0.8266, "step": 15440 }, { "epoch": 1.2264581555498224, "grad_norm": 0.8250033855438232, "learning_rate": 2.9562594268476624e-05, "loss": 0.8823, "step": 15450 }, { "epoch": 1.227251979598722, "grad_norm": 0.8161039352416992, "learning_rate": 2.9550686671429705e-05, "loss": 0.7683, "step": 15460 }, { "epoch": 1.2280458036476216, "grad_norm": 0.9151235222816467, "learning_rate": 2.9537456008044244e-05, "loss": 0.7988, "step": 15470 }, { "epoch": 1.228839627696521, "grad_norm": 0.696901798248291, "learning_rate": 2.9524225344658783e-05, "loss": 0.8231, "step": 15480 }, { "epoch": 1.2296334517454206, "grad_norm": Infinity, "learning_rate": 2.9512317747611867e-05, "loss": 0.8496, "step": 15490 }, { "epoch": 1.2304272757943202, "grad_norm": 0.7194181084632874, "learning_rate": 2.9499087084226406e-05, "loss": 0.8621, "step": 15500 }, { "epoch": 1.2312210998432198, "grad_norm": 0.7951611280441284, "learning_rate": 2.9485856420840942e-05, "loss": 0.7799, "step": 15510 }, { "epoch": 1.2320149238921192, "grad_norm": 0.8867210149765015, "learning_rate": 2.947262575745548e-05, "loss": 0.8452, "step": 15520 }, { "epoch": 1.2328087479410188, "grad_norm": 0.842461109161377, "learning_rate": 2.945939509407002e-05, "loss": 0.8028, "step": 15530 }, { "epoch": 1.2336025719899184, "grad_norm": 0.7903575897216797, "learning_rate": 2.944616443068456e-05, "loss": 0.8713, "step": 15540 }, { "epoch": 1.234396396038818, "grad_norm": 0.7790343165397644, "learning_rate": 2.943293376729909e-05, "loss": 0.7852, "step": 15550 }, { "epoch": 1.2351902200877176, "grad_norm": 0.8206233978271484, "learning_rate": 2.9419703103913633e-05, "loss": 0.7899, "step": 15560 }, { "epoch": 1.2359840441366172, "grad_norm": 0.8968833684921265, "learning_rate": 2.9406472440528172e-05, "loss": 0.8477, "step": 15570 }, { "epoch": 1.2367778681855166, "grad_norm": 0.9603748917579651, "learning_rate": 2.9393241777142704e-05, "loss": 0.79, "step": 15580 }, { "epoch": 1.2375716922344162, "grad_norm": 0.7768341898918152, "learning_rate": 2.9380011113757243e-05, "loss": 0.7812, "step": 15590 }, { "epoch": 1.2383655162833158, "grad_norm": 0.7468785047531128, "learning_rate": 2.9366780450371782e-05, "loss": 0.8008, "step": 15600 }, { "epoch": 1.2391593403322154, "grad_norm": 1.0277005434036255, "learning_rate": 2.9353549786986324e-05, "loss": 0.8168, "step": 15610 }, { "epoch": 1.2399531643811148, "grad_norm": 0.7243433594703674, "learning_rate": 2.9340319123600857e-05, "loss": 0.8091, "step": 15620 }, { "epoch": 1.2407469884300144, "grad_norm": 0.8666514158248901, "learning_rate": 2.9327088460215395e-05, "loss": 0.8132, "step": 15630 }, { "epoch": 1.241540812478914, "grad_norm": 0.80784010887146, "learning_rate": 2.9313857796829934e-05, "loss": 0.8182, "step": 15640 }, { "epoch": 1.2423346365278136, "grad_norm": 0.743398129940033, "learning_rate": 2.9300627133444473e-05, "loss": 0.8264, "step": 15650 }, { "epoch": 1.2431284605767132, "grad_norm": 0.6970568895339966, "learning_rate": 2.928739647005901e-05, "loss": 0.8156, "step": 15660 }, { "epoch": 1.2439222846256128, "grad_norm": 0.640468955039978, "learning_rate": 2.9274165806673548e-05, "loss": 0.8191, "step": 15670 }, { "epoch": 1.2447161086745122, "grad_norm": 0.9808381795883179, "learning_rate": 2.9260935143288087e-05, "loss": 0.8104, "step": 15680 }, { "epoch": 1.2455099327234118, "grad_norm": 0.7460395693778992, "learning_rate": 2.9247704479902622e-05, "loss": 0.8131, "step": 15690 }, { "epoch": 1.2463037567723114, "grad_norm": 0.8486602306365967, "learning_rate": 2.923447381651716e-05, "loss": 0.7889, "step": 15700 }, { "epoch": 1.247097580821211, "grad_norm": 0.9149585962295532, "learning_rate": 2.92212431531317e-05, "loss": 0.782, "step": 15710 }, { "epoch": 1.2478914048701106, "grad_norm": 0.7191526889801025, "learning_rate": 2.920801248974624e-05, "loss": 0.8407, "step": 15720 }, { "epoch": 1.24868522891901, "grad_norm": 0.7577335834503174, "learning_rate": 2.9194781826360775e-05, "loss": 0.8428, "step": 15730 }, { "epoch": 1.2494790529679096, "grad_norm": 0.8915782570838928, "learning_rate": 2.9181551162975313e-05, "loss": 0.7993, "step": 15740 }, { "epoch": 1.2502728770168092, "grad_norm": 0.9318637847900391, "learning_rate": 2.9168320499589852e-05, "loss": 0.7586, "step": 15750 }, { "epoch": 1.2510667010657088, "grad_norm": 0.7829806208610535, "learning_rate": 2.915508983620439e-05, "loss": 0.8027, "step": 15760 }, { "epoch": 1.2518605251146084, "grad_norm": 0.7112168073654175, "learning_rate": 2.9141859172818923e-05, "loss": 0.8171, "step": 15770 }, { "epoch": 1.252654349163508, "grad_norm": 0.935157060623169, "learning_rate": 2.9128628509433466e-05, "loss": 0.7728, "step": 15780 }, { "epoch": 1.2534481732124074, "grad_norm": 0.8060632348060608, "learning_rate": 2.9115397846048005e-05, "loss": 0.835, "step": 15790 }, { "epoch": 1.254241997261307, "grad_norm": 0.7491572499275208, "learning_rate": 2.9102167182662537e-05, "loss": 0.7851, "step": 15800 }, { "epoch": 1.2550358213102066, "grad_norm": 0.844757080078125, "learning_rate": 2.9088936519277076e-05, "loss": 0.7442, "step": 15810 }, { "epoch": 1.255829645359106, "grad_norm": 0.5904254913330078, "learning_rate": 2.9075705855891615e-05, "loss": 0.77, "step": 15820 }, { "epoch": 1.2566234694080056, "grad_norm": 0.7826739549636841, "learning_rate": 2.9062475192506157e-05, "loss": 0.8496, "step": 15830 }, { "epoch": 1.2574172934569052, "grad_norm": 0.7734588980674744, "learning_rate": 2.904924452912069e-05, "loss": 0.8367, "step": 15840 }, { "epoch": 1.2582111175058048, "grad_norm": 0.712218701839447, "learning_rate": 2.9036013865735228e-05, "loss": 0.7709, "step": 15850 }, { "epoch": 1.2590049415547044, "grad_norm": 0.7345622181892395, "learning_rate": 2.9022783202349767e-05, "loss": 0.8509, "step": 15860 }, { "epoch": 1.259798765603604, "grad_norm": 0.9125804901123047, "learning_rate": 2.9009552538964306e-05, "loss": 0.8151, "step": 15870 }, { "epoch": 1.2605925896525036, "grad_norm": 0.7302107214927673, "learning_rate": 2.899632187557884e-05, "loss": 0.8181, "step": 15880 }, { "epoch": 1.261386413701403, "grad_norm": 0.921686589717865, "learning_rate": 2.898309121219338e-05, "loss": 0.8133, "step": 15890 }, { "epoch": 1.2621802377503026, "grad_norm": 0.9053015112876892, "learning_rate": 2.896986054880792e-05, "loss": 0.822, "step": 15900 }, { "epoch": 1.2629740617992022, "grad_norm": 0.8016523718833923, "learning_rate": 2.8956629885422455e-05, "loss": 0.8147, "step": 15910 }, { "epoch": 1.2637678858481018, "grad_norm": 0.9724611043930054, "learning_rate": 2.8943399222036994e-05, "loss": 0.8608, "step": 15920 }, { "epoch": 1.2645617098970012, "grad_norm": 0.9794032573699951, "learning_rate": 2.8930168558651533e-05, "loss": 0.7779, "step": 15930 }, { "epoch": 1.2653555339459008, "grad_norm": 0.6316156387329102, "learning_rate": 2.891693789526607e-05, "loss": 0.8147, "step": 15940 }, { "epoch": 1.2661493579948004, "grad_norm": 0.835666298866272, "learning_rate": 2.8903707231880607e-05, "loss": 0.8002, "step": 15950 }, { "epoch": 1.2669431820437, "grad_norm": 0.6945637464523315, "learning_rate": 2.8890476568495146e-05, "loss": 0.9178, "step": 15960 }, { "epoch": 1.2677370060925996, "grad_norm": 0.6459661722183228, "learning_rate": 2.8877245905109685e-05, "loss": 0.8289, "step": 15970 }, { "epoch": 1.2685308301414993, "grad_norm": 0.8627393841743469, "learning_rate": 2.8864015241724224e-05, "loss": 0.8809, "step": 15980 }, { "epoch": 1.2693246541903986, "grad_norm": 0.8296471834182739, "learning_rate": 2.8850784578338756e-05, "loss": 0.8025, "step": 15990 }, { "epoch": 1.2701184782392982, "grad_norm": 0.8422446846961975, "learning_rate": 2.88375539149533e-05, "loss": 0.8378, "step": 16000 }, { "epoch": 1.2709123022881978, "grad_norm": 0.8606308102607727, "learning_rate": 2.8824323251567837e-05, "loss": 0.8543, "step": 16010 }, { "epoch": 1.2717061263370975, "grad_norm": 0.9312155842781067, "learning_rate": 2.881109258818237e-05, "loss": 0.8339, "step": 16020 }, { "epoch": 1.2724999503859968, "grad_norm": 0.782143771648407, "learning_rate": 2.879786192479691e-05, "loss": 0.7537, "step": 16030 }, { "epoch": 1.2732937744348964, "grad_norm": 0.7882851958274841, "learning_rate": 2.8784631261411447e-05, "loss": 0.8494, "step": 16040 }, { "epoch": 1.274087598483796, "grad_norm": 0.8223291039466858, "learning_rate": 2.877140059802599e-05, "loss": 0.788, "step": 16050 }, { "epoch": 1.2748814225326957, "grad_norm": 0.7009278535842896, "learning_rate": 2.8758169934640522e-05, "loss": 0.8456, "step": 16060 }, { "epoch": 1.2756752465815953, "grad_norm": 0.7133784294128418, "learning_rate": 2.874493927125506e-05, "loss": 0.7725, "step": 16070 }, { "epoch": 1.2764690706304949, "grad_norm": 0.7193998098373413, "learning_rate": 2.87317086078696e-05, "loss": 0.8465, "step": 16080 }, { "epoch": 1.2772628946793942, "grad_norm": 0.6710327863693237, "learning_rate": 2.871847794448414e-05, "loss": 0.8016, "step": 16090 }, { "epoch": 1.2780567187282939, "grad_norm": 0.8942824006080627, "learning_rate": 2.8705247281098674e-05, "loss": 0.8741, "step": 16100 }, { "epoch": 1.2788505427771935, "grad_norm": 0.891715407371521, "learning_rate": 2.8692016617713213e-05, "loss": 0.792, "step": 16110 }, { "epoch": 1.279644366826093, "grad_norm": 0.7947127819061279, "learning_rate": 2.8678785954327752e-05, "loss": 0.8215, "step": 16120 }, { "epoch": 1.2804381908749924, "grad_norm": 0.7644555568695068, "learning_rate": 2.866555529094229e-05, "loss": 0.7671, "step": 16130 }, { "epoch": 1.281232014923892, "grad_norm": 0.9834242463111877, "learning_rate": 2.8652324627556827e-05, "loss": 0.7838, "step": 16140 }, { "epoch": 1.2820258389727917, "grad_norm": 0.851807177066803, "learning_rate": 2.8639093964171365e-05, "loss": 0.9058, "step": 16150 }, { "epoch": 1.2828196630216913, "grad_norm": 0.7503984570503235, "learning_rate": 2.8625863300785904e-05, "loss": 0.783, "step": 16160 }, { "epoch": 1.2836134870705909, "grad_norm": 0.7916014790534973, "learning_rate": 2.861263263740044e-05, "loss": 0.888, "step": 16170 }, { "epoch": 1.2844073111194905, "grad_norm": 0.8114141225814819, "learning_rate": 2.859940197401498e-05, "loss": 0.8192, "step": 16180 }, { "epoch": 1.2852011351683899, "grad_norm": 0.7763829827308655, "learning_rate": 2.8586171310629518e-05, "loss": 0.8159, "step": 16190 }, { "epoch": 1.2859949592172895, "grad_norm": 0.7763867378234863, "learning_rate": 2.8572940647244057e-05, "loss": 0.83, "step": 16200 }, { "epoch": 1.286788783266189, "grad_norm": 0.969960629940033, "learning_rate": 2.855970998385859e-05, "loss": 0.8671, "step": 16210 }, { "epoch": 1.2875826073150887, "grad_norm": 0.7875334620475769, "learning_rate": 2.854647932047313e-05, "loss": 0.7545, "step": 16220 }, { "epoch": 1.288376431363988, "grad_norm": 0.9192537665367126, "learning_rate": 2.853324865708767e-05, "loss": 0.8174, "step": 16230 }, { "epoch": 1.2891702554128877, "grad_norm": 0.8982329368591309, "learning_rate": 2.852001799370221e-05, "loss": 0.8373, "step": 16240 }, { "epoch": 1.2899640794617873, "grad_norm": 0.7992169857025146, "learning_rate": 2.850678733031674e-05, "loss": 0.8858, "step": 16250 }, { "epoch": 1.2907579035106869, "grad_norm": 0.7234670519828796, "learning_rate": 2.849355666693128e-05, "loss": 0.8548, "step": 16260 }, { "epoch": 1.2915517275595865, "grad_norm": 0.803986132144928, "learning_rate": 2.8480326003545822e-05, "loss": 0.843, "step": 16270 }, { "epoch": 1.292345551608486, "grad_norm": 0.6676992177963257, "learning_rate": 2.8467095340160355e-05, "loss": 0.8432, "step": 16280 }, { "epoch": 1.2931393756573855, "grad_norm": 0.7712359428405762, "learning_rate": 2.8453864676774893e-05, "loss": 0.7889, "step": 16290 }, { "epoch": 1.293933199706285, "grad_norm": 0.9185086488723755, "learning_rate": 2.8440634013389432e-05, "loss": 0.8816, "step": 16300 }, { "epoch": 1.2947270237551847, "grad_norm": 0.7256115078926086, "learning_rate": 2.842740335000397e-05, "loss": 0.8636, "step": 16310 }, { "epoch": 1.2955208478040843, "grad_norm": 0.9436436295509338, "learning_rate": 2.8414172686618507e-05, "loss": 0.8138, "step": 16320 }, { "epoch": 1.2963146718529837, "grad_norm": 0.8887737393379211, "learning_rate": 2.8400942023233046e-05, "loss": 0.7817, "step": 16330 }, { "epoch": 1.2971084959018833, "grad_norm": 0.8305734395980835, "learning_rate": 2.8387711359847585e-05, "loss": 0.7737, "step": 16340 }, { "epoch": 1.2979023199507829, "grad_norm": 0.8457329273223877, "learning_rate": 2.8374480696462124e-05, "loss": 0.7743, "step": 16350 }, { "epoch": 1.2986961439996825, "grad_norm": 0.7918816208839417, "learning_rate": 2.836125003307666e-05, "loss": 0.8273, "step": 16360 }, { "epoch": 1.299489968048582, "grad_norm": 0.6060484647750854, "learning_rate": 2.8348019369691198e-05, "loss": 0.7823, "step": 16370 }, { "epoch": 1.3002837920974817, "grad_norm": 0.9133262038230896, "learning_rate": 2.8334788706305737e-05, "loss": 0.7733, "step": 16380 }, { "epoch": 1.3010776161463813, "grad_norm": 0.8485836386680603, "learning_rate": 2.8321558042920273e-05, "loss": 0.7581, "step": 16390 }, { "epoch": 1.3018714401952807, "grad_norm": 0.7613677978515625, "learning_rate": 2.830832737953481e-05, "loss": 0.8436, "step": 16400 }, { "epoch": 1.3026652642441803, "grad_norm": 0.8914282917976379, "learning_rate": 2.829509671614935e-05, "loss": 0.8283, "step": 16410 }, { "epoch": 1.3034590882930799, "grad_norm": 0.8663108944892883, "learning_rate": 2.828186605276389e-05, "loss": 0.7854, "step": 16420 }, { "epoch": 1.3042529123419793, "grad_norm": 0.8851757049560547, "learning_rate": 2.826863538937842e-05, "loss": 0.824, "step": 16430 }, { "epoch": 1.3050467363908789, "grad_norm": 0.8618358969688416, "learning_rate": 2.8255404725992964e-05, "loss": 0.8277, "step": 16440 }, { "epoch": 1.3058405604397785, "grad_norm": 0.8172771334648132, "learning_rate": 2.8242174062607503e-05, "loss": 0.8929, "step": 16450 }, { "epoch": 1.306634384488678, "grad_norm": 0.898499608039856, "learning_rate": 2.822894339922204e-05, "loss": 0.8683, "step": 16460 }, { "epoch": 1.3074282085375777, "grad_norm": 0.7734599113464355, "learning_rate": 2.8215712735836574e-05, "loss": 0.8588, "step": 16470 }, { "epoch": 1.3082220325864773, "grad_norm": 0.8357298374176025, "learning_rate": 2.8202482072451113e-05, "loss": 0.7965, "step": 16480 }, { "epoch": 1.309015856635377, "grad_norm": 0.8872872591018677, "learning_rate": 2.8189251409065655e-05, "loss": 0.7839, "step": 16490 }, { "epoch": 1.3098096806842763, "grad_norm": 0.9397867321968079, "learning_rate": 2.8176020745680187e-05, "loss": 0.8508, "step": 16500 }, { "epoch": 1.3106035047331759, "grad_norm": 0.790989339351654, "learning_rate": 2.8162790082294726e-05, "loss": 0.8086, "step": 16510 }, { "epoch": 1.3113973287820755, "grad_norm": 0.747873842716217, "learning_rate": 2.8149559418909265e-05, "loss": 0.8536, "step": 16520 }, { "epoch": 1.312191152830975, "grad_norm": 0.8547879457473755, "learning_rate": 2.8136328755523804e-05, "loss": 0.7932, "step": 16530 }, { "epoch": 1.3129849768798745, "grad_norm": 0.8623278141021729, "learning_rate": 2.812309809213834e-05, "loss": 0.765, "step": 16540 }, { "epoch": 1.313778800928774, "grad_norm": 0.7240385413169861, "learning_rate": 2.810986742875288e-05, "loss": 0.8292, "step": 16550 }, { "epoch": 1.3145726249776737, "grad_norm": 0.9323850274085999, "learning_rate": 2.8096636765367417e-05, "loss": 0.76, "step": 16560 }, { "epoch": 1.3153664490265733, "grad_norm": 0.695256769657135, "learning_rate": 2.8083406101981956e-05, "loss": 0.8158, "step": 16570 }, { "epoch": 1.316160273075473, "grad_norm": 0.6848450899124146, "learning_rate": 2.8070175438596492e-05, "loss": 0.7873, "step": 16580 }, { "epoch": 1.3169540971243725, "grad_norm": 0.7952008247375488, "learning_rate": 2.805694477521103e-05, "loss": 0.8798, "step": 16590 }, { "epoch": 1.3177479211732719, "grad_norm": 0.8499898314476013, "learning_rate": 2.804371411182557e-05, "loss": 0.7545, "step": 16600 }, { "epoch": 1.3185417452221715, "grad_norm": 0.8064092993736267, "learning_rate": 2.8030483448440102e-05, "loss": 0.7962, "step": 16610 }, { "epoch": 1.319335569271071, "grad_norm": 0.9621743559837341, "learning_rate": 2.8017252785054644e-05, "loss": 0.853, "step": 16620 }, { "epoch": 1.3201293933199707, "grad_norm": 0.7509051561355591, "learning_rate": 2.8004022121669183e-05, "loss": 0.8417, "step": 16630 }, { "epoch": 1.32092321736887, "grad_norm": 0.7417434453964233, "learning_rate": 2.7990791458283722e-05, "loss": 0.8704, "step": 16640 }, { "epoch": 1.3217170414177697, "grad_norm": 0.8676928877830505, "learning_rate": 2.7977560794898254e-05, "loss": 0.789, "step": 16650 }, { "epoch": 1.3225108654666693, "grad_norm": 0.907887876033783, "learning_rate": 2.7964330131512793e-05, "loss": 0.8448, "step": 16660 }, { "epoch": 1.323304689515569, "grad_norm": 0.7819181084632874, "learning_rate": 2.7951099468127335e-05, "loss": 0.8467, "step": 16670 }, { "epoch": 1.3240985135644685, "grad_norm": 0.7135961651802063, "learning_rate": 2.7937868804741874e-05, "loss": 0.8322, "step": 16680 }, { "epoch": 1.324892337613368, "grad_norm": 0.7459518313407898, "learning_rate": 2.7924638141356407e-05, "loss": 0.8954, "step": 16690 }, { "epoch": 1.3256861616622675, "grad_norm": 0.8480586409568787, "learning_rate": 2.7911407477970945e-05, "loss": 0.8421, "step": 16700 }, { "epoch": 1.326479985711167, "grad_norm": 0.7946953773498535, "learning_rate": 2.7898176814585484e-05, "loss": 0.7894, "step": 16710 }, { "epoch": 1.3272738097600667, "grad_norm": 0.9292244911193848, "learning_rate": 2.788494615120002e-05, "loss": 0.8569, "step": 16720 }, { "epoch": 1.3280676338089663, "grad_norm": 0.8674454689025879, "learning_rate": 2.787171548781456e-05, "loss": 0.8093, "step": 16730 }, { "epoch": 1.3288614578578657, "grad_norm": 0.9656598567962646, "learning_rate": 2.7858484824429098e-05, "loss": 0.7962, "step": 16740 }, { "epoch": 1.3296552819067653, "grad_norm": 0.7918367385864258, "learning_rate": 2.7845254161043637e-05, "loss": 0.8784, "step": 16750 }, { "epoch": 1.330449105955665, "grad_norm": 0.8442765474319458, "learning_rate": 2.7832023497658172e-05, "loss": 0.8143, "step": 16760 }, { "epoch": 1.3312429300045645, "grad_norm": 0.8403950333595276, "learning_rate": 2.781879283427271e-05, "loss": 0.8452, "step": 16770 }, { "epoch": 1.3320367540534641, "grad_norm": 0.8802147507667542, "learning_rate": 2.780556217088725e-05, "loss": 0.8828, "step": 16780 }, { "epoch": 1.3328305781023637, "grad_norm": 0.8321970105171204, "learning_rate": 2.779233150750179e-05, "loss": 0.8389, "step": 16790 }, { "epoch": 1.333624402151263, "grad_norm": 0.814605176448822, "learning_rate": 2.7779100844116325e-05, "loss": 0.8263, "step": 16800 }, { "epoch": 1.3344182262001627, "grad_norm": 0.8342418670654297, "learning_rate": 2.7765870180730863e-05, "loss": 0.7823, "step": 16810 }, { "epoch": 1.3352120502490623, "grad_norm": 0.8523891568183899, "learning_rate": 2.7752639517345402e-05, "loss": 0.8527, "step": 16820 }, { "epoch": 1.336005874297962, "grad_norm": 0.9738366603851318, "learning_rate": 2.7739408853959935e-05, "loss": 0.8452, "step": 16830 }, { "epoch": 1.3367996983468613, "grad_norm": 0.6618583798408508, "learning_rate": 2.7726178190574477e-05, "loss": 0.9022, "step": 16840 }, { "epoch": 1.337593522395761, "grad_norm": 0.6916811466217041, "learning_rate": 2.7712947527189016e-05, "loss": 0.8484, "step": 16850 }, { "epoch": 1.3383873464446605, "grad_norm": 0.8240322470664978, "learning_rate": 2.7699716863803555e-05, "loss": 0.7851, "step": 16860 }, { "epoch": 1.3391811704935601, "grad_norm": 0.8770764470100403, "learning_rate": 2.7686486200418087e-05, "loss": 0.8042, "step": 16870 }, { "epoch": 1.3399749945424597, "grad_norm": 0.8255194425582886, "learning_rate": 2.7673255537032626e-05, "loss": 0.8021, "step": 16880 }, { "epoch": 1.3407688185913593, "grad_norm": 0.822505533695221, "learning_rate": 2.7660024873647168e-05, "loss": 0.7602, "step": 16890 }, { "epoch": 1.3415626426402587, "grad_norm": 1.0661057233810425, "learning_rate": 2.7646794210261707e-05, "loss": 0.8531, "step": 16900 }, { "epoch": 1.3423564666891583, "grad_norm": 0.8882428407669067, "learning_rate": 2.763356354687624e-05, "loss": 0.8681, "step": 16910 }, { "epoch": 1.343150290738058, "grad_norm": 0.8333000540733337, "learning_rate": 2.7620332883490778e-05, "loss": 0.8016, "step": 16920 }, { "epoch": 1.3439441147869575, "grad_norm": 0.7466040849685669, "learning_rate": 2.7607102220105317e-05, "loss": 0.8253, "step": 16930 }, { "epoch": 1.344737938835857, "grad_norm": 0.9921952486038208, "learning_rate": 2.759387155671986e-05, "loss": 0.8058, "step": 16940 }, { "epoch": 1.3455317628847565, "grad_norm": 0.7671175599098206, "learning_rate": 2.758064089333439e-05, "loss": 0.8876, "step": 16950 }, { "epoch": 1.3463255869336561, "grad_norm": 0.5953800678253174, "learning_rate": 2.756741022994893e-05, "loss": 0.8096, "step": 16960 }, { "epoch": 1.3471194109825557, "grad_norm": 0.8293882012367249, "learning_rate": 2.755417956656347e-05, "loss": 0.7875, "step": 16970 }, { "epoch": 1.3479132350314553, "grad_norm": 0.7418809533119202, "learning_rate": 2.7540948903178005e-05, "loss": 0.7676, "step": 16980 }, { "epoch": 1.348707059080355, "grad_norm": 0.8036984205245972, "learning_rate": 2.7527718239792544e-05, "loss": 0.8258, "step": 16990 }, { "epoch": 1.3495008831292545, "grad_norm": 0.9726935625076294, "learning_rate": 2.7514487576407083e-05, "loss": 0.8252, "step": 17000 }, { "epoch": 1.350294707178154, "grad_norm": 0.9548497796058655, "learning_rate": 2.750125691302162e-05, "loss": 0.7797, "step": 17010 }, { "epoch": 1.3510885312270535, "grad_norm": 0.84930819272995, "learning_rate": 2.7488026249636157e-05, "loss": 0.8566, "step": 17020 }, { "epoch": 1.3518823552759531, "grad_norm": 0.8506318926811218, "learning_rate": 2.7474795586250696e-05, "loss": 0.8499, "step": 17030 }, { "epoch": 1.3526761793248525, "grad_norm": 0.7205038070678711, "learning_rate": 2.7461564922865235e-05, "loss": 0.8265, "step": 17040 }, { "epoch": 1.3534700033737521, "grad_norm": 0.8020601868629456, "learning_rate": 2.7448334259479774e-05, "loss": 0.7768, "step": 17050 }, { "epoch": 1.3542638274226517, "grad_norm": 1.0212651491165161, "learning_rate": 2.743510359609431e-05, "loss": 0.8562, "step": 17060 }, { "epoch": 1.3550576514715513, "grad_norm": 0.9957228302955627, "learning_rate": 2.742187293270885e-05, "loss": 0.8412, "step": 17070 }, { "epoch": 1.355851475520451, "grad_norm": 0.8726456165313721, "learning_rate": 2.7408642269323387e-05, "loss": 0.8624, "step": 17080 }, { "epoch": 1.3566452995693505, "grad_norm": 0.9366957545280457, "learning_rate": 2.739541160593792e-05, "loss": 0.8055, "step": 17090 }, { "epoch": 1.3574391236182501, "grad_norm": 0.7526766657829285, "learning_rate": 2.738218094255246e-05, "loss": 0.901, "step": 17100 }, { "epoch": 1.3582329476671495, "grad_norm": 0.8756746053695679, "learning_rate": 2.7368950279167e-05, "loss": 0.8179, "step": 17110 }, { "epoch": 1.3590267717160491, "grad_norm": 0.7645390629768372, "learning_rate": 2.735571961578154e-05, "loss": 0.8382, "step": 17120 }, { "epoch": 1.3598205957649487, "grad_norm": 0.8134385943412781, "learning_rate": 2.7342488952396072e-05, "loss": 0.8029, "step": 17130 }, { "epoch": 1.3606144198138483, "grad_norm": 0.8174660205841064, "learning_rate": 2.732925828901061e-05, "loss": 0.8108, "step": 17140 }, { "epoch": 1.3614082438627477, "grad_norm": 0.8981174230575562, "learning_rate": 2.731602762562515e-05, "loss": 0.8636, "step": 17150 }, { "epoch": 1.3622020679116473, "grad_norm": 0.7757834792137146, "learning_rate": 2.7302796962239692e-05, "loss": 0.8198, "step": 17160 }, { "epoch": 1.362995891960547, "grad_norm": 0.850306510925293, "learning_rate": 2.7289566298854224e-05, "loss": 0.8118, "step": 17170 }, { "epoch": 1.3637897160094465, "grad_norm": 0.7254217863082886, "learning_rate": 2.7276335635468763e-05, "loss": 0.827, "step": 17180 }, { "epoch": 1.3645835400583461, "grad_norm": 1.0736218690872192, "learning_rate": 2.7263104972083302e-05, "loss": 0.8839, "step": 17190 }, { "epoch": 1.3653773641072458, "grad_norm": 0.8943197131156921, "learning_rate": 2.7249874308697838e-05, "loss": 0.7996, "step": 17200 }, { "epoch": 1.3661711881561451, "grad_norm": 0.9427874684333801, "learning_rate": 2.7236643645312377e-05, "loss": 0.8843, "step": 17210 }, { "epoch": 1.3669650122050447, "grad_norm": 1.0540951490402222, "learning_rate": 2.7223412981926915e-05, "loss": 0.8258, "step": 17220 }, { "epoch": 1.3677588362539443, "grad_norm": 0.827373206615448, "learning_rate": 2.7210182318541454e-05, "loss": 0.8492, "step": 17230 }, { "epoch": 1.368552660302844, "grad_norm": 0.7452979683876038, "learning_rate": 2.719695165515599e-05, "loss": 0.8457, "step": 17240 }, { "epoch": 1.3693464843517433, "grad_norm": 1.0606896877288818, "learning_rate": 2.718372099177053e-05, "loss": 0.8411, "step": 17250 }, { "epoch": 1.370140308400643, "grad_norm": 0.6817187070846558, "learning_rate": 2.7170490328385068e-05, "loss": 0.7858, "step": 17260 }, { "epoch": 1.3709341324495425, "grad_norm": 0.9781541228294373, "learning_rate": 2.7157259664999607e-05, "loss": 0.8377, "step": 17270 }, { "epoch": 1.3717279564984421, "grad_norm": 0.787799060344696, "learning_rate": 2.7144029001614142e-05, "loss": 0.8435, "step": 17280 }, { "epoch": 1.3725217805473418, "grad_norm": 0.7571372389793396, "learning_rate": 2.713079833822868e-05, "loss": 0.8434, "step": 17290 }, { "epoch": 1.3733156045962414, "grad_norm": 0.6738792061805725, "learning_rate": 2.711756767484322e-05, "loss": 0.8412, "step": 17300 }, { "epoch": 1.3741094286451407, "grad_norm": 1.037701964378357, "learning_rate": 2.7104337011457752e-05, "loss": 0.831, "step": 17310 }, { "epoch": 1.3749032526940403, "grad_norm": 0.7781122326850891, "learning_rate": 2.709110634807229e-05, "loss": 0.8592, "step": 17320 }, { "epoch": 1.37569707674294, "grad_norm": 0.9618025422096252, "learning_rate": 2.7077875684686833e-05, "loss": 0.808, "step": 17330 }, { "epoch": 1.3764909007918396, "grad_norm": 0.768357515335083, "learning_rate": 2.7064645021301372e-05, "loss": 0.8701, "step": 17340 }, { "epoch": 1.377284724840739, "grad_norm": 0.8587696552276611, "learning_rate": 2.7051414357915905e-05, "loss": 0.8703, "step": 17350 }, { "epoch": 1.3780785488896385, "grad_norm": 0.9739010334014893, "learning_rate": 2.7038183694530443e-05, "loss": 0.7343, "step": 17360 }, { "epoch": 1.3788723729385381, "grad_norm": 0.7748016119003296, "learning_rate": 2.7024953031144982e-05, "loss": 0.7732, "step": 17370 }, { "epoch": 1.3796661969874378, "grad_norm": 0.7629042863845825, "learning_rate": 2.7011722367759525e-05, "loss": 0.7921, "step": 17380 }, { "epoch": 1.3804600210363374, "grad_norm": 0.9152623414993286, "learning_rate": 2.6998491704374057e-05, "loss": 0.8426, "step": 17390 }, { "epoch": 1.381253845085237, "grad_norm": 0.8051658868789673, "learning_rate": 2.6985261040988596e-05, "loss": 0.8433, "step": 17400 }, { "epoch": 1.3820476691341363, "grad_norm": 0.7106397151947021, "learning_rate": 2.6972030377603135e-05, "loss": 0.8297, "step": 17410 }, { "epoch": 1.382841493183036, "grad_norm": 0.7966054677963257, "learning_rate": 2.695879971421767e-05, "loss": 0.9135, "step": 17420 }, { "epoch": 1.3836353172319356, "grad_norm": 0.8528908491134644, "learning_rate": 2.694556905083221e-05, "loss": 0.7658, "step": 17430 }, { "epoch": 1.3844291412808352, "grad_norm": 0.7444737553596497, "learning_rate": 2.6932338387446748e-05, "loss": 0.7901, "step": 17440 }, { "epoch": 1.3852229653297345, "grad_norm": 0.6893477439880371, "learning_rate": 2.6919107724061287e-05, "loss": 0.8823, "step": 17450 }, { "epoch": 1.3860167893786342, "grad_norm": 0.8451089859008789, "learning_rate": 2.6905877060675823e-05, "loss": 0.8699, "step": 17460 }, { "epoch": 1.3868106134275338, "grad_norm": 0.7939800024032593, "learning_rate": 2.689264639729036e-05, "loss": 0.8529, "step": 17470 }, { "epoch": 1.3876044374764334, "grad_norm": 0.7365521788597107, "learning_rate": 2.68794157339049e-05, "loss": 0.8272, "step": 17480 }, { "epoch": 1.388398261525333, "grad_norm": 0.8047791123390198, "learning_rate": 2.686618507051944e-05, "loss": 0.7692, "step": 17490 }, { "epoch": 1.3891920855742326, "grad_norm": 0.7910019755363464, "learning_rate": 2.6852954407133975e-05, "loss": 0.8384, "step": 17500 }, { "epoch": 1.389985909623132, "grad_norm": 0.8277596235275269, "learning_rate": 2.6839723743748514e-05, "loss": 0.7949, "step": 17510 }, { "epoch": 1.3907797336720316, "grad_norm": 1.008076786994934, "learning_rate": 2.6826493080363053e-05, "loss": 0.8341, "step": 17520 }, { "epoch": 1.3915735577209312, "grad_norm": 0.7078754305839539, "learning_rate": 2.6813262416977585e-05, "loss": 0.8233, "step": 17530 }, { "epoch": 1.3923673817698308, "grad_norm": 0.8263536095619202, "learning_rate": 2.6800031753592124e-05, "loss": 0.8366, "step": 17540 }, { "epoch": 1.3931612058187302, "grad_norm": 0.8170019388198853, "learning_rate": 2.6786801090206666e-05, "loss": 0.83, "step": 17550 }, { "epoch": 1.3939550298676298, "grad_norm": 0.8896247744560242, "learning_rate": 2.6773570426821205e-05, "loss": 0.8493, "step": 17560 }, { "epoch": 1.3947488539165294, "grad_norm": 0.6989545822143555, "learning_rate": 2.6760339763435737e-05, "loss": 0.8151, "step": 17570 }, { "epoch": 1.395542677965429, "grad_norm": 0.956941545009613, "learning_rate": 2.6747109100050276e-05, "loss": 0.8058, "step": 17580 }, { "epoch": 1.3963365020143286, "grad_norm": 0.8271597027778625, "learning_rate": 2.6733878436664815e-05, "loss": 0.7853, "step": 17590 }, { "epoch": 1.3971303260632282, "grad_norm": 0.7426630258560181, "learning_rate": 2.6720647773279357e-05, "loss": 0.7689, "step": 17600 }, { "epoch": 1.3979241501121278, "grad_norm": 0.8329174518585205, "learning_rate": 2.670741710989389e-05, "loss": 0.8266, "step": 17610 }, { "epoch": 1.3987179741610272, "grad_norm": 0.918170154094696, "learning_rate": 2.669418644650843e-05, "loss": 0.8444, "step": 17620 }, { "epoch": 1.3995117982099268, "grad_norm": 0.6863502264022827, "learning_rate": 2.6680955783122967e-05, "loss": 0.8654, "step": 17630 }, { "epoch": 1.4003056222588264, "grad_norm": 0.8955294489860535, "learning_rate": 2.6667725119737503e-05, "loss": 0.8649, "step": 17640 }, { "epoch": 1.4010994463077258, "grad_norm": 0.8615051507949829, "learning_rate": 2.6654494456352042e-05, "loss": 0.7899, "step": 17650 }, { "epoch": 1.4018932703566254, "grad_norm": 0.9451674222946167, "learning_rate": 2.664126379296658e-05, "loss": 0.8824, "step": 17660 }, { "epoch": 1.402687094405525, "grad_norm": 0.8407998085021973, "learning_rate": 2.662803312958112e-05, "loss": 0.7696, "step": 17670 }, { "epoch": 1.4034809184544246, "grad_norm": 0.8204694390296936, "learning_rate": 2.6614802466195655e-05, "loss": 0.7892, "step": 17680 }, { "epoch": 1.4042747425033242, "grad_norm": 0.7346405982971191, "learning_rate": 2.6601571802810194e-05, "loss": 0.794, "step": 17690 }, { "epoch": 1.4050685665522238, "grad_norm": 0.7684462070465088, "learning_rate": 2.6588341139424733e-05, "loss": 0.8656, "step": 17700 }, { "epoch": 1.4058623906011234, "grad_norm": 0.7464351058006287, "learning_rate": 2.6575110476039272e-05, "loss": 0.8215, "step": 17710 }, { "epoch": 1.4066562146500228, "grad_norm": 0.8222455382347107, "learning_rate": 2.6561879812653808e-05, "loss": 0.835, "step": 17720 }, { "epoch": 1.4074500386989224, "grad_norm": 0.9029430150985718, "learning_rate": 2.6548649149268347e-05, "loss": 0.8038, "step": 17730 }, { "epoch": 1.408243862747822, "grad_norm": 0.6402363777160645, "learning_rate": 2.6535418485882885e-05, "loss": 0.8557, "step": 17740 }, { "epoch": 1.4090376867967216, "grad_norm": 0.7686623930931091, "learning_rate": 2.6522187822497424e-05, "loss": 0.7969, "step": 17750 }, { "epoch": 1.409831510845621, "grad_norm": 0.8864317536354065, "learning_rate": 2.6508957159111957e-05, "loss": 0.8525, "step": 17760 }, { "epoch": 1.4106253348945206, "grad_norm": 0.8594456911087036, "learning_rate": 2.64957264957265e-05, "loss": 0.8271, "step": 17770 }, { "epoch": 1.4114191589434202, "grad_norm": 0.8445634841918945, "learning_rate": 2.6482495832341038e-05, "loss": 0.761, "step": 17780 }, { "epoch": 1.4122129829923198, "grad_norm": 0.8919235467910767, "learning_rate": 2.646926516895557e-05, "loss": 0.8448, "step": 17790 }, { "epoch": 1.4130068070412194, "grad_norm": 0.9002474546432495, "learning_rate": 2.645603450557011e-05, "loss": 0.873, "step": 17800 }, { "epoch": 1.413800631090119, "grad_norm": 0.8635540008544922, "learning_rate": 2.6442803842184648e-05, "loss": 0.8874, "step": 17810 }, { "epoch": 1.4145944551390184, "grad_norm": 0.8504475355148315, "learning_rate": 2.642957317879919e-05, "loss": 0.8296, "step": 17820 }, { "epoch": 1.415388279187918, "grad_norm": 0.8239719867706299, "learning_rate": 2.6416342515413722e-05, "loss": 0.7893, "step": 17830 }, { "epoch": 1.4161821032368176, "grad_norm": 0.7520464062690735, "learning_rate": 2.640311185202826e-05, "loss": 0.8056, "step": 17840 }, { "epoch": 1.4169759272857172, "grad_norm": 0.8690072894096375, "learning_rate": 2.63898811886428e-05, "loss": 0.8513, "step": 17850 }, { "epoch": 1.4177697513346166, "grad_norm": 0.6335851550102234, "learning_rate": 2.637665052525734e-05, "loss": 0.8591, "step": 17860 }, { "epoch": 1.4185635753835162, "grad_norm": 0.9497599005699158, "learning_rate": 2.6363419861871875e-05, "loss": 0.8655, "step": 17870 }, { "epoch": 1.4193573994324158, "grad_norm": 0.7551174163818359, "learning_rate": 2.6350189198486413e-05, "loss": 0.7851, "step": 17880 }, { "epoch": 1.4201512234813154, "grad_norm": 0.9593257904052734, "learning_rate": 2.6336958535100952e-05, "loss": 0.8049, "step": 17890 }, { "epoch": 1.420945047530215, "grad_norm": 0.8998299837112427, "learning_rate": 2.6323727871715488e-05, "loss": 0.8266, "step": 17900 }, { "epoch": 1.4217388715791146, "grad_norm": 0.8189812302589417, "learning_rate": 2.6310497208330027e-05, "loss": 0.8333, "step": 17910 }, { "epoch": 1.422532695628014, "grad_norm": 0.7574518322944641, "learning_rate": 2.6297266544944566e-05, "loss": 0.8794, "step": 17920 }, { "epoch": 1.4233265196769136, "grad_norm": 0.815168559551239, "learning_rate": 2.6284035881559105e-05, "loss": 0.8662, "step": 17930 }, { "epoch": 1.4241203437258132, "grad_norm": 0.7286579012870789, "learning_rate": 2.627080521817364e-05, "loss": 0.8239, "step": 17940 }, { "epoch": 1.4249141677747128, "grad_norm": 0.8906771540641785, "learning_rate": 2.625757455478818e-05, "loss": 0.8331, "step": 17950 }, { "epoch": 1.4257079918236122, "grad_norm": 0.7556918859481812, "learning_rate": 2.6244343891402718e-05, "loss": 0.8623, "step": 17960 }, { "epoch": 1.4265018158725118, "grad_norm": 0.8590714931488037, "learning_rate": 2.6231113228017257e-05, "loss": 0.7373, "step": 17970 }, { "epoch": 1.4272956399214114, "grad_norm": 0.827892541885376, "learning_rate": 2.621788256463179e-05, "loss": 0.7714, "step": 17980 }, { "epoch": 1.428089463970311, "grad_norm": 0.7814650535583496, "learning_rate": 2.620465190124633e-05, "loss": 0.8154, "step": 17990 }, { "epoch": 1.4288832880192106, "grad_norm": 0.8106557130813599, "learning_rate": 2.619142123786087e-05, "loss": 0.7834, "step": 18000 }, { "epoch": 1.4296771120681102, "grad_norm": 0.7437555193901062, "learning_rate": 2.6178190574475403e-05, "loss": 0.7773, "step": 18010 }, { "epoch": 1.4304709361170096, "grad_norm": 0.9069659113883972, "learning_rate": 2.616495991108994e-05, "loss": 0.7767, "step": 18020 }, { "epoch": 1.4312647601659092, "grad_norm": 0.8167043924331665, "learning_rate": 2.615172924770448e-05, "loss": 0.8079, "step": 18030 }, { "epoch": 1.4320585842148088, "grad_norm": 0.8131256103515625, "learning_rate": 2.6138498584319023e-05, "loss": 0.806, "step": 18040 }, { "epoch": 1.4328524082637084, "grad_norm": 0.8274345993995667, "learning_rate": 2.6125267920933555e-05, "loss": 0.8135, "step": 18050 }, { "epoch": 1.4336462323126078, "grad_norm": 0.9551146626472473, "learning_rate": 2.6112037257548094e-05, "loss": 0.852, "step": 18060 }, { "epoch": 1.4344400563615074, "grad_norm": 0.8088938593864441, "learning_rate": 2.6098806594162633e-05, "loss": 0.7876, "step": 18070 }, { "epoch": 1.435233880410407, "grad_norm": 0.7884169816970825, "learning_rate": 2.608557593077717e-05, "loss": 0.7942, "step": 18080 }, { "epoch": 1.4360277044593066, "grad_norm": 0.8303605914115906, "learning_rate": 2.6072345267391707e-05, "loss": 0.8792, "step": 18090 }, { "epoch": 1.4368215285082062, "grad_norm": 0.75099778175354, "learning_rate": 2.6059114604006246e-05, "loss": 0.7786, "step": 18100 }, { "epoch": 1.4376153525571058, "grad_norm": 0.7984867095947266, "learning_rate": 2.6045883940620785e-05, "loss": 0.786, "step": 18110 }, { "epoch": 1.4384091766060052, "grad_norm": 0.7262865900993347, "learning_rate": 2.603265327723532e-05, "loss": 0.8207, "step": 18120 }, { "epoch": 1.4392030006549048, "grad_norm": 0.8592838048934937, "learning_rate": 2.601942261384986e-05, "loss": 0.8127, "step": 18130 }, { "epoch": 1.4399968247038044, "grad_norm": 0.6876896619796753, "learning_rate": 2.60061919504644e-05, "loss": 0.8638, "step": 18140 }, { "epoch": 1.440790648752704, "grad_norm": 0.9342442154884338, "learning_rate": 2.5992961287078937e-05, "loss": 0.8032, "step": 18150 }, { "epoch": 1.4415844728016034, "grad_norm": 0.9930727481842041, "learning_rate": 2.597973062369347e-05, "loss": 0.7582, "step": 18160 }, { "epoch": 1.442378296850503, "grad_norm": 0.8912200331687927, "learning_rate": 2.5966499960308012e-05, "loss": 0.8152, "step": 18170 }, { "epoch": 1.4431721208994026, "grad_norm": 0.7123863697052002, "learning_rate": 2.595326929692255e-05, "loss": 0.7756, "step": 18180 }, { "epoch": 1.4439659449483022, "grad_norm": 0.8635473251342773, "learning_rate": 2.594003863353709e-05, "loss": 0.8072, "step": 18190 }, { "epoch": 1.4447597689972018, "grad_norm": 0.7606240510940552, "learning_rate": 2.5926807970151622e-05, "loss": 0.8457, "step": 18200 }, { "epoch": 1.4455535930461014, "grad_norm": 0.7793667912483215, "learning_rate": 2.591357730676616e-05, "loss": 0.8179, "step": 18210 }, { "epoch": 1.446347417095001, "grad_norm": 0.6772461533546448, "learning_rate": 2.5900346643380703e-05, "loss": 0.8746, "step": 18220 }, { "epoch": 1.4471412411439004, "grad_norm": 0.9796277284622192, "learning_rate": 2.5887115979995235e-05, "loss": 0.8008, "step": 18230 }, { "epoch": 1.4479350651928, "grad_norm": 0.8049467206001282, "learning_rate": 2.5873885316609774e-05, "loss": 0.7848, "step": 18240 }, { "epoch": 1.4487288892416996, "grad_norm": 0.8529013991355896, "learning_rate": 2.5860654653224313e-05, "loss": 0.7956, "step": 18250 }, { "epoch": 1.449522713290599, "grad_norm": 1.007569432258606, "learning_rate": 2.5847423989838852e-05, "loss": 0.7736, "step": 18260 }, { "epoch": 1.4503165373394986, "grad_norm": 0.8529340624809265, "learning_rate": 2.5834193326453388e-05, "loss": 0.8122, "step": 18270 }, { "epoch": 1.4511103613883982, "grad_norm": 0.8169769644737244, "learning_rate": 2.5820962663067927e-05, "loss": 0.841, "step": 18280 }, { "epoch": 1.4519041854372978, "grad_norm": 0.7556354999542236, "learning_rate": 2.5807731999682465e-05, "loss": 0.8136, "step": 18290 }, { "epoch": 1.4526980094861974, "grad_norm": 0.867939293384552, "learning_rate": 2.5794501336297004e-05, "loss": 0.8153, "step": 18300 }, { "epoch": 1.453491833535097, "grad_norm": 0.9463819861412048, "learning_rate": 2.578127067291154e-05, "loss": 0.8175, "step": 18310 }, { "epoch": 1.4542856575839966, "grad_norm": 0.9238904714584351, "learning_rate": 2.576804000952608e-05, "loss": 0.7291, "step": 18320 }, { "epoch": 1.455079481632896, "grad_norm": 0.8403037786483765, "learning_rate": 2.5754809346140618e-05, "loss": 0.864, "step": 18330 }, { "epoch": 1.4558733056817956, "grad_norm": 0.8228583931922913, "learning_rate": 2.5741578682755153e-05, "loss": 0.8357, "step": 18340 }, { "epoch": 1.4566671297306952, "grad_norm": 0.730689287185669, "learning_rate": 2.5728348019369692e-05, "loss": 0.8575, "step": 18350 }, { "epoch": 1.4574609537795946, "grad_norm": 0.7443234920501709, "learning_rate": 2.571511735598423e-05, "loss": 0.8414, "step": 18360 }, { "epoch": 1.4582547778284942, "grad_norm": 0.9750908017158508, "learning_rate": 2.570188669259877e-05, "loss": 0.8359, "step": 18370 }, { "epoch": 1.4590486018773938, "grad_norm": 0.9453748464584351, "learning_rate": 2.5688656029213302e-05, "loss": 0.7975, "step": 18380 }, { "epoch": 1.4598424259262934, "grad_norm": 0.826844334602356, "learning_rate": 2.5675425365827845e-05, "loss": 0.7952, "step": 18390 }, { "epoch": 1.460636249975193, "grad_norm": 0.937788724899292, "learning_rate": 2.5662194702442383e-05, "loss": 0.8504, "step": 18400 }, { "epoch": 1.4614300740240926, "grad_norm": 0.9471660852432251, "learning_rate": 2.5648964039056922e-05, "loss": 0.8499, "step": 18410 }, { "epoch": 1.4622238980729922, "grad_norm": 0.8096206784248352, "learning_rate": 2.5635733375671455e-05, "loss": 0.8297, "step": 18420 }, { "epoch": 1.4630177221218916, "grad_norm": 1.0029072761535645, "learning_rate": 2.5622502712285993e-05, "loss": 0.8496, "step": 18430 }, { "epoch": 1.4638115461707912, "grad_norm": 0.9172188639640808, "learning_rate": 2.5609272048900536e-05, "loss": 0.823, "step": 18440 }, { "epoch": 1.4646053702196908, "grad_norm": 0.8738644123077393, "learning_rate": 2.5596041385515068e-05, "loss": 0.8175, "step": 18450 }, { "epoch": 1.4653991942685904, "grad_norm": 0.7507184743881226, "learning_rate": 2.5582810722129607e-05, "loss": 0.7805, "step": 18460 }, { "epoch": 1.4661930183174898, "grad_norm": 0.8251667022705078, "learning_rate": 2.5569580058744146e-05, "loss": 0.8151, "step": 18470 }, { "epoch": 1.4669868423663894, "grad_norm": 0.9038745164871216, "learning_rate": 2.5556349395358685e-05, "loss": 0.8222, "step": 18480 }, { "epoch": 1.467780666415289, "grad_norm": 0.7549251914024353, "learning_rate": 2.554311873197322e-05, "loss": 0.7928, "step": 18490 }, { "epoch": 1.4685744904641886, "grad_norm": 0.8208056688308716, "learning_rate": 2.552988806858776e-05, "loss": 0.8973, "step": 18500 }, { "epoch": 1.4693683145130882, "grad_norm": 1.0672497749328613, "learning_rate": 2.5516657405202298e-05, "loss": 0.8407, "step": 18510 }, { "epoch": 1.4701621385619879, "grad_norm": 0.8073993921279907, "learning_rate": 2.5503426741816837e-05, "loss": 0.8038, "step": 18520 }, { "epoch": 1.4709559626108872, "grad_norm": 0.9057318568229675, "learning_rate": 2.5490196078431373e-05, "loss": 0.829, "step": 18530 }, { "epoch": 1.4717497866597868, "grad_norm": 0.7318175435066223, "learning_rate": 2.547696541504591e-05, "loss": 0.8253, "step": 18540 }, { "epoch": 1.4725436107086864, "grad_norm": 0.819756805896759, "learning_rate": 2.546373475166045e-05, "loss": 0.8265, "step": 18550 }, { "epoch": 1.473337434757586, "grad_norm": 0.852188766002655, "learning_rate": 2.5450504088274986e-05, "loss": 0.7702, "step": 18560 }, { "epoch": 1.4741312588064854, "grad_norm": 0.8986577391624451, "learning_rate": 2.5437273424889525e-05, "loss": 0.8488, "step": 18570 }, { "epoch": 1.474925082855385, "grad_norm": 0.6667974591255188, "learning_rate": 2.5424042761504064e-05, "loss": 0.8424, "step": 18580 }, { "epoch": 1.4757189069042846, "grad_norm": 0.7348021864891052, "learning_rate": 2.5410812098118603e-05, "loss": 0.7877, "step": 18590 }, { "epoch": 1.4765127309531842, "grad_norm": 0.7171440720558167, "learning_rate": 2.5397581434733135e-05, "loss": 0.8142, "step": 18600 }, { "epoch": 1.4773065550020839, "grad_norm": 0.7128421068191528, "learning_rate": 2.5384350771347677e-05, "loss": 0.8373, "step": 18610 }, { "epoch": 1.4781003790509835, "grad_norm": 0.7781753540039062, "learning_rate": 2.5371120107962216e-05, "loss": 0.7994, "step": 18620 }, { "epoch": 1.4788942030998828, "grad_norm": 0.8336313366889954, "learning_rate": 2.5357889444576755e-05, "loss": 0.8565, "step": 18630 }, { "epoch": 1.4796880271487824, "grad_norm": 0.9292792677879333, "learning_rate": 2.5344658781191287e-05, "loss": 0.7621, "step": 18640 }, { "epoch": 1.480481851197682, "grad_norm": 0.81519615650177, "learning_rate": 2.5331428117805826e-05, "loss": 0.8161, "step": 18650 }, { "epoch": 1.4812756752465817, "grad_norm": 0.7871677279472351, "learning_rate": 2.531819745442037e-05, "loss": 0.8304, "step": 18660 }, { "epoch": 1.482069499295481, "grad_norm": 0.8033188581466675, "learning_rate": 2.5304966791034907e-05, "loss": 0.859, "step": 18670 }, { "epoch": 1.4828633233443806, "grad_norm": 0.8543998003005981, "learning_rate": 2.529173612764944e-05, "loss": 0.8521, "step": 18680 }, { "epoch": 1.4836571473932803, "grad_norm": 0.7945504188537598, "learning_rate": 2.527850546426398e-05, "loss": 0.8477, "step": 18690 }, { "epoch": 1.4844509714421799, "grad_norm": 0.9741247892379761, "learning_rate": 2.5265274800878517e-05, "loss": 0.8306, "step": 18700 }, { "epoch": 1.4852447954910795, "grad_norm": 1.0190260410308838, "learning_rate": 2.5252044137493053e-05, "loss": 0.7352, "step": 18710 }, { "epoch": 1.486038619539979, "grad_norm": 0.8218889832496643, "learning_rate": 2.5238813474107592e-05, "loss": 0.8306, "step": 18720 }, { "epoch": 1.4868324435888784, "grad_norm": 0.9101859331130981, "learning_rate": 2.522558281072213e-05, "loss": 0.8644, "step": 18730 }, { "epoch": 1.487626267637778, "grad_norm": 0.8741206526756287, "learning_rate": 2.521235214733667e-05, "loss": 0.811, "step": 18740 }, { "epoch": 1.4884200916866777, "grad_norm": 0.7072809338569641, "learning_rate": 2.5199121483951205e-05, "loss": 0.8437, "step": 18750 }, { "epoch": 1.4892139157355773, "grad_norm": 0.8772575855255127, "learning_rate": 2.5185890820565744e-05, "loss": 0.7746, "step": 18760 }, { "epoch": 1.4900077397844766, "grad_norm": 0.8278160095214844, "learning_rate": 2.5172660157180283e-05, "loss": 0.8073, "step": 18770 }, { "epoch": 1.4908015638333763, "grad_norm": 0.8926985859870911, "learning_rate": 2.5159429493794822e-05, "loss": 0.8103, "step": 18780 }, { "epoch": 1.4915953878822759, "grad_norm": 1.0303518772125244, "learning_rate": 2.5146198830409358e-05, "loss": 0.8209, "step": 18790 }, { "epoch": 1.4923892119311755, "grad_norm": 0.7462199926376343, "learning_rate": 2.5132968167023897e-05, "loss": 0.7996, "step": 18800 }, { "epoch": 1.493183035980075, "grad_norm": 0.8517622351646423, "learning_rate": 2.5119737503638435e-05, "loss": 0.7538, "step": 18810 }, { "epoch": 1.4939768600289747, "grad_norm": 0.8433437347412109, "learning_rate": 2.5106506840252968e-05, "loss": 0.8481, "step": 18820 }, { "epoch": 1.494770684077874, "grad_norm": 0.8017115592956543, "learning_rate": 2.509327617686751e-05, "loss": 0.7835, "step": 18830 }, { "epoch": 1.4955645081267737, "grad_norm": 0.8956550359725952, "learning_rate": 2.508004551348205e-05, "loss": 0.746, "step": 18840 }, { "epoch": 1.4963583321756733, "grad_norm": 0.8071773648262024, "learning_rate": 2.5066814850096588e-05, "loss": 0.8029, "step": 18850 }, { "epoch": 1.4971521562245729, "grad_norm": 0.8614389300346375, "learning_rate": 2.505358418671112e-05, "loss": 0.8419, "step": 18860 }, { "epoch": 1.4979459802734723, "grad_norm": 0.8073408007621765, "learning_rate": 2.504035352332566e-05, "loss": 0.8295, "step": 18870 }, { "epoch": 1.4987398043223719, "grad_norm": 0.8368566036224365, "learning_rate": 2.50271228599402e-05, "loss": 0.8155, "step": 18880 }, { "epoch": 1.4995336283712715, "grad_norm": 0.7750986814498901, "learning_rate": 2.501389219655474e-05, "loss": 0.7991, "step": 18890 }, { "epoch": 1.500327452420171, "grad_norm": 0.843612015247345, "learning_rate": 2.5000661533169272e-05, "loss": 0.8506, "step": 18900 }, { "epoch": 1.5011212764690707, "grad_norm": 0.9953761100769043, "learning_rate": 2.498743086978381e-05, "loss": 0.8303, "step": 18910 }, { "epoch": 1.5019151005179703, "grad_norm": 0.8304370641708374, "learning_rate": 2.497420020639835e-05, "loss": 0.7987, "step": 18920 }, { "epoch": 1.5027089245668699, "grad_norm": 0.6083853840827942, "learning_rate": 2.496096954301289e-05, "loss": 0.7744, "step": 18930 }, { "epoch": 1.5035027486157693, "grad_norm": 0.8940452933311462, "learning_rate": 2.4947738879627425e-05, "loss": 0.7647, "step": 18940 }, { "epoch": 1.5042965726646689, "grad_norm": 0.723276674747467, "learning_rate": 2.4934508216241963e-05, "loss": 0.8258, "step": 18950 }, { "epoch": 1.5050903967135685, "grad_norm": 0.8840285539627075, "learning_rate": 2.49212775528565e-05, "loss": 0.8022, "step": 18960 }, { "epoch": 1.5058842207624679, "grad_norm": 0.775147020816803, "learning_rate": 2.490804688947104e-05, "loss": 0.8434, "step": 18970 }, { "epoch": 1.5066780448113675, "grad_norm": 0.9234639406204224, "learning_rate": 2.4894816226085577e-05, "loss": 0.8079, "step": 18980 }, { "epoch": 1.507471868860267, "grad_norm": 0.7386789321899414, "learning_rate": 2.4881585562700116e-05, "loss": 0.8194, "step": 18990 }, { "epoch": 1.5082656929091667, "grad_norm": 0.9541153311729431, "learning_rate": 2.486835489931465e-05, "loss": 0.7625, "step": 19000 }, { "epoch": 1.5090595169580663, "grad_norm": 0.9702879190444946, "learning_rate": 2.485512423592919e-05, "loss": 0.8113, "step": 19010 }, { "epoch": 1.5098533410069659, "grad_norm": 0.7624096870422363, "learning_rate": 2.484189357254373e-05, "loss": 0.8299, "step": 19020 }, { "epoch": 1.5106471650558655, "grad_norm": 0.734418511390686, "learning_rate": 2.4828662909158265e-05, "loss": 0.8394, "step": 19030 }, { "epoch": 1.5114409891047649, "grad_norm": 0.8682621717453003, "learning_rate": 2.4815432245772804e-05, "loss": 0.8202, "step": 19040 }, { "epoch": 1.5122348131536645, "grad_norm": 0.7648998498916626, "learning_rate": 2.4802201582387343e-05, "loss": 0.8419, "step": 19050 }, { "epoch": 1.513028637202564, "grad_norm": 0.702462911605835, "learning_rate": 2.478897091900188e-05, "loss": 0.8125, "step": 19060 }, { "epoch": 1.5138224612514635, "grad_norm": 0.7904557585716248, "learning_rate": 2.4775740255616417e-05, "loss": 0.8679, "step": 19070 }, { "epoch": 1.514616285300363, "grad_norm": 1.0039995908737183, "learning_rate": 2.4762509592230956e-05, "loss": 0.8623, "step": 19080 }, { "epoch": 1.5154101093492627, "grad_norm": 0.8769594430923462, "learning_rate": 2.474927892884549e-05, "loss": 0.8309, "step": 19090 }, { "epoch": 1.5162039333981623, "grad_norm": 0.742843508720398, "learning_rate": 2.473737133179858e-05, "loss": 0.8068, "step": 19100 }, { "epoch": 1.516997757447062, "grad_norm": 0.7758870720863342, "learning_rate": 2.4724140668413115e-05, "loss": 0.8617, "step": 19110 }, { "epoch": 1.5177915814959615, "grad_norm": 0.8473050594329834, "learning_rate": 2.4710910005027654e-05, "loss": 0.8175, "step": 19120 }, { "epoch": 1.518585405544861, "grad_norm": 0.6727568507194519, "learning_rate": 2.469767934164219e-05, "loss": 0.8389, "step": 19130 }, { "epoch": 1.5193792295937607, "grad_norm": 0.7093884944915771, "learning_rate": 2.468444867825673e-05, "loss": 0.8226, "step": 19140 }, { "epoch": 1.52017305364266, "grad_norm": 0.8359051942825317, "learning_rate": 2.4671218014871267e-05, "loss": 0.7772, "step": 19150 }, { "epoch": 1.5209668776915597, "grad_norm": 0.6940776705741882, "learning_rate": 2.4657987351485803e-05, "loss": 0.7137, "step": 19160 }, { "epoch": 1.521760701740459, "grad_norm": 0.8422114253044128, "learning_rate": 2.464475668810034e-05, "loss": 0.8425, "step": 19170 }, { "epoch": 1.5225545257893587, "grad_norm": 1.108744740486145, "learning_rate": 2.463152602471488e-05, "loss": 0.8392, "step": 19180 }, { "epoch": 1.5233483498382583, "grad_norm": 0.9370563626289368, "learning_rate": 2.461829536132942e-05, "loss": 0.8278, "step": 19190 }, { "epoch": 1.524142173887158, "grad_norm": 0.8219224214553833, "learning_rate": 2.4605064697943955e-05, "loss": 0.7916, "step": 19200 }, { "epoch": 1.5249359979360575, "grad_norm": 0.8902950882911682, "learning_rate": 2.4591834034558494e-05, "loss": 0.8494, "step": 19210 }, { "epoch": 1.525729821984957, "grad_norm": 0.884885311126709, "learning_rate": 2.4578603371173033e-05, "loss": 0.8258, "step": 19220 }, { "epoch": 1.5265236460338567, "grad_norm": 0.6714193224906921, "learning_rate": 2.4565372707787572e-05, "loss": 0.8377, "step": 19230 }, { "epoch": 1.5273174700827563, "grad_norm": 0.9097526669502258, "learning_rate": 2.4552142044402107e-05, "loss": 0.8093, "step": 19240 }, { "epoch": 1.5281112941316557, "grad_norm": 0.6362065672874451, "learning_rate": 2.4538911381016646e-05, "loss": 0.8245, "step": 19250 }, { "epoch": 1.5289051181805553, "grad_norm": 0.7581408619880676, "learning_rate": 2.4525680717631182e-05, "loss": 0.8677, "step": 19260 }, { "epoch": 1.5296989422294547, "grad_norm": 0.7132633328437805, "learning_rate": 2.451245005424572e-05, "loss": 0.8338, "step": 19270 }, { "epoch": 1.5304927662783543, "grad_norm": 0.8469735383987427, "learning_rate": 2.449921939086026e-05, "loss": 0.9025, "step": 19280 }, { "epoch": 1.531286590327254, "grad_norm": 0.8352085947990417, "learning_rate": 2.4485988727474795e-05, "loss": 0.8216, "step": 19290 }, { "epoch": 1.5320804143761535, "grad_norm": 0.7701964378356934, "learning_rate": 2.4472758064089334e-05, "loss": 0.7969, "step": 19300 }, { "epoch": 1.532874238425053, "grad_norm": 0.8008362054824829, "learning_rate": 2.4459527400703873e-05, "loss": 0.781, "step": 19310 }, { "epoch": 1.5336680624739527, "grad_norm": 0.7611208558082581, "learning_rate": 2.4446296737318412e-05, "loss": 0.7799, "step": 19320 }, { "epoch": 1.5344618865228523, "grad_norm": 0.7576231360435486, "learning_rate": 2.4433066073932947e-05, "loss": 0.8071, "step": 19330 }, { "epoch": 1.535255710571752, "grad_norm": 0.9794348478317261, "learning_rate": 2.4419835410547486e-05, "loss": 0.8337, "step": 19340 }, { "epoch": 1.5360495346206513, "grad_norm": 0.8349586725234985, "learning_rate": 2.4406604747162022e-05, "loss": 0.7267, "step": 19350 }, { "epoch": 1.536843358669551, "grad_norm": 0.794275164604187, "learning_rate": 2.4393374083776564e-05, "loss": 0.7525, "step": 19360 }, { "epoch": 1.5376371827184503, "grad_norm": 0.9255030155181885, "learning_rate": 2.43801434203911e-05, "loss": 0.7845, "step": 19370 }, { "epoch": 1.53843100676735, "grad_norm": 0.7629058957099915, "learning_rate": 2.436691275700564e-05, "loss": 0.7849, "step": 19380 }, { "epoch": 1.5392248308162495, "grad_norm": 0.8714119791984558, "learning_rate": 2.4353682093620174e-05, "loss": 0.8106, "step": 19390 }, { "epoch": 1.540018654865149, "grad_norm": 0.9720879793167114, "learning_rate": 2.4340451430234713e-05, "loss": 0.868, "step": 19400 }, { "epoch": 1.5408124789140487, "grad_norm": 0.848136842250824, "learning_rate": 2.4327220766849252e-05, "loss": 0.8204, "step": 19410 }, { "epoch": 1.5416063029629483, "grad_norm": 0.8804022073745728, "learning_rate": 2.4313990103463788e-05, "loss": 0.8423, "step": 19420 }, { "epoch": 1.542400127011848, "grad_norm": 0.7391209602355957, "learning_rate": 2.4300759440078327e-05, "loss": 0.8378, "step": 19430 }, { "epoch": 1.5431939510607475, "grad_norm": 0.6705385446548462, "learning_rate": 2.4287528776692866e-05, "loss": 0.7564, "step": 19440 }, { "epoch": 1.543987775109647, "grad_norm": 0.8526281118392944, "learning_rate": 2.4274298113307404e-05, "loss": 0.7472, "step": 19450 }, { "epoch": 1.5447815991585465, "grad_norm": 0.7899938225746155, "learning_rate": 2.426106744992194e-05, "loss": 0.8292, "step": 19460 }, { "epoch": 1.5455754232074461, "grad_norm": 0.9177964925765991, "learning_rate": 2.424783678653648e-05, "loss": 0.829, "step": 19470 }, { "epoch": 1.5463692472563455, "grad_norm": 0.8367957472801208, "learning_rate": 2.4234606123151014e-05, "loss": 0.8395, "step": 19480 }, { "epoch": 1.547163071305245, "grad_norm": 0.8440412878990173, "learning_rate": 2.4221375459765557e-05, "loss": 0.7762, "step": 19490 }, { "epoch": 1.5479568953541447, "grad_norm": 0.8187562823295593, "learning_rate": 2.4208144796380092e-05, "loss": 0.8345, "step": 19500 }, { "epoch": 1.5487507194030443, "grad_norm": 0.8997789025306702, "learning_rate": 2.4194914132994628e-05, "loss": 0.8156, "step": 19510 }, { "epoch": 1.549544543451944, "grad_norm": 0.8826847672462463, "learning_rate": 2.4181683469609167e-05, "loss": 0.8558, "step": 19520 }, { "epoch": 1.5503383675008435, "grad_norm": 0.8336629867553711, "learning_rate": 2.4168452806223706e-05, "loss": 0.9098, "step": 19530 }, { "epoch": 1.5511321915497431, "grad_norm": 0.9518599510192871, "learning_rate": 2.4155222142838245e-05, "loss": 0.7868, "step": 19540 }, { "epoch": 1.5519260155986425, "grad_norm": 0.6656529903411865, "learning_rate": 2.414199147945278e-05, "loss": 0.8164, "step": 19550 }, { "epoch": 1.5527198396475421, "grad_norm": 0.8456233739852905, "learning_rate": 2.412876081606732e-05, "loss": 0.7531, "step": 19560 }, { "epoch": 1.5535136636964417, "grad_norm": 0.8264173269271851, "learning_rate": 2.4115530152681855e-05, "loss": 0.8304, "step": 19570 }, { "epoch": 1.554307487745341, "grad_norm": 0.9435857534408569, "learning_rate": 2.4102299489296397e-05, "loss": 0.8006, "step": 19580 }, { "epoch": 1.5551013117942407, "grad_norm": 0.7315828204154968, "learning_rate": 2.4089068825910932e-05, "loss": 0.8037, "step": 19590 }, { "epoch": 1.5558951358431403, "grad_norm": 0.8221999406814575, "learning_rate": 2.407583816252547e-05, "loss": 0.8318, "step": 19600 }, { "epoch": 1.55668895989204, "grad_norm": 0.7483288645744324, "learning_rate": 2.4062607499140007e-05, "loss": 0.8202, "step": 19610 }, { "epoch": 1.5574827839409395, "grad_norm": 0.7994224429130554, "learning_rate": 2.4049376835754546e-05, "loss": 0.7977, "step": 19620 }, { "epoch": 1.5582766079898391, "grad_norm": 0.7807520031929016, "learning_rate": 2.4036146172369085e-05, "loss": 0.7979, "step": 19630 }, { "epoch": 1.5590704320387387, "grad_norm": 0.771969199180603, "learning_rate": 2.402291550898362e-05, "loss": 0.8071, "step": 19640 }, { "epoch": 1.5598642560876381, "grad_norm": 0.7460570931434631, "learning_rate": 2.400968484559816e-05, "loss": 0.7829, "step": 19650 }, { "epoch": 1.5606580801365377, "grad_norm": 0.737625241279602, "learning_rate": 2.3996454182212695e-05, "loss": 0.8223, "step": 19660 }, { "epoch": 1.5614519041854373, "grad_norm": 0.9518927335739136, "learning_rate": 2.3983223518827237e-05, "loss": 0.7475, "step": 19670 }, { "epoch": 1.5622457282343367, "grad_norm": 0.719597578048706, "learning_rate": 2.3969992855441773e-05, "loss": 0.8264, "step": 19680 }, { "epoch": 1.5630395522832363, "grad_norm": 0.8887612223625183, "learning_rate": 2.395676219205631e-05, "loss": 0.7934, "step": 19690 }, { "epoch": 1.563833376332136, "grad_norm": 0.801966667175293, "learning_rate": 2.3943531528670847e-05, "loss": 0.7757, "step": 19700 }, { "epoch": 1.5646272003810355, "grad_norm": 0.9171623587608337, "learning_rate": 2.3930300865285386e-05, "loss": 0.7704, "step": 19710 }, { "epoch": 1.5654210244299351, "grad_norm": 0.8287226557731628, "learning_rate": 2.3917070201899925e-05, "loss": 0.8069, "step": 19720 }, { "epoch": 1.5662148484788347, "grad_norm": 0.8021081686019897, "learning_rate": 2.390383953851446e-05, "loss": 0.8093, "step": 19730 }, { "epoch": 1.5670086725277343, "grad_norm": 0.8900328874588013, "learning_rate": 2.3890608875129e-05, "loss": 0.7636, "step": 19740 }, { "epoch": 1.567802496576634, "grad_norm": 0.9409016966819763, "learning_rate": 2.387737821174354e-05, "loss": 0.7915, "step": 19750 }, { "epoch": 1.5685963206255333, "grad_norm": 0.8233746290206909, "learning_rate": 2.3864147548358077e-05, "loss": 0.8201, "step": 19760 }, { "epoch": 1.569390144674433, "grad_norm": 0.8792815804481506, "learning_rate": 2.3850916884972613e-05, "loss": 0.7767, "step": 19770 }, { "epoch": 1.5701839687233323, "grad_norm": 0.8257175087928772, "learning_rate": 2.3837686221587152e-05, "loss": 0.7879, "step": 19780 }, { "epoch": 1.570977792772232, "grad_norm": 0.9405742883682251, "learning_rate": 2.3824455558201687e-05, "loss": 0.7247, "step": 19790 }, { "epoch": 1.5717716168211315, "grad_norm": 0.8703904747962952, "learning_rate": 2.381122489481623e-05, "loss": 0.8193, "step": 19800 }, { "epoch": 1.5725654408700311, "grad_norm": 0.9303200244903564, "learning_rate": 2.3797994231430765e-05, "loss": 0.7814, "step": 19810 }, { "epoch": 1.5733592649189307, "grad_norm": 0.8841179609298706, "learning_rate": 2.3784763568045304e-05, "loss": 0.8572, "step": 19820 }, { "epoch": 1.5741530889678303, "grad_norm": 0.7013158798217773, "learning_rate": 2.377153290465984e-05, "loss": 0.8371, "step": 19830 }, { "epoch": 1.57494691301673, "grad_norm": 0.8166126012802124, "learning_rate": 2.375830224127438e-05, "loss": 0.7561, "step": 19840 }, { "epoch": 1.5757407370656296, "grad_norm": 0.8469328284263611, "learning_rate": 2.3745071577888917e-05, "loss": 0.8203, "step": 19850 }, { "epoch": 1.576534561114529, "grad_norm": 0.90618497133255, "learning_rate": 2.3731840914503453e-05, "loss": 0.808, "step": 19860 }, { "epoch": 1.5773283851634285, "grad_norm": 0.9630061984062195, "learning_rate": 2.3718610251117992e-05, "loss": 0.7956, "step": 19870 }, { "epoch": 1.578122209212328, "grad_norm": 0.8445176482200623, "learning_rate": 2.3705379587732527e-05, "loss": 0.8172, "step": 19880 }, { "epoch": 1.5789160332612275, "grad_norm": 0.834750235080719, "learning_rate": 2.369214892434707e-05, "loss": 0.9094, "step": 19890 }, { "epoch": 1.5797098573101271, "grad_norm": 0.8799012303352356, "learning_rate": 2.3678918260961605e-05, "loss": 0.8587, "step": 19900 }, { "epoch": 1.5805036813590267, "grad_norm": 0.9250380992889404, "learning_rate": 2.3665687597576144e-05, "loss": 0.8404, "step": 19910 }, { "epoch": 1.5812975054079264, "grad_norm": 0.9735758304595947, "learning_rate": 2.365245693419068e-05, "loss": 0.8144, "step": 19920 }, { "epoch": 1.582091329456826, "grad_norm": 0.7764338850975037, "learning_rate": 2.363922627080522e-05, "loss": 0.7696, "step": 19930 }, { "epoch": 1.5828851535057256, "grad_norm": 0.7918556332588196, "learning_rate": 2.3625995607419758e-05, "loss": 0.7751, "step": 19940 }, { "epoch": 1.5836789775546252, "grad_norm": 0.980975866317749, "learning_rate": 2.3612764944034297e-05, "loss": 0.8112, "step": 19950 }, { "epoch": 1.5844728016035245, "grad_norm": 0.6612667441368103, "learning_rate": 2.3599534280648832e-05, "loss": 0.7655, "step": 19960 }, { "epoch": 1.5852666256524242, "grad_norm": 0.7851887941360474, "learning_rate": 2.358630361726337e-05, "loss": 0.7865, "step": 19970 }, { "epoch": 1.5860604497013235, "grad_norm": 0.8260042071342468, "learning_rate": 2.357307295387791e-05, "loss": 0.7995, "step": 19980 }, { "epoch": 1.5868542737502231, "grad_norm": 0.918483555316925, "learning_rate": 2.3559842290492446e-05, "loss": 0.7792, "step": 19990 }, { "epoch": 1.5876480977991227, "grad_norm": 0.9035216569900513, "learning_rate": 2.3546611627106984e-05, "loss": 0.8465, "step": 20000 }, { "epoch": 1.5884419218480224, "grad_norm": 0.7393106818199158, "learning_rate": 2.353338096372152e-05, "loss": 0.7915, "step": 20010 }, { "epoch": 1.589235745896922, "grad_norm": 0.9715613722801208, "learning_rate": 2.3520150300336062e-05, "loss": 0.8255, "step": 20020 }, { "epoch": 1.5900295699458216, "grad_norm": 0.79570472240448, "learning_rate": 2.3506919636950598e-05, "loss": 0.8283, "step": 20030 }, { "epoch": 1.5908233939947212, "grad_norm": 0.738107442855835, "learning_rate": 2.3493688973565137e-05, "loss": 0.8292, "step": 20040 }, { "epoch": 1.5916172180436208, "grad_norm": 1.0753180980682373, "learning_rate": 2.3480458310179672e-05, "loss": 0.8189, "step": 20050 }, { "epoch": 1.5924110420925202, "grad_norm": 0.9600769877433777, "learning_rate": 2.346722764679421e-05, "loss": 0.855, "step": 20060 }, { "epoch": 1.5932048661414198, "grad_norm": 0.7264878749847412, "learning_rate": 2.345399698340875e-05, "loss": 0.8333, "step": 20070 }, { "epoch": 1.5939986901903194, "grad_norm": 1.0567606687545776, "learning_rate": 2.3440766320023286e-05, "loss": 0.842, "step": 20080 }, { "epoch": 1.5947925142392188, "grad_norm": 0.8307549953460693, "learning_rate": 2.3427535656637825e-05, "loss": 0.8132, "step": 20090 }, { "epoch": 1.5955863382881184, "grad_norm": 0.7764892578125, "learning_rate": 2.341430499325236e-05, "loss": 0.783, "step": 20100 }, { "epoch": 1.596380162337018, "grad_norm": 0.8395814895629883, "learning_rate": 2.3401074329866902e-05, "loss": 0.8022, "step": 20110 }, { "epoch": 1.5971739863859176, "grad_norm": 0.7872236371040344, "learning_rate": 2.3387843666481438e-05, "loss": 0.8471, "step": 20120 }, { "epoch": 1.5979678104348172, "grad_norm": 0.9380735158920288, "learning_rate": 2.3374613003095977e-05, "loss": 0.7703, "step": 20130 }, { "epoch": 1.5987616344837168, "grad_norm": 1.0980794429779053, "learning_rate": 2.3361382339710512e-05, "loss": 0.7735, "step": 20140 }, { "epoch": 1.5995554585326164, "grad_norm": 0.8633868098258972, "learning_rate": 2.334815167632505e-05, "loss": 0.7795, "step": 20150 }, { "epoch": 1.6003492825815158, "grad_norm": 0.8296395540237427, "learning_rate": 2.333492101293959e-05, "loss": 0.783, "step": 20160 }, { "epoch": 1.6011431066304154, "grad_norm": 0.9974943995475769, "learning_rate": 2.332169034955413e-05, "loss": 0.8203, "step": 20170 }, { "epoch": 1.601936930679315, "grad_norm": 0.932245671749115, "learning_rate": 2.3308459686168665e-05, "loss": 0.8744, "step": 20180 }, { "epoch": 1.6027307547282144, "grad_norm": 0.8958556652069092, "learning_rate": 2.3295229022783204e-05, "loss": 0.7615, "step": 20190 }, { "epoch": 1.603524578777114, "grad_norm": 0.8927448391914368, "learning_rate": 2.3281998359397743e-05, "loss": 0.8409, "step": 20200 }, { "epoch": 1.6043184028260136, "grad_norm": 0.8587812185287476, "learning_rate": 2.3268767696012278e-05, "loss": 0.7895, "step": 20210 }, { "epoch": 1.6051122268749132, "grad_norm": 0.9127293825149536, "learning_rate": 2.3255537032626817e-05, "loss": 0.7561, "step": 20220 }, { "epoch": 1.6059060509238128, "grad_norm": 0.8766940832138062, "learning_rate": 2.3242306369241353e-05, "loss": 0.8178, "step": 20230 }, { "epoch": 1.6066998749727124, "grad_norm": 0.9021517634391785, "learning_rate": 2.3229075705855895e-05, "loss": 0.7978, "step": 20240 }, { "epoch": 1.607493699021612, "grad_norm": 0.7711321711540222, "learning_rate": 2.321584504247043e-05, "loss": 0.8566, "step": 20250 }, { "epoch": 1.6082875230705114, "grad_norm": 0.7530736327171326, "learning_rate": 2.320261437908497e-05, "loss": 0.772, "step": 20260 }, { "epoch": 1.609081347119411, "grad_norm": 0.7428927421569824, "learning_rate": 2.3189383715699505e-05, "loss": 0.8944, "step": 20270 }, { "epoch": 1.6098751711683106, "grad_norm": 0.7444825768470764, "learning_rate": 2.3176153052314044e-05, "loss": 0.8346, "step": 20280 }, { "epoch": 1.61066899521721, "grad_norm": 1.0045958757400513, "learning_rate": 2.3162922388928583e-05, "loss": 0.8444, "step": 20290 }, { "epoch": 1.6114628192661096, "grad_norm": 1.0150105953216553, "learning_rate": 2.3149691725543122e-05, "loss": 0.7973, "step": 20300 }, { "epoch": 1.6122566433150092, "grad_norm": 0.9908817410469055, "learning_rate": 2.3136461062157657e-05, "loss": 0.7855, "step": 20310 }, { "epoch": 1.6130504673639088, "grad_norm": 0.8784881830215454, "learning_rate": 2.3123230398772193e-05, "loss": 0.7707, "step": 20320 }, { "epoch": 1.6138442914128084, "grad_norm": 0.7329348921775818, "learning_rate": 2.3109999735386735e-05, "loss": 0.8465, "step": 20330 }, { "epoch": 1.614638115461708, "grad_norm": 0.9049598574638367, "learning_rate": 2.309676907200127e-05, "loss": 0.8031, "step": 20340 }, { "epoch": 1.6154319395106076, "grad_norm": 0.8993352651596069, "learning_rate": 2.308353840861581e-05, "loss": 0.8029, "step": 20350 }, { "epoch": 1.6162257635595072, "grad_norm": 0.7339730858802795, "learning_rate": 2.3070307745230345e-05, "loss": 0.8168, "step": 20360 }, { "epoch": 1.6170195876084066, "grad_norm": 0.8246588706970215, "learning_rate": 2.3057077081844884e-05, "loss": 0.8413, "step": 20370 }, { "epoch": 1.6178134116573062, "grad_norm": 0.8873500227928162, "learning_rate": 2.3043846418459423e-05, "loss": 0.822, "step": 20380 }, { "epoch": 1.6186072357062056, "grad_norm": 0.7149674892425537, "learning_rate": 2.3030615755073962e-05, "loss": 0.8343, "step": 20390 }, { "epoch": 1.6194010597551052, "grad_norm": 0.8390538096427917, "learning_rate": 2.3017385091688497e-05, "loss": 0.7945, "step": 20400 }, { "epoch": 1.6201948838040048, "grad_norm": 0.7026415467262268, "learning_rate": 2.3004154428303036e-05, "loss": 0.7255, "step": 20410 }, { "epoch": 1.6209887078529044, "grad_norm": 0.8849766254425049, "learning_rate": 2.2990923764917575e-05, "loss": 0.7589, "step": 20420 }, { "epoch": 1.621782531901804, "grad_norm": 0.8478531241416931, "learning_rate": 2.297769310153211e-05, "loss": 0.7977, "step": 20430 }, { "epoch": 1.6225763559507036, "grad_norm": 0.8617717027664185, "learning_rate": 2.296446243814665e-05, "loss": 0.8657, "step": 20440 }, { "epoch": 1.6233701799996032, "grad_norm": 0.913943350315094, "learning_rate": 2.2951231774761185e-05, "loss": 0.8567, "step": 20450 }, { "epoch": 1.6241640040485028, "grad_norm": 0.6891298294067383, "learning_rate": 2.2938001111375724e-05, "loss": 0.806, "step": 20460 }, { "epoch": 1.6249578280974022, "grad_norm": 0.914467990398407, "learning_rate": 2.2924770447990263e-05, "loss": 0.7876, "step": 20470 }, { "epoch": 1.6257516521463018, "grad_norm": 0.8670864701271057, "learning_rate": 2.2911539784604802e-05, "loss": 0.7589, "step": 20480 }, { "epoch": 1.6265454761952012, "grad_norm": 0.8266043663024902, "learning_rate": 2.2898309121219338e-05, "loss": 0.8038, "step": 20490 }, { "epoch": 1.6273393002441008, "grad_norm": 0.7241661548614502, "learning_rate": 2.2885078457833877e-05, "loss": 0.8191, "step": 20500 }, { "epoch": 1.6281331242930004, "grad_norm": 0.7914606332778931, "learning_rate": 2.2871847794448416e-05, "loss": 0.759, "step": 20510 }, { "epoch": 1.6289269483419, "grad_norm": 0.8614340424537659, "learning_rate": 2.2858617131062954e-05, "loss": 0.7668, "step": 20520 }, { "epoch": 1.6297207723907996, "grad_norm": 0.9252728223800659, "learning_rate": 2.284538646767749e-05, "loss": 0.7982, "step": 20530 }, { "epoch": 1.6305145964396992, "grad_norm": 0.808665931224823, "learning_rate": 2.2832155804292026e-05, "loss": 0.8616, "step": 20540 }, { "epoch": 1.6313084204885988, "grad_norm": 0.6369495987892151, "learning_rate": 2.2818925140906568e-05, "loss": 0.8265, "step": 20550 }, { "epoch": 1.6321022445374984, "grad_norm": 0.8415317535400391, "learning_rate": 2.2805694477521103e-05, "loss": 0.8126, "step": 20560 }, { "epoch": 1.6328960685863978, "grad_norm": 0.9225695133209229, "learning_rate": 2.2792463814135642e-05, "loss": 0.8399, "step": 20570 }, { "epoch": 1.6336898926352974, "grad_norm": 0.6714004278182983, "learning_rate": 2.2779233150750178e-05, "loss": 0.8543, "step": 20580 }, { "epoch": 1.6344837166841968, "grad_norm": 0.8270373940467834, "learning_rate": 2.2766002487364717e-05, "loss": 0.8255, "step": 20590 }, { "epoch": 1.6352775407330964, "grad_norm": 0.7550914883613586, "learning_rate": 2.2752771823979256e-05, "loss": 0.7879, "step": 20600 }, { "epoch": 1.636071364781996, "grad_norm": 0.865746796131134, "learning_rate": 2.2739541160593795e-05, "loss": 0.857, "step": 20610 }, { "epoch": 1.6368651888308956, "grad_norm": 0.8743048906326294, "learning_rate": 2.272631049720833e-05, "loss": 0.8459, "step": 20620 }, { "epoch": 1.6376590128797952, "grad_norm": 0.7982882261276245, "learning_rate": 2.271307983382287e-05, "loss": 0.8263, "step": 20630 }, { "epoch": 1.6384528369286948, "grad_norm": 0.7997922897338867, "learning_rate": 2.2699849170437408e-05, "loss": 0.7892, "step": 20640 }, { "epoch": 1.6392466609775944, "grad_norm": 1.0065598487854004, "learning_rate": 2.2686618507051947e-05, "loss": 0.7943, "step": 20650 }, { "epoch": 1.640040485026494, "grad_norm": 0.828996479511261, "learning_rate": 2.2673387843666482e-05, "loss": 0.7368, "step": 20660 }, { "epoch": 1.6408343090753934, "grad_norm": 0.8274086117744446, "learning_rate": 2.2660157180281018e-05, "loss": 0.8166, "step": 20670 }, { "epoch": 1.641628133124293, "grad_norm": 0.8134365081787109, "learning_rate": 2.2646926516895557e-05, "loss": 0.8618, "step": 20680 }, { "epoch": 1.6424219571731926, "grad_norm": 0.8807488679885864, "learning_rate": 2.2633695853510096e-05, "loss": 0.8119, "step": 20690 }, { "epoch": 1.643215781222092, "grad_norm": 0.93861985206604, "learning_rate": 2.2620465190124635e-05, "loss": 0.8055, "step": 20700 }, { "epoch": 1.6440096052709916, "grad_norm": 0.8421952724456787, "learning_rate": 2.260723452673917e-05, "loss": 0.8429, "step": 20710 }, { "epoch": 1.6448034293198912, "grad_norm": 0.9165804982185364, "learning_rate": 2.259400386335371e-05, "loss": 0.7859, "step": 20720 }, { "epoch": 1.6455972533687908, "grad_norm": 0.7905272841453552, "learning_rate": 2.2580773199968248e-05, "loss": 0.8527, "step": 20730 }, { "epoch": 1.6463910774176904, "grad_norm": 0.9320306777954102, "learning_rate": 2.2567542536582787e-05, "loss": 0.7826, "step": 20740 }, { "epoch": 1.64718490146659, "grad_norm": 0.8871251940727234, "learning_rate": 2.2554311873197323e-05, "loss": 0.7819, "step": 20750 }, { "epoch": 1.6479787255154896, "grad_norm": 1.1218063831329346, "learning_rate": 2.254108120981186e-05, "loss": 0.8509, "step": 20760 }, { "epoch": 1.648772549564389, "grad_norm": 0.890609622001648, "learning_rate": 2.25278505464264e-05, "loss": 0.8236, "step": 20770 }, { "epoch": 1.6495663736132886, "grad_norm": 0.8825326561927795, "learning_rate": 2.2514619883040936e-05, "loss": 0.7444, "step": 20780 }, { "epoch": 1.6503601976621882, "grad_norm": 0.8295640349388123, "learning_rate": 2.2501389219655475e-05, "loss": 0.7725, "step": 20790 }, { "epoch": 1.6511540217110876, "grad_norm": 0.9995619654655457, "learning_rate": 2.248815855627001e-05, "loss": 0.7952, "step": 20800 }, { "epoch": 1.6519478457599872, "grad_norm": 1.0375027656555176, "learning_rate": 2.247492789288455e-05, "loss": 0.7575, "step": 20810 }, { "epoch": 1.6527416698088868, "grad_norm": 0.7434347867965698, "learning_rate": 2.246169722949909e-05, "loss": 0.7775, "step": 20820 }, { "epoch": 1.6535354938577864, "grad_norm": 0.7898569107055664, "learning_rate": 2.2448466566113627e-05, "loss": 0.8458, "step": 20830 }, { "epoch": 1.654329317906686, "grad_norm": 0.8854445815086365, "learning_rate": 2.2435235902728163e-05, "loss": 0.8005, "step": 20840 }, { "epoch": 1.6551231419555856, "grad_norm": 0.8585209846496582, "learning_rate": 2.2422005239342702e-05, "loss": 0.7762, "step": 20850 }, { "epoch": 1.6559169660044852, "grad_norm": 0.7355554103851318, "learning_rate": 2.240877457595724e-05, "loss": 0.7967, "step": 20860 }, { "epoch": 1.6567107900533846, "grad_norm": 0.8480395078659058, "learning_rate": 2.239554391257178e-05, "loss": 0.8113, "step": 20870 }, { "epoch": 1.6575046141022842, "grad_norm": 0.7819201946258545, "learning_rate": 2.2382313249186315e-05, "loss": 0.7618, "step": 20880 }, { "epoch": 1.6582984381511838, "grad_norm": 0.7378450036048889, "learning_rate": 2.236908258580085e-05, "loss": 0.8375, "step": 20890 }, { "epoch": 1.6590922622000832, "grad_norm": 0.8621236085891724, "learning_rate": 2.235585192241539e-05, "loss": 0.7767, "step": 20900 }, { "epoch": 1.6598860862489828, "grad_norm": 0.720856785774231, "learning_rate": 2.234262125902993e-05, "loss": 0.7847, "step": 20910 }, { "epoch": 1.6606799102978824, "grad_norm": 0.9445878863334656, "learning_rate": 2.2329390595644467e-05, "loss": 0.832, "step": 20920 }, { "epoch": 1.661473734346782, "grad_norm": 0.7476319670677185, "learning_rate": 2.2316159932259003e-05, "loss": 0.8122, "step": 20930 }, { "epoch": 1.6622675583956816, "grad_norm": 0.7706652283668518, "learning_rate": 2.2302929268873542e-05, "loss": 0.7505, "step": 20940 }, { "epoch": 1.6630613824445812, "grad_norm": 1.0088225603103638, "learning_rate": 2.228969860548808e-05, "loss": 0.8042, "step": 20950 }, { "epoch": 1.6638552064934808, "grad_norm": 0.906871497631073, "learning_rate": 2.227646794210262e-05, "loss": 0.8492, "step": 20960 }, { "epoch": 1.6646490305423804, "grad_norm": 0.8510369062423706, "learning_rate": 2.2263237278717155e-05, "loss": 0.789, "step": 20970 }, { "epoch": 1.6654428545912798, "grad_norm": 0.842056930065155, "learning_rate": 2.2250006615331694e-05, "loss": 0.7597, "step": 20980 }, { "epoch": 1.6662366786401794, "grad_norm": 0.8941336274147034, "learning_rate": 2.2236775951946233e-05, "loss": 0.7614, "step": 20990 }, { "epoch": 1.6670305026890788, "grad_norm": 0.6981929540634155, "learning_rate": 2.222354528856077e-05, "loss": 0.8553, "step": 21000 }, { "epoch": 1.6678243267379784, "grad_norm": 0.7219326496124268, "learning_rate": 2.2210314625175308e-05, "loss": 0.8378, "step": 21010 }, { "epoch": 1.668618150786878, "grad_norm": 0.6784833073616028, "learning_rate": 2.2197083961789843e-05, "loss": 0.87, "step": 21020 }, { "epoch": 1.6694119748357776, "grad_norm": 0.7664601802825928, "learning_rate": 2.2183853298404382e-05, "loss": 0.8162, "step": 21030 }, { "epoch": 1.6702057988846772, "grad_norm": 0.7743760347366333, "learning_rate": 2.217062263501892e-05, "loss": 0.7797, "step": 21040 }, { "epoch": 1.6709996229335768, "grad_norm": 0.9794851541519165, "learning_rate": 2.215739197163346e-05, "loss": 0.8093, "step": 21050 }, { "epoch": 1.6717934469824764, "grad_norm": 0.8463415503501892, "learning_rate": 2.2144161308247996e-05, "loss": 0.7824, "step": 21060 }, { "epoch": 1.672587271031376, "grad_norm": 0.8329752087593079, "learning_rate": 2.2130930644862534e-05, "loss": 0.824, "step": 21070 }, { "epoch": 1.6733810950802754, "grad_norm": 0.7661581039428711, "learning_rate": 2.2117699981477073e-05, "loss": 0.8068, "step": 21080 }, { "epoch": 1.674174919129175, "grad_norm": 0.8593927621841431, "learning_rate": 2.2104469318091612e-05, "loss": 0.8332, "step": 21090 }, { "epoch": 1.6749687431780744, "grad_norm": 0.7758104801177979, "learning_rate": 2.2091238654706148e-05, "loss": 0.8511, "step": 21100 }, { "epoch": 1.675762567226974, "grad_norm": 0.8966826796531677, "learning_rate": 2.2078007991320687e-05, "loss": 0.8041, "step": 21110 }, { "epoch": 1.6765563912758736, "grad_norm": 0.8776934146881104, "learning_rate": 2.2064777327935222e-05, "loss": 0.8093, "step": 21120 }, { "epoch": 1.6773502153247732, "grad_norm": 0.6183434724807739, "learning_rate": 2.205154666454976e-05, "loss": 0.8896, "step": 21130 }, { "epoch": 1.6781440393736728, "grad_norm": 0.7311633825302124, "learning_rate": 2.20383160011643e-05, "loss": 0.7782, "step": 21140 }, { "epoch": 1.6789378634225725, "grad_norm": 0.8047720789909363, "learning_rate": 2.2025085337778836e-05, "loss": 0.8534, "step": 21150 }, { "epoch": 1.679731687471472, "grad_norm": 0.7538871169090271, "learning_rate": 2.2011854674393375e-05, "loss": 0.791, "step": 21160 }, { "epoch": 1.6805255115203717, "grad_norm": 0.7678347826004028, "learning_rate": 2.1998624011007914e-05, "loss": 0.8283, "step": 21170 }, { "epoch": 1.681319335569271, "grad_norm": 0.7614843249320984, "learning_rate": 2.1985393347622452e-05, "loss": 0.7892, "step": 21180 }, { "epoch": 1.6821131596181707, "grad_norm": 0.7208516597747803, "learning_rate": 2.1972162684236988e-05, "loss": 0.7965, "step": 21190 }, { "epoch": 1.68290698366707, "grad_norm": 0.7032837867736816, "learning_rate": 2.1958932020851527e-05, "loss": 0.7784, "step": 21200 }, { "epoch": 1.6837008077159696, "grad_norm": 0.705690860748291, "learning_rate": 2.1945701357466062e-05, "loss": 0.829, "step": 21210 }, { "epoch": 1.6844946317648692, "grad_norm": 0.8606507778167725, "learning_rate": 2.1932470694080605e-05, "loss": 0.8002, "step": 21220 }, { "epoch": 1.6852884558137688, "grad_norm": 0.7552010416984558, "learning_rate": 2.191924003069514e-05, "loss": 0.7753, "step": 21230 }, { "epoch": 1.6860822798626685, "grad_norm": 0.9295687079429626, "learning_rate": 2.1906009367309676e-05, "loss": 0.8016, "step": 21240 }, { "epoch": 1.686876103911568, "grad_norm": 0.8093982934951782, "learning_rate": 2.1892778703924215e-05, "loss": 0.8157, "step": 21250 }, { "epoch": 1.6876699279604677, "grad_norm": 0.8069568872451782, "learning_rate": 2.1879548040538754e-05, "loss": 0.836, "step": 21260 }, { "epoch": 1.6884637520093673, "grad_norm": 0.799312174320221, "learning_rate": 2.1866317377153293e-05, "loss": 0.8299, "step": 21270 }, { "epoch": 1.6892575760582667, "grad_norm": 0.8665412068367004, "learning_rate": 2.1853086713767828e-05, "loss": 0.795, "step": 21280 }, { "epoch": 1.6900514001071663, "grad_norm": 0.8677714467048645, "learning_rate": 2.1839856050382367e-05, "loss": 0.8833, "step": 21290 }, { "epoch": 1.6908452241560659, "grad_norm": 0.7136849164962769, "learning_rate": 2.1826625386996906e-05, "loss": 0.83, "step": 21300 }, { "epoch": 1.6916390482049652, "grad_norm": 0.8250473737716675, "learning_rate": 2.1813394723611445e-05, "loss": 0.8308, "step": 21310 }, { "epoch": 1.6924328722538649, "grad_norm": 0.7840633988380432, "learning_rate": 2.180016406022598e-05, "loss": 0.8283, "step": 21320 }, { "epoch": 1.6932266963027645, "grad_norm": 0.8209207653999329, "learning_rate": 2.178693339684052e-05, "loss": 0.7752, "step": 21330 }, { "epoch": 1.694020520351664, "grad_norm": 0.7074013948440552, "learning_rate": 2.1773702733455055e-05, "loss": 0.7849, "step": 21340 }, { "epoch": 1.6948143444005637, "grad_norm": 0.8547490835189819, "learning_rate": 2.1760472070069594e-05, "loss": 0.8125, "step": 21350 }, { "epoch": 1.6956081684494633, "grad_norm": 0.7698444724082947, "learning_rate": 2.1747241406684133e-05, "loss": 0.766, "step": 21360 }, { "epoch": 1.6964019924983629, "grad_norm": 0.9371459484100342, "learning_rate": 2.173401074329867e-05, "loss": 0.7663, "step": 21370 }, { "epoch": 1.6971958165472623, "grad_norm": 0.8051425218582153, "learning_rate": 2.1720780079913207e-05, "loss": 0.8333, "step": 21380 }, { "epoch": 1.6979896405961619, "grad_norm": 0.7680243253707886, "learning_rate": 2.1707549416527746e-05, "loss": 0.8129, "step": 21390 }, { "epoch": 1.6987834646450615, "grad_norm": 0.6620742082595825, "learning_rate": 2.1694318753142285e-05, "loss": 0.8581, "step": 21400 }, { "epoch": 1.6995772886939609, "grad_norm": 0.8950489163398743, "learning_rate": 2.168108808975682e-05, "loss": 0.8185, "step": 21410 }, { "epoch": 1.7003711127428605, "grad_norm": 0.7099031805992126, "learning_rate": 2.166785742637136e-05, "loss": 0.8097, "step": 21420 }, { "epoch": 1.70116493679176, "grad_norm": 0.9240164756774902, "learning_rate": 2.1654626762985895e-05, "loss": 0.8416, "step": 21430 }, { "epoch": 1.7019587608406597, "grad_norm": 0.7131415009498596, "learning_rate": 2.1641396099600437e-05, "loss": 0.7934, "step": 21440 }, { "epoch": 1.7027525848895593, "grad_norm": 0.8093000650405884, "learning_rate": 2.1628165436214973e-05, "loss": 0.8598, "step": 21450 }, { "epoch": 1.7035464089384589, "grad_norm": 0.8821578025817871, "learning_rate": 2.1614934772829512e-05, "loss": 0.8821, "step": 21460 }, { "epoch": 1.7043402329873585, "grad_norm": 0.835568904876709, "learning_rate": 2.1601704109444047e-05, "loss": 0.8338, "step": 21470 }, { "epoch": 1.7051340570362579, "grad_norm": 0.9917017817497253, "learning_rate": 2.1588473446058586e-05, "loss": 0.8007, "step": 21480 }, { "epoch": 1.7059278810851575, "grad_norm": 0.8382050395011902, "learning_rate": 2.1575242782673125e-05, "loss": 0.733, "step": 21490 }, { "epoch": 1.706721705134057, "grad_norm": 0.9868109226226807, "learning_rate": 2.156201211928766e-05, "loss": 0.8729, "step": 21500 }, { "epoch": 1.7075155291829565, "grad_norm": 0.8536074161529541, "learning_rate": 2.15487814559022e-05, "loss": 0.8462, "step": 21510 }, { "epoch": 1.708309353231856, "grad_norm": 0.7851694822311401, "learning_rate": 2.153555079251674e-05, "loss": 0.8142, "step": 21520 }, { "epoch": 1.7091031772807557, "grad_norm": 0.9547113180160522, "learning_rate": 2.1522320129131278e-05, "loss": 0.8597, "step": 21530 }, { "epoch": 1.7098970013296553, "grad_norm": 0.9159030914306641, "learning_rate": 2.1509089465745813e-05, "loss": 0.8335, "step": 21540 }, { "epoch": 1.7106908253785549, "grad_norm": 0.6976479887962341, "learning_rate": 2.1495858802360352e-05, "loss": 0.8117, "step": 21550 }, { "epoch": 1.7114846494274545, "grad_norm": 0.7213908433914185, "learning_rate": 2.1482628138974888e-05, "loss": 0.8176, "step": 21560 }, { "epoch": 1.712278473476354, "grad_norm": 0.8861874341964722, "learning_rate": 2.146939747558943e-05, "loss": 0.7886, "step": 21570 }, { "epoch": 1.7130722975252535, "grad_norm": 0.87417072057724, "learning_rate": 2.1456166812203966e-05, "loss": 0.7848, "step": 21580 }, { "epoch": 1.713866121574153, "grad_norm": 0.9183176755905151, "learning_rate": 2.14429361488185e-05, "loss": 0.8826, "step": 21590 }, { "epoch": 1.7146599456230527, "grad_norm": 0.7000020146369934, "learning_rate": 2.142970548543304e-05, "loss": 0.8566, "step": 21600 }, { "epoch": 1.715453769671952, "grad_norm": 0.7864513993263245, "learning_rate": 2.141647482204758e-05, "loss": 0.7844, "step": 21610 }, { "epoch": 1.7162475937208517, "grad_norm": 0.8153032660484314, "learning_rate": 2.1403244158662118e-05, "loss": 0.8072, "step": 21620 }, { "epoch": 1.7170414177697513, "grad_norm": 0.8709026575088501, "learning_rate": 2.1390013495276653e-05, "loss": 0.8276, "step": 21630 }, { "epoch": 1.7178352418186509, "grad_norm": 0.8884004950523376, "learning_rate": 2.1376782831891192e-05, "loss": 0.8013, "step": 21640 }, { "epoch": 1.7186290658675505, "grad_norm": 0.7052041292190552, "learning_rate": 2.1363552168505728e-05, "loss": 0.8273, "step": 21650 }, { "epoch": 1.71942288991645, "grad_norm": 0.8561339378356934, "learning_rate": 2.135032150512027e-05, "loss": 0.8321, "step": 21660 }, { "epoch": 1.7202167139653497, "grad_norm": 0.8964945077896118, "learning_rate": 2.1337090841734806e-05, "loss": 0.7822, "step": 21670 }, { "epoch": 1.7210105380142493, "grad_norm": 0.702278733253479, "learning_rate": 2.1323860178349345e-05, "loss": 0.8043, "step": 21680 }, { "epoch": 1.7218043620631487, "grad_norm": 0.8085154891014099, "learning_rate": 2.131062951496388e-05, "loss": 0.8019, "step": 21690 }, { "epoch": 1.7225981861120483, "grad_norm": 0.8028178811073303, "learning_rate": 2.129739885157842e-05, "loss": 0.8023, "step": 21700 }, { "epoch": 1.7233920101609477, "grad_norm": 0.8829013705253601, "learning_rate": 2.1284168188192958e-05, "loss": 0.7858, "step": 21710 }, { "epoch": 1.7241858342098473, "grad_norm": 0.8301076889038086, "learning_rate": 2.1270937524807494e-05, "loss": 0.8117, "step": 21720 }, { "epoch": 1.7249796582587469, "grad_norm": 0.8090790510177612, "learning_rate": 2.1257706861422032e-05, "loss": 0.7803, "step": 21730 }, { "epoch": 1.7257734823076465, "grad_norm": 0.8940252065658569, "learning_rate": 2.124447619803657e-05, "loss": 0.7733, "step": 21740 }, { "epoch": 1.726567306356546, "grad_norm": 0.8481318950653076, "learning_rate": 2.123124553465111e-05, "loss": 0.826, "step": 21750 }, { "epoch": 1.7273611304054457, "grad_norm": 0.7668688297271729, "learning_rate": 2.1218014871265646e-05, "loss": 0.8235, "step": 21760 }, { "epoch": 1.7281549544543453, "grad_norm": 0.8397562503814697, "learning_rate": 2.1204784207880185e-05, "loss": 0.8275, "step": 21770 }, { "epoch": 1.728948778503245, "grad_norm": 0.786025881767273, "learning_rate": 2.119155354449472e-05, "loss": 0.862, "step": 21780 }, { "epoch": 1.7297426025521443, "grad_norm": 0.7707594633102417, "learning_rate": 2.1178322881109263e-05, "loss": 0.8089, "step": 21790 }, { "epoch": 1.730536426601044, "grad_norm": 0.7764220237731934, "learning_rate": 2.1165092217723798e-05, "loss": 0.7513, "step": 21800 }, { "epoch": 1.7313302506499433, "grad_norm": 0.9090325832366943, "learning_rate": 2.1151861554338334e-05, "loss": 0.7636, "step": 21810 }, { "epoch": 1.7321240746988429, "grad_norm": 0.7767049074172974, "learning_rate": 2.1138630890952873e-05, "loss": 0.8391, "step": 21820 }, { "epoch": 1.7329178987477425, "grad_norm": 0.7917195558547974, "learning_rate": 2.112540022756741e-05, "loss": 0.8291, "step": 21830 }, { "epoch": 1.733711722796642, "grad_norm": 0.9477527141571045, "learning_rate": 2.111216956418195e-05, "loss": 0.7697, "step": 21840 }, { "epoch": 1.7345055468455417, "grad_norm": 0.8348498940467834, "learning_rate": 2.1098938900796486e-05, "loss": 0.7575, "step": 21850 }, { "epoch": 1.7352993708944413, "grad_norm": 0.7253052592277527, "learning_rate": 2.1085708237411025e-05, "loss": 0.7476, "step": 21860 }, { "epoch": 1.736093194943341, "grad_norm": 0.8680984973907471, "learning_rate": 2.107247757402556e-05, "loss": 0.7865, "step": 21870 }, { "epoch": 1.7368870189922405, "grad_norm": 0.7522729635238647, "learning_rate": 2.1059246910640103e-05, "loss": 0.8271, "step": 21880 }, { "epoch": 1.73768084304114, "grad_norm": 0.6878653764724731, "learning_rate": 2.104601624725464e-05, "loss": 0.8229, "step": 21890 }, { "epoch": 1.7384746670900395, "grad_norm": 0.9120025038719177, "learning_rate": 2.1032785583869177e-05, "loss": 0.8646, "step": 21900 }, { "epoch": 1.7392684911389389, "grad_norm": 0.7929124236106873, "learning_rate": 2.1019554920483713e-05, "loss": 0.8654, "step": 21910 }, { "epoch": 1.7400623151878385, "grad_norm": 0.8900664448738098, "learning_rate": 2.1006324257098252e-05, "loss": 0.8068, "step": 21920 }, { "epoch": 1.740856139236738, "grad_norm": 0.8378934860229492, "learning_rate": 2.099309359371279e-05, "loss": 0.7829, "step": 21930 }, { "epoch": 1.7416499632856377, "grad_norm": 0.8364038467407227, "learning_rate": 2.0979862930327326e-05, "loss": 0.8372, "step": 21940 }, { "epoch": 1.7424437873345373, "grad_norm": 0.7890764474868774, "learning_rate": 2.0966632266941865e-05, "loss": 0.8067, "step": 21950 }, { "epoch": 1.743237611383437, "grad_norm": 0.8029731512069702, "learning_rate": 2.09534016035564e-05, "loss": 0.8142, "step": 21960 }, { "epoch": 1.7440314354323365, "grad_norm": 0.9675547480583191, "learning_rate": 2.0940170940170943e-05, "loss": 0.8511, "step": 21970 }, { "epoch": 1.7448252594812361, "grad_norm": 0.866611897945404, "learning_rate": 2.092694027678548e-05, "loss": 0.7468, "step": 21980 }, { "epoch": 1.7456190835301355, "grad_norm": 0.7923464775085449, "learning_rate": 2.0913709613400017e-05, "loss": 0.8396, "step": 21990 }, { "epoch": 1.7464129075790351, "grad_norm": 0.8513892292976379, "learning_rate": 2.0900478950014553e-05, "loss": 0.8128, "step": 22000 }, { "epoch": 1.7472067316279347, "grad_norm": 0.7640386819839478, "learning_rate": 2.0887248286629092e-05, "loss": 0.7826, "step": 22010 }, { "epoch": 1.748000555676834, "grad_norm": 0.8362756967544556, "learning_rate": 2.087401762324363e-05, "loss": 0.7284, "step": 22020 }, { "epoch": 1.7487943797257337, "grad_norm": 0.78795325756073, "learning_rate": 2.086078695985817e-05, "loss": 0.8005, "step": 22030 }, { "epoch": 1.7495882037746333, "grad_norm": 0.9655894637107849, "learning_rate": 2.0847556296472705e-05, "loss": 0.8236, "step": 22040 }, { "epoch": 1.750382027823533, "grad_norm": 0.8761208653450012, "learning_rate": 2.0834325633087244e-05, "loss": 0.8187, "step": 22050 }, { "epoch": 1.7511758518724325, "grad_norm": 0.6492605209350586, "learning_rate": 2.0821094969701783e-05, "loss": 0.922, "step": 22060 }, { "epoch": 1.7519696759213321, "grad_norm": 0.6981455683708191, "learning_rate": 2.080786430631632e-05, "loss": 0.7863, "step": 22070 }, { "epoch": 1.7527634999702317, "grad_norm": 0.978775680065155, "learning_rate": 2.0794633642930858e-05, "loss": 0.7996, "step": 22080 }, { "epoch": 1.7535573240191311, "grad_norm": 0.7167590260505676, "learning_rate": 2.0781402979545393e-05, "loss": 0.8866, "step": 22090 }, { "epoch": 1.7543511480680307, "grad_norm": 0.7744993567466736, "learning_rate": 2.0768172316159935e-05, "loss": 0.7559, "step": 22100 }, { "epoch": 1.7551449721169303, "grad_norm": 0.8939769268035889, "learning_rate": 2.075494165277447e-05, "loss": 0.7956, "step": 22110 }, { "epoch": 1.7559387961658297, "grad_norm": 0.74879390001297, "learning_rate": 2.074171098938901e-05, "loss": 0.8159, "step": 22120 }, { "epoch": 1.7567326202147293, "grad_norm": 0.7568157911300659, "learning_rate": 2.0728480326003546e-05, "loss": 0.7673, "step": 22130 }, { "epoch": 1.757526444263629, "grad_norm": 0.9827935695648193, "learning_rate": 2.0715249662618084e-05, "loss": 0.8079, "step": 22140 }, { "epoch": 1.7583202683125285, "grad_norm": 0.8912889957427979, "learning_rate": 2.0702018999232623e-05, "loss": 0.821, "step": 22150 }, { "epoch": 1.7591140923614281, "grad_norm": 0.8157911896705627, "learning_rate": 2.068878833584716e-05, "loss": 0.7612, "step": 22160 }, { "epoch": 1.7599079164103277, "grad_norm": 0.745104968547821, "learning_rate": 2.0675557672461698e-05, "loss": 0.8183, "step": 22170 }, { "epoch": 1.7607017404592273, "grad_norm": 0.846377432346344, "learning_rate": 2.0662327009076233e-05, "loss": 0.7756, "step": 22180 }, { "epoch": 1.7614955645081267, "grad_norm": 0.8381850719451904, "learning_rate": 2.0649096345690776e-05, "loss": 0.8417, "step": 22190 }, { "epoch": 1.7622893885570263, "grad_norm": 0.8737789392471313, "learning_rate": 2.063586568230531e-05, "loss": 0.7929, "step": 22200 }, { "epoch": 1.763083212605926, "grad_norm": 0.8165319561958313, "learning_rate": 2.062263501891985e-05, "loss": 0.7364, "step": 22210 }, { "epoch": 1.7638770366548253, "grad_norm": 0.9064796566963196, "learning_rate": 2.0609404355534386e-05, "loss": 0.8336, "step": 22220 }, { "epoch": 1.764670860703725, "grad_norm": 0.8361899256706238, "learning_rate": 2.0596173692148925e-05, "loss": 0.7976, "step": 22230 }, { "epoch": 1.7654646847526245, "grad_norm": 0.955283522605896, "learning_rate": 2.0582943028763464e-05, "loss": 0.781, "step": 22240 }, { "epoch": 1.7662585088015241, "grad_norm": 0.87896329164505, "learning_rate": 2.0569712365378002e-05, "loss": 0.8037, "step": 22250 }, { "epoch": 1.7670523328504237, "grad_norm": 0.91143399477005, "learning_rate": 2.0556481701992538e-05, "loss": 0.748, "step": 22260 }, { "epoch": 1.7678461568993233, "grad_norm": 0.8325607180595398, "learning_rate": 2.0543251038607077e-05, "loss": 0.8072, "step": 22270 }, { "epoch": 1.768639980948223, "grad_norm": 0.904513955116272, "learning_rate": 2.0530020375221616e-05, "loss": 0.7791, "step": 22280 }, { "epoch": 1.7694338049971226, "grad_norm": 0.9098820686340332, "learning_rate": 2.051678971183615e-05, "loss": 0.8965, "step": 22290 }, { "epoch": 1.770227629046022, "grad_norm": 0.900877833366394, "learning_rate": 2.050355904845069e-05, "loss": 0.8368, "step": 22300 }, { "epoch": 1.7710214530949215, "grad_norm": 0.9795036911964417, "learning_rate": 2.0490328385065226e-05, "loss": 0.8214, "step": 22310 }, { "epoch": 1.771815277143821, "grad_norm": 0.7888408303260803, "learning_rate": 2.0477097721679768e-05, "loss": 0.829, "step": 22320 }, { "epoch": 1.7726091011927205, "grad_norm": 0.6869066953659058, "learning_rate": 2.0463867058294304e-05, "loss": 0.8656, "step": 22330 }, { "epoch": 1.7734029252416201, "grad_norm": 0.891853928565979, "learning_rate": 2.0450636394908843e-05, "loss": 0.8304, "step": 22340 }, { "epoch": 1.7741967492905197, "grad_norm": 0.6878489255905151, "learning_rate": 2.0437405731523378e-05, "loss": 0.8594, "step": 22350 }, { "epoch": 1.7749905733394193, "grad_norm": 0.9317987561225891, "learning_rate": 2.0424175068137917e-05, "loss": 0.8681, "step": 22360 }, { "epoch": 1.775784397388319, "grad_norm": 0.8260341882705688, "learning_rate": 2.0410944404752456e-05, "loss": 0.8144, "step": 22370 }, { "epoch": 1.7765782214372186, "grad_norm": 0.7893334031105042, "learning_rate": 2.0397713741366995e-05, "loss": 0.8375, "step": 22380 }, { "epoch": 1.7773720454861182, "grad_norm": 0.8867517113685608, "learning_rate": 2.038448307798153e-05, "loss": 0.817, "step": 22390 }, { "epoch": 1.7781658695350175, "grad_norm": 0.843120813369751, "learning_rate": 2.0371252414596066e-05, "loss": 0.8542, "step": 22400 }, { "epoch": 1.7789596935839171, "grad_norm": 0.8140029311180115, "learning_rate": 2.035802175121061e-05, "loss": 0.9021, "step": 22410 }, { "epoch": 1.7797535176328165, "grad_norm": 0.9623441100120544, "learning_rate": 2.0344791087825144e-05, "loss": 0.8447, "step": 22420 }, { "epoch": 1.7805473416817161, "grad_norm": 0.9128091335296631, "learning_rate": 2.0331560424439683e-05, "loss": 0.794, "step": 22430 }, { "epoch": 1.7813411657306157, "grad_norm": 0.8841097354888916, "learning_rate": 2.031832976105422e-05, "loss": 0.8707, "step": 22440 }, { "epoch": 1.7821349897795153, "grad_norm": 0.7501084208488464, "learning_rate": 2.0305099097668757e-05, "loss": 0.8113, "step": 22450 }, { "epoch": 1.782928813828415, "grad_norm": 0.9023767113685608, "learning_rate": 2.0291868434283296e-05, "loss": 0.8584, "step": 22460 }, { "epoch": 1.7837226378773146, "grad_norm": 0.9524368047714233, "learning_rate": 2.0278637770897835e-05, "loss": 0.7864, "step": 22470 }, { "epoch": 1.7845164619262142, "grad_norm": 0.7797056436538696, "learning_rate": 2.026540710751237e-05, "loss": 0.7762, "step": 22480 }, { "epoch": 1.7853102859751138, "grad_norm": 0.918021559715271, "learning_rate": 2.025217644412691e-05, "loss": 0.7703, "step": 22490 }, { "epoch": 1.7861041100240131, "grad_norm": 0.9234730005264282, "learning_rate": 2.023894578074145e-05, "loss": 0.8419, "step": 22500 }, { "epoch": 1.7868979340729128, "grad_norm": 0.6988440155982971, "learning_rate": 2.0225715117355984e-05, "loss": 0.8038, "step": 22510 }, { "epoch": 1.7876917581218121, "grad_norm": 0.96622633934021, "learning_rate": 2.0212484453970523e-05, "loss": 0.792, "step": 22520 }, { "epoch": 1.7884855821707117, "grad_norm": 0.7496207356452942, "learning_rate": 2.019925379058506e-05, "loss": 0.7702, "step": 22530 }, { "epoch": 1.7892794062196113, "grad_norm": 0.9251072406768799, "learning_rate": 2.01860231271996e-05, "loss": 0.8013, "step": 22540 }, { "epoch": 1.790073230268511, "grad_norm": 0.8455217480659485, "learning_rate": 2.0172792463814136e-05, "loss": 0.8374, "step": 22550 }, { "epoch": 1.7908670543174106, "grad_norm": 0.8806858062744141, "learning_rate": 2.0159561800428675e-05, "loss": 0.8292, "step": 22560 }, { "epoch": 1.7916608783663102, "grad_norm": 0.8762655258178711, "learning_rate": 2.014633113704321e-05, "loss": 0.774, "step": 22570 }, { "epoch": 1.7924547024152098, "grad_norm": 0.8209755420684814, "learning_rate": 2.013310047365775e-05, "loss": 0.7664, "step": 22580 }, { "epoch": 1.7932485264641094, "grad_norm": 0.8102571964263916, "learning_rate": 2.011986981027229e-05, "loss": 0.8032, "step": 22590 }, { "epoch": 1.7940423505130088, "grad_norm": 0.7247342467308044, "learning_rate": 2.0106639146886828e-05, "loss": 0.832, "step": 22600 }, { "epoch": 1.7948361745619084, "grad_norm": 0.9266355633735657, "learning_rate": 2.0093408483501363e-05, "loss": 0.7909, "step": 22610 }, { "epoch": 1.795629998610808, "grad_norm": 0.7393051981925964, "learning_rate": 2.00801778201159e-05, "loss": 0.8408, "step": 22620 }, { "epoch": 1.7964238226597073, "grad_norm": 0.8639968633651733, "learning_rate": 2.006694715673044e-05, "loss": 0.8002, "step": 22630 }, { "epoch": 1.797217646708607, "grad_norm": 0.9779701232910156, "learning_rate": 2.0053716493344977e-05, "loss": 0.7608, "step": 22640 }, { "epoch": 1.7980114707575066, "grad_norm": 0.8327713012695312, "learning_rate": 2.0040485829959516e-05, "loss": 0.8247, "step": 22650 }, { "epoch": 1.7988052948064062, "grad_norm": 0.8809974193572998, "learning_rate": 2.002725516657405e-05, "loss": 0.8666, "step": 22660 }, { "epoch": 1.7995991188553058, "grad_norm": 0.8804794549942017, "learning_rate": 2.001402450318859e-05, "loss": 0.8138, "step": 22670 }, { "epoch": 1.8003929429042054, "grad_norm": 0.8360373377799988, "learning_rate": 2.000079383980313e-05, "loss": 0.8091, "step": 22680 }, { "epoch": 1.801186766953105, "grad_norm": 0.8613433241844177, "learning_rate": 1.9987563176417668e-05, "loss": 0.7882, "step": 22690 }, { "epoch": 1.8019805910020044, "grad_norm": 0.8613284826278687, "learning_rate": 1.9974332513032203e-05, "loss": 0.8012, "step": 22700 }, { "epoch": 1.802774415050904, "grad_norm": 0.8369642496109009, "learning_rate": 1.9961101849646742e-05, "loss": 0.7765, "step": 22710 }, { "epoch": 1.8035682390998036, "grad_norm": 0.8004109859466553, "learning_rate": 1.994787118626128e-05, "loss": 0.7961, "step": 22720 }, { "epoch": 1.804362063148703, "grad_norm": 0.792927086353302, "learning_rate": 1.993464052287582e-05, "loss": 0.848, "step": 22730 }, { "epoch": 1.8051558871976026, "grad_norm": 0.7693809866905212, "learning_rate": 1.9921409859490356e-05, "loss": 0.786, "step": 22740 }, { "epoch": 1.8059497112465022, "grad_norm": 1.0815696716308594, "learning_rate": 1.990817919610489e-05, "loss": 0.8142, "step": 22750 }, { "epoch": 1.8067435352954018, "grad_norm": 0.9764348268508911, "learning_rate": 1.989494853271943e-05, "loss": 0.8801, "step": 22760 }, { "epoch": 1.8075373593443014, "grad_norm": 0.9634998440742493, "learning_rate": 1.988171786933397e-05, "loss": 0.7793, "step": 22770 }, { "epoch": 1.808331183393201, "grad_norm": 0.8265504837036133, "learning_rate": 1.9868487205948508e-05, "loss": 0.7417, "step": 22780 }, { "epoch": 1.8091250074421006, "grad_norm": 0.8806119561195374, "learning_rate": 1.9855256542563044e-05, "loss": 0.8474, "step": 22790 }, { "epoch": 1.809918831491, "grad_norm": 1.0471200942993164, "learning_rate": 1.9842025879177582e-05, "loss": 0.8872, "step": 22800 }, { "epoch": 1.8107126555398996, "grad_norm": 0.7680615782737732, "learning_rate": 1.982879521579212e-05, "loss": 0.8097, "step": 22810 }, { "epoch": 1.8115064795887992, "grad_norm": 0.9899775385856628, "learning_rate": 1.981556455240666e-05, "loss": 0.8203, "step": 22820 }, { "epoch": 1.8123003036376986, "grad_norm": 0.8771310448646545, "learning_rate": 1.9802333889021196e-05, "loss": 0.783, "step": 22830 }, { "epoch": 1.8130941276865982, "grad_norm": 0.7819077372550964, "learning_rate": 1.9789103225635735e-05, "loss": 0.8192, "step": 22840 }, { "epoch": 1.8138879517354978, "grad_norm": 0.9690881371498108, "learning_rate": 1.9775872562250274e-05, "loss": 0.7972, "step": 22850 }, { "epoch": 1.8146817757843974, "grad_norm": 0.9323807954788208, "learning_rate": 1.976264189886481e-05, "loss": 0.8351, "step": 22860 }, { "epoch": 1.815475599833297, "grad_norm": 0.7597420811653137, "learning_rate": 1.9749411235479348e-05, "loss": 0.7927, "step": 22870 }, { "epoch": 1.8162694238821966, "grad_norm": 0.614967942237854, "learning_rate": 1.9736180572093884e-05, "loss": 0.8414, "step": 22880 }, { "epoch": 1.8170632479310962, "grad_norm": 0.9562491178512573, "learning_rate": 1.9722949908708423e-05, "loss": 0.7689, "step": 22890 }, { "epoch": 1.8178570719799958, "grad_norm": 0.8071816563606262, "learning_rate": 1.970971924532296e-05, "loss": 0.7618, "step": 22900 }, { "epoch": 1.8186508960288952, "grad_norm": 0.9454165697097778, "learning_rate": 1.96964885819375e-05, "loss": 0.8387, "step": 22910 }, { "epoch": 1.8194447200777948, "grad_norm": 0.8835422992706299, "learning_rate": 1.9683257918552036e-05, "loss": 0.8217, "step": 22920 }, { "epoch": 1.8202385441266942, "grad_norm": 0.9334397912025452, "learning_rate": 1.9670027255166575e-05, "loss": 0.7968, "step": 22930 }, { "epoch": 1.8210323681755938, "grad_norm": 0.9972975254058838, "learning_rate": 1.9656796591781114e-05, "loss": 0.7708, "step": 22940 }, { "epoch": 1.8218261922244934, "grad_norm": 1.0508019924163818, "learning_rate": 1.9643565928395653e-05, "loss": 0.795, "step": 22950 }, { "epoch": 1.822620016273393, "grad_norm": 0.8678830862045288, "learning_rate": 1.963033526501019e-05, "loss": 0.8326, "step": 22960 }, { "epoch": 1.8234138403222926, "grad_norm": 0.7960614562034607, "learning_rate": 1.9617104601624724e-05, "loss": 0.8116, "step": 22970 }, { "epoch": 1.8242076643711922, "grad_norm": 0.8850997090339661, "learning_rate": 1.9603873938239263e-05, "loss": 0.7878, "step": 22980 }, { "epoch": 1.8250014884200918, "grad_norm": 0.7653430700302124, "learning_rate": 1.9590643274853802e-05, "loss": 0.7775, "step": 22990 }, { "epoch": 1.8257953124689914, "grad_norm": 0.87236487865448, "learning_rate": 1.957741261146834e-05, "loss": 0.7627, "step": 23000 }, { "epoch": 1.8265891365178908, "grad_norm": 1.0964977741241455, "learning_rate": 1.9564181948082876e-05, "loss": 0.7447, "step": 23010 }, { "epoch": 1.8273829605667904, "grad_norm": 0.8569262623786926, "learning_rate": 1.9550951284697415e-05, "loss": 0.8153, "step": 23020 }, { "epoch": 1.8281767846156898, "grad_norm": 0.9798825979232788, "learning_rate": 1.9537720621311954e-05, "loss": 0.7993, "step": 23030 }, { "epoch": 1.8289706086645894, "grad_norm": 0.8593613505363464, "learning_rate": 1.9524489957926493e-05, "loss": 0.8205, "step": 23040 }, { "epoch": 1.829764432713489, "grad_norm": 0.6907086372375488, "learning_rate": 1.951125929454103e-05, "loss": 0.7571, "step": 23050 }, { "epoch": 1.8305582567623886, "grad_norm": 0.8665714263916016, "learning_rate": 1.9498028631155567e-05, "loss": 0.8783, "step": 23060 }, { "epoch": 1.8313520808112882, "grad_norm": 0.8479406833648682, "learning_rate": 1.9484797967770106e-05, "loss": 0.8075, "step": 23070 }, { "epoch": 1.8321459048601878, "grad_norm": 1.0376914739608765, "learning_rate": 1.9471567304384645e-05, "loss": 0.8745, "step": 23080 }, { "epoch": 1.8329397289090874, "grad_norm": 0.7835286855697632, "learning_rate": 1.945833664099918e-05, "loss": 0.7769, "step": 23090 }, { "epoch": 1.833733552957987, "grad_norm": 0.7931967973709106, "learning_rate": 1.9445105977613716e-05, "loss": 0.7833, "step": 23100 }, { "epoch": 1.8345273770068864, "grad_norm": 0.9185943007469177, "learning_rate": 1.9433198380566804e-05, "loss": 0.8216, "step": 23110 }, { "epoch": 1.835321201055786, "grad_norm": 0.9194105267524719, "learning_rate": 1.941996771718134e-05, "loss": 0.8119, "step": 23120 }, { "epoch": 1.8361150251046854, "grad_norm": 0.9397788047790527, "learning_rate": 1.940673705379588e-05, "loss": 0.7952, "step": 23130 }, { "epoch": 1.836908849153585, "grad_norm": 0.9412715435028076, "learning_rate": 1.9393506390410414e-05, "loss": 0.8362, "step": 23140 }, { "epoch": 1.8377026732024846, "grad_norm": 0.8924200534820557, "learning_rate": 1.9380275727024953e-05, "loss": 0.8191, "step": 23150 }, { "epoch": 1.8384964972513842, "grad_norm": 0.9370428323745728, "learning_rate": 1.9367045063639492e-05, "loss": 0.7919, "step": 23160 }, { "epoch": 1.8392903213002838, "grad_norm": 0.9872609376907349, "learning_rate": 1.935381440025403e-05, "loss": 0.7711, "step": 23170 }, { "epoch": 1.8400841453491834, "grad_norm": 0.7494041323661804, "learning_rate": 1.9340583736868566e-05, "loss": 0.8043, "step": 23180 }, { "epoch": 1.840877969398083, "grad_norm": 0.768407940864563, "learning_rate": 1.9327353073483105e-05, "loss": 0.7851, "step": 23190 }, { "epoch": 1.8416717934469826, "grad_norm": 0.7478283047676086, "learning_rate": 1.9314122410097644e-05, "loss": 0.8769, "step": 23200 }, { "epoch": 1.842465617495882, "grad_norm": 1.001345157623291, "learning_rate": 1.930089174671218e-05, "loss": 0.8168, "step": 23210 }, { "epoch": 1.8432594415447816, "grad_norm": 0.9208034873008728, "learning_rate": 1.928766108332672e-05, "loss": 0.7763, "step": 23220 }, { "epoch": 1.8440532655936812, "grad_norm": 0.7579790949821472, "learning_rate": 1.9274430419941254e-05, "loss": 0.8256, "step": 23230 }, { "epoch": 1.8448470896425806, "grad_norm": 0.8360084891319275, "learning_rate": 1.9261199756555797e-05, "loss": 0.8554, "step": 23240 }, { "epoch": 1.8456409136914802, "grad_norm": 0.8228520154953003, "learning_rate": 1.9247969093170332e-05, "loss": 0.7981, "step": 23250 }, { "epoch": 1.8464347377403798, "grad_norm": 0.7764120697975159, "learning_rate": 1.923473842978487e-05, "loss": 0.8218, "step": 23260 }, { "epoch": 1.8472285617892794, "grad_norm": 0.730930507183075, "learning_rate": 1.9221507766399407e-05, "loss": 0.8442, "step": 23270 }, { "epoch": 1.848022385838179, "grad_norm": 0.8573958277702332, "learning_rate": 1.9208277103013946e-05, "loss": 0.8032, "step": 23280 }, { "epoch": 1.8488162098870786, "grad_norm": 1.118808388710022, "learning_rate": 1.9195046439628485e-05, "loss": 0.8619, "step": 23290 }, { "epoch": 1.8496100339359782, "grad_norm": 0.7608010768890381, "learning_rate": 1.9181815776243023e-05, "loss": 0.8218, "step": 23300 }, { "epoch": 1.8504038579848776, "grad_norm": 0.8794158697128296, "learning_rate": 1.916858511285756e-05, "loss": 0.8562, "step": 23310 }, { "epoch": 1.8511976820337772, "grad_norm": 0.8464013934135437, "learning_rate": 1.9155354449472098e-05, "loss": 0.836, "step": 23320 }, { "epoch": 1.8519915060826768, "grad_norm": 0.7255980372428894, "learning_rate": 1.9142123786086637e-05, "loss": 0.8553, "step": 23330 }, { "epoch": 1.8527853301315762, "grad_norm": 0.9724112153053284, "learning_rate": 1.9128893122701172e-05, "loss": 0.8473, "step": 23340 }, { "epoch": 1.8535791541804758, "grad_norm": 0.7873225212097168, "learning_rate": 1.911566245931571e-05, "loss": 0.8406, "step": 23350 }, { "epoch": 1.8543729782293754, "grad_norm": 0.8537048697471619, "learning_rate": 1.9102431795930247e-05, "loss": 0.8082, "step": 23360 }, { "epoch": 1.855166802278275, "grad_norm": 0.7261394262313843, "learning_rate": 1.9089201132544786e-05, "loss": 0.8488, "step": 23370 }, { "epoch": 1.8559606263271746, "grad_norm": 0.8339587450027466, "learning_rate": 1.9075970469159325e-05, "loss": 0.8027, "step": 23380 }, { "epoch": 1.8567544503760742, "grad_norm": 1.3965240716934204, "learning_rate": 1.9062739805773864e-05, "loss": 0.7967, "step": 23390 }, { "epoch": 1.8575482744249738, "grad_norm": 0.789261519908905, "learning_rate": 1.90495091423884e-05, "loss": 0.7911, "step": 23400 }, { "epoch": 1.8583420984738732, "grad_norm": 0.6335147619247437, "learning_rate": 1.9036278479002938e-05, "loss": 0.8299, "step": 23410 }, { "epoch": 1.8591359225227728, "grad_norm": 0.7321164608001709, "learning_rate": 1.9023047815617477e-05, "loss": 0.8545, "step": 23420 }, { "epoch": 1.8599297465716724, "grad_norm": 0.9617056250572205, "learning_rate": 1.9009817152232016e-05, "loss": 0.7811, "step": 23430 }, { "epoch": 1.8607235706205718, "grad_norm": 0.8931480050086975, "learning_rate": 1.899658648884655e-05, "loss": 0.8112, "step": 23440 }, { "epoch": 1.8615173946694714, "grad_norm": 1.094520926475525, "learning_rate": 1.8983355825461087e-05, "loss": 0.8078, "step": 23450 }, { "epoch": 1.862311218718371, "grad_norm": 0.9582784175872803, "learning_rate": 1.8970125162075626e-05, "loss": 0.763, "step": 23460 }, { "epoch": 1.8631050427672706, "grad_norm": 0.9741404056549072, "learning_rate": 1.8956894498690165e-05, "loss": 0.7808, "step": 23470 }, { "epoch": 1.8638988668161702, "grad_norm": 0.758374035358429, "learning_rate": 1.8943663835304704e-05, "loss": 0.8281, "step": 23480 }, { "epoch": 1.8646926908650698, "grad_norm": 0.815403938293457, "learning_rate": 1.893043317191924e-05, "loss": 0.7594, "step": 23490 }, { "epoch": 1.8654865149139694, "grad_norm": 0.7608597278594971, "learning_rate": 1.8917202508533778e-05, "loss": 0.8072, "step": 23500 }, { "epoch": 1.866280338962869, "grad_norm": 0.8437971472740173, "learning_rate": 1.8903971845148317e-05, "loss": 0.8153, "step": 23510 }, { "epoch": 1.8670741630117684, "grad_norm": 0.8459624648094177, "learning_rate": 1.8890741181762856e-05, "loss": 0.7585, "step": 23520 }, { "epoch": 1.867867987060668, "grad_norm": 0.9859129786491394, "learning_rate": 1.887751051837739e-05, "loss": 0.7822, "step": 23530 }, { "epoch": 1.8686618111095674, "grad_norm": 0.7122694849967957, "learning_rate": 1.886427985499193e-05, "loss": 0.8052, "step": 23540 }, { "epoch": 1.869455635158467, "grad_norm": 0.8045480251312256, "learning_rate": 1.885104919160647e-05, "loss": 0.7523, "step": 23550 }, { "epoch": 1.8702494592073666, "grad_norm": 0.8591973781585693, "learning_rate": 1.8837818528221005e-05, "loss": 0.8087, "step": 23560 }, { "epoch": 1.8710432832562662, "grad_norm": 0.8517307043075562, "learning_rate": 1.8824587864835544e-05, "loss": 0.7844, "step": 23570 }, { "epoch": 1.8718371073051658, "grad_norm": 0.96706223487854, "learning_rate": 1.881135720145008e-05, "loss": 0.7595, "step": 23580 }, { "epoch": 1.8726309313540654, "grad_norm": 0.966511070728302, "learning_rate": 1.879812653806462e-05, "loss": 0.8569, "step": 23590 }, { "epoch": 1.873424755402965, "grad_norm": 0.8597899675369263, "learning_rate": 1.8784895874679157e-05, "loss": 0.7934, "step": 23600 }, { "epoch": 1.8742185794518647, "grad_norm": 0.7398180961608887, "learning_rate": 1.8771665211293696e-05, "loss": 0.818, "step": 23610 }, { "epoch": 1.875012403500764, "grad_norm": 0.8387320637702942, "learning_rate": 1.8758434547908232e-05, "loss": 0.8067, "step": 23620 }, { "epoch": 1.8758062275496636, "grad_norm": 0.8745064735412598, "learning_rate": 1.874520388452277e-05, "loss": 0.7378, "step": 23630 }, { "epoch": 1.876600051598563, "grad_norm": 0.7294278740882874, "learning_rate": 1.873197322113731e-05, "loss": 0.8538, "step": 23640 }, { "epoch": 1.8773938756474626, "grad_norm": 0.8370188474655151, "learning_rate": 1.871874255775185e-05, "loss": 0.837, "step": 23650 }, { "epoch": 1.8781876996963622, "grad_norm": 0.8423630595207214, "learning_rate": 1.8705511894366384e-05, "loss": 0.7942, "step": 23660 }, { "epoch": 1.8789815237452618, "grad_norm": 0.8420581817626953, "learning_rate": 1.8692281230980923e-05, "loss": 0.8273, "step": 23670 }, { "epoch": 1.8797753477941614, "grad_norm": 0.9311301708221436, "learning_rate": 1.867905056759546e-05, "loss": 0.832, "step": 23680 }, { "epoch": 1.880569171843061, "grad_norm": 0.7972429990768433, "learning_rate": 1.8665819904209998e-05, "loss": 0.8131, "step": 23690 }, { "epoch": 1.8813629958919607, "grad_norm": 0.7214037775993347, "learning_rate": 1.8652589240824536e-05, "loss": 0.8298, "step": 23700 }, { "epoch": 1.8821568199408603, "grad_norm": 0.8672298789024353, "learning_rate": 1.8639358577439072e-05, "loss": 0.8283, "step": 23710 }, { "epoch": 1.8829506439897596, "grad_norm": 0.8380793333053589, "learning_rate": 1.862612791405361e-05, "loss": 0.7733, "step": 23720 }, { "epoch": 1.8837444680386592, "grad_norm": 0.8176583051681519, "learning_rate": 1.861289725066815e-05, "loss": 0.8774, "step": 23730 }, { "epoch": 1.8845382920875586, "grad_norm": 0.8315280079841614, "learning_rate": 1.859966658728269e-05, "loss": 0.8133, "step": 23740 }, { "epoch": 1.8853321161364582, "grad_norm": 0.7591937184333801, "learning_rate": 1.8586435923897224e-05, "loss": 0.8648, "step": 23750 }, { "epoch": 1.8861259401853578, "grad_norm": 0.9065371155738831, "learning_rate": 1.8573205260511763e-05, "loss": 0.7526, "step": 23760 }, { "epoch": 1.8869197642342574, "grad_norm": 0.8125611543655396, "learning_rate": 1.8559974597126302e-05, "loss": 0.7813, "step": 23770 }, { "epoch": 1.887713588283157, "grad_norm": 0.7974992990493774, "learning_rate": 1.854674393374084e-05, "loss": 0.8515, "step": 23780 }, { "epoch": 1.8885074123320567, "grad_norm": 0.6835601925849915, "learning_rate": 1.8533513270355377e-05, "loss": 0.8259, "step": 23790 }, { "epoch": 1.8893012363809563, "grad_norm": 0.82981938123703, "learning_rate": 1.8520282606969912e-05, "loss": 0.7867, "step": 23800 }, { "epoch": 1.8900950604298559, "grad_norm": 0.7962732911109924, "learning_rate": 1.850705194358445e-05, "loss": 0.8032, "step": 23810 }, { "epoch": 1.8908888844787552, "grad_norm": 0.7996245622634888, "learning_rate": 1.849382128019899e-05, "loss": 0.7464, "step": 23820 }, { "epoch": 1.8916827085276549, "grad_norm": 0.8889716267585754, "learning_rate": 1.848059061681353e-05, "loss": 0.7978, "step": 23830 }, { "epoch": 1.8924765325765545, "grad_norm": 0.8882633447647095, "learning_rate": 1.8467359953428065e-05, "loss": 0.7114, "step": 23840 }, { "epoch": 1.8932703566254538, "grad_norm": 0.8519427180290222, "learning_rate": 1.8454129290042603e-05, "loss": 0.8326, "step": 23850 }, { "epoch": 1.8940641806743534, "grad_norm": 1.117575764656067, "learning_rate": 1.8440898626657142e-05, "loss": 0.8373, "step": 23860 }, { "epoch": 1.894858004723253, "grad_norm": 0.871019184589386, "learning_rate": 1.842766796327168e-05, "loss": 0.8082, "step": 23870 }, { "epoch": 1.8956518287721527, "grad_norm": 0.8169146776199341, "learning_rate": 1.8414437299886217e-05, "loss": 0.7634, "step": 23880 }, { "epoch": 1.8964456528210523, "grad_norm": 0.8163456320762634, "learning_rate": 1.8401206636500756e-05, "loss": 0.8704, "step": 23890 }, { "epoch": 1.8972394768699519, "grad_norm": 0.912982165813446, "learning_rate": 1.838797597311529e-05, "loss": 0.7831, "step": 23900 }, { "epoch": 1.8980333009188515, "grad_norm": 0.8707506656646729, "learning_rate": 1.837474530972983e-05, "loss": 0.7522, "step": 23910 }, { "epoch": 1.8988271249677509, "grad_norm": 0.7773687839508057, "learning_rate": 1.836151464634437e-05, "loss": 0.8301, "step": 23920 }, { "epoch": 1.8996209490166505, "grad_norm": 0.846040666103363, "learning_rate": 1.8348283982958905e-05, "loss": 0.8274, "step": 23930 }, { "epoch": 1.90041477306555, "grad_norm": 0.890678882598877, "learning_rate": 1.8335053319573444e-05, "loss": 0.8097, "step": 23940 }, { "epoch": 1.9012085971144495, "grad_norm": 0.8786852955818176, "learning_rate": 1.8321822656187983e-05, "loss": 0.8448, "step": 23950 }, { "epoch": 1.902002421163349, "grad_norm": 0.8807387948036194, "learning_rate": 1.830859199280252e-05, "loss": 0.8331, "step": 23960 }, { "epoch": 1.9027962452122487, "grad_norm": 0.6707026958465576, "learning_rate": 1.8295361329417057e-05, "loss": 0.8109, "step": 23970 }, { "epoch": 1.9035900692611483, "grad_norm": 0.9399641156196594, "learning_rate": 1.8282130666031596e-05, "loss": 0.7463, "step": 23980 }, { "epoch": 1.9043838933100479, "grad_norm": 0.6597705483436584, "learning_rate": 1.8268900002646135e-05, "loss": 0.8391, "step": 23990 }, { "epoch": 1.9051777173589475, "grad_norm": 0.869948148727417, "learning_rate": 1.8255669339260674e-05, "loss": 0.8085, "step": 24000 }, { "epoch": 1.905971541407847, "grad_norm": 0.9739574193954468, "learning_rate": 1.824243867587521e-05, "loss": 0.8292, "step": 24010 }, { "epoch": 1.9067653654567465, "grad_norm": 0.8347158432006836, "learning_rate": 1.8229208012489745e-05, "loss": 0.8148, "step": 24020 }, { "epoch": 1.907559189505646, "grad_norm": 0.8817285299301147, "learning_rate": 1.8215977349104284e-05, "loss": 0.8662, "step": 24030 }, { "epoch": 1.9083530135545457, "grad_norm": 0.8079895377159119, "learning_rate": 1.8202746685718823e-05, "loss": 0.8082, "step": 24040 }, { "epoch": 1.909146837603445, "grad_norm": 0.9502777457237244, "learning_rate": 1.818951602233336e-05, "loss": 0.8375, "step": 24050 }, { "epoch": 1.9099406616523447, "grad_norm": 0.7885468602180481, "learning_rate": 1.8176285358947897e-05, "loss": 0.7887, "step": 24060 }, { "epoch": 1.9107344857012443, "grad_norm": 0.861359179019928, "learning_rate": 1.8163054695562436e-05, "loss": 0.8925, "step": 24070 }, { "epoch": 1.9115283097501439, "grad_norm": 0.8542492389678955, "learning_rate": 1.8149824032176975e-05, "loss": 0.7872, "step": 24080 }, { "epoch": 1.9123221337990435, "grad_norm": 0.97325199842453, "learning_rate": 1.8136593368791514e-05, "loss": 0.8129, "step": 24090 }, { "epoch": 1.913115957847943, "grad_norm": 0.8216763734817505, "learning_rate": 1.812336270540605e-05, "loss": 0.8226, "step": 24100 }, { "epoch": 1.9139097818968427, "grad_norm": 0.8220085501670837, "learning_rate": 1.811013204202059e-05, "loss": 0.8222, "step": 24110 }, { "epoch": 1.914703605945742, "grad_norm": 0.8182690739631653, "learning_rate": 1.8096901378635124e-05, "loss": 0.843, "step": 24120 }, { "epoch": 1.9154974299946417, "grad_norm": 1.0019487142562866, "learning_rate": 1.8083670715249666e-05, "loss": 0.826, "step": 24130 }, { "epoch": 1.9162912540435413, "grad_norm": 0.9827296137809753, "learning_rate": 1.8070440051864202e-05, "loss": 0.876, "step": 24140 }, { "epoch": 1.9170850780924407, "grad_norm": 0.848641037940979, "learning_rate": 1.8057209388478737e-05, "loss": 0.7753, "step": 24150 }, { "epoch": 1.9178789021413403, "grad_norm": 0.9888964891433716, "learning_rate": 1.8043978725093276e-05, "loss": 0.7491, "step": 24160 }, { "epoch": 1.9186727261902399, "grad_norm": 0.8973375558853149, "learning_rate": 1.8030748061707815e-05, "loss": 0.7887, "step": 24170 }, { "epoch": 1.9194665502391395, "grad_norm": 0.8632115721702576, "learning_rate": 1.8017517398322354e-05, "loss": 0.8052, "step": 24180 }, { "epoch": 1.920260374288039, "grad_norm": 0.9406533241271973, "learning_rate": 1.800428673493689e-05, "loss": 0.7834, "step": 24190 }, { "epoch": 1.9210541983369387, "grad_norm": 0.7621222734451294, "learning_rate": 1.799105607155143e-05, "loss": 0.8211, "step": 24200 }, { "epoch": 1.9218480223858383, "grad_norm": 0.7019477486610413, "learning_rate": 1.7977825408165964e-05, "loss": 0.7818, "step": 24210 }, { "epoch": 1.922641846434738, "grad_norm": 0.7617840766906738, "learning_rate": 1.7964594744780506e-05, "loss": 0.7691, "step": 24220 }, { "epoch": 1.9234356704836373, "grad_norm": 0.8671677112579346, "learning_rate": 1.7951364081395042e-05, "loss": 0.8076, "step": 24230 }, { "epoch": 1.9242294945325369, "grad_norm": 0.7963345646858215, "learning_rate": 1.793813341800958e-05, "loss": 0.8549, "step": 24240 }, { "epoch": 1.9250233185814363, "grad_norm": 0.7418803572654724, "learning_rate": 1.7924902754624116e-05, "loss": 0.8776, "step": 24250 }, { "epoch": 1.9258171426303359, "grad_norm": 0.991839587688446, "learning_rate": 1.7911672091238655e-05, "loss": 0.8098, "step": 24260 }, { "epoch": 1.9266109666792355, "grad_norm": 0.8886794447898865, "learning_rate": 1.7898441427853194e-05, "loss": 0.7906, "step": 24270 }, { "epoch": 1.927404790728135, "grad_norm": 0.8058855533599854, "learning_rate": 1.788521076446773e-05, "loss": 0.7909, "step": 24280 }, { "epoch": 1.9281986147770347, "grad_norm": 0.9611883163452148, "learning_rate": 1.787198010108227e-05, "loss": 0.7466, "step": 24290 }, { "epoch": 1.9289924388259343, "grad_norm": 0.9158162474632263, "learning_rate": 1.7858749437696808e-05, "loss": 0.772, "step": 24300 }, { "epoch": 1.929786262874834, "grad_norm": 0.7327207922935486, "learning_rate": 1.7845518774311347e-05, "loss": 0.832, "step": 24310 }, { "epoch": 1.9305800869237335, "grad_norm": 0.8581303954124451, "learning_rate": 1.7832288110925882e-05, "loss": 0.8193, "step": 24320 }, { "epoch": 1.931373910972633, "grad_norm": 0.8568522930145264, "learning_rate": 1.781905744754042e-05, "loss": 0.8105, "step": 24330 }, { "epoch": 1.9321677350215325, "grad_norm": 0.9713951945304871, "learning_rate": 1.7805826784154957e-05, "loss": 0.7699, "step": 24340 }, { "epoch": 1.9329615590704319, "grad_norm": 0.8259567618370056, "learning_rate": 1.77925961207695e-05, "loss": 0.8278, "step": 24350 }, { "epoch": 1.9337553831193315, "grad_norm": 0.9013030529022217, "learning_rate": 1.7779365457384035e-05, "loss": 0.8677, "step": 24360 }, { "epoch": 1.934549207168231, "grad_norm": 0.8050611019134521, "learning_rate": 1.776613479399857e-05, "loss": 0.7818, "step": 24370 }, { "epoch": 1.9353430312171307, "grad_norm": 0.8828538060188293, "learning_rate": 1.775290413061311e-05, "loss": 0.8062, "step": 24380 }, { "epoch": 1.9361368552660303, "grad_norm": 0.9584046602249146, "learning_rate": 1.7739673467227648e-05, "loss": 0.8004, "step": 24390 }, { "epoch": 1.93693067931493, "grad_norm": 1.0166678428649902, "learning_rate": 1.7726442803842187e-05, "loss": 0.8063, "step": 24400 }, { "epoch": 1.9377245033638295, "grad_norm": 0.8781585693359375, "learning_rate": 1.7713212140456722e-05, "loss": 0.8428, "step": 24410 }, { "epoch": 1.9385183274127291, "grad_norm": 0.9329415559768677, "learning_rate": 1.769998147707126e-05, "loss": 0.8114, "step": 24420 }, { "epoch": 1.9393121514616285, "grad_norm": 0.9052295684814453, "learning_rate": 1.7686750813685797e-05, "loss": 0.8206, "step": 24430 }, { "epoch": 1.940105975510528, "grad_norm": 0.8311532139778137, "learning_rate": 1.767352015030034e-05, "loss": 0.83, "step": 24440 }, { "epoch": 1.9408997995594277, "grad_norm": 0.8196767568588257, "learning_rate": 1.7660289486914875e-05, "loss": 0.8425, "step": 24450 }, { "epoch": 1.941693623608327, "grad_norm": 0.7230108976364136, "learning_rate": 1.7647058823529414e-05, "loss": 0.8507, "step": 24460 }, { "epoch": 1.9424874476572267, "grad_norm": 1.056864857673645, "learning_rate": 1.763382816014395e-05, "loss": 0.7798, "step": 24470 }, { "epoch": 1.9432812717061263, "grad_norm": 1.0241467952728271, "learning_rate": 1.7620597496758488e-05, "loss": 0.8083, "step": 24480 }, { "epoch": 1.944075095755026, "grad_norm": 1.016441822052002, "learning_rate": 1.7607366833373027e-05, "loss": 0.8201, "step": 24490 }, { "epoch": 1.9448689198039255, "grad_norm": 0.9095987677574158, "learning_rate": 1.7594136169987563e-05, "loss": 0.8211, "step": 24500 }, { "epoch": 1.9456627438528251, "grad_norm": 0.7234585881233215, "learning_rate": 1.75809055066021e-05, "loss": 0.8225, "step": 24510 }, { "epoch": 1.9464565679017247, "grad_norm": 0.8463301062583923, "learning_rate": 1.756767484321664e-05, "loss": 0.8663, "step": 24520 }, { "epoch": 1.947250391950624, "grad_norm": 0.8381713628768921, "learning_rate": 1.755444417983118e-05, "loss": 0.8309, "step": 24530 }, { "epoch": 1.9480442159995237, "grad_norm": 1.2629505395889282, "learning_rate": 1.7541213516445715e-05, "loss": 0.8193, "step": 24540 }, { "epoch": 1.9488380400484233, "grad_norm": 0.889024555683136, "learning_rate": 1.7527982853060254e-05, "loss": 0.7851, "step": 24550 }, { "epoch": 1.9496318640973227, "grad_norm": 1.1809180974960327, "learning_rate": 1.751475218967479e-05, "loss": 0.8625, "step": 24560 }, { "epoch": 1.9504256881462223, "grad_norm": 0.973340630531311, "learning_rate": 1.750152152628933e-05, "loss": 0.7819, "step": 24570 }, { "epoch": 1.951219512195122, "grad_norm": 0.9036343693733215, "learning_rate": 1.7488290862903867e-05, "loss": 0.7523, "step": 24580 }, { "epoch": 1.9520133362440215, "grad_norm": 0.9107938408851624, "learning_rate": 1.7475060199518406e-05, "loss": 0.8277, "step": 24590 }, { "epoch": 1.9528071602929211, "grad_norm": 0.7978836297988892, "learning_rate": 1.746182953613294e-05, "loss": 0.8058, "step": 24600 }, { "epoch": 1.9536009843418207, "grad_norm": 0.9246600270271301, "learning_rate": 1.744859887274748e-05, "loss": 0.862, "step": 24610 }, { "epoch": 1.9543948083907203, "grad_norm": 0.96808922290802, "learning_rate": 1.743536820936202e-05, "loss": 0.9027, "step": 24620 }, { "epoch": 1.9551886324396197, "grad_norm": 0.7712015509605408, "learning_rate": 1.7422137545976555e-05, "loss": 0.836, "step": 24630 }, { "epoch": 1.9559824564885193, "grad_norm": 0.9321126937866211, "learning_rate": 1.7408906882591094e-05, "loss": 0.836, "step": 24640 }, { "epoch": 1.956776280537419, "grad_norm": 0.8697831630706787, "learning_rate": 1.739567621920563e-05, "loss": 0.8271, "step": 24650 }, { "epoch": 1.9575701045863183, "grad_norm": 0.8572379350662231, "learning_rate": 1.7382445555820172e-05, "loss": 0.7872, "step": 24660 }, { "epoch": 1.958363928635218, "grad_norm": 0.7709526419639587, "learning_rate": 1.7369214892434707e-05, "loss": 0.8315, "step": 24670 }, { "epoch": 1.9591577526841175, "grad_norm": 0.867958128452301, "learning_rate": 1.7355984229049246e-05, "loss": 0.7875, "step": 24680 }, { "epoch": 1.9599515767330171, "grad_norm": 0.883253276348114, "learning_rate": 1.7342753565663782e-05, "loss": 0.8871, "step": 24690 }, { "epoch": 1.9607454007819167, "grad_norm": 0.8314810395240784, "learning_rate": 1.732952290227832e-05, "loss": 0.7559, "step": 24700 }, { "epoch": 1.9615392248308163, "grad_norm": 0.7700093984603882, "learning_rate": 1.731629223889286e-05, "loss": 0.8198, "step": 24710 }, { "epoch": 1.962333048879716, "grad_norm": 0.7861685156822205, "learning_rate": 1.7303061575507395e-05, "loss": 0.8823, "step": 24720 }, { "epoch": 1.9631268729286153, "grad_norm": 0.9376586675643921, "learning_rate": 1.7289830912121934e-05, "loss": 0.8059, "step": 24730 }, { "epoch": 1.963920696977515, "grad_norm": 0.9346411228179932, "learning_rate": 1.7276600248736473e-05, "loss": 0.8167, "step": 24740 }, { "epoch": 1.9647145210264145, "grad_norm": 0.7460438013076782, "learning_rate": 1.7263369585351012e-05, "loss": 0.7703, "step": 24750 }, { "epoch": 1.965508345075314, "grad_norm": 0.7430590391159058, "learning_rate": 1.7250138921965548e-05, "loss": 0.7716, "step": 24760 }, { "epoch": 1.9663021691242135, "grad_norm": 0.9722541570663452, "learning_rate": 1.7236908258580086e-05, "loss": 0.7665, "step": 24770 }, { "epoch": 1.9670959931731131, "grad_norm": 0.8619949817657471, "learning_rate": 1.7223677595194622e-05, "loss": 0.7947, "step": 24780 }, { "epoch": 1.9678898172220127, "grad_norm": 0.864704966545105, "learning_rate": 1.7210446931809164e-05, "loss": 0.8233, "step": 24790 }, { "epoch": 1.9686836412709123, "grad_norm": 0.8963067531585693, "learning_rate": 1.71972162684237e-05, "loss": 0.8249, "step": 24800 }, { "epoch": 1.969477465319812, "grad_norm": 0.7496664524078369, "learning_rate": 1.718398560503824e-05, "loss": 0.7904, "step": 24810 }, { "epoch": 1.9702712893687115, "grad_norm": 0.9141893982887268, "learning_rate": 1.7170754941652774e-05, "loss": 0.8094, "step": 24820 }, { "epoch": 1.9710651134176111, "grad_norm": 0.8252711296081543, "learning_rate": 1.7157524278267313e-05, "loss": 0.744, "step": 24830 }, { "epoch": 1.9718589374665105, "grad_norm": 0.7899823188781738, "learning_rate": 1.7144293614881852e-05, "loss": 0.7799, "step": 24840 }, { "epoch": 1.9726527615154101, "grad_norm": 0.7791532874107361, "learning_rate": 1.7131062951496388e-05, "loss": 0.8205, "step": 24850 }, { "epoch": 1.9734465855643095, "grad_norm": 0.8147314786911011, "learning_rate": 1.7117832288110927e-05, "loss": 0.7866, "step": 24860 }, { "epoch": 1.9742404096132091, "grad_norm": 0.9811384677886963, "learning_rate": 1.7104601624725462e-05, "loss": 0.8046, "step": 24870 }, { "epoch": 1.9750342336621087, "grad_norm": 0.6570988893508911, "learning_rate": 1.7091370961340004e-05, "loss": 0.8115, "step": 24880 }, { "epoch": 1.9758280577110083, "grad_norm": 0.8411725759506226, "learning_rate": 1.707814029795454e-05, "loss": 0.8163, "step": 24890 }, { "epoch": 1.976621881759908, "grad_norm": 0.9080935120582581, "learning_rate": 1.706490963456908e-05, "loss": 0.7894, "step": 24900 }, { "epoch": 1.9774157058088075, "grad_norm": 0.8399326801300049, "learning_rate": 1.7051678971183615e-05, "loss": 0.8409, "step": 24910 }, { "epoch": 1.9782095298577071, "grad_norm": 0.7995646595954895, "learning_rate": 1.7038448307798153e-05, "loss": 0.8039, "step": 24920 }, { "epoch": 1.9790033539066068, "grad_norm": 0.7588172554969788, "learning_rate": 1.7025217644412692e-05, "loss": 0.8694, "step": 24930 }, { "epoch": 1.9797971779555061, "grad_norm": 0.8422114253044128, "learning_rate": 1.701198698102723e-05, "loss": 0.8497, "step": 24940 }, { "epoch": 1.9805910020044057, "grad_norm": 0.9750681519508362, "learning_rate": 1.6998756317641767e-05, "loss": 0.809, "step": 24950 }, { "epoch": 1.9813848260533051, "grad_norm": 0.8295944929122925, "learning_rate": 1.6985525654256302e-05, "loss": 0.828, "step": 24960 }, { "epoch": 1.9821786501022047, "grad_norm": 0.7455227375030518, "learning_rate": 1.6972294990870845e-05, "loss": 0.774, "step": 24970 }, { "epoch": 1.9829724741511043, "grad_norm": 0.7973991632461548, "learning_rate": 1.695906432748538e-05, "loss": 0.833, "step": 24980 }, { "epoch": 1.983766298200004, "grad_norm": 0.9970287084579468, "learning_rate": 1.694583366409992e-05, "loss": 0.7929, "step": 24990 }, { "epoch": 1.9845601222489035, "grad_norm": 1.006111741065979, "learning_rate": 1.6932603000714455e-05, "loss": 0.7835, "step": 25000 }, { "epoch": 1.9853539462978032, "grad_norm": 0.7539806962013245, "learning_rate": 1.6919372337328994e-05, "loss": 0.791, "step": 25010 }, { "epoch": 1.9861477703467028, "grad_norm": 0.6668902635574341, "learning_rate": 1.6906141673943533e-05, "loss": 0.8413, "step": 25020 }, { "epoch": 1.9869415943956024, "grad_norm": 0.7992756366729736, "learning_rate": 1.689291101055807e-05, "loss": 0.8181, "step": 25030 }, { "epoch": 1.9877354184445017, "grad_norm": 0.836247980594635, "learning_rate": 1.6879680347172607e-05, "loss": 0.8255, "step": 25040 }, { "epoch": 1.9885292424934013, "grad_norm": 0.701686680316925, "learning_rate": 1.6866449683787146e-05, "loss": 0.8523, "step": 25050 }, { "epoch": 1.9893230665423007, "grad_norm": 0.8680205941200256, "learning_rate": 1.6853219020401685e-05, "loss": 0.8737, "step": 25060 }, { "epoch": 1.9901168905912003, "grad_norm": 1.0040700435638428, "learning_rate": 1.683998835701622e-05, "loss": 0.8068, "step": 25070 }, { "epoch": 1.9909107146401, "grad_norm": 0.7312654852867126, "learning_rate": 1.682675769363076e-05, "loss": 0.7744, "step": 25080 }, { "epoch": 1.9917045386889995, "grad_norm": 0.8141999244689941, "learning_rate": 1.6813527030245295e-05, "loss": 0.8031, "step": 25090 }, { "epoch": 1.9924983627378992, "grad_norm": 0.9395736455917358, "learning_rate": 1.6800296366859837e-05, "loss": 0.8556, "step": 25100 }, { "epoch": 1.9932921867867988, "grad_norm": 1.0903936624526978, "learning_rate": 1.6788388769812918e-05, "loss": 0.7715, "step": 25110 }, { "epoch": 1.9940860108356984, "grad_norm": 0.9055798053741455, "learning_rate": 1.6775158106427457e-05, "loss": 0.8161, "step": 25120 }, { "epoch": 1.994879834884598, "grad_norm": 1.0567915439605713, "learning_rate": 1.6761927443041993e-05, "loss": 0.7872, "step": 25130 }, { "epoch": 1.9956736589334974, "grad_norm": 0.8715507388114929, "learning_rate": 1.6748696779656535e-05, "loss": 0.847, "step": 25140 }, { "epoch": 1.996467482982397, "grad_norm": 0.9350098371505737, "learning_rate": 1.673546611627107e-05, "loss": 0.7932, "step": 25150 }, { "epoch": 1.9972613070312966, "grad_norm": 0.8718578219413757, "learning_rate": 1.672223545288561e-05, "loss": 0.8038, "step": 25160 }, { "epoch": 1.998055131080196, "grad_norm": 0.987808883190155, "learning_rate": 1.6709004789500145e-05, "loss": 0.8141, "step": 25170 }, { "epoch": 1.9988489551290956, "grad_norm": 0.8378719091415405, "learning_rate": 1.6695774126114684e-05, "loss": 0.7715, "step": 25180 }, { "epoch": 1.9996427791779952, "grad_norm": 0.8598037958145142, "learning_rate": 1.6682543462729223e-05, "loss": 0.7998, "step": 25190 }, { "epoch": 2.0004366032268948, "grad_norm": 0.8673446774482727, "learning_rate": 1.666931279934376e-05, "loss": 0.8147, "step": 25200 }, { "epoch": 2.0012304272757944, "grad_norm": 0.8430289030075073, "learning_rate": 1.6656082135958297e-05, "loss": 0.801, "step": 25210 }, { "epoch": 2.002024251324694, "grad_norm": 0.8227368593215942, "learning_rate": 1.6642851472572836e-05, "loss": 0.7521, "step": 25220 }, { "epoch": 2.0028180753735936, "grad_norm": 0.8883048295974731, "learning_rate": 1.6629620809187375e-05, "loss": 0.8145, "step": 25230 }, { "epoch": 2.003611899422493, "grad_norm": 0.6865272521972656, "learning_rate": 1.661639014580191e-05, "loss": 0.776, "step": 25240 }, { "epoch": 2.004405723471393, "grad_norm": 0.8548721075057983, "learning_rate": 1.660315948241645e-05, "loss": 0.7732, "step": 25250 }, { "epoch": 2.005199547520292, "grad_norm": 0.8403987884521484, "learning_rate": 1.6589928819030985e-05, "loss": 0.7927, "step": 25260 }, { "epoch": 2.0059933715691916, "grad_norm": 0.7534387111663818, "learning_rate": 1.6576698155645527e-05, "loss": 0.8369, "step": 25270 }, { "epoch": 2.006787195618091, "grad_norm": 0.8503125309944153, "learning_rate": 1.6563467492260063e-05, "loss": 0.7955, "step": 25280 }, { "epoch": 2.0075810196669908, "grad_norm": 0.6855154633522034, "learning_rate": 1.6550236828874602e-05, "loss": 0.7958, "step": 25290 }, { "epoch": 2.0083748437158904, "grad_norm": 0.7701917290687561, "learning_rate": 1.6537006165489137e-05, "loss": 0.7644, "step": 25300 }, { "epoch": 2.00916866776479, "grad_norm": 0.9168187379837036, "learning_rate": 1.6523775502103676e-05, "loss": 0.8316, "step": 25310 }, { "epoch": 2.0099624918136896, "grad_norm": 0.833348274230957, "learning_rate": 1.6510544838718215e-05, "loss": 0.7625, "step": 25320 }, { "epoch": 2.010756315862589, "grad_norm": 0.8914290070533752, "learning_rate": 1.649731417533275e-05, "loss": 0.7627, "step": 25330 }, { "epoch": 2.011550139911489, "grad_norm": 1.0038100481033325, "learning_rate": 1.648408351194729e-05, "loss": 0.784, "step": 25340 }, { "epoch": 2.0123439639603884, "grad_norm": 0.8875173330307007, "learning_rate": 1.6470852848561825e-05, "loss": 0.8098, "step": 25350 }, { "epoch": 2.0131377880092876, "grad_norm": 0.9379634857177734, "learning_rate": 1.6457622185176368e-05, "loss": 0.8373, "step": 25360 }, { "epoch": 2.013931612058187, "grad_norm": 0.7726089954376221, "learning_rate": 1.6444391521790903e-05, "loss": 0.7727, "step": 25370 }, { "epoch": 2.0147254361070868, "grad_norm": 0.7188129425048828, "learning_rate": 1.6431160858405442e-05, "loss": 0.7634, "step": 25380 }, { "epoch": 2.0155192601559864, "grad_norm": 0.7986645698547363, "learning_rate": 1.6417930195019978e-05, "loss": 0.8123, "step": 25390 }, { "epoch": 2.016313084204886, "grad_norm": 0.720370888710022, "learning_rate": 1.6404699531634517e-05, "loss": 0.7428, "step": 25400 }, { "epoch": 2.0171069082537856, "grad_norm": 0.9025735259056091, "learning_rate": 1.6391468868249055e-05, "loss": 0.817, "step": 25410 }, { "epoch": 2.017900732302685, "grad_norm": 0.8990045785903931, "learning_rate": 1.637823820486359e-05, "loss": 0.7254, "step": 25420 }, { "epoch": 2.018694556351585, "grad_norm": 0.9403473138809204, "learning_rate": 1.636500754147813e-05, "loss": 0.7958, "step": 25430 }, { "epoch": 2.0194883804004844, "grad_norm": 1.0497170686721802, "learning_rate": 1.635177687809267e-05, "loss": 0.7647, "step": 25440 }, { "epoch": 2.020282204449384, "grad_norm": 1.1026662588119507, "learning_rate": 1.6338546214707208e-05, "loss": 0.7541, "step": 25450 }, { "epoch": 2.021076028498283, "grad_norm": 0.9010255336761475, "learning_rate": 1.6325315551321743e-05, "loss": 0.7501, "step": 25460 }, { "epoch": 2.0218698525471828, "grad_norm": 0.6994714736938477, "learning_rate": 1.6312084887936282e-05, "loss": 0.7985, "step": 25470 }, { "epoch": 2.0226636765960824, "grad_norm": 0.7155155539512634, "learning_rate": 1.6298854224550818e-05, "loss": 0.7136, "step": 25480 }, { "epoch": 2.023457500644982, "grad_norm": 0.9782127141952515, "learning_rate": 1.628562356116536e-05, "loss": 0.782, "step": 25490 }, { "epoch": 2.0242513246938816, "grad_norm": 0.8731641173362732, "learning_rate": 1.6272392897779896e-05, "loss": 0.7801, "step": 25500 }, { "epoch": 2.025045148742781, "grad_norm": 0.8580078482627869, "learning_rate": 1.6259162234394435e-05, "loss": 0.833, "step": 25510 }, { "epoch": 2.025838972791681, "grad_norm": 0.9071916937828064, "learning_rate": 1.624593157100897e-05, "loss": 0.8026, "step": 25520 }, { "epoch": 2.0266327968405804, "grad_norm": 0.9354455471038818, "learning_rate": 1.623270090762351e-05, "loss": 0.7792, "step": 25530 }, { "epoch": 2.02742662088948, "grad_norm": 0.8776410222053528, "learning_rate": 1.6219470244238048e-05, "loss": 0.7958, "step": 25540 }, { "epoch": 2.0282204449383796, "grad_norm": 0.7995261549949646, "learning_rate": 1.6206239580852584e-05, "loss": 0.7811, "step": 25550 }, { "epoch": 2.0290142689872788, "grad_norm": 0.8553417325019836, "learning_rate": 1.6193008917467122e-05, "loss": 0.7713, "step": 25560 }, { "epoch": 2.0298080930361784, "grad_norm": 0.9338173270225525, "learning_rate": 1.6179778254081658e-05, "loss": 0.7451, "step": 25570 }, { "epoch": 2.030601917085078, "grad_norm": 0.9235241413116455, "learning_rate": 1.61665475906962e-05, "loss": 0.7126, "step": 25580 }, { "epoch": 2.0313957411339776, "grad_norm": 0.864325761795044, "learning_rate": 1.6153316927310736e-05, "loss": 0.7988, "step": 25590 }, { "epoch": 2.032189565182877, "grad_norm": 0.7986742854118347, "learning_rate": 1.6140086263925275e-05, "loss": 0.7959, "step": 25600 }, { "epoch": 2.032983389231777, "grad_norm": 0.9655138254165649, "learning_rate": 1.612685560053981e-05, "loss": 0.8415, "step": 25610 }, { "epoch": 2.0337772132806764, "grad_norm": 0.6859591603279114, "learning_rate": 1.611362493715435e-05, "loss": 0.7661, "step": 25620 }, { "epoch": 2.034571037329576, "grad_norm": 0.7954578399658203, "learning_rate": 1.6100394273768888e-05, "loss": 0.811, "step": 25630 }, { "epoch": 2.0353648613784756, "grad_norm": 0.9830471873283386, "learning_rate": 1.6087163610383427e-05, "loss": 0.7648, "step": 25640 }, { "epoch": 2.036158685427375, "grad_norm": 0.9153994917869568, "learning_rate": 1.6073932946997963e-05, "loss": 0.7447, "step": 25650 }, { "epoch": 2.0369525094762744, "grad_norm": 0.9104596972465515, "learning_rate": 1.60607022836125e-05, "loss": 0.8326, "step": 25660 }, { "epoch": 2.037746333525174, "grad_norm": 0.8871384859085083, "learning_rate": 1.604747162022704e-05, "loss": 0.8012, "step": 25670 }, { "epoch": 2.0385401575740736, "grad_norm": 0.800845205783844, "learning_rate": 1.6034240956841576e-05, "loss": 0.7772, "step": 25680 }, { "epoch": 2.039333981622973, "grad_norm": 1.036254644393921, "learning_rate": 1.6021010293456115e-05, "loss": 0.7825, "step": 25690 }, { "epoch": 2.040127805671873, "grad_norm": 0.8596107363700867, "learning_rate": 1.600777963007065e-05, "loss": 0.7614, "step": 25700 }, { "epoch": 2.0409216297207724, "grad_norm": 0.7805830240249634, "learning_rate": 1.5994548966685193e-05, "loss": 0.8176, "step": 25710 }, { "epoch": 2.041715453769672, "grad_norm": 1.0922433137893677, "learning_rate": 1.598131830329973e-05, "loss": 0.7965, "step": 25720 }, { "epoch": 2.0425092778185716, "grad_norm": 0.7439164519309998, "learning_rate": 1.5968087639914267e-05, "loss": 0.8144, "step": 25730 }, { "epoch": 2.043303101867471, "grad_norm": 0.7576305270195007, "learning_rate": 1.5954856976528803e-05, "loss": 0.8107, "step": 25740 }, { "epoch": 2.044096925916371, "grad_norm": 0.7325326800346375, "learning_rate": 1.5941626313143342e-05, "loss": 0.7942, "step": 25750 }, { "epoch": 2.04489074996527, "grad_norm": 0.7904483079910278, "learning_rate": 1.592839564975788e-05, "loss": 0.8301, "step": 25760 }, { "epoch": 2.0456845740141696, "grad_norm": 0.8427342772483826, "learning_rate": 1.5915164986372416e-05, "loss": 0.7408, "step": 25770 }, { "epoch": 2.046478398063069, "grad_norm": 0.9607779383659363, "learning_rate": 1.5901934322986955e-05, "loss": 0.7796, "step": 25780 }, { "epoch": 2.047272222111969, "grad_norm": 0.9316497445106506, "learning_rate": 1.588870365960149e-05, "loss": 0.8237, "step": 25790 }, { "epoch": 2.0480660461608684, "grad_norm": 0.8862087726593018, "learning_rate": 1.5875472996216033e-05, "loss": 0.7505, "step": 25800 }, { "epoch": 2.048859870209768, "grad_norm": 0.9712191820144653, "learning_rate": 1.586224233283057e-05, "loss": 0.7987, "step": 25810 }, { "epoch": 2.0496536942586676, "grad_norm": 0.8523909449577332, "learning_rate": 1.5849011669445107e-05, "loss": 0.7585, "step": 25820 }, { "epoch": 2.050447518307567, "grad_norm": 0.9257214069366455, "learning_rate": 1.5835781006059643e-05, "loss": 0.7431, "step": 25830 }, { "epoch": 2.051241342356467, "grad_norm": 0.9414131045341492, "learning_rate": 1.5822550342674182e-05, "loss": 0.7289, "step": 25840 }, { "epoch": 2.0520351664053664, "grad_norm": 0.7903403639793396, "learning_rate": 1.580931967928872e-05, "loss": 0.7282, "step": 25850 }, { "epoch": 2.052828990454266, "grad_norm": 0.7305004000663757, "learning_rate": 1.579608901590326e-05, "loss": 0.7352, "step": 25860 }, { "epoch": 2.053622814503165, "grad_norm": 0.8797780275344849, "learning_rate": 1.5782858352517795e-05, "loss": 0.7377, "step": 25870 }, { "epoch": 2.054416638552065, "grad_norm": 1.0097732543945312, "learning_rate": 1.5769627689132334e-05, "loss": 0.7732, "step": 25880 }, { "epoch": 2.0552104626009644, "grad_norm": 0.8882811069488525, "learning_rate": 1.5756397025746873e-05, "loss": 0.8136, "step": 25890 }, { "epoch": 2.056004286649864, "grad_norm": 0.7051288485527039, "learning_rate": 1.574316636236141e-05, "loss": 0.7464, "step": 25900 }, { "epoch": 2.0567981106987636, "grad_norm": 0.9617118239402771, "learning_rate": 1.5729935698975948e-05, "loss": 0.7851, "step": 25910 }, { "epoch": 2.057591934747663, "grad_norm": 0.8823207020759583, "learning_rate": 1.5716705035590483e-05, "loss": 0.8155, "step": 25920 }, { "epoch": 2.058385758796563, "grad_norm": 1.0698038339614868, "learning_rate": 1.5703474372205022e-05, "loss": 0.798, "step": 25930 }, { "epoch": 2.0591795828454624, "grad_norm": 1.0213465690612793, "learning_rate": 1.569024370881956e-05, "loss": 0.7397, "step": 25940 }, { "epoch": 2.059973406894362, "grad_norm": 0.8252881765365601, "learning_rate": 1.56770130454341e-05, "loss": 0.7786, "step": 25950 }, { "epoch": 2.0607672309432616, "grad_norm": 0.79705810546875, "learning_rate": 1.5663782382048635e-05, "loss": 0.7649, "step": 25960 }, { "epoch": 2.061561054992161, "grad_norm": 0.7776365280151367, "learning_rate": 1.5650551718663174e-05, "loss": 0.7646, "step": 25970 }, { "epoch": 2.0623548790410604, "grad_norm": 0.7812894582748413, "learning_rate": 1.5637321055277713e-05, "loss": 0.7598, "step": 25980 }, { "epoch": 2.06314870308996, "grad_norm": 0.9928243160247803, "learning_rate": 1.5624090391892252e-05, "loss": 0.7991, "step": 25990 }, { "epoch": 2.0639425271388596, "grad_norm": 0.9473987221717834, "learning_rate": 1.5610859728506788e-05, "loss": 0.7417, "step": 26000 }, { "epoch": 2.0647363511877592, "grad_norm": 0.9683297276496887, "learning_rate": 1.5597629065121323e-05, "loss": 0.7986, "step": 26010 }, { "epoch": 2.065530175236659, "grad_norm": 0.9280180335044861, "learning_rate": 1.5584398401735866e-05, "loss": 0.7455, "step": 26020 }, { "epoch": 2.0663239992855584, "grad_norm": 0.8765127658843994, "learning_rate": 1.55711677383504e-05, "loss": 0.8731, "step": 26030 }, { "epoch": 2.067117823334458, "grad_norm": 0.7415598034858704, "learning_rate": 1.555793707496494e-05, "loss": 0.8217, "step": 26040 }, { "epoch": 2.0679116473833576, "grad_norm": 0.8780352473258972, "learning_rate": 1.554602947791802e-05, "loss": 0.8236, "step": 26050 }, { "epoch": 2.0687054714322572, "grad_norm": 0.9112974405288696, "learning_rate": 1.5532798814532563e-05, "loss": 0.7893, "step": 26060 }, { "epoch": 2.0694992954811564, "grad_norm": 0.726959228515625, "learning_rate": 1.55195681511471e-05, "loss": 0.7708, "step": 26070 }, { "epoch": 2.070293119530056, "grad_norm": 0.9605491161346436, "learning_rate": 1.5506337487761638e-05, "loss": 0.7306, "step": 26080 }, { "epoch": 2.0710869435789556, "grad_norm": 0.6991214752197266, "learning_rate": 1.5493106824376173e-05, "loss": 0.8029, "step": 26090 }, { "epoch": 2.0718807676278552, "grad_norm": 0.947589099407196, "learning_rate": 1.5479876160990712e-05, "loss": 0.7885, "step": 26100 }, { "epoch": 2.072674591676755, "grad_norm": 0.9293213486671448, "learning_rate": 1.546664549760525e-05, "loss": 0.7285, "step": 26110 }, { "epoch": 2.0734684157256544, "grad_norm": 1.076295018196106, "learning_rate": 1.545341483421979e-05, "loss": 0.7704, "step": 26120 }, { "epoch": 2.074262239774554, "grad_norm": 0.894410252571106, "learning_rate": 1.5440184170834326e-05, "loss": 0.7298, "step": 26130 }, { "epoch": 2.0750560638234536, "grad_norm": 0.9316887855529785, "learning_rate": 1.5426953507448865e-05, "loss": 0.7989, "step": 26140 }, { "epoch": 2.0758498878723532, "grad_norm": 0.8590110540390015, "learning_rate": 1.5413722844063404e-05, "loss": 0.7369, "step": 26150 }, { "epoch": 2.076643711921253, "grad_norm": 0.8195733428001404, "learning_rate": 1.540049218067794e-05, "loss": 0.7837, "step": 26160 }, { "epoch": 2.077437535970152, "grad_norm": 0.8563514351844788, "learning_rate": 1.5387261517292478e-05, "loss": 0.7514, "step": 26170 }, { "epoch": 2.0782313600190516, "grad_norm": 0.8873001933097839, "learning_rate": 1.5374030853907014e-05, "loss": 0.7356, "step": 26180 }, { "epoch": 2.0790251840679512, "grad_norm": 0.9553200006484985, "learning_rate": 1.5360800190521556e-05, "loss": 0.7611, "step": 26190 }, { "epoch": 2.079819008116851, "grad_norm": 0.782842218875885, "learning_rate": 1.534756952713609e-05, "loss": 0.7383, "step": 26200 }, { "epoch": 2.0806128321657504, "grad_norm": 0.9627733826637268, "learning_rate": 1.533433886375063e-05, "loss": 0.7796, "step": 26210 }, { "epoch": 2.08140665621465, "grad_norm": 0.7810627818107605, "learning_rate": 1.5321108200365166e-05, "loss": 0.7968, "step": 26220 }, { "epoch": 2.0822004802635496, "grad_norm": 0.8176783919334412, "learning_rate": 1.5307877536979705e-05, "loss": 0.7525, "step": 26230 }, { "epoch": 2.0829943043124493, "grad_norm": 0.9359534382820129, "learning_rate": 1.5294646873594244e-05, "loss": 0.8248, "step": 26240 }, { "epoch": 2.083788128361349, "grad_norm": 0.7079921960830688, "learning_rate": 1.528141621020878e-05, "loss": 0.8115, "step": 26250 }, { "epoch": 2.0845819524102485, "grad_norm": 0.8684226870536804, "learning_rate": 1.5268185546823318e-05, "loss": 0.7866, "step": 26260 }, { "epoch": 2.0853757764591476, "grad_norm": 0.8505408763885498, "learning_rate": 1.5254954883437855e-05, "loss": 0.7144, "step": 26270 }, { "epoch": 2.0861696005080472, "grad_norm": 0.953670084476471, "learning_rate": 1.5241724220052394e-05, "loss": 0.7999, "step": 26280 }, { "epoch": 2.086963424556947, "grad_norm": 1.1968601942062378, "learning_rate": 1.5228493556666932e-05, "loss": 0.7863, "step": 26290 }, { "epoch": 2.0877572486058464, "grad_norm": 0.7209346294403076, "learning_rate": 1.521526289328147e-05, "loss": 0.843, "step": 26300 }, { "epoch": 2.088551072654746, "grad_norm": 0.8510392904281616, "learning_rate": 1.5202032229896008e-05, "loss": 0.831, "step": 26310 }, { "epoch": 2.0893448967036456, "grad_norm": 0.899815559387207, "learning_rate": 1.5188801566510547e-05, "loss": 0.7577, "step": 26320 }, { "epoch": 2.0901387207525453, "grad_norm": 0.8000263571739197, "learning_rate": 1.5175570903125084e-05, "loss": 0.7918, "step": 26330 }, { "epoch": 2.090932544801445, "grad_norm": 0.7844998240470886, "learning_rate": 1.5162340239739623e-05, "loss": 0.7903, "step": 26340 }, { "epoch": 2.0917263688503445, "grad_norm": 1.0130077600479126, "learning_rate": 1.5149109576354158e-05, "loss": 0.7545, "step": 26350 }, { "epoch": 2.092520192899244, "grad_norm": 0.8703743815422058, "learning_rate": 1.5135878912968696e-05, "loss": 0.8129, "step": 26360 }, { "epoch": 2.0933140169481432, "grad_norm": 0.6894111633300781, "learning_rate": 1.5122648249583235e-05, "loss": 0.7017, "step": 26370 }, { "epoch": 2.094107840997043, "grad_norm": 0.9596577882766724, "learning_rate": 1.5109417586197772e-05, "loss": 0.6868, "step": 26380 }, { "epoch": 2.0949016650459424, "grad_norm": 0.878044843673706, "learning_rate": 1.509618692281231e-05, "loss": 0.8629, "step": 26390 }, { "epoch": 2.095695489094842, "grad_norm": 0.9057536125183105, "learning_rate": 1.5082956259426848e-05, "loss": 0.7812, "step": 26400 }, { "epoch": 2.0964893131437417, "grad_norm": 0.8363611698150635, "learning_rate": 1.5069725596041387e-05, "loss": 0.7738, "step": 26410 }, { "epoch": 2.0972831371926413, "grad_norm": 0.7889995574951172, "learning_rate": 1.5056494932655924e-05, "loss": 0.8144, "step": 26420 }, { "epoch": 2.098076961241541, "grad_norm": 0.9280676245689392, "learning_rate": 1.5043264269270463e-05, "loss": 0.7792, "step": 26430 }, { "epoch": 2.0988707852904405, "grad_norm": 1.015638828277588, "learning_rate": 1.5030033605885e-05, "loss": 0.7745, "step": 26440 }, { "epoch": 2.09966460933934, "grad_norm": 0.9778096079826355, "learning_rate": 1.501680294249954e-05, "loss": 0.7636, "step": 26450 }, { "epoch": 2.1004584333882397, "grad_norm": 0.9082860946655273, "learning_rate": 1.5003572279114075e-05, "loss": 0.7399, "step": 26460 }, { "epoch": 2.101252257437139, "grad_norm": 0.8969444632530212, "learning_rate": 1.4990341615728615e-05, "loss": 0.7326, "step": 26470 }, { "epoch": 2.1020460814860384, "grad_norm": 0.9246445298194885, "learning_rate": 1.4977110952343151e-05, "loss": 0.789, "step": 26480 }, { "epoch": 2.102839905534938, "grad_norm": 0.9216938614845276, "learning_rate": 1.4963880288957688e-05, "loss": 0.821, "step": 26490 }, { "epoch": 2.1036337295838377, "grad_norm": 0.7816628813743591, "learning_rate": 1.4950649625572227e-05, "loss": 0.7705, "step": 26500 }, { "epoch": 2.1044275536327373, "grad_norm": 1.04630446434021, "learning_rate": 1.4937418962186764e-05, "loss": 0.7733, "step": 26510 }, { "epoch": 2.105221377681637, "grad_norm": 0.9890500903129578, "learning_rate": 1.4924188298801303e-05, "loss": 0.8013, "step": 26520 }, { "epoch": 2.1060152017305365, "grad_norm": 0.8225802779197693, "learning_rate": 1.491095763541584e-05, "loss": 0.7539, "step": 26530 }, { "epoch": 2.106809025779436, "grad_norm": 0.7966319918632507, "learning_rate": 1.489772697203038e-05, "loss": 0.7814, "step": 26540 }, { "epoch": 2.1076028498283357, "grad_norm": 0.9166170954704285, "learning_rate": 1.4884496308644915e-05, "loss": 0.6954, "step": 26550 }, { "epoch": 2.1083966738772353, "grad_norm": 1.0586912631988525, "learning_rate": 1.4871265645259456e-05, "loss": 0.7287, "step": 26560 }, { "epoch": 2.109190497926135, "grad_norm": 0.9350046515464783, "learning_rate": 1.4858034981873991e-05, "loss": 0.7704, "step": 26570 }, { "epoch": 2.109984321975034, "grad_norm": 0.9269388914108276, "learning_rate": 1.4844804318488532e-05, "loss": 0.8284, "step": 26580 }, { "epoch": 2.1107781460239337, "grad_norm": 0.7918737530708313, "learning_rate": 1.4831573655103067e-05, "loss": 0.7947, "step": 26590 }, { "epoch": 2.1115719700728333, "grad_norm": 0.8778436183929443, "learning_rate": 1.4818342991717604e-05, "loss": 0.7774, "step": 26600 }, { "epoch": 2.112365794121733, "grad_norm": 1.0038212537765503, "learning_rate": 1.4805112328332143e-05, "loss": 0.7356, "step": 26610 }, { "epoch": 2.1131596181706325, "grad_norm": 0.811038076877594, "learning_rate": 1.479188166494668e-05, "loss": 0.811, "step": 26620 }, { "epoch": 2.113953442219532, "grad_norm": 1.1587707996368408, "learning_rate": 1.477865100156122e-05, "loss": 0.8549, "step": 26630 }, { "epoch": 2.1147472662684317, "grad_norm": 0.74503493309021, "learning_rate": 1.4765420338175757e-05, "loss": 0.7897, "step": 26640 }, { "epoch": 2.1155410903173313, "grad_norm": 0.9020684361457825, "learning_rate": 1.4752189674790296e-05, "loss": 0.7894, "step": 26650 }, { "epoch": 2.116334914366231, "grad_norm": 0.7925030589103699, "learning_rate": 1.4738959011404831e-05, "loss": 0.8061, "step": 26660 }, { "epoch": 2.1171287384151305, "grad_norm": 0.8654730916023254, "learning_rate": 1.4725728348019372e-05, "loss": 0.7569, "step": 26670 }, { "epoch": 2.1179225624640297, "grad_norm": 0.9839343428611755, "learning_rate": 1.4712497684633907e-05, "loss": 0.8012, "step": 26680 }, { "epoch": 2.1187163865129293, "grad_norm": 0.8837997317314148, "learning_rate": 1.4699267021248448e-05, "loss": 0.8263, "step": 26690 }, { "epoch": 2.119510210561829, "grad_norm": 1.0431504249572754, "learning_rate": 1.4686036357862984e-05, "loss": 0.8411, "step": 26700 }, { "epoch": 2.1203040346107285, "grad_norm": 0.9431108236312866, "learning_rate": 1.467280569447752e-05, "loss": 0.786, "step": 26710 }, { "epoch": 2.121097858659628, "grad_norm": 0.8475701212882996, "learning_rate": 1.465957503109206e-05, "loss": 0.8422, "step": 26720 }, { "epoch": 2.1218916827085277, "grad_norm": 0.9545729160308838, "learning_rate": 1.4646344367706597e-05, "loss": 0.7607, "step": 26730 }, { "epoch": 2.1226855067574273, "grad_norm": 0.9317882061004639, "learning_rate": 1.4633113704321136e-05, "loss": 0.7591, "step": 26740 }, { "epoch": 2.123479330806327, "grad_norm": 0.8636899590492249, "learning_rate": 1.4619883040935673e-05, "loss": 0.7443, "step": 26750 }, { "epoch": 2.1242731548552265, "grad_norm": 0.9780907034873962, "learning_rate": 1.4606652377550212e-05, "loss": 0.7513, "step": 26760 }, { "epoch": 2.125066978904126, "grad_norm": 0.9252169132232666, "learning_rate": 1.4593421714164748e-05, "loss": 0.8244, "step": 26770 }, { "epoch": 2.1258608029530253, "grad_norm": 0.8407522439956665, "learning_rate": 1.4580191050779288e-05, "loss": 0.7896, "step": 26780 }, { "epoch": 2.126654627001925, "grad_norm": 0.8842277526855469, "learning_rate": 1.4566960387393824e-05, "loss": 0.8509, "step": 26790 }, { "epoch": 2.1274484510508245, "grad_norm": 0.8318625092506409, "learning_rate": 1.4553729724008364e-05, "loss": 0.7577, "step": 26800 }, { "epoch": 2.128242275099724, "grad_norm": 1.0206186771392822, "learning_rate": 1.45404990606229e-05, "loss": 0.7706, "step": 26810 }, { "epoch": 2.1290360991486237, "grad_norm": 0.7975717782974243, "learning_rate": 1.4527268397237437e-05, "loss": 0.7187, "step": 26820 }, { "epoch": 2.1298299231975233, "grad_norm": 0.9346230030059814, "learning_rate": 1.4514037733851976e-05, "loss": 0.7833, "step": 26830 }, { "epoch": 2.130623747246423, "grad_norm": 0.8083703517913818, "learning_rate": 1.4500807070466513e-05, "loss": 0.8184, "step": 26840 }, { "epoch": 2.1314175712953225, "grad_norm": 1.0353641510009766, "learning_rate": 1.4487576407081052e-05, "loss": 0.8394, "step": 26850 }, { "epoch": 2.132211395344222, "grad_norm": 0.8950655460357666, "learning_rate": 1.447434574369559e-05, "loss": 0.8077, "step": 26860 }, { "epoch": 2.1330052193931217, "grad_norm": 1.034495234489441, "learning_rate": 1.4461115080310128e-05, "loss": 0.7665, "step": 26870 }, { "epoch": 2.133799043442021, "grad_norm": 0.8213537335395813, "learning_rate": 1.4447884416924664e-05, "loss": 0.8224, "step": 26880 }, { "epoch": 2.1345928674909205, "grad_norm": 1.1027780771255493, "learning_rate": 1.4434653753539205e-05, "loss": 0.7717, "step": 26890 }, { "epoch": 2.13538669153982, "grad_norm": 1.02985417842865, "learning_rate": 1.442142309015374e-05, "loss": 0.7465, "step": 26900 }, { "epoch": 2.1361805155887197, "grad_norm": 0.7433731555938721, "learning_rate": 1.440819242676828e-05, "loss": 0.7882, "step": 26910 }, { "epoch": 2.1369743396376193, "grad_norm": 1.0033385753631592, "learning_rate": 1.4394961763382816e-05, "loss": 0.8659, "step": 26920 }, { "epoch": 2.137768163686519, "grad_norm": 0.7365638613700867, "learning_rate": 1.4381731099997355e-05, "loss": 0.7062, "step": 26930 }, { "epoch": 2.1385619877354185, "grad_norm": 1.0388671159744263, "learning_rate": 1.4368500436611892e-05, "loss": 0.8148, "step": 26940 }, { "epoch": 2.139355811784318, "grad_norm": 0.9410164952278137, "learning_rate": 1.435526977322643e-05, "loss": 0.6894, "step": 26950 }, { "epoch": 2.1401496358332177, "grad_norm": 0.903447151184082, "learning_rate": 1.4342039109840969e-05, "loss": 0.7527, "step": 26960 }, { "epoch": 2.1409434598821173, "grad_norm": 1.121335744857788, "learning_rate": 1.4328808446455506e-05, "loss": 0.8063, "step": 26970 }, { "epoch": 2.1417372839310165, "grad_norm": 0.8166680335998535, "learning_rate": 1.4315577783070045e-05, "loss": 0.8217, "step": 26980 }, { "epoch": 2.142531107979916, "grad_norm": 0.8533350825309753, "learning_rate": 1.430234711968458e-05, "loss": 0.8218, "step": 26990 }, { "epoch": 2.1433249320288157, "grad_norm": 0.9793769121170044, "learning_rate": 1.4289116456299121e-05, "loss": 0.7345, "step": 27000 }, { "epoch": 2.1441187560777153, "grad_norm": 0.9998401403427124, "learning_rate": 1.4275885792913656e-05, "loss": 0.7736, "step": 27010 }, { "epoch": 2.144912580126615, "grad_norm": 1.0122779607772827, "learning_rate": 1.4262655129528197e-05, "loss": 0.7703, "step": 27020 }, { "epoch": 2.1457064041755145, "grad_norm": 1.040964961051941, "learning_rate": 1.4249424466142733e-05, "loss": 0.7899, "step": 27030 }, { "epoch": 2.146500228224414, "grad_norm": 0.8178542852401733, "learning_rate": 1.4236193802757272e-05, "loss": 0.7742, "step": 27040 }, { "epoch": 2.1472940522733137, "grad_norm": 0.8384308815002441, "learning_rate": 1.4222963139371809e-05, "loss": 0.8275, "step": 27050 }, { "epoch": 2.1480878763222133, "grad_norm": 0.9705979228019714, "learning_rate": 1.4209732475986346e-05, "loss": 0.7532, "step": 27060 }, { "epoch": 2.148881700371113, "grad_norm": 0.9038788080215454, "learning_rate": 1.4196501812600885e-05, "loss": 0.8162, "step": 27070 }, { "epoch": 2.1496755244200125, "grad_norm": 0.9565449953079224, "learning_rate": 1.4183271149215422e-05, "loss": 0.7742, "step": 27080 }, { "epoch": 2.1504693484689117, "grad_norm": 0.9967783093452454, "learning_rate": 1.4170040485829961e-05, "loss": 0.8269, "step": 27090 }, { "epoch": 2.1512631725178113, "grad_norm": 0.8932458162307739, "learning_rate": 1.4156809822444497e-05, "loss": 0.746, "step": 27100 }, { "epoch": 2.152056996566711, "grad_norm": 0.9235972762107849, "learning_rate": 1.4143579159059037e-05, "loss": 0.8389, "step": 27110 }, { "epoch": 2.1528508206156105, "grad_norm": 0.9918680787086487, "learning_rate": 1.4130348495673573e-05, "loss": 0.7368, "step": 27120 }, { "epoch": 2.15364464466451, "grad_norm": 1.0589817762374878, "learning_rate": 1.4117117832288113e-05, "loss": 0.708, "step": 27130 }, { "epoch": 2.1544384687134097, "grad_norm": 0.8974472284317017, "learning_rate": 1.4103887168902649e-05, "loss": 0.8292, "step": 27140 }, { "epoch": 2.1552322927623093, "grad_norm": 0.8955181837081909, "learning_rate": 1.4090656505517188e-05, "loss": 0.7798, "step": 27150 }, { "epoch": 2.156026116811209, "grad_norm": 1.0225322246551514, "learning_rate": 1.4077425842131725e-05, "loss": 0.7567, "step": 27160 }, { "epoch": 2.1568199408601085, "grad_norm": 0.7848957180976868, "learning_rate": 1.4064195178746262e-05, "loss": 0.8015, "step": 27170 }, { "epoch": 2.1576137649090077, "grad_norm": 0.916002631187439, "learning_rate": 1.4050964515360801e-05, "loss": 0.8266, "step": 27180 }, { "epoch": 2.1584075889579073, "grad_norm": 0.8571447134017944, "learning_rate": 1.4037733851975337e-05, "loss": 0.7878, "step": 27190 }, { "epoch": 2.159201413006807, "grad_norm": 0.8290430903434753, "learning_rate": 1.4024503188589877e-05, "loss": 0.7503, "step": 27200 }, { "epoch": 2.1599952370557065, "grad_norm": 0.9717968702316284, "learning_rate": 1.4011272525204413e-05, "loss": 0.7839, "step": 27210 }, { "epoch": 2.160789061104606, "grad_norm": 0.8097538948059082, "learning_rate": 1.3998041861818954e-05, "loss": 0.784, "step": 27220 }, { "epoch": 2.1615828851535057, "grad_norm": 1.0527442693710327, "learning_rate": 1.3984811198433489e-05, "loss": 0.7795, "step": 27230 }, { "epoch": 2.1623767092024053, "grad_norm": 0.7761133909225464, "learning_rate": 1.397158053504803e-05, "loss": 0.796, "step": 27240 }, { "epoch": 2.163170533251305, "grad_norm": 1.009090781211853, "learning_rate": 1.3958349871662565e-05, "loss": 0.7895, "step": 27250 }, { "epoch": 2.1639643573002045, "grad_norm": 0.9476297497749329, "learning_rate": 1.3945119208277104e-05, "loss": 0.805, "step": 27260 }, { "epoch": 2.164758181349104, "grad_norm": 0.8424776792526245, "learning_rate": 1.3931888544891641e-05, "loss": 0.7752, "step": 27270 }, { "epoch": 2.1655520053980037, "grad_norm": 1.0687159299850464, "learning_rate": 1.391865788150618e-05, "loss": 0.7333, "step": 27280 }, { "epoch": 2.166345829446903, "grad_norm": 0.7562177777290344, "learning_rate": 1.3905427218120718e-05, "loss": 0.766, "step": 27290 }, { "epoch": 2.1671396534958025, "grad_norm": 0.9093291163444519, "learning_rate": 1.3892196554735253e-05, "loss": 0.7164, "step": 27300 }, { "epoch": 2.167933477544702, "grad_norm": 0.8972833156585693, "learning_rate": 1.3878965891349794e-05, "loss": 0.7667, "step": 27310 }, { "epoch": 2.1687273015936017, "grad_norm": 0.9251981377601624, "learning_rate": 1.386573522796433e-05, "loss": 0.7996, "step": 27320 }, { "epoch": 2.1695211256425013, "grad_norm": 0.8145211338996887, "learning_rate": 1.385250456457887e-05, "loss": 0.7725, "step": 27330 }, { "epoch": 2.170314949691401, "grad_norm": 0.8409168124198914, "learning_rate": 1.3839273901193405e-05, "loss": 0.7804, "step": 27340 }, { "epoch": 2.1711087737403005, "grad_norm": 0.9111427068710327, "learning_rate": 1.3826043237807944e-05, "loss": 0.7775, "step": 27350 }, { "epoch": 2.1719025977892, "grad_norm": 0.91303551197052, "learning_rate": 1.3812812574422482e-05, "loss": 0.8227, "step": 27360 }, { "epoch": 2.1726964218380997, "grad_norm": 0.8870943188667297, "learning_rate": 1.379958191103702e-05, "loss": 0.7965, "step": 27370 }, { "epoch": 2.1734902458869994, "grad_norm": 0.8687139749526978, "learning_rate": 1.3786351247651558e-05, "loss": 0.799, "step": 27380 }, { "epoch": 2.1742840699358985, "grad_norm": 0.9708724617958069, "learning_rate": 1.3773120584266097e-05, "loss": 0.6977, "step": 27390 }, { "epoch": 2.175077893984798, "grad_norm": 0.8507083654403687, "learning_rate": 1.3759889920880634e-05, "loss": 0.8515, "step": 27400 }, { "epoch": 2.1758717180336977, "grad_norm": 0.9613845348358154, "learning_rate": 1.374665925749517e-05, "loss": 0.8635, "step": 27410 }, { "epoch": 2.1766655420825973, "grad_norm": 0.8473123908042908, "learning_rate": 1.373342859410971e-05, "loss": 0.7451, "step": 27420 }, { "epoch": 2.177459366131497, "grad_norm": 0.9432036280632019, "learning_rate": 1.3720197930724246e-05, "loss": 0.8018, "step": 27430 }, { "epoch": 2.1782531901803965, "grad_norm": 0.8323158025741577, "learning_rate": 1.3706967267338786e-05, "loss": 0.7799, "step": 27440 }, { "epoch": 2.179047014229296, "grad_norm": 1.166176438331604, "learning_rate": 1.3693736603953322e-05, "loss": 0.7891, "step": 27450 }, { "epoch": 2.1798408382781957, "grad_norm": 0.781601071357727, "learning_rate": 1.368050594056786e-05, "loss": 0.7851, "step": 27460 }, { "epoch": 2.1806346623270954, "grad_norm": 1.0222710371017456, "learning_rate": 1.3667275277182398e-05, "loss": 0.7159, "step": 27470 }, { "epoch": 2.181428486375995, "grad_norm": 0.758436381816864, "learning_rate": 1.3654044613796937e-05, "loss": 0.7342, "step": 27480 }, { "epoch": 2.182222310424894, "grad_norm": 0.8809502124786377, "learning_rate": 1.3640813950411474e-05, "loss": 0.8097, "step": 27490 }, { "epoch": 2.1830161344737937, "grad_norm": 0.8321342468261719, "learning_rate": 1.3627583287026013e-05, "loss": 0.756, "step": 27500 }, { "epoch": 2.1838099585226933, "grad_norm": 0.8868312835693359, "learning_rate": 1.361435262364055e-05, "loss": 0.8246, "step": 27510 }, { "epoch": 2.184603782571593, "grad_norm": 0.8987812995910645, "learning_rate": 1.3601121960255086e-05, "loss": 0.8034, "step": 27520 }, { "epoch": 2.1853976066204925, "grad_norm": 0.8325653076171875, "learning_rate": 1.3587891296869626e-05, "loss": 0.8197, "step": 27530 }, { "epoch": 2.186191430669392, "grad_norm": 0.9166516065597534, "learning_rate": 1.3574660633484162e-05, "loss": 0.7413, "step": 27540 }, { "epoch": 2.1869852547182917, "grad_norm": 0.9565515518188477, "learning_rate": 1.3561429970098703e-05, "loss": 0.7971, "step": 27550 }, { "epoch": 2.1877790787671914, "grad_norm": 0.8458524346351624, "learning_rate": 1.3548199306713238e-05, "loss": 0.756, "step": 27560 }, { "epoch": 2.188572902816091, "grad_norm": 0.7264953255653381, "learning_rate": 1.3534968643327777e-05, "loss": 0.798, "step": 27570 }, { "epoch": 2.1893667268649906, "grad_norm": 0.9435407519340515, "learning_rate": 1.3521737979942314e-05, "loss": 0.7956, "step": 27580 }, { "epoch": 2.19016055091389, "grad_norm": 0.9455521106719971, "learning_rate": 1.3508507316556853e-05, "loss": 0.7712, "step": 27590 }, { "epoch": 2.1909543749627893, "grad_norm": 1.1080188751220703, "learning_rate": 1.349527665317139e-05, "loss": 0.7544, "step": 27600 }, { "epoch": 2.191748199011689, "grad_norm": 0.98456871509552, "learning_rate": 1.348204598978593e-05, "loss": 0.8196, "step": 27610 }, { "epoch": 2.1925420230605885, "grad_norm": 0.8413136005401611, "learning_rate": 1.3468815326400467e-05, "loss": 0.7864, "step": 27620 }, { "epoch": 2.193335847109488, "grad_norm": 0.7672378420829773, "learning_rate": 1.3455584663015002e-05, "loss": 0.7624, "step": 27630 }, { "epoch": 2.1941296711583878, "grad_norm": 0.9120532274246216, "learning_rate": 1.3442353999629543e-05, "loss": 0.7425, "step": 27640 }, { "epoch": 2.1949234952072874, "grad_norm": 0.9745380878448486, "learning_rate": 1.3429123336244078e-05, "loss": 0.8149, "step": 27650 }, { "epoch": 2.195717319256187, "grad_norm": 0.8304915428161621, "learning_rate": 1.3415892672858619e-05, "loss": 0.7609, "step": 27660 }, { "epoch": 2.1965111433050866, "grad_norm": 0.9230349659919739, "learning_rate": 1.3402662009473154e-05, "loss": 0.7648, "step": 27670 }, { "epoch": 2.197304967353986, "grad_norm": 1.0619224309921265, "learning_rate": 1.3389431346087693e-05, "loss": 0.7887, "step": 27680 }, { "epoch": 2.1980987914028853, "grad_norm": 1.0033013820648193, "learning_rate": 1.337620068270223e-05, "loss": 0.805, "step": 27690 }, { "epoch": 2.198892615451785, "grad_norm": 0.9245965480804443, "learning_rate": 1.336297001931677e-05, "loss": 0.8012, "step": 27700 }, { "epoch": 2.1996864395006845, "grad_norm": 1.0526070594787598, "learning_rate": 1.3349739355931307e-05, "loss": 0.7792, "step": 27710 }, { "epoch": 2.200480263549584, "grad_norm": 0.7733340859413147, "learning_rate": 1.3336508692545846e-05, "loss": 0.7708, "step": 27720 }, { "epoch": 2.2012740875984838, "grad_norm": 1.1390515565872192, "learning_rate": 1.3323278029160383e-05, "loss": 0.7797, "step": 27730 }, { "epoch": 2.2020679116473834, "grad_norm": 0.8812433481216431, "learning_rate": 1.3310047365774922e-05, "loss": 0.719, "step": 27740 }, { "epoch": 2.202861735696283, "grad_norm": 0.8924884796142578, "learning_rate": 1.3296816702389459e-05, "loss": 0.836, "step": 27750 }, { "epoch": 2.2036555597451826, "grad_norm": 0.9707908630371094, "learning_rate": 1.3283586039003995e-05, "loss": 0.7732, "step": 27760 }, { "epoch": 2.204449383794082, "grad_norm": 0.904464602470398, "learning_rate": 1.3270355375618535e-05, "loss": 0.8084, "step": 27770 }, { "epoch": 2.205243207842982, "grad_norm": 0.8692517280578613, "learning_rate": 1.325712471223307e-05, "loss": 0.8074, "step": 27780 }, { "epoch": 2.2060370318918814, "grad_norm": 0.9948639869689941, "learning_rate": 1.324389404884761e-05, "loss": 0.7936, "step": 27790 }, { "epoch": 2.2068308559407805, "grad_norm": 0.915266215801239, "learning_rate": 1.3230663385462147e-05, "loss": 0.8084, "step": 27800 }, { "epoch": 2.20762467998968, "grad_norm": 0.8853785395622253, "learning_rate": 1.3217432722076686e-05, "loss": 0.774, "step": 27810 }, { "epoch": 2.2084185040385798, "grad_norm": 1.0445677042007446, "learning_rate": 1.3204202058691223e-05, "loss": 0.8096, "step": 27820 }, { "epoch": 2.2092123280874794, "grad_norm": 0.9219695925712585, "learning_rate": 1.3190971395305762e-05, "loss": 0.8174, "step": 27830 }, { "epoch": 2.210006152136379, "grad_norm": 0.727891206741333, "learning_rate": 1.31777407319203e-05, "loss": 0.7495, "step": 27840 }, { "epoch": 2.2107999761852786, "grad_norm": 0.8732167482376099, "learning_rate": 1.3164510068534838e-05, "loss": 0.7907, "step": 27850 }, { "epoch": 2.211593800234178, "grad_norm": 0.9940975904464722, "learning_rate": 1.3151279405149375e-05, "loss": 0.8013, "step": 27860 }, { "epoch": 2.212387624283078, "grad_norm": 0.9644079804420471, "learning_rate": 1.3138048741763911e-05, "loss": 0.7781, "step": 27870 }, { "epoch": 2.2131814483319774, "grad_norm": 0.957815945148468, "learning_rate": 1.3124818078378452e-05, "loss": 0.7853, "step": 27880 }, { "epoch": 2.2139752723808765, "grad_norm": 0.9299752712249756, "learning_rate": 1.3111587414992987e-05, "loss": 0.7654, "step": 27890 }, { "epoch": 2.214769096429776, "grad_norm": 0.763888418674469, "learning_rate": 1.3098356751607526e-05, "loss": 0.8254, "step": 27900 }, { "epoch": 2.2155629204786758, "grad_norm": 0.9485118389129639, "learning_rate": 1.3085126088222063e-05, "loss": 0.7565, "step": 27910 }, { "epoch": 2.2163567445275754, "grad_norm": 0.7794759273529053, "learning_rate": 1.3071895424836602e-05, "loss": 0.7794, "step": 27920 }, { "epoch": 2.217150568576475, "grad_norm": 0.8612314462661743, "learning_rate": 1.305866476145114e-05, "loss": 0.8535, "step": 27930 }, { "epoch": 2.2179443926253746, "grad_norm": 0.926816463470459, "learning_rate": 1.3045434098065678e-05, "loss": 0.7421, "step": 27940 }, { "epoch": 2.218738216674274, "grad_norm": 0.8914726376533508, "learning_rate": 1.3032203434680216e-05, "loss": 0.7918, "step": 27950 }, { "epoch": 2.219532040723174, "grad_norm": 0.7953301072120667, "learning_rate": 1.3018972771294755e-05, "loss": 0.7813, "step": 27960 }, { "epoch": 2.2203258647720734, "grad_norm": 1.100185513496399, "learning_rate": 1.3005742107909292e-05, "loss": 0.8074, "step": 27970 }, { "epoch": 2.221119688820973, "grad_norm": 0.9314887523651123, "learning_rate": 1.2992511444523827e-05, "loss": 0.7986, "step": 27980 }, { "epoch": 2.2219135128698726, "grad_norm": 0.9188593626022339, "learning_rate": 1.2979280781138366e-05, "loss": 0.7485, "step": 27990 }, { "epoch": 2.2227073369187718, "grad_norm": 1.0341116189956665, "learning_rate": 1.2966050117752903e-05, "loss": 0.7407, "step": 28000 }, { "epoch": 2.2235011609676714, "grad_norm": 0.8388242125511169, "learning_rate": 1.2952819454367442e-05, "loss": 0.8283, "step": 28010 }, { "epoch": 2.224294985016571, "grad_norm": 0.9826040267944336, "learning_rate": 1.293958879098198e-05, "loss": 0.7214, "step": 28020 }, { "epoch": 2.2250888090654706, "grad_norm": 0.7271533608436584, "learning_rate": 1.2926358127596519e-05, "loss": 0.836, "step": 28030 }, { "epoch": 2.22588263311437, "grad_norm": 0.795552134513855, "learning_rate": 1.2913127464211056e-05, "loss": 0.8255, "step": 28040 }, { "epoch": 2.22667645716327, "grad_norm": 0.8182919025421143, "learning_rate": 1.2899896800825595e-05, "loss": 0.7078, "step": 28050 }, { "epoch": 2.2274702812121694, "grad_norm": 0.9969826340675354, "learning_rate": 1.2886666137440132e-05, "loss": 0.7659, "step": 28060 }, { "epoch": 2.228264105261069, "grad_norm": 0.8955338001251221, "learning_rate": 1.2873435474054671e-05, "loss": 0.8228, "step": 28070 }, { "epoch": 2.2290579293099686, "grad_norm": 0.9254652261734009, "learning_rate": 1.2860204810669208e-05, "loss": 0.8226, "step": 28080 }, { "epoch": 2.229851753358868, "grad_norm": 0.8870140910148621, "learning_rate": 1.2846974147283744e-05, "loss": 0.8239, "step": 28090 }, { "epoch": 2.2306455774077674, "grad_norm": 1.0438228845596313, "learning_rate": 1.2833743483898283e-05, "loss": 0.7559, "step": 28100 }, { "epoch": 2.231439401456667, "grad_norm": 1.0301085710525513, "learning_rate": 1.282051282051282e-05, "loss": 0.7513, "step": 28110 }, { "epoch": 2.2322332255055666, "grad_norm": 0.9675314426422119, "learning_rate": 1.2807282157127359e-05, "loss": 0.8013, "step": 28120 }, { "epoch": 2.233027049554466, "grad_norm": 1.017149806022644, "learning_rate": 1.2794051493741896e-05, "loss": 0.7762, "step": 28130 }, { "epoch": 2.233820873603366, "grad_norm": 0.8916369080543518, "learning_rate": 1.2780820830356435e-05, "loss": 0.8421, "step": 28140 }, { "epoch": 2.2346146976522654, "grad_norm": 0.8855636119842529, "learning_rate": 1.2767590166970972e-05, "loss": 0.792, "step": 28150 }, { "epoch": 2.235408521701165, "grad_norm": 0.758443295955658, "learning_rate": 1.2754359503585511e-05, "loss": 0.7603, "step": 28160 }, { "epoch": 2.2362023457500646, "grad_norm": 1.0338447093963623, "learning_rate": 1.2741128840200048e-05, "loss": 0.787, "step": 28170 }, { "epoch": 2.236996169798964, "grad_norm": 1.0957199335098267, "learning_rate": 1.2727898176814587e-05, "loss": 0.7863, "step": 28180 }, { "epoch": 2.237789993847864, "grad_norm": 0.8449088335037231, "learning_rate": 1.2714667513429124e-05, "loss": 0.782, "step": 28190 }, { "epoch": 2.238583817896763, "grad_norm": 0.9781031012535095, "learning_rate": 1.2701436850043663e-05, "loss": 0.777, "step": 28200 }, { "epoch": 2.2393776419456626, "grad_norm": 0.9978182315826416, "learning_rate": 1.2688206186658199e-05, "loss": 0.7937, "step": 28210 }, { "epoch": 2.240171465994562, "grad_norm": 1.0001929998397827, "learning_rate": 1.2674975523272736e-05, "loss": 0.8321, "step": 28220 }, { "epoch": 2.240965290043462, "grad_norm": 0.9343948364257812, "learning_rate": 1.2661744859887275e-05, "loss": 0.7676, "step": 28230 }, { "epoch": 2.2417591140923614, "grad_norm": 1.0100466012954712, "learning_rate": 1.2648514196501812e-05, "loss": 0.7627, "step": 28240 }, { "epoch": 2.242552938141261, "grad_norm": 0.7554891109466553, "learning_rate": 1.2635283533116351e-05, "loss": 0.7973, "step": 28250 }, { "epoch": 2.2433467621901606, "grad_norm": 0.9241369366645813, "learning_rate": 1.2622052869730888e-05, "loss": 0.7823, "step": 28260 }, { "epoch": 2.24414058623906, "grad_norm": 0.9405050277709961, "learning_rate": 1.2608822206345427e-05, "loss": 0.7655, "step": 28270 }, { "epoch": 2.24493441028796, "grad_norm": 0.8088013529777527, "learning_rate": 1.2595591542959965e-05, "loss": 0.7726, "step": 28280 }, { "epoch": 2.2457282343368594, "grad_norm": 0.9160634279251099, "learning_rate": 1.2582360879574504e-05, "loss": 0.7966, "step": 28290 }, { "epoch": 2.246522058385759, "grad_norm": 0.9134334921836853, "learning_rate": 1.256913021618904e-05, "loss": 0.7318, "step": 28300 }, { "epoch": 2.247315882434658, "grad_norm": 0.9910647869110107, "learning_rate": 1.255589955280358e-05, "loss": 0.8372, "step": 28310 }, { "epoch": 2.248109706483558, "grad_norm": 0.9692395329475403, "learning_rate": 1.2542668889418115e-05, "loss": 0.7783, "step": 28320 }, { "epoch": 2.2489035305324574, "grad_norm": 0.9558752179145813, "learning_rate": 1.2529438226032653e-05, "loss": 0.7304, "step": 28330 }, { "epoch": 2.249697354581357, "grad_norm": 1.0415302515029907, "learning_rate": 1.2516207562647191e-05, "loss": 0.8635, "step": 28340 }, { "epoch": 2.2504911786302566, "grad_norm": 0.7867638468742371, "learning_rate": 1.2502976899261729e-05, "loss": 0.8164, "step": 28350 }, { "epoch": 2.251285002679156, "grad_norm": 0.7861484885215759, "learning_rate": 1.2489746235876268e-05, "loss": 0.8794, "step": 28360 }, { "epoch": 2.252078826728056, "grad_norm": 0.8788695335388184, "learning_rate": 1.2476515572490807e-05, "loss": 0.8134, "step": 28370 }, { "epoch": 2.2528726507769554, "grad_norm": 0.9511646032333374, "learning_rate": 1.2463284909105342e-05, "loss": 0.7574, "step": 28380 }, { "epoch": 2.253666474825855, "grad_norm": 1.1273367404937744, "learning_rate": 1.2450054245719881e-05, "loss": 0.7506, "step": 28390 }, { "epoch": 2.254460298874754, "grad_norm": 0.773710310459137, "learning_rate": 1.2436823582334418e-05, "loss": 0.8182, "step": 28400 }, { "epoch": 2.255254122923654, "grad_norm": 0.8454147577285767, "learning_rate": 1.2423592918948957e-05, "loss": 0.7827, "step": 28410 }, { "epoch": 2.2560479469725534, "grad_norm": 0.7909261584281921, "learning_rate": 1.2410362255563494e-05, "loss": 0.854, "step": 28420 }, { "epoch": 2.256841771021453, "grad_norm": 1.1431065797805786, "learning_rate": 1.2397131592178032e-05, "loss": 0.7735, "step": 28430 }, { "epoch": 2.2576355950703526, "grad_norm": 0.8554138541221619, "learning_rate": 1.238390092879257e-05, "loss": 0.7886, "step": 28440 }, { "epoch": 2.258429419119252, "grad_norm": 0.801655113697052, "learning_rate": 1.2370670265407108e-05, "loss": 0.8245, "step": 28450 }, { "epoch": 2.259223243168152, "grad_norm": 0.8901875019073486, "learning_rate": 1.2357439602021647e-05, "loss": 0.7802, "step": 28460 }, { "epoch": 2.2600170672170514, "grad_norm": 1.0075801610946655, "learning_rate": 1.2344208938636184e-05, "loss": 0.763, "step": 28470 }, { "epoch": 2.260810891265951, "grad_norm": 1.1321099996566772, "learning_rate": 1.2330978275250723e-05, "loss": 0.8004, "step": 28480 }, { "epoch": 2.2616047153148506, "grad_norm": 0.7835102081298828, "learning_rate": 1.231774761186526e-05, "loss": 0.7653, "step": 28490 }, { "epoch": 2.2623985393637502, "grad_norm": 0.9481699466705322, "learning_rate": 1.2304516948479797e-05, "loss": 0.7393, "step": 28500 }, { "epoch": 2.2631923634126494, "grad_norm": 0.876270055770874, "learning_rate": 1.2291286285094335e-05, "loss": 0.7817, "step": 28510 }, { "epoch": 2.263986187461549, "grad_norm": 0.9594854712486267, "learning_rate": 1.2278055621708873e-05, "loss": 0.8727, "step": 28520 }, { "epoch": 2.2647800115104486, "grad_norm": 0.919558584690094, "learning_rate": 1.226482495832341e-05, "loss": 0.8242, "step": 28530 }, { "epoch": 2.265573835559348, "grad_norm": 0.7271903157234192, "learning_rate": 1.2251594294937948e-05, "loss": 0.7551, "step": 28540 }, { "epoch": 2.266367659608248, "grad_norm": 1.0002189874649048, "learning_rate": 1.2238363631552487e-05, "loss": 0.7631, "step": 28550 }, { "epoch": 2.2671614836571474, "grad_norm": 0.8755606412887573, "learning_rate": 1.2225132968167024e-05, "loss": 0.7551, "step": 28560 }, { "epoch": 2.267955307706047, "grad_norm": 0.8737064599990845, "learning_rate": 1.2211902304781563e-05, "loss": 0.7563, "step": 28570 }, { "epoch": 2.2687491317549466, "grad_norm": 1.0743333101272583, "learning_rate": 1.21986716413961e-05, "loss": 0.7898, "step": 28580 }, { "epoch": 2.2695429558038462, "grad_norm": 0.7818424105644226, "learning_rate": 1.218544097801064e-05, "loss": 0.8337, "step": 28590 }, { "epoch": 2.2703367798527454, "grad_norm": 0.8167740702629089, "learning_rate": 1.2172210314625176e-05, "loss": 0.7188, "step": 28600 }, { "epoch": 2.271130603901645, "grad_norm": 1.0544767379760742, "learning_rate": 1.2158979651239714e-05, "loss": 0.7083, "step": 28610 }, { "epoch": 2.2719244279505446, "grad_norm": 0.9623509049415588, "learning_rate": 1.2145748987854251e-05, "loss": 0.8609, "step": 28620 }, { "epoch": 2.272718251999444, "grad_norm": 0.9412724375724792, "learning_rate": 1.213251832446879e-05, "loss": 0.7785, "step": 28630 }, { "epoch": 2.273512076048344, "grad_norm": 0.9624713063240051, "learning_rate": 1.2119287661083327e-05, "loss": 0.7225, "step": 28640 }, { "epoch": 2.2743059000972434, "grad_norm": 0.958265483379364, "learning_rate": 1.2106056997697864e-05, "loss": 0.7591, "step": 28650 }, { "epoch": 2.275099724146143, "grad_norm": 1.0588797330856323, "learning_rate": 1.2092826334312403e-05, "loss": 0.7488, "step": 28660 }, { "epoch": 2.2758935481950426, "grad_norm": 0.8400679230690002, "learning_rate": 1.207959567092694e-05, "loss": 0.7603, "step": 28670 }, { "epoch": 2.2766873722439422, "grad_norm": 0.8208017349243164, "learning_rate": 1.206636500754148e-05, "loss": 0.7729, "step": 28680 }, { "epoch": 2.277481196292842, "grad_norm": 0.8255361914634705, "learning_rate": 1.2053134344156017e-05, "loss": 0.7548, "step": 28690 }, { "epoch": 2.2782750203417415, "grad_norm": 1.0541584491729736, "learning_rate": 1.2039903680770556e-05, "loss": 0.7856, "step": 28700 }, { "epoch": 2.2790688443906406, "grad_norm": 1.1183170080184937, "learning_rate": 1.2026673017385093e-05, "loss": 0.7884, "step": 28710 }, { "epoch": 2.27986266843954, "grad_norm": 0.8430930376052856, "learning_rate": 1.201344235399963e-05, "loss": 0.8003, "step": 28720 }, { "epoch": 2.28065649248844, "grad_norm": 0.872404932975769, "learning_rate": 1.2000211690614167e-05, "loss": 0.7212, "step": 28730 }, { "epoch": 2.2814503165373394, "grad_norm": 0.8843950033187866, "learning_rate": 1.1986981027228704e-05, "loss": 0.7738, "step": 28740 }, { "epoch": 2.282244140586239, "grad_norm": 0.8415934443473816, "learning_rate": 1.1973750363843243e-05, "loss": 0.7609, "step": 28750 }, { "epoch": 2.2830379646351386, "grad_norm": 0.7827284932136536, "learning_rate": 1.196051970045778e-05, "loss": 0.7876, "step": 28760 }, { "epoch": 2.2838317886840382, "grad_norm": 1.0107696056365967, "learning_rate": 1.194728903707232e-05, "loss": 0.7452, "step": 28770 }, { "epoch": 2.284625612732938, "grad_norm": 0.802697479724884, "learning_rate": 1.1934058373686857e-05, "loss": 0.8176, "step": 28780 }, { "epoch": 2.2854194367818375, "grad_norm": 0.8944302797317505, "learning_rate": 1.1920827710301396e-05, "loss": 0.7346, "step": 28790 }, { "epoch": 2.286213260830737, "grad_norm": 0.8373863101005554, "learning_rate": 1.1907597046915933e-05, "loss": 0.7585, "step": 28800 }, { "epoch": 2.2870070848796367, "grad_norm": 0.964019238948822, "learning_rate": 1.1894366383530472e-05, "loss": 0.7473, "step": 28810 }, { "epoch": 2.287800908928536, "grad_norm": 1.0196411609649658, "learning_rate": 1.1881135720145009e-05, "loss": 0.7552, "step": 28820 }, { "epoch": 2.2885947329774354, "grad_norm": 0.758881688117981, "learning_rate": 1.1867905056759546e-05, "loss": 0.8031, "step": 28830 }, { "epoch": 2.289388557026335, "grad_norm": 0.768775999546051, "learning_rate": 1.1854674393374085e-05, "loss": 0.7993, "step": 28840 }, { "epoch": 2.2901823810752346, "grad_norm": 0.9451555013656616, "learning_rate": 1.184144372998862e-05, "loss": 0.8086, "step": 28850 }, { "epoch": 2.2909762051241342, "grad_norm": 0.9371961951255798, "learning_rate": 1.182821306660316e-05, "loss": 0.7808, "step": 28860 }, { "epoch": 2.291770029173034, "grad_norm": 0.7734199166297913, "learning_rate": 1.1814982403217697e-05, "loss": 0.7633, "step": 28870 }, { "epoch": 2.2925638532219335, "grad_norm": 0.787481427192688, "learning_rate": 1.1801751739832236e-05, "loss": 0.837, "step": 28880 }, { "epoch": 2.293357677270833, "grad_norm": 0.9613273739814758, "learning_rate": 1.1788521076446773e-05, "loss": 0.726, "step": 28890 }, { "epoch": 2.2941515013197327, "grad_norm": 0.9527220129966736, "learning_rate": 1.1775290413061312e-05, "loss": 0.7615, "step": 28900 }, { "epoch": 2.294945325368632, "grad_norm": 0.9209556579589844, "learning_rate": 1.176205974967585e-05, "loss": 0.8216, "step": 28910 }, { "epoch": 2.2957391494175314, "grad_norm": 0.9544232487678528, "learning_rate": 1.1748829086290388e-05, "loss": 0.794, "step": 28920 }, { "epoch": 2.296532973466431, "grad_norm": 0.9990895390510559, "learning_rate": 1.1735598422904925e-05, "loss": 0.7551, "step": 28930 }, { "epoch": 2.2973267975153306, "grad_norm": 1.1743451356887817, "learning_rate": 1.1722367759519463e-05, "loss": 0.7476, "step": 28940 }, { "epoch": 2.2981206215642302, "grad_norm": 0.9132225513458252, "learning_rate": 1.1709137096134002e-05, "loss": 0.7687, "step": 28950 }, { "epoch": 2.29891444561313, "grad_norm": 0.7507365345954895, "learning_rate": 1.1695906432748537e-05, "loss": 0.7469, "step": 28960 }, { "epoch": 2.2997082696620295, "grad_norm": 0.8947992920875549, "learning_rate": 1.1682675769363076e-05, "loss": 0.8074, "step": 28970 }, { "epoch": 2.300502093710929, "grad_norm": 1.213215708732605, "learning_rate": 1.1669445105977613e-05, "loss": 0.7489, "step": 28980 }, { "epoch": 2.3012959177598287, "grad_norm": 0.8981990218162537, "learning_rate": 1.1656214442592152e-05, "loss": 0.7278, "step": 28990 }, { "epoch": 2.3020897418087283, "grad_norm": 0.820511519908905, "learning_rate": 1.164298377920669e-05, "loss": 0.7792, "step": 29000 }, { "epoch": 2.302883565857628, "grad_norm": 1.1118419170379639, "learning_rate": 1.1629753115821228e-05, "loss": 0.7267, "step": 29010 }, { "epoch": 2.303677389906527, "grad_norm": 1.0339609384536743, "learning_rate": 1.1616522452435766e-05, "loss": 0.7933, "step": 29020 }, { "epoch": 2.3044712139554266, "grad_norm": 0.8476637601852417, "learning_rate": 1.1603291789050305e-05, "loss": 0.7775, "step": 29030 }, { "epoch": 2.3052650380043263, "grad_norm": 0.7746336460113525, "learning_rate": 1.1590061125664842e-05, "loss": 0.7619, "step": 29040 }, { "epoch": 2.306058862053226, "grad_norm": 1.1253118515014648, "learning_rate": 1.1576830462279379e-05, "loss": 0.7724, "step": 29050 }, { "epoch": 2.3068526861021255, "grad_norm": 1.1791478395462036, "learning_rate": 1.1563599798893918e-05, "loss": 0.794, "step": 29060 }, { "epoch": 2.307646510151025, "grad_norm": 0.935023307800293, "learning_rate": 1.1550369135508455e-05, "loss": 0.7548, "step": 29070 }, { "epoch": 2.3084403341999247, "grad_norm": 0.8298285603523254, "learning_rate": 1.1537138472122992e-05, "loss": 0.7939, "step": 29080 }, { "epoch": 2.3092341582488243, "grad_norm": 0.8414050340652466, "learning_rate": 1.152390780873753e-05, "loss": 0.8132, "step": 29090 }, { "epoch": 2.310027982297724, "grad_norm": 0.9045025110244751, "learning_rate": 1.1510677145352069e-05, "loss": 0.7828, "step": 29100 }, { "epoch": 2.310821806346623, "grad_norm": 0.8417425751686096, "learning_rate": 1.1497446481966606e-05, "loss": 0.8046, "step": 29110 }, { "epoch": 2.3116156303955226, "grad_norm": 0.9332298040390015, "learning_rate": 1.1484215818581145e-05, "loss": 0.8187, "step": 29120 }, { "epoch": 2.3124094544444223, "grad_norm": 1.017712116241455, "learning_rate": 1.1470985155195682e-05, "loss": 0.8168, "step": 29130 }, { "epoch": 2.313203278493322, "grad_norm": 0.9712084531784058, "learning_rate": 1.145775449181022e-05, "loss": 0.7914, "step": 29140 }, { "epoch": 2.3139971025422215, "grad_norm": 0.8326614499092102, "learning_rate": 1.1444523828424758e-05, "loss": 0.757, "step": 29150 }, { "epoch": 2.314790926591121, "grad_norm": 0.7456022500991821, "learning_rate": 1.1431293165039295e-05, "loss": 0.8309, "step": 29160 }, { "epoch": 2.3155847506400207, "grad_norm": 0.895487368106842, "learning_rate": 1.1418062501653834e-05, "loss": 0.7881, "step": 29170 }, { "epoch": 2.3163785746889203, "grad_norm": 0.9077318906784058, "learning_rate": 1.1404831838268372e-05, "loss": 0.8497, "step": 29180 }, { "epoch": 2.31717239873782, "grad_norm": 0.7892618775367737, "learning_rate": 1.1391601174882909e-05, "loss": 0.7859, "step": 29190 }, { "epoch": 2.3179662227867195, "grad_norm": 0.8005306720733643, "learning_rate": 1.1378370511497446e-05, "loss": 0.7707, "step": 29200 }, { "epoch": 2.318760046835619, "grad_norm": 1.0797233581542969, "learning_rate": 1.1365139848111985e-05, "loss": 0.7446, "step": 29210 }, { "epoch": 2.3195538708845183, "grad_norm": 0.9487477540969849, "learning_rate": 1.135323225106507e-05, "loss": 0.8089, "step": 29220 }, { "epoch": 2.320347694933418, "grad_norm": 0.9373265504837036, "learning_rate": 1.1340001587679607e-05, "loss": 0.7926, "step": 29230 }, { "epoch": 2.3211415189823175, "grad_norm": 1.0408942699432373, "learning_rate": 1.1326770924294144e-05, "loss": 0.8057, "step": 29240 }, { "epoch": 2.321935343031217, "grad_norm": 0.9446254968643188, "learning_rate": 1.1313540260908683e-05, "loss": 0.8202, "step": 29250 }, { "epoch": 2.3227291670801167, "grad_norm": 1.1571518182754517, "learning_rate": 1.130030959752322e-05, "loss": 0.7437, "step": 29260 }, { "epoch": 2.3235229911290163, "grad_norm": 0.8599859476089478, "learning_rate": 1.1287078934137759e-05, "loss": 0.7217, "step": 29270 }, { "epoch": 2.324316815177916, "grad_norm": 0.9582074880599976, "learning_rate": 1.1273848270752296e-05, "loss": 0.8472, "step": 29280 }, { "epoch": 2.3251106392268155, "grad_norm": 0.9550096392631531, "learning_rate": 1.1260617607366835e-05, "loss": 0.7503, "step": 29290 }, { "epoch": 2.325904463275715, "grad_norm": 0.972760796546936, "learning_rate": 1.1247386943981372e-05, "loss": 0.7296, "step": 29300 }, { "epoch": 2.3266982873246143, "grad_norm": 0.9969944953918457, "learning_rate": 1.123415628059591e-05, "loss": 0.7982, "step": 29310 }, { "epoch": 2.3274921113735143, "grad_norm": 0.9653575420379639, "learning_rate": 1.1220925617210447e-05, "loss": 0.8543, "step": 29320 }, { "epoch": 2.3282859354224135, "grad_norm": 0.8924707174301147, "learning_rate": 1.1207694953824986e-05, "loss": 0.7601, "step": 29330 }, { "epoch": 2.329079759471313, "grad_norm": 0.9261003136634827, "learning_rate": 1.1194464290439523e-05, "loss": 0.7984, "step": 29340 }, { "epoch": 2.3298735835202127, "grad_norm": 0.678521454334259, "learning_rate": 1.118123362705406e-05, "loss": 0.8383, "step": 29350 }, { "epoch": 2.3306674075691123, "grad_norm": 1.122550368309021, "learning_rate": 1.1168002963668599e-05, "loss": 0.7964, "step": 29360 }, { "epoch": 2.331461231618012, "grad_norm": 0.8651812672615051, "learning_rate": 1.1154772300283136e-05, "loss": 0.7974, "step": 29370 }, { "epoch": 2.3322550556669115, "grad_norm": 1.018391489982605, "learning_rate": 1.1141541636897675e-05, "loss": 0.7908, "step": 29380 }, { "epoch": 2.333048879715811, "grad_norm": 0.8569789528846741, "learning_rate": 1.1128310973512212e-05, "loss": 0.7332, "step": 29390 }, { "epoch": 2.3338427037647107, "grad_norm": 0.7940601706504822, "learning_rate": 1.1115080310126751e-05, "loss": 0.8628, "step": 29400 }, { "epoch": 2.3346365278136103, "grad_norm": 0.9954172372817993, "learning_rate": 1.1101849646741289e-05, "loss": 0.7837, "step": 29410 }, { "epoch": 2.3354303518625095, "grad_norm": 0.9305082559585571, "learning_rate": 1.1088618983355826e-05, "loss": 0.7587, "step": 29420 }, { "epoch": 2.336224175911409, "grad_norm": 1.003757357597351, "learning_rate": 1.1075388319970365e-05, "loss": 0.8239, "step": 29430 }, { "epoch": 2.3370179999603087, "grad_norm": 0.8799276947975159, "learning_rate": 1.1062157656584902e-05, "loss": 0.728, "step": 29440 }, { "epoch": 2.3378118240092083, "grad_norm": 0.758741021156311, "learning_rate": 1.104892699319944e-05, "loss": 0.806, "step": 29450 }, { "epoch": 2.338605648058108, "grad_norm": 0.9049004912376404, "learning_rate": 1.1035696329813976e-05, "loss": 0.7504, "step": 29460 }, { "epoch": 2.3393994721070075, "grad_norm": 1.0414828062057495, "learning_rate": 1.1022465666428515e-05, "loss": 0.77, "step": 29470 }, { "epoch": 2.340193296155907, "grad_norm": 0.8456864356994629, "learning_rate": 1.1009235003043053e-05, "loss": 0.7447, "step": 29480 }, { "epoch": 2.3409871202048067, "grad_norm": 0.7757733464241028, "learning_rate": 1.0996004339657591e-05, "loss": 0.7494, "step": 29490 }, { "epoch": 2.3417809442537063, "grad_norm": 1.131042718887329, "learning_rate": 1.0982773676272129e-05, "loss": 0.8584, "step": 29500 }, { "epoch": 2.342574768302606, "grad_norm": 0.8263581395149231, "learning_rate": 1.0969543012886668e-05, "loss": 0.741, "step": 29510 }, { "epoch": 2.3433685923515055, "grad_norm": 1.135141134262085, "learning_rate": 1.0956312349501205e-05, "loss": 0.7473, "step": 29520 }, { "epoch": 2.3441624164004047, "grad_norm": 1.2775719165802002, "learning_rate": 1.0943081686115742e-05, "loss": 0.7666, "step": 29530 }, { "epoch": 2.3449562404493043, "grad_norm": 0.8533919453620911, "learning_rate": 1.0929851022730281e-05, "loss": 0.7309, "step": 29540 }, { "epoch": 2.345750064498204, "grad_norm": 0.7968776822090149, "learning_rate": 1.0916620359344817e-05, "loss": 0.8046, "step": 29550 }, { "epoch": 2.3465438885471035, "grad_norm": 0.9204419851303101, "learning_rate": 1.0903389695959356e-05, "loss": 0.7839, "step": 29560 }, { "epoch": 2.347337712596003, "grad_norm": 1.0441004037857056, "learning_rate": 1.0890159032573893e-05, "loss": 0.7742, "step": 29570 }, { "epoch": 2.3481315366449027, "grad_norm": 0.9476767182350159, "learning_rate": 1.0876928369188432e-05, "loss": 0.7271, "step": 29580 }, { "epoch": 2.3489253606938023, "grad_norm": 0.8613219857215881, "learning_rate": 1.0863697705802969e-05, "loss": 0.7584, "step": 29590 }, { "epoch": 2.349719184742702, "grad_norm": 0.8703140616416931, "learning_rate": 1.0850467042417508e-05, "loss": 0.7311, "step": 29600 }, { "epoch": 2.3505130087916015, "grad_norm": 0.8106393218040466, "learning_rate": 1.0837236379032045e-05, "loss": 0.8058, "step": 29610 }, { "epoch": 2.3513068328405007, "grad_norm": 1.057108759880066, "learning_rate": 1.0824005715646584e-05, "loss": 0.79, "step": 29620 }, { "epoch": 2.3521006568894003, "grad_norm": 1.0854650735855103, "learning_rate": 1.0810775052261121e-05, "loss": 0.8291, "step": 29630 }, { "epoch": 2.3528944809383, "grad_norm": 0.9281707406044006, "learning_rate": 1.0797544388875658e-05, "loss": 0.7736, "step": 29640 }, { "epoch": 2.3536883049871995, "grad_norm": 0.7807760238647461, "learning_rate": 1.0784313725490197e-05, "loss": 0.7909, "step": 29650 }, { "epoch": 2.354482129036099, "grad_norm": 0.9227698445320129, "learning_rate": 1.0771083062104735e-05, "loss": 0.8431, "step": 29660 }, { "epoch": 2.3552759530849987, "grad_norm": 0.8814806938171387, "learning_rate": 1.0757852398719272e-05, "loss": 0.8081, "step": 29670 }, { "epoch": 2.3560697771338983, "grad_norm": 0.7971763014793396, "learning_rate": 1.0744621735333809e-05, "loss": 0.7334, "step": 29680 }, { "epoch": 2.356863601182798, "grad_norm": 0.8405244946479797, "learning_rate": 1.0731391071948348e-05, "loss": 0.7273, "step": 29690 }, { "epoch": 2.3576574252316975, "grad_norm": 1.0113767385482788, "learning_rate": 1.0718160408562885e-05, "loss": 0.832, "step": 29700 }, { "epoch": 2.358451249280597, "grad_norm": 0.9246639013290405, "learning_rate": 1.0704929745177424e-05, "loss": 0.8362, "step": 29710 }, { "epoch": 2.3592450733294967, "grad_norm": 1.0259578227996826, "learning_rate": 1.0691699081791961e-05, "loss": 0.7198, "step": 29720 }, { "epoch": 2.360038897378396, "grad_norm": 0.9511812925338745, "learning_rate": 1.06784684184065e-05, "loss": 0.7828, "step": 29730 }, { "epoch": 2.3608327214272955, "grad_norm": 0.87998366355896, "learning_rate": 1.0665237755021038e-05, "loss": 0.7572, "step": 29740 }, { "epoch": 2.361626545476195, "grad_norm": 0.6550692915916443, "learning_rate": 1.0652007091635575e-05, "loss": 0.7808, "step": 29750 }, { "epoch": 2.3624203695250947, "grad_norm": 0.8000962734222412, "learning_rate": 1.0638776428250114e-05, "loss": 0.784, "step": 29760 }, { "epoch": 2.3632141935739943, "grad_norm": 0.8203931450843811, "learning_rate": 1.0625545764864651e-05, "loss": 0.8397, "step": 29770 }, { "epoch": 2.364008017622894, "grad_norm": 0.9608734250068665, "learning_rate": 1.0612315101479188e-05, "loss": 0.7318, "step": 29780 }, { "epoch": 2.3648018416717935, "grad_norm": 0.9658749103546143, "learning_rate": 1.0599084438093725e-05, "loss": 0.8222, "step": 29790 }, { "epoch": 2.365595665720693, "grad_norm": 0.9885733127593994, "learning_rate": 1.0585853774708264e-05, "loss": 0.7645, "step": 29800 }, { "epoch": 2.3663894897695927, "grad_norm": 0.9032514691352844, "learning_rate": 1.0572623111322802e-05, "loss": 0.8039, "step": 29810 }, { "epoch": 2.367183313818492, "grad_norm": 0.9113055467605591, "learning_rate": 1.055939244793734e-05, "loss": 0.8142, "step": 29820 }, { "epoch": 2.3679771378673915, "grad_norm": 0.9187504649162292, "learning_rate": 1.0546161784551878e-05, "loss": 0.7926, "step": 29830 }, { "epoch": 2.368770961916291, "grad_norm": 0.8882543444633484, "learning_rate": 1.0532931121166417e-05, "loss": 0.7267, "step": 29840 }, { "epoch": 2.3695647859651907, "grad_norm": 0.9198811054229736, "learning_rate": 1.0519700457780954e-05, "loss": 0.7839, "step": 29850 }, { "epoch": 2.3703586100140903, "grad_norm": 0.8237046003341675, "learning_rate": 1.0506469794395491e-05, "loss": 0.7327, "step": 29860 }, { "epoch": 2.37115243406299, "grad_norm": 0.9879904985427856, "learning_rate": 1.049323913101003e-05, "loss": 0.759, "step": 29870 }, { "epoch": 2.3719462581118895, "grad_norm": 0.7641343474388123, "learning_rate": 1.0480008467624567e-05, "loss": 0.7876, "step": 29880 }, { "epoch": 2.372740082160789, "grad_norm": 0.8497799634933472, "learning_rate": 1.0466777804239106e-05, "loss": 0.788, "step": 29890 }, { "epoch": 2.3735339062096887, "grad_norm": 0.7273193001747131, "learning_rate": 1.0453547140853642e-05, "loss": 0.8189, "step": 29900 }, { "epoch": 2.3743277302585883, "grad_norm": 0.9262228012084961, "learning_rate": 1.044031647746818e-05, "loss": 0.8058, "step": 29910 }, { "epoch": 2.375121554307488, "grad_norm": 0.8716014623641968, "learning_rate": 1.0427085814082718e-05, "loss": 0.7813, "step": 29920 }, { "epoch": 2.375915378356387, "grad_norm": 1.047127366065979, "learning_rate": 1.0413855150697257e-05, "loss": 0.8362, "step": 29930 }, { "epoch": 2.3767092024052867, "grad_norm": 1.0029412508010864, "learning_rate": 1.0400624487311794e-05, "loss": 0.7892, "step": 29940 }, { "epoch": 2.3775030264541863, "grad_norm": 0.8537968397140503, "learning_rate": 1.0387393823926331e-05, "loss": 0.7837, "step": 29950 }, { "epoch": 2.378296850503086, "grad_norm": 0.8679432272911072, "learning_rate": 1.037416316054087e-05, "loss": 0.7785, "step": 29960 }, { "epoch": 2.3790906745519855, "grad_norm": 1.063770055770874, "learning_rate": 1.0360932497155407e-05, "loss": 0.7029, "step": 29970 }, { "epoch": 2.379884498600885, "grad_norm": 0.8927010297775269, "learning_rate": 1.0347701833769946e-05, "loss": 0.7595, "step": 29980 }, { "epoch": 2.3806783226497847, "grad_norm": 0.8152598738670349, "learning_rate": 1.0334471170384484e-05, "loss": 0.8162, "step": 29990 }, { "epoch": 2.3814721466986843, "grad_norm": 0.9757214188575745, "learning_rate": 1.0321240506999023e-05, "loss": 0.7675, "step": 30000 }, { "epoch": 2.382265970747584, "grad_norm": 0.8075728416442871, "learning_rate": 1.0308009843613558e-05, "loss": 0.8226, "step": 30010 }, { "epoch": 2.383059794796483, "grad_norm": 0.7815035581588745, "learning_rate": 1.0294779180228097e-05, "loss": 0.8277, "step": 30020 }, { "epoch": 2.383853618845383, "grad_norm": 0.8908788561820984, "learning_rate": 1.0281548516842634e-05, "loss": 0.773, "step": 30030 }, { "epoch": 2.3846474428942823, "grad_norm": 0.8850157260894775, "learning_rate": 1.0268317853457173e-05, "loss": 0.7643, "step": 30040 }, { "epoch": 2.385441266943182, "grad_norm": 0.802002489566803, "learning_rate": 1.025508719007171e-05, "loss": 0.8312, "step": 30050 }, { "epoch": 2.3862350909920815, "grad_norm": 0.9820096492767334, "learning_rate": 1.0241856526686248e-05, "loss": 0.8148, "step": 30060 }, { "epoch": 2.387028915040981, "grad_norm": 0.934891402721405, "learning_rate": 1.0228625863300787e-05, "loss": 0.7546, "step": 30070 }, { "epoch": 2.3878227390898807, "grad_norm": 0.9393154978752136, "learning_rate": 1.0215395199915324e-05, "loss": 0.7793, "step": 30080 }, { "epoch": 2.3886165631387803, "grad_norm": 0.9475399255752563, "learning_rate": 1.0202164536529863e-05, "loss": 0.7756, "step": 30090 }, { "epoch": 2.38941038718768, "grad_norm": 0.8171314001083374, "learning_rate": 1.01889338731444e-05, "loss": 0.78, "step": 30100 }, { "epoch": 2.3902042112365796, "grad_norm": 0.85038822889328, "learning_rate": 1.0175703209758939e-05, "loss": 0.7777, "step": 30110 }, { "epoch": 2.390998035285479, "grad_norm": 0.9316538572311401, "learning_rate": 1.0162472546373476e-05, "loss": 0.8058, "step": 30120 }, { "epoch": 2.3917918593343783, "grad_norm": 0.7901173233985901, "learning_rate": 1.0149241882988013e-05, "loss": 0.7778, "step": 30130 }, { "epoch": 2.392585683383278, "grad_norm": 0.907632052898407, "learning_rate": 1.013601121960255e-05, "loss": 0.7577, "step": 30140 }, { "epoch": 2.3933795074321775, "grad_norm": 0.7853071689605713, "learning_rate": 1.012278055621709e-05, "loss": 0.7905, "step": 30150 }, { "epoch": 2.394173331481077, "grad_norm": 0.9845629930496216, "learning_rate": 1.0109549892831627e-05, "loss": 0.8268, "step": 30160 }, { "epoch": 2.3949671555299767, "grad_norm": 0.8937403559684753, "learning_rate": 1.0096319229446164e-05, "loss": 0.842, "step": 30170 }, { "epoch": 2.3957609795788763, "grad_norm": 0.9141705632209778, "learning_rate": 1.0083088566060703e-05, "loss": 0.7376, "step": 30180 }, { "epoch": 2.396554803627776, "grad_norm": 0.8283079266548157, "learning_rate": 1.006985790267524e-05, "loss": 0.7864, "step": 30190 }, { "epoch": 2.3973486276766756, "grad_norm": 1.2408968210220337, "learning_rate": 1.0056627239289779e-05, "loss": 0.8002, "step": 30200 }, { "epoch": 2.398142451725575, "grad_norm": 0.8985006809234619, "learning_rate": 1.0043396575904316e-05, "loss": 0.7865, "step": 30210 }, { "epoch": 2.3989362757744748, "grad_norm": 0.8631697297096252, "learning_rate": 1.0030165912518855e-05, "loss": 0.8899, "step": 30220 }, { "epoch": 2.3997300998233744, "grad_norm": 0.8567438721656799, "learning_rate": 1.0016935249133392e-05, "loss": 0.7726, "step": 30230 }, { "epoch": 2.4005239238722735, "grad_norm": 0.780764102935791, "learning_rate": 1.0003704585747931e-05, "loss": 0.8075, "step": 30240 }, { "epoch": 2.401317747921173, "grad_norm": 0.9728577136993408, "learning_rate": 9.990473922362467e-06, "loss": 0.7686, "step": 30250 }, { "epoch": 2.4021115719700727, "grad_norm": 0.9801949858665466, "learning_rate": 9.977243258977006e-06, "loss": 0.7584, "step": 30260 }, { "epoch": 2.4029053960189724, "grad_norm": 1.1175134181976318, "learning_rate": 9.964012595591543e-06, "loss": 0.7502, "step": 30270 }, { "epoch": 2.403699220067872, "grad_norm": 1.078339695930481, "learning_rate": 9.95078193220608e-06, "loss": 0.8411, "step": 30280 }, { "epoch": 2.4044930441167716, "grad_norm": 1.1007136106491089, "learning_rate": 9.93755126882062e-06, "loss": 0.8423, "step": 30290 }, { "epoch": 2.405286868165671, "grad_norm": 0.8614094853401184, "learning_rate": 9.924320605435157e-06, "loss": 0.8043, "step": 30300 }, { "epoch": 2.4060806922145708, "grad_norm": 0.723034679889679, "learning_rate": 9.911089942049695e-06, "loss": 0.822, "step": 30310 }, { "epoch": 2.4068745162634704, "grad_norm": 0.899814784526825, "learning_rate": 9.897859278664233e-06, "loss": 0.8256, "step": 30320 }, { "epoch": 2.4076683403123695, "grad_norm": 0.8916921615600586, "learning_rate": 9.884628615278772e-06, "loss": 0.8253, "step": 30330 }, { "epoch": 2.408462164361269, "grad_norm": 0.8144512176513672, "learning_rate": 9.871397951893309e-06, "loss": 0.8021, "step": 30340 }, { "epoch": 2.4092559884101687, "grad_norm": 0.9181259274482727, "learning_rate": 9.858167288507846e-06, "loss": 0.8385, "step": 30350 }, { "epoch": 2.4100498124590684, "grad_norm": 0.9962033629417419, "learning_rate": 9.844936625122383e-06, "loss": 0.8356, "step": 30360 }, { "epoch": 2.410843636507968, "grad_norm": 1.0186583995819092, "learning_rate": 9.831705961736922e-06, "loss": 0.7458, "step": 30370 }, { "epoch": 2.4116374605568676, "grad_norm": 0.9851090312004089, "learning_rate": 9.81847529835146e-06, "loss": 0.8241, "step": 30380 }, { "epoch": 2.412431284605767, "grad_norm": 0.8016131520271301, "learning_rate": 9.805244634965997e-06, "loss": 0.7743, "step": 30390 }, { "epoch": 2.4132251086546668, "grad_norm": 0.937201976776123, "learning_rate": 9.792013971580536e-06, "loss": 0.7902, "step": 30400 }, { "epoch": 2.4140189327035664, "grad_norm": 0.6954747438430786, "learning_rate": 9.778783308195073e-06, "loss": 0.8559, "step": 30410 }, { "epoch": 2.414812756752466, "grad_norm": 0.8315379023551941, "learning_rate": 9.765552644809612e-06, "loss": 0.7645, "step": 30420 }, { "epoch": 2.4156065808013656, "grad_norm": 0.86857008934021, "learning_rate": 9.752321981424149e-06, "loss": 0.7826, "step": 30430 }, { "epoch": 2.4164004048502647, "grad_norm": 0.9260841012001038, "learning_rate": 9.739091318038688e-06, "loss": 0.8496, "step": 30440 }, { "epoch": 2.4171942288991644, "grad_norm": 1.021768569946289, "learning_rate": 9.725860654653225e-06, "loss": 0.7887, "step": 30450 }, { "epoch": 2.417988052948064, "grad_norm": 0.9660463929176331, "learning_rate": 9.712629991267762e-06, "loss": 0.7776, "step": 30460 }, { "epoch": 2.4187818769969636, "grad_norm": 0.7337642908096313, "learning_rate": 9.699399327882301e-06, "loss": 0.786, "step": 30470 }, { "epoch": 2.419575701045863, "grad_norm": 1.0514928102493286, "learning_rate": 9.686168664496839e-06, "loss": 0.7785, "step": 30480 }, { "epoch": 2.4203695250947628, "grad_norm": 1.1055537462234497, "learning_rate": 9.672938001111376e-06, "loss": 0.7879, "step": 30490 }, { "epoch": 2.4211633491436624, "grad_norm": 0.9621986746788025, "learning_rate": 9.659707337725913e-06, "loss": 0.7828, "step": 30500 }, { "epoch": 2.421957173192562, "grad_norm": 0.9570415019989014, "learning_rate": 9.646476674340452e-06, "loss": 0.7945, "step": 30510 }, { "epoch": 2.4227509972414616, "grad_norm": 0.831770122051239, "learning_rate": 9.63324601095499e-06, "loss": 0.772, "step": 30520 }, { "epoch": 2.4235448212903608, "grad_norm": 0.7580466270446777, "learning_rate": 9.620015347569528e-06, "loss": 0.8456, "step": 30530 }, { "epoch": 2.424338645339261, "grad_norm": 1.027631163597107, "learning_rate": 9.606784684184065e-06, "loss": 0.7945, "step": 30540 }, { "epoch": 2.42513246938816, "grad_norm": 1.0360970497131348, "learning_rate": 9.593554020798604e-06, "loss": 0.8, "step": 30550 }, { "epoch": 2.4259262934370596, "grad_norm": 0.9129590392112732, "learning_rate": 9.580323357413142e-06, "loss": 0.8057, "step": 30560 }, { "epoch": 2.426720117485959, "grad_norm": 0.7934035658836365, "learning_rate": 9.567092694027679e-06, "loss": 0.8296, "step": 30570 }, { "epoch": 2.4275139415348588, "grad_norm": 0.9812830090522766, "learning_rate": 9.553862030642218e-06, "loss": 0.7877, "step": 30580 }, { "epoch": 2.4283077655837584, "grad_norm": 0.9974318146705627, "learning_rate": 9.540631367256755e-06, "loss": 0.7479, "step": 30590 }, { "epoch": 2.429101589632658, "grad_norm": 0.9436841011047363, "learning_rate": 9.527400703871292e-06, "loss": 0.8279, "step": 30600 }, { "epoch": 2.4298954136815576, "grad_norm": 0.9886923432350159, "learning_rate": 9.51417004048583e-06, "loss": 0.7854, "step": 30610 }, { "epoch": 2.430689237730457, "grad_norm": 0.9457038044929504, "learning_rate": 9.500939377100368e-06, "loss": 0.827, "step": 30620 }, { "epoch": 2.431483061779357, "grad_norm": 0.93290776014328, "learning_rate": 9.487708713714906e-06, "loss": 0.7439, "step": 30630 }, { "epoch": 2.432276885828256, "grad_norm": 0.9726458787918091, "learning_rate": 9.474478050329444e-06, "loss": 0.7855, "step": 30640 }, { "epoch": 2.4330707098771556, "grad_norm": 1.1130033731460571, "learning_rate": 9.461247386943982e-06, "loss": 0.7976, "step": 30650 }, { "epoch": 2.433864533926055, "grad_norm": 0.7888174653053284, "learning_rate": 9.44801672355852e-06, "loss": 0.7857, "step": 30660 }, { "epoch": 2.434658357974955, "grad_norm": 0.9603310227394104, "learning_rate": 9.434786060173058e-06, "loss": 0.7558, "step": 30670 }, { "epoch": 2.4354521820238544, "grad_norm": 0.9045382738113403, "learning_rate": 9.421555396787595e-06, "loss": 0.7363, "step": 30680 }, { "epoch": 2.436246006072754, "grad_norm": 1.1511313915252686, "learning_rate": 9.408324733402134e-06, "loss": 0.8566, "step": 30690 }, { "epoch": 2.4370398301216536, "grad_norm": 0.9795113205909729, "learning_rate": 9.395094070016671e-06, "loss": 0.7888, "step": 30700 }, { "epoch": 2.437833654170553, "grad_norm": 1.0283374786376953, "learning_rate": 9.381863406631208e-06, "loss": 0.8044, "step": 30710 }, { "epoch": 2.438627478219453, "grad_norm": 1.023207664489746, "learning_rate": 9.368632743245746e-06, "loss": 0.818, "step": 30720 }, { "epoch": 2.4394213022683524, "grad_norm": 0.7632606029510498, "learning_rate": 9.355402079860285e-06, "loss": 0.8481, "step": 30730 }, { "epoch": 2.440215126317252, "grad_norm": 0.8076527118682861, "learning_rate": 9.342171416474822e-06, "loss": 0.772, "step": 30740 }, { "epoch": 2.441008950366151, "grad_norm": 0.9960797429084778, "learning_rate": 9.32894075308936e-06, "loss": 0.7411, "step": 30750 }, { "epoch": 2.441802774415051, "grad_norm": 0.8365696668624878, "learning_rate": 9.315710089703898e-06, "loss": 0.804, "step": 30760 }, { "epoch": 2.4425965984639504, "grad_norm": 0.9081131815910339, "learning_rate": 9.302479426318437e-06, "loss": 0.797, "step": 30770 }, { "epoch": 2.44339042251285, "grad_norm": 0.9612648487091064, "learning_rate": 9.289248762932974e-06, "loss": 0.7829, "step": 30780 }, { "epoch": 2.4441842465617496, "grad_norm": 0.8103266954421997, "learning_rate": 9.276018099547511e-06, "loss": 0.8183, "step": 30790 }, { "epoch": 2.444978070610649, "grad_norm": 0.8949236273765564, "learning_rate": 9.26278743616205e-06, "loss": 0.7561, "step": 30800 }, { "epoch": 2.445771894659549, "grad_norm": 0.8719674348831177, "learning_rate": 9.249556772776588e-06, "loss": 0.7777, "step": 30810 }, { "epoch": 2.4465657187084484, "grad_norm": 0.959321916103363, "learning_rate": 9.236326109391125e-06, "loss": 0.7687, "step": 30820 }, { "epoch": 2.447359542757348, "grad_norm": 0.8504953980445862, "learning_rate": 9.223095446005662e-06, "loss": 0.7785, "step": 30830 }, { "epoch": 2.448153366806247, "grad_norm": 1.073606014251709, "learning_rate": 9.209864782620201e-06, "loss": 0.8652, "step": 30840 }, { "epoch": 2.448947190855147, "grad_norm": 1.1981532573699951, "learning_rate": 9.196634119234738e-06, "loss": 0.7746, "step": 30850 }, { "epoch": 2.4497410149040464, "grad_norm": 0.819255530834198, "learning_rate": 9.183403455849277e-06, "loss": 0.8058, "step": 30860 }, { "epoch": 2.450534838952946, "grad_norm": 1.0335006713867188, "learning_rate": 9.170172792463814e-06, "loss": 0.8036, "step": 30870 }, { "epoch": 2.4513286630018456, "grad_norm": 0.7458271980285645, "learning_rate": 9.156942129078353e-06, "loss": 0.7471, "step": 30880 }, { "epoch": 2.452122487050745, "grad_norm": 0.7569029331207275, "learning_rate": 9.14371146569289e-06, "loss": 0.768, "step": 30890 }, { "epoch": 2.452916311099645, "grad_norm": 1.0082522630691528, "learning_rate": 9.130480802307428e-06, "loss": 0.7802, "step": 30900 }, { "epoch": 2.4537101351485444, "grad_norm": 1.024379014968872, "learning_rate": 9.117250138921967e-06, "loss": 0.7639, "step": 30910 }, { "epoch": 2.454503959197444, "grad_norm": 1.1425983905792236, "learning_rate": 9.104019475536504e-06, "loss": 0.7593, "step": 30920 }, { "epoch": 2.4552977832463436, "grad_norm": 0.9849503040313721, "learning_rate": 9.090788812151043e-06, "loss": 0.8317, "step": 30930 }, { "epoch": 2.4560916072952432, "grad_norm": 1.0086652040481567, "learning_rate": 9.077558148765578e-06, "loss": 0.7548, "step": 30940 }, { "epoch": 2.4568854313441424, "grad_norm": 0.8764373660087585, "learning_rate": 9.064327485380117e-06, "loss": 0.7411, "step": 30950 }, { "epoch": 2.457679255393042, "grad_norm": 1.1179012060165405, "learning_rate": 9.051096821994655e-06, "loss": 0.734, "step": 30960 }, { "epoch": 2.4584730794419416, "grad_norm": 1.0787594318389893, "learning_rate": 9.037866158609193e-06, "loss": 0.8439, "step": 30970 }, { "epoch": 2.459266903490841, "grad_norm": 0.9047144651412964, "learning_rate": 9.02463549522373e-06, "loss": 0.7876, "step": 30980 }, { "epoch": 2.460060727539741, "grad_norm": 0.771873414516449, "learning_rate": 9.01140483183827e-06, "loss": 0.8258, "step": 30990 }, { "epoch": 2.4608545515886404, "grad_norm": 0.8397606611251831, "learning_rate": 8.998174168452807e-06, "loss": 0.8014, "step": 31000 }, { "epoch": 2.46164837563754, "grad_norm": 1.1035096645355225, "learning_rate": 8.984943505067344e-06, "loss": 0.7098, "step": 31010 }, { "epoch": 2.4624421996864396, "grad_norm": 0.8682432770729065, "learning_rate": 8.971712841681883e-06, "loss": 0.7, "step": 31020 }, { "epoch": 2.4632360237353392, "grad_norm": 0.9221312403678894, "learning_rate": 8.95848217829642e-06, "loss": 0.8001, "step": 31030 }, { "epoch": 2.4640298477842384, "grad_norm": 1.064479947090149, "learning_rate": 8.94525151491096e-06, "loss": 0.727, "step": 31040 }, { "epoch": 2.464823671833138, "grad_norm": 1.1401010751724243, "learning_rate": 8.932020851525495e-06, "loss": 0.7556, "step": 31050 }, { "epoch": 2.4656174958820376, "grad_norm": 0.9812748432159424, "learning_rate": 8.918790188140034e-06, "loss": 0.7788, "step": 31060 }, { "epoch": 2.466411319930937, "grad_norm": 0.8414878845214844, "learning_rate": 8.905559524754571e-06, "loss": 0.751, "step": 31070 }, { "epoch": 2.467205143979837, "grad_norm": 0.9719010591506958, "learning_rate": 8.89232886136911e-06, "loss": 0.7413, "step": 31080 }, { "epoch": 2.4679989680287364, "grad_norm": 0.8958083987236023, "learning_rate": 8.879098197983647e-06, "loss": 0.7575, "step": 31090 }, { "epoch": 2.468792792077636, "grad_norm": 0.94130939245224, "learning_rate": 8.865867534598184e-06, "loss": 0.7973, "step": 31100 }, { "epoch": 2.4695866161265356, "grad_norm": 0.8929863572120667, "learning_rate": 8.852636871212723e-06, "loss": 0.8096, "step": 31110 }, { "epoch": 2.4703804401754352, "grad_norm": 0.9234357476234436, "learning_rate": 8.83940620782726e-06, "loss": 0.8093, "step": 31120 }, { "epoch": 2.471174264224335, "grad_norm": 0.8647427558898926, "learning_rate": 8.8261755444418e-06, "loss": 0.7498, "step": 31130 }, { "epoch": 2.4719680882732344, "grad_norm": 0.9515289068222046, "learning_rate": 8.812944881056337e-06, "loss": 0.814, "step": 31140 }, { "epoch": 2.4727619123221336, "grad_norm": 1.1594178676605225, "learning_rate": 8.799714217670876e-06, "loss": 0.8102, "step": 31150 }, { "epoch": 2.473555736371033, "grad_norm": 0.7505339980125427, "learning_rate": 8.786483554285413e-06, "loss": 0.8073, "step": 31160 }, { "epoch": 2.474349560419933, "grad_norm": 1.0167568922042847, "learning_rate": 8.77325289089995e-06, "loss": 0.7955, "step": 31170 }, { "epoch": 2.4751433844688324, "grad_norm": 0.8681299090385437, "learning_rate": 8.760022227514487e-06, "loss": 0.8455, "step": 31180 }, { "epoch": 2.475937208517732, "grad_norm": 0.9660631418228149, "learning_rate": 8.746791564129026e-06, "loss": 0.7721, "step": 31190 }, { "epoch": 2.4767310325666316, "grad_norm": 1.0215500593185425, "learning_rate": 8.733560900743563e-06, "loss": 0.7798, "step": 31200 }, { "epoch": 2.4775248566155312, "grad_norm": 1.1119939088821411, "learning_rate": 8.7203302373581e-06, "loss": 0.7486, "step": 31210 }, { "epoch": 2.478318680664431, "grad_norm": 0.8434147238731384, "learning_rate": 8.70709957397264e-06, "loss": 0.7727, "step": 31220 }, { "epoch": 2.4791125047133304, "grad_norm": 0.9059457778930664, "learning_rate": 8.693868910587177e-06, "loss": 0.7878, "step": 31230 }, { "epoch": 2.4799063287622296, "grad_norm": 0.9416773319244385, "learning_rate": 8.680638247201716e-06, "loss": 0.7827, "step": 31240 }, { "epoch": 2.4807001528111297, "grad_norm": 1.0320826768875122, "learning_rate": 8.667407583816253e-06, "loss": 0.803, "step": 31250 }, { "epoch": 2.481493976860029, "grad_norm": 1.0947669744491577, "learning_rate": 8.654176920430792e-06, "loss": 0.7813, "step": 31260 }, { "epoch": 2.4822878009089284, "grad_norm": 0.9328486919403076, "learning_rate": 8.640946257045329e-06, "loss": 0.7644, "step": 31270 }, { "epoch": 2.483081624957828, "grad_norm": 1.0494132041931152, "learning_rate": 8.627715593659868e-06, "loss": 0.8421, "step": 31280 }, { "epoch": 2.4838754490067276, "grad_norm": 0.7501383423805237, "learning_rate": 8.614484930274404e-06, "loss": 0.7802, "step": 31290 }, { "epoch": 2.4846692730556272, "grad_norm": 0.9962626099586487, "learning_rate": 8.601254266888942e-06, "loss": 0.7712, "step": 31300 }, { "epoch": 2.485463097104527, "grad_norm": 0.8576439023017883, "learning_rate": 8.58802360350348e-06, "loss": 0.8233, "step": 31310 }, { "epoch": 2.4862569211534264, "grad_norm": 1.1776164770126343, "learning_rate": 8.574792940118017e-06, "loss": 0.7753, "step": 31320 }, { "epoch": 2.487050745202326, "grad_norm": 0.8087422847747803, "learning_rate": 8.561562276732556e-06, "loss": 0.7315, "step": 31330 }, { "epoch": 2.4878445692512257, "grad_norm": 0.8869718909263611, "learning_rate": 8.548331613347093e-06, "loss": 0.7489, "step": 31340 }, { "epoch": 2.488638393300125, "grad_norm": 0.9648873805999756, "learning_rate": 8.535100949961632e-06, "loss": 0.7823, "step": 31350 }, { "epoch": 2.4894322173490244, "grad_norm": 0.9656558632850647, "learning_rate": 8.52187028657617e-06, "loss": 0.7554, "step": 31360 }, { "epoch": 2.490226041397924, "grad_norm": 1.0111730098724365, "learning_rate": 8.508639623190708e-06, "loss": 0.7656, "step": 31370 }, { "epoch": 2.4910198654468236, "grad_norm": 0.9099489450454712, "learning_rate": 8.495408959805245e-06, "loss": 0.8378, "step": 31380 }, { "epoch": 2.4918136894957232, "grad_norm": 0.9399808645248413, "learning_rate": 8.482178296419784e-06, "loss": 0.8008, "step": 31390 }, { "epoch": 2.492607513544623, "grad_norm": 0.6809582710266113, "learning_rate": 8.46894763303432e-06, "loss": 0.8124, "step": 31400 }, { "epoch": 2.4934013375935224, "grad_norm": 0.8552061915397644, "learning_rate": 8.455716969648859e-06, "loss": 0.7942, "step": 31410 }, { "epoch": 2.494195161642422, "grad_norm": 0.8102262616157532, "learning_rate": 8.442486306263396e-06, "loss": 0.7476, "step": 31420 }, { "epoch": 2.4949889856913217, "grad_norm": 0.8236813545227051, "learning_rate": 8.429255642877933e-06, "loss": 0.8093, "step": 31430 }, { "epoch": 2.4957828097402213, "grad_norm": 0.8333600163459778, "learning_rate": 8.416024979492472e-06, "loss": 0.7869, "step": 31440 }, { "epoch": 2.496576633789121, "grad_norm": 1.0917775630950928, "learning_rate": 8.40279431610701e-06, "loss": 0.7584, "step": 31450 }, { "epoch": 2.49737045783802, "grad_norm": 1.0433661937713623, "learning_rate": 8.389563652721548e-06, "loss": 0.8002, "step": 31460 }, { "epoch": 2.4981642818869196, "grad_norm": 0.8462589979171753, "learning_rate": 8.376332989336086e-06, "loss": 0.7642, "step": 31470 }, { "epoch": 2.4989581059358192, "grad_norm": 0.8918420076370239, "learning_rate": 8.363102325950625e-06, "loss": 0.7486, "step": 31480 }, { "epoch": 2.499751929984719, "grad_norm": 0.9466501474380493, "learning_rate": 8.349871662565162e-06, "loss": 0.8042, "step": 31490 }, { "epoch": 2.5005457540336185, "grad_norm": 0.9120933413505554, "learning_rate": 8.336640999179699e-06, "loss": 0.761, "step": 31500 }, { "epoch": 2.501339578082518, "grad_norm": 0.8320806622505188, "learning_rate": 8.323410335794238e-06, "loss": 0.7678, "step": 31510 }, { "epoch": 2.5021334021314177, "grad_norm": 0.9464184641838074, "learning_rate": 8.310179672408775e-06, "loss": 0.728, "step": 31520 }, { "epoch": 2.5029272261803173, "grad_norm": 0.831498384475708, "learning_rate": 8.296949009023312e-06, "loss": 0.7438, "step": 31530 }, { "epoch": 2.503721050229217, "grad_norm": 0.7958276271820068, "learning_rate": 8.28371834563785e-06, "loss": 0.7579, "step": 31540 }, { "epoch": 2.504514874278116, "grad_norm": 0.859502375125885, "learning_rate": 8.270487682252389e-06, "loss": 0.8299, "step": 31550 }, { "epoch": 2.505308698327016, "grad_norm": 0.9718039035797119, "learning_rate": 8.257257018866926e-06, "loss": 0.7772, "step": 31560 }, { "epoch": 2.5061025223759152, "grad_norm": 0.8938490152359009, "learning_rate": 8.244026355481465e-06, "loss": 0.823, "step": 31570 }, { "epoch": 2.506896346424815, "grad_norm": 0.9374427199363708, "learning_rate": 8.230795692096002e-06, "loss": 0.8246, "step": 31580 }, { "epoch": 2.5076901704737145, "grad_norm": 0.8214637637138367, "learning_rate": 8.217565028710541e-06, "loss": 0.7747, "step": 31590 }, { "epoch": 2.508483994522614, "grad_norm": 1.0425516366958618, "learning_rate": 8.204334365325078e-06, "loss": 0.7211, "step": 31600 }, { "epoch": 2.5092778185715137, "grad_norm": 0.8850832581520081, "learning_rate": 8.191103701939615e-06, "loss": 0.7707, "step": 31610 }, { "epoch": 2.5100716426204133, "grad_norm": 0.7210223078727722, "learning_rate": 8.177873038554154e-06, "loss": 0.7833, "step": 31620 }, { "epoch": 2.510865466669313, "grad_norm": 0.9423604011535645, "learning_rate": 8.164642375168692e-06, "loss": 0.7697, "step": 31630 }, { "epoch": 2.511659290718212, "grad_norm": 0.937473475933075, "learning_rate": 8.151411711783229e-06, "loss": 0.7924, "step": 31640 }, { "epoch": 2.512453114767112, "grad_norm": 1.0225754976272583, "learning_rate": 8.138181048397766e-06, "loss": 0.8051, "step": 31650 }, { "epoch": 2.5132469388160112, "grad_norm": 0.9610739350318909, "learning_rate": 8.124950385012305e-06, "loss": 0.7844, "step": 31660 }, { "epoch": 2.514040762864911, "grad_norm": 0.6740348935127258, "learning_rate": 8.111719721626842e-06, "loss": 0.7502, "step": 31670 }, { "epoch": 2.5148345869138105, "grad_norm": 0.9693344235420227, "learning_rate": 8.098489058241381e-06, "loss": 0.7821, "step": 31680 }, { "epoch": 2.51562841096271, "grad_norm": 1.042784333229065, "learning_rate": 8.085258394855918e-06, "loss": 0.8042, "step": 31690 }, { "epoch": 2.5164222350116097, "grad_norm": 0.726180911064148, "learning_rate": 8.072027731470457e-06, "loss": 0.7687, "step": 31700 }, { "epoch": 2.5172160590605093, "grad_norm": 0.8935758471488953, "learning_rate": 8.058797068084994e-06, "loss": 0.7846, "step": 31710 }, { "epoch": 2.518009883109409, "grad_norm": 0.884282648563385, "learning_rate": 8.045566404699532e-06, "loss": 0.7613, "step": 31720 }, { "epoch": 2.5188037071583085, "grad_norm": 0.8129022717475891, "learning_rate": 8.03233574131407e-06, "loss": 0.8585, "step": 31730 }, { "epoch": 2.519597531207208, "grad_norm": 0.9726604223251343, "learning_rate": 8.019105077928608e-06, "loss": 0.7648, "step": 31740 }, { "epoch": 2.5203913552561072, "grad_norm": 0.7345698475837708, "learning_rate": 8.005874414543145e-06, "loss": 0.8031, "step": 31750 }, { "epoch": 2.5211851793050073, "grad_norm": 0.9011138081550598, "learning_rate": 7.992643751157682e-06, "loss": 0.7917, "step": 31760 }, { "epoch": 2.5219790033539065, "grad_norm": 0.9308391213417053, "learning_rate": 7.979413087772221e-06, "loss": 0.8107, "step": 31770 }, { "epoch": 2.522772827402806, "grad_norm": 0.9620044827461243, "learning_rate": 7.966182424386758e-06, "loss": 0.7515, "step": 31780 }, { "epoch": 2.5235666514517057, "grad_norm": 0.7784889936447144, "learning_rate": 7.952951761001297e-06, "loss": 0.7873, "step": 31790 }, { "epoch": 2.5243604755006053, "grad_norm": 0.9141636490821838, "learning_rate": 7.939721097615835e-06, "loss": 0.8912, "step": 31800 }, { "epoch": 2.525154299549505, "grad_norm": 0.7752735614776611, "learning_rate": 7.926490434230374e-06, "loss": 0.7585, "step": 31810 }, { "epoch": 2.5259481235984045, "grad_norm": 0.7791392803192139, "learning_rate": 7.91325977084491e-06, "loss": 0.8186, "step": 31820 }, { "epoch": 2.526741947647304, "grad_norm": 1.1256967782974243, "learning_rate": 7.900029107459448e-06, "loss": 0.7574, "step": 31830 }, { "epoch": 2.5275357716962037, "grad_norm": 1.1258056163787842, "learning_rate": 7.886798444073987e-06, "loss": 0.7826, "step": 31840 }, { "epoch": 2.5283295957451033, "grad_norm": 1.118212103843689, "learning_rate": 7.873567780688524e-06, "loss": 0.8267, "step": 31850 }, { "epoch": 2.5291234197940025, "grad_norm": 1.0277642011642456, "learning_rate": 7.860337117303061e-06, "loss": 0.7603, "step": 31860 }, { "epoch": 2.529917243842902, "grad_norm": 0.9534600377082825, "learning_rate": 7.847106453917599e-06, "loss": 0.7872, "step": 31870 }, { "epoch": 2.5307110678918017, "grad_norm": 0.9541226625442505, "learning_rate": 7.833875790532138e-06, "loss": 0.7756, "step": 31880 }, { "epoch": 2.5315048919407013, "grad_norm": 0.8130003213882446, "learning_rate": 7.820645127146675e-06, "loss": 0.8042, "step": 31890 }, { "epoch": 2.532298715989601, "grad_norm": 1.0782946348190308, "learning_rate": 7.807414463761214e-06, "loss": 0.7568, "step": 31900 }, { "epoch": 2.5330925400385005, "grad_norm": 1.0216178894042969, "learning_rate": 7.794183800375751e-06, "loss": 0.7805, "step": 31910 }, { "epoch": 2.5338863640874, "grad_norm": 1.0545295476913452, "learning_rate": 7.78095313699029e-06, "loss": 0.7286, "step": 31920 }, { "epoch": 2.5346801881362997, "grad_norm": 0.8440236449241638, "learning_rate": 7.767722473604827e-06, "loss": 0.731, "step": 31930 }, { "epoch": 2.5354740121851993, "grad_norm": 1.0648548603057861, "learning_rate": 7.754491810219364e-06, "loss": 0.8121, "step": 31940 }, { "epoch": 2.5362678362340985, "grad_norm": 1.0364627838134766, "learning_rate": 7.741261146833903e-06, "loss": 0.7799, "step": 31950 }, { "epoch": 2.5370616602829985, "grad_norm": 1.0399836301803589, "learning_rate": 7.72803048344844e-06, "loss": 0.8083, "step": 31960 }, { "epoch": 2.5378554843318977, "grad_norm": 0.8876070380210876, "learning_rate": 7.71479982006298e-06, "loss": 0.8043, "step": 31970 }, { "epoch": 2.5386493083807973, "grad_norm": 0.9416910409927368, "learning_rate": 7.701569156677515e-06, "loss": 0.7913, "step": 31980 }, { "epoch": 2.539443132429697, "grad_norm": 0.7986873984336853, "learning_rate": 7.688338493292054e-06, "loss": 0.7693, "step": 31990 }, { "epoch": 2.5402369564785965, "grad_norm": 1.0350062847137451, "learning_rate": 7.675107829906591e-06, "loss": 0.7858, "step": 32000 }, { "epoch": 2.541030780527496, "grad_norm": 0.820899486541748, "learning_rate": 7.66187716652113e-06, "loss": 0.781, "step": 32010 }, { "epoch": 2.5418246045763957, "grad_norm": 0.9244166612625122, "learning_rate": 7.648646503135667e-06, "loss": 0.7373, "step": 32020 }, { "epoch": 2.5426184286252953, "grad_norm": 0.9774866104125977, "learning_rate": 7.635415839750206e-06, "loss": 0.7804, "step": 32030 }, { "epoch": 2.543412252674195, "grad_norm": 0.9632622599601746, "learning_rate": 7.6221851763647435e-06, "loss": 0.8138, "step": 32040 }, { "epoch": 2.5442060767230945, "grad_norm": 0.9499511122703552, "learning_rate": 7.6089545129792815e-06, "loss": 0.8123, "step": 32050 }, { "epoch": 2.5449999007719937, "grad_norm": 0.9002213478088379, "learning_rate": 7.59572384959382e-06, "loss": 0.7807, "step": 32060 }, { "epoch": 2.5457937248208937, "grad_norm": 0.965908944606781, "learning_rate": 7.582493186208358e-06, "loss": 0.8199, "step": 32070 }, { "epoch": 2.546587548869793, "grad_norm": 0.9211716055870056, "learning_rate": 7.569262522822895e-06, "loss": 0.6892, "step": 32080 }, { "epoch": 2.5473813729186925, "grad_norm": 1.01425039768219, "learning_rate": 7.556031859437433e-06, "loss": 0.7395, "step": 32090 }, { "epoch": 2.548175196967592, "grad_norm": 0.9339504241943359, "learning_rate": 7.54280119605197e-06, "loss": 0.7235, "step": 32100 }, { "epoch": 2.5489690210164917, "grad_norm": 1.2052608728408813, "learning_rate": 7.5295705326665075e-06, "loss": 0.7632, "step": 32110 }, { "epoch": 2.5497628450653913, "grad_norm": 0.9481042623519897, "learning_rate": 7.5163398692810456e-06, "loss": 0.7757, "step": 32120 }, { "epoch": 2.550556669114291, "grad_norm": 1.060491681098938, "learning_rate": 7.503109205895584e-06, "loss": 0.8118, "step": 32130 }, { "epoch": 2.5513504931631905, "grad_norm": 0.9081485867500305, "learning_rate": 7.489878542510122e-06, "loss": 0.7584, "step": 32140 }, { "epoch": 2.5521443172120897, "grad_norm": 0.8219790458679199, "learning_rate": 7.47664787912466e-06, "loss": 0.8306, "step": 32150 }, { "epoch": 2.5529381412609897, "grad_norm": 0.9022849202156067, "learning_rate": 7.463417215739198e-06, "loss": 0.7768, "step": 32160 }, { "epoch": 2.553731965309889, "grad_norm": 1.0158053636550903, "learning_rate": 7.450186552353736e-06, "loss": 0.7782, "step": 32170 }, { "epoch": 2.5545257893587885, "grad_norm": 0.8737016916275024, "learning_rate": 7.436955888968273e-06, "loss": 0.744, "step": 32180 }, { "epoch": 2.555319613407688, "grad_norm": 0.9380881190299988, "learning_rate": 7.423725225582811e-06, "loss": 0.7312, "step": 32190 }, { "epoch": 2.5561134374565877, "grad_norm": 0.9312672019004822, "learning_rate": 7.410494562197349e-06, "loss": 0.7955, "step": 32200 }, { "epoch": 2.5569072615054873, "grad_norm": 1.055057168006897, "learning_rate": 7.397263898811886e-06, "loss": 0.7535, "step": 32210 }, { "epoch": 2.557701085554387, "grad_norm": 0.9864413142204285, "learning_rate": 7.384033235426424e-06, "loss": 0.8113, "step": 32220 }, { "epoch": 2.5584949096032865, "grad_norm": 0.8412654995918274, "learning_rate": 7.370802572040962e-06, "loss": 0.77, "step": 32230 }, { "epoch": 2.559288733652186, "grad_norm": 0.8634358048439026, "learning_rate": 7.3575719086555e-06, "loss": 0.7868, "step": 32240 }, { "epoch": 2.5600825577010857, "grad_norm": 1.081010103225708, "learning_rate": 7.344341245270038e-06, "loss": 0.8323, "step": 32250 }, { "epoch": 2.560876381749985, "grad_norm": 0.9374126195907593, "learning_rate": 7.331110581884576e-06, "loss": 0.78, "step": 32260 }, { "epoch": 2.561670205798885, "grad_norm": 1.0620479583740234, "learning_rate": 7.317879918499114e-06, "loss": 0.7825, "step": 32270 }, { "epoch": 2.562464029847784, "grad_norm": 0.8730692863464355, "learning_rate": 7.304649255113652e-06, "loss": 0.7922, "step": 32280 }, { "epoch": 2.5632578538966837, "grad_norm": 1.0309388637542725, "learning_rate": 7.2914185917281895e-06, "loss": 0.79, "step": 32290 }, { "epoch": 2.5640516779455833, "grad_norm": 0.8236114382743835, "learning_rate": 7.278187928342728e-06, "loss": 0.7463, "step": 32300 }, { "epoch": 2.564845501994483, "grad_norm": 0.9980899095535278, "learning_rate": 7.264957264957266e-06, "loss": 0.7699, "step": 32310 }, { "epoch": 2.5656393260433825, "grad_norm": 0.8803249597549438, "learning_rate": 7.251726601571804e-06, "loss": 0.8637, "step": 32320 }, { "epoch": 2.566433150092282, "grad_norm": 0.9461043477058411, "learning_rate": 7.23849593818634e-06, "loss": 0.8058, "step": 32330 }, { "epoch": 2.5672269741411817, "grad_norm": 1.0341662168502808, "learning_rate": 7.225265274800878e-06, "loss": 0.8059, "step": 32340 }, { "epoch": 2.5680207981900813, "grad_norm": 0.7638232111930847, "learning_rate": 7.212034611415416e-06, "loss": 0.8157, "step": 32350 }, { "epoch": 2.568814622238981, "grad_norm": 0.8748646974563599, "learning_rate": 7.198803948029954e-06, "loss": 0.7982, "step": 32360 }, { "epoch": 2.56960844628788, "grad_norm": 0.9040183424949646, "learning_rate": 7.1855732846444925e-06, "loss": 0.8113, "step": 32370 }, { "epoch": 2.5704022703367797, "grad_norm": 0.7992666959762573, "learning_rate": 7.1723426212590306e-06, "loss": 0.791, "step": 32380 }, { "epoch": 2.5711960943856793, "grad_norm": 0.9834904074668884, "learning_rate": 7.159111957873569e-06, "loss": 0.8094, "step": 32390 }, { "epoch": 2.571989918434579, "grad_norm": 1.0673480033874512, "learning_rate": 7.145881294488106e-06, "loss": 0.7499, "step": 32400 }, { "epoch": 2.5727837424834785, "grad_norm": 1.0334373712539673, "learning_rate": 7.132650631102644e-06, "loss": 0.7715, "step": 32410 }, { "epoch": 2.573577566532378, "grad_norm": 0.9567040205001831, "learning_rate": 7.119419967717182e-06, "loss": 0.7818, "step": 32420 }, { "epoch": 2.5743713905812777, "grad_norm": 0.890590488910675, "learning_rate": 7.10618930433172e-06, "loss": 0.7509, "step": 32430 }, { "epoch": 2.5751652146301773, "grad_norm": 0.8644120693206787, "learning_rate": 7.0929586409462565e-06, "loss": 0.7556, "step": 32440 }, { "epoch": 2.575959038679077, "grad_norm": 0.8161708116531372, "learning_rate": 7.079727977560795e-06, "loss": 0.8253, "step": 32450 }, { "epoch": 2.576752862727976, "grad_norm": 0.8945040106773376, "learning_rate": 7.066497314175333e-06, "loss": 0.754, "step": 32460 }, { "epoch": 2.577546686776876, "grad_norm": 0.9900794625282288, "learning_rate": 7.053266650789871e-06, "loss": 0.7775, "step": 32470 }, { "epoch": 2.5783405108257753, "grad_norm": 0.7387442588806152, "learning_rate": 7.040035987404409e-06, "loss": 0.8277, "step": 32480 }, { "epoch": 2.579134334874675, "grad_norm": 0.8887678980827332, "learning_rate": 7.026805324018947e-06, "loss": 0.8033, "step": 32490 }, { "epoch": 2.5799281589235745, "grad_norm": 1.0268609523773193, "learning_rate": 7.013574660633485e-06, "loss": 0.7224, "step": 32500 }, { "epoch": 2.580721982972474, "grad_norm": 0.7121632099151611, "learning_rate": 7.000343997248022e-06, "loss": 0.8311, "step": 32510 }, { "epoch": 2.5815158070213737, "grad_norm": 0.9221680164337158, "learning_rate": 6.98711333386256e-06, "loss": 0.8105, "step": 32520 }, { "epoch": 2.5823096310702733, "grad_norm": 0.8981099128723145, "learning_rate": 6.973882670477098e-06, "loss": 0.7886, "step": 32530 }, { "epoch": 2.583103455119173, "grad_norm": 0.8529419302940369, "learning_rate": 6.9606520070916365e-06, "loss": 0.7879, "step": 32540 }, { "epoch": 2.5838972791680725, "grad_norm": 0.9864525198936462, "learning_rate": 6.9474213437061745e-06, "loss": 0.7829, "step": 32550 }, { "epoch": 2.584691103216972, "grad_norm": 1.1552358865737915, "learning_rate": 6.934190680320711e-06, "loss": 0.737, "step": 32560 }, { "epoch": 2.5854849272658713, "grad_norm": 0.9277712106704712, "learning_rate": 6.920960016935249e-06, "loss": 0.773, "step": 32570 }, { "epoch": 2.586278751314771, "grad_norm": 1.1222898960113525, "learning_rate": 6.907729353549787e-06, "loss": 0.7899, "step": 32580 }, { "epoch": 2.5870725753636705, "grad_norm": 0.9069304466247559, "learning_rate": 6.894498690164325e-06, "loss": 0.7647, "step": 32590 }, { "epoch": 2.58786639941257, "grad_norm": 1.0726667642593384, "learning_rate": 6.881268026778863e-06, "loss": 0.752, "step": 32600 }, { "epoch": 2.5886602234614697, "grad_norm": 1.020435094833374, "learning_rate": 6.8680373633934005e-06, "loss": 0.7844, "step": 32610 }, { "epoch": 2.5894540475103693, "grad_norm": 0.928993821144104, "learning_rate": 6.8548067000079386e-06, "loss": 0.8021, "step": 32620 }, { "epoch": 2.590247871559269, "grad_norm": 0.8852306604385376, "learning_rate": 6.841576036622477e-06, "loss": 0.7672, "step": 32630 }, { "epoch": 2.5910416956081685, "grad_norm": 0.8123989701271057, "learning_rate": 6.828345373237015e-06, "loss": 0.8049, "step": 32640 }, { "epoch": 2.591835519657068, "grad_norm": 0.8990057110786438, "learning_rate": 6.815114709851553e-06, "loss": 0.72, "step": 32650 }, { "epoch": 2.5926293437059673, "grad_norm": 1.0066652297973633, "learning_rate": 6.801884046466091e-06, "loss": 0.7786, "step": 32660 }, { "epoch": 2.5934231677548674, "grad_norm": 0.984686017036438, "learning_rate": 6.788653383080627e-06, "loss": 0.7876, "step": 32670 }, { "epoch": 2.5942169918037665, "grad_norm": 0.9421548247337341, "learning_rate": 6.775422719695165e-06, "loss": 0.7983, "step": 32680 }, { "epoch": 2.595010815852666, "grad_norm": 0.9953796863555908, "learning_rate": 6.762192056309703e-06, "loss": 0.7933, "step": 32690 }, { "epoch": 2.5958046399015657, "grad_norm": 1.0435398817062378, "learning_rate": 6.7489613929242415e-06, "loss": 0.8087, "step": 32700 }, { "epoch": 2.5965984639504653, "grad_norm": 0.8738863468170166, "learning_rate": 6.73573072953878e-06, "loss": 0.7782, "step": 32710 }, { "epoch": 2.597392287999365, "grad_norm": 1.0245190858840942, "learning_rate": 6.722500066153317e-06, "loss": 0.8013, "step": 32720 }, { "epoch": 2.5981861120482646, "grad_norm": 0.965116560459137, "learning_rate": 6.709269402767855e-06, "loss": 0.8208, "step": 32730 }, { "epoch": 2.598979936097164, "grad_norm": 1.0972223281860352, "learning_rate": 6.696038739382393e-06, "loss": 0.7588, "step": 32740 }, { "epoch": 2.5997737601460638, "grad_norm": 1.1245334148406982, "learning_rate": 6.682808075996931e-06, "loss": 0.8014, "step": 32750 }, { "epoch": 2.6005675841949634, "grad_norm": 0.9737082719802856, "learning_rate": 6.669577412611469e-06, "loss": 0.7959, "step": 32760 }, { "epoch": 2.6013614082438625, "grad_norm": 0.9256748557090759, "learning_rate": 6.656346749226007e-06, "loss": 0.7476, "step": 32770 }, { "epoch": 2.6021552322927626, "grad_norm": 0.9990182518959045, "learning_rate": 6.643116085840545e-06, "loss": 0.8093, "step": 32780 }, { "epoch": 2.6029490563416617, "grad_norm": 1.174641489982605, "learning_rate": 6.629885422455082e-06, "loss": 0.7579, "step": 32790 }, { "epoch": 2.6037428803905613, "grad_norm": 0.8486537933349609, "learning_rate": 6.61665475906962e-06, "loss": 0.755, "step": 32800 }, { "epoch": 2.604536704439461, "grad_norm": 1.0289491415023804, "learning_rate": 6.603424095684158e-06, "loss": 0.7426, "step": 32810 }, { "epoch": 2.6053305284883606, "grad_norm": 0.9940427541732788, "learning_rate": 6.590193432298696e-06, "loss": 0.802, "step": 32820 }, { "epoch": 2.60612435253726, "grad_norm": 0.9397895336151123, "learning_rate": 6.576962768913233e-06, "loss": 0.7455, "step": 32830 }, { "epoch": 2.6069181765861598, "grad_norm": 0.9215680360794067, "learning_rate": 6.563732105527771e-06, "loss": 0.8156, "step": 32840 }, { "epoch": 2.6077120006350594, "grad_norm": 0.9983633160591125, "learning_rate": 6.550501442142309e-06, "loss": 0.749, "step": 32850 }, { "epoch": 2.6085058246839585, "grad_norm": 0.9694678783416748, "learning_rate": 6.537270778756847e-06, "loss": 0.7625, "step": 32860 }, { "epoch": 2.6092996487328586, "grad_norm": 0.9712207913398743, "learning_rate": 6.5240401153713855e-06, "loss": 0.7712, "step": 32870 }, { "epoch": 2.6100934727817577, "grad_norm": 0.8602631688117981, "learning_rate": 6.5108094519859236e-06, "loss": 0.7941, "step": 32880 }, { "epoch": 2.6108872968306573, "grad_norm": 1.1966452598571777, "learning_rate": 6.497578788600462e-06, "loss": 0.7736, "step": 32890 }, { "epoch": 2.611681120879557, "grad_norm": 1.0157437324523926, "learning_rate": 6.484348125214998e-06, "loss": 0.7439, "step": 32900 }, { "epoch": 2.6124749449284566, "grad_norm": 0.9984763264656067, "learning_rate": 6.471117461829536e-06, "loss": 0.7874, "step": 32910 }, { "epoch": 2.613268768977356, "grad_norm": 0.9150610566139221, "learning_rate": 6.457886798444074e-06, "loss": 0.6659, "step": 32920 }, { "epoch": 2.6140625930262558, "grad_norm": 1.074073076248169, "learning_rate": 6.444656135058611e-06, "loss": 0.8417, "step": 32930 }, { "epoch": 2.6148564170751554, "grad_norm": 0.8862860798835754, "learning_rate": 6.4314254716731495e-06, "loss": 0.7755, "step": 32940 }, { "epoch": 2.615650241124055, "grad_norm": 0.9611344933509827, "learning_rate": 6.4181948082876876e-06, "loss": 0.7053, "step": 32950 }, { "epoch": 2.6164440651729546, "grad_norm": 0.8691285252571106, "learning_rate": 6.404964144902226e-06, "loss": 0.7922, "step": 32960 }, { "epoch": 2.6172378892218537, "grad_norm": 0.7891274690628052, "learning_rate": 6.391733481516764e-06, "loss": 0.7849, "step": 32970 }, { "epoch": 2.618031713270754, "grad_norm": 0.9660747647285461, "learning_rate": 6.378502818131302e-06, "loss": 0.8229, "step": 32980 }, { "epoch": 2.618825537319653, "grad_norm": 0.9171955585479736, "learning_rate": 6.36527215474584e-06, "loss": 0.8054, "step": 32990 }, { "epoch": 2.6196193613685526, "grad_norm": 1.118066668510437, "learning_rate": 6.352041491360378e-06, "loss": 0.7319, "step": 33000 }, { "epoch": 2.620413185417452, "grad_norm": 1.0057705640792847, "learning_rate": 6.338810827974915e-06, "loss": 0.7856, "step": 33010 }, { "epoch": 2.6212070094663518, "grad_norm": 0.8103092908859253, "learning_rate": 6.3255801645894524e-06, "loss": 0.804, "step": 33020 }, { "epoch": 2.6220008335152514, "grad_norm": 0.926285982131958, "learning_rate": 6.3123495012039905e-06, "loss": 0.8147, "step": 33030 }, { "epoch": 2.622794657564151, "grad_norm": 0.8489839434623718, "learning_rate": 6.299118837818528e-06, "loss": 0.7151, "step": 33040 }, { "epoch": 2.6235884816130506, "grad_norm": 0.8944807648658752, "learning_rate": 6.285888174433066e-06, "loss": 0.7859, "step": 33050 }, { "epoch": 2.62438230566195, "grad_norm": 0.8567325472831726, "learning_rate": 6.272657511047604e-06, "loss": 0.8029, "step": 33060 }, { "epoch": 2.62517612971085, "grad_norm": 0.9069671034812927, "learning_rate": 6.259426847662142e-06, "loss": 0.7756, "step": 33070 }, { "epoch": 2.625969953759749, "grad_norm": 0.924453854560852, "learning_rate": 6.24619618427668e-06, "loss": 0.7929, "step": 33080 }, { "epoch": 2.6267637778086486, "grad_norm": 0.9638664126396179, "learning_rate": 6.232965520891218e-06, "loss": 0.8279, "step": 33090 }, { "epoch": 2.627557601857548, "grad_norm": 0.9644804000854492, "learning_rate": 6.219734857505756e-06, "loss": 0.7289, "step": 33100 }, { "epoch": 2.6283514259064478, "grad_norm": 0.8256565928459167, "learning_rate": 6.2065041941202935e-06, "loss": 0.7667, "step": 33110 }, { "epoch": 2.6291452499553474, "grad_norm": 0.9192927479743958, "learning_rate": 6.1932735307348315e-06, "loss": 0.7884, "step": 33120 }, { "epoch": 2.629939074004247, "grad_norm": 0.8394224047660828, "learning_rate": 6.180042867349369e-06, "loss": 0.7822, "step": 33130 }, { "epoch": 2.6307328980531466, "grad_norm": 0.9371026754379272, "learning_rate": 6.166812203963907e-06, "loss": 0.7951, "step": 33140 }, { "epoch": 2.631526722102046, "grad_norm": 0.9811797738075256, "learning_rate": 6.153581540578445e-06, "loss": 0.7974, "step": 33150 }, { "epoch": 2.632320546150946, "grad_norm": 1.0210554599761963, "learning_rate": 6.140350877192982e-06, "loss": 0.7797, "step": 33160 }, { "epoch": 2.633114370199845, "grad_norm": 1.1501883268356323, "learning_rate": 6.12712021380752e-06, "loss": 0.8254, "step": 33170 }, { "epoch": 2.633908194248745, "grad_norm": 1.004550576210022, "learning_rate": 6.113889550422058e-06, "loss": 0.7635, "step": 33180 }, { "epoch": 2.634702018297644, "grad_norm": 0.8310564756393433, "learning_rate": 6.100658887036596e-06, "loss": 0.835, "step": 33190 }, { "epoch": 2.6354958423465438, "grad_norm": 0.8766066431999207, "learning_rate": 6.0874282236511345e-06, "loss": 0.8109, "step": 33200 }, { "epoch": 2.6362896663954434, "grad_norm": 1.1657209396362305, "learning_rate": 6.0741975602656726e-06, "loss": 0.7254, "step": 33210 }, { "epoch": 2.637083490444343, "grad_norm": 0.9214816689491272, "learning_rate": 6.062289963218756e-06, "loss": 0.7934, "step": 33220 }, { "epoch": 2.6378773144932426, "grad_norm": 0.9850200414657593, "learning_rate": 6.049059299833293e-06, "loss": 0.814, "step": 33230 }, { "epoch": 2.638671138542142, "grad_norm": 0.7920827865600586, "learning_rate": 6.035828636447831e-06, "loss": 0.7924, "step": 33240 }, { "epoch": 2.639464962591042, "grad_norm": 0.9591189026832581, "learning_rate": 6.0225979730623695e-06, "loss": 0.793, "step": 33250 }, { "epoch": 2.6402587866399414, "grad_norm": 0.8499795198440552, "learning_rate": 6.0093673096769076e-06, "loss": 0.8147, "step": 33260 }, { "epoch": 2.641052610688841, "grad_norm": 0.9025987982749939, "learning_rate": 5.996136646291446e-06, "loss": 0.7892, "step": 33270 }, { "epoch": 2.64184643473774, "grad_norm": 0.8324596881866455, "learning_rate": 5.982905982905984e-06, "loss": 0.7845, "step": 33280 }, { "epoch": 2.64264025878664, "grad_norm": 0.948970377445221, "learning_rate": 5.969675319520521e-06, "loss": 0.7481, "step": 33290 }, { "epoch": 2.6434340828355394, "grad_norm": 0.7791500687599182, "learning_rate": 5.956444656135059e-06, "loss": 0.7557, "step": 33300 }, { "epoch": 2.644227906884439, "grad_norm": 0.7366840243339539, "learning_rate": 5.943213992749596e-06, "loss": 0.8133, "step": 33310 }, { "epoch": 2.6450217309333386, "grad_norm": 0.9585718512535095, "learning_rate": 5.929983329364134e-06, "loss": 0.7364, "step": 33320 }, { "epoch": 2.645815554982238, "grad_norm": 0.8997151851654053, "learning_rate": 5.916752665978672e-06, "loss": 0.8047, "step": 33330 }, { "epoch": 2.646609379031138, "grad_norm": 1.0085241794586182, "learning_rate": 5.9035220025932105e-06, "loss": 0.7328, "step": 33340 }, { "epoch": 2.6474032030800374, "grad_norm": 1.000828742980957, "learning_rate": 5.890291339207748e-06, "loss": 0.756, "step": 33350 }, { "epoch": 2.648197027128937, "grad_norm": 0.913764476776123, "learning_rate": 5.877060675822286e-06, "loss": 0.8004, "step": 33360 }, { "epoch": 2.648990851177836, "grad_norm": 0.861119270324707, "learning_rate": 5.863830012436824e-06, "loss": 0.8222, "step": 33370 }, { "epoch": 2.649784675226736, "grad_norm": 0.7925031185150146, "learning_rate": 5.850599349051362e-06, "loss": 0.7273, "step": 33380 }, { "epoch": 2.6505784992756354, "grad_norm": 1.0246551036834717, "learning_rate": 5.8373686856659e-06, "loss": 0.8253, "step": 33390 }, { "epoch": 2.651372323324535, "grad_norm": 0.970195472240448, "learning_rate": 5.824138022280438e-06, "loss": 0.7488, "step": 33400 }, { "epoch": 2.6521661473734346, "grad_norm": 0.955636203289032, "learning_rate": 5.810907358894975e-06, "loss": 0.7855, "step": 33410 }, { "epoch": 2.652959971422334, "grad_norm": 0.837679386138916, "learning_rate": 5.797676695509513e-06, "loss": 0.7337, "step": 33420 }, { "epoch": 2.653753795471234, "grad_norm": 0.9340305328369141, "learning_rate": 5.784446032124051e-06, "loss": 0.7628, "step": 33430 }, { "epoch": 2.6545476195201334, "grad_norm": 0.8416723608970642, "learning_rate": 5.771215368738589e-06, "loss": 0.818, "step": 33440 }, { "epoch": 2.655341443569033, "grad_norm": 0.9004825353622437, "learning_rate": 5.757984705353127e-06, "loss": 0.7861, "step": 33450 }, { "epoch": 2.6561352676179326, "grad_norm": 0.7813234925270081, "learning_rate": 5.744754041967665e-06, "loss": 0.804, "step": 33460 }, { "epoch": 2.656929091666832, "grad_norm": 1.1214011907577515, "learning_rate": 5.731523378582202e-06, "loss": 0.7651, "step": 33470 }, { "epoch": 2.6577229157157314, "grad_norm": 0.9582983255386353, "learning_rate": 5.71829271519674e-06, "loss": 0.7695, "step": 33480 }, { "epoch": 2.6585167397646314, "grad_norm": 0.7920512557029724, "learning_rate": 5.705062051811278e-06, "loss": 0.8515, "step": 33490 }, { "epoch": 2.6593105638135306, "grad_norm": 0.904657781124115, "learning_rate": 5.691831388425816e-06, "loss": 0.8063, "step": 33500 }, { "epoch": 2.66010438786243, "grad_norm": 0.9910812377929688, "learning_rate": 5.678600725040354e-06, "loss": 0.7586, "step": 33510 }, { "epoch": 2.66089821191133, "grad_norm": 1.2386099100112915, "learning_rate": 5.665370061654892e-06, "loss": 0.764, "step": 33520 }, { "epoch": 2.6616920359602294, "grad_norm": 0.9596011638641357, "learning_rate": 5.652139398269429e-06, "loss": 0.7142, "step": 33530 }, { "epoch": 2.662485860009129, "grad_norm": 1.1034601926803589, "learning_rate": 5.638908734883967e-06, "loss": 0.8422, "step": 33540 }, { "epoch": 2.6632796840580286, "grad_norm": 0.8817473649978638, "learning_rate": 5.625678071498505e-06, "loss": 0.7953, "step": 33550 }, { "epoch": 2.6640735081069282, "grad_norm": 0.9314897656440735, "learning_rate": 5.612447408113043e-06, "loss": 0.8269, "step": 33560 }, { "epoch": 2.6648673321558274, "grad_norm": 0.8691668510437012, "learning_rate": 5.599216744727581e-06, "loss": 0.7674, "step": 33570 }, { "epoch": 2.6656611562047274, "grad_norm": 0.869050145149231, "learning_rate": 5.5859860813421185e-06, "loss": 0.7608, "step": 33580 }, { "epoch": 2.6664549802536266, "grad_norm": 0.7281513214111328, "learning_rate": 5.5727554179566566e-06, "loss": 0.7388, "step": 33590 }, { "epoch": 2.667248804302526, "grad_norm": 0.9663460850715637, "learning_rate": 5.559524754571195e-06, "loss": 0.7931, "step": 33600 }, { "epoch": 2.668042628351426, "grad_norm": 0.8808895945549011, "learning_rate": 5.546294091185733e-06, "loss": 0.7915, "step": 33610 }, { "epoch": 2.6688364524003254, "grad_norm": 1.0292035341262817, "learning_rate": 5.53306342780027e-06, "loss": 0.7188, "step": 33620 }, { "epoch": 2.669630276449225, "grad_norm": 0.8974072337150574, "learning_rate": 5.519832764414808e-06, "loss": 0.7602, "step": 33630 }, { "epoch": 2.6704241004981246, "grad_norm": 0.8630101084709167, "learning_rate": 5.506602101029345e-06, "loss": 0.806, "step": 33640 }, { "epoch": 2.6712179245470242, "grad_norm": 0.958050549030304, "learning_rate": 5.493371437643883e-06, "loss": 0.7824, "step": 33650 }, { "epoch": 2.672011748595924, "grad_norm": 0.9174726605415344, "learning_rate": 5.4801407742584214e-06, "loss": 0.848, "step": 33660 }, { "epoch": 2.6728055726448234, "grad_norm": 0.9651468992233276, "learning_rate": 5.4669101108729595e-06, "loss": 0.7625, "step": 33670 }, { "epoch": 2.6735993966937226, "grad_norm": 0.8684685230255127, "learning_rate": 5.453679447487498e-06, "loss": 0.7831, "step": 33680 }, { "epoch": 2.6743932207426226, "grad_norm": 0.9022814631462097, "learning_rate": 5.440448784102036e-06, "loss": 0.7718, "step": 33690 }, { "epoch": 2.675187044791522, "grad_norm": 0.9168913960456848, "learning_rate": 5.427218120716573e-06, "loss": 0.87, "step": 33700 }, { "epoch": 2.6759808688404214, "grad_norm": 1.065454125404358, "learning_rate": 5.413987457331111e-06, "loss": 0.7865, "step": 33710 }, { "epoch": 2.676774692889321, "grad_norm": 1.0886614322662354, "learning_rate": 5.400756793945649e-06, "loss": 0.7529, "step": 33720 }, { "epoch": 2.6775685169382206, "grad_norm": 1.0097647905349731, "learning_rate": 5.387526130560186e-06, "loss": 0.7951, "step": 33730 }, { "epoch": 2.6783623409871202, "grad_norm": 0.7832012176513672, "learning_rate": 5.374295467174724e-06, "loss": 0.7762, "step": 33740 }, { "epoch": 2.67915616503602, "grad_norm": 0.7780231833457947, "learning_rate": 5.3610648037892625e-06, "loss": 0.8242, "step": 33750 }, { "epoch": 2.6799499890849194, "grad_norm": 0.9866840243339539, "learning_rate": 5.3478341404038e-06, "loss": 0.7879, "step": 33760 }, { "epoch": 2.680743813133819, "grad_norm": 1.0353221893310547, "learning_rate": 5.334603477018338e-06, "loss": 0.7558, "step": 33770 }, { "epoch": 2.6815376371827186, "grad_norm": 1.043462872505188, "learning_rate": 5.321372813632876e-06, "loss": 0.7572, "step": 33780 }, { "epoch": 2.682331461231618, "grad_norm": 0.7620270848274231, "learning_rate": 5.308142150247414e-06, "loss": 0.8275, "step": 33790 }, { "epoch": 2.6831252852805174, "grad_norm": 0.835926353931427, "learning_rate": 5.294911486861952e-06, "loss": 0.8125, "step": 33800 }, { "epoch": 2.683919109329417, "grad_norm": 1.1094342470169067, "learning_rate": 5.281680823476489e-06, "loss": 0.8078, "step": 33810 }, { "epoch": 2.6847129333783166, "grad_norm": 0.8570294380187988, "learning_rate": 5.268450160091027e-06, "loss": 0.7878, "step": 33820 }, { "epoch": 2.6855067574272162, "grad_norm": 0.7822785377502441, "learning_rate": 5.255219496705565e-06, "loss": 0.7786, "step": 33830 }, { "epoch": 2.686300581476116, "grad_norm": 1.0392347574234009, "learning_rate": 5.241988833320103e-06, "loss": 0.7568, "step": 33840 }, { "epoch": 2.6870944055250154, "grad_norm": 0.9113541841506958, "learning_rate": 5.228758169934641e-06, "loss": 0.82, "step": 33850 }, { "epoch": 2.687888229573915, "grad_norm": 0.928196370601654, "learning_rate": 5.215527506549179e-06, "loss": 0.7763, "step": 33860 }, { "epoch": 2.6886820536228146, "grad_norm": 1.0967717170715332, "learning_rate": 5.202296843163716e-06, "loss": 0.7269, "step": 33870 }, { "epoch": 2.689475877671714, "grad_norm": 1.093342900276184, "learning_rate": 5.189066179778254e-06, "loss": 0.8291, "step": 33880 }, { "epoch": 2.690269701720614, "grad_norm": 1.088734745979309, "learning_rate": 5.175835516392792e-06, "loss": 0.7851, "step": 33890 }, { "epoch": 2.691063525769513, "grad_norm": 0.8813207149505615, "learning_rate": 5.16260485300733e-06, "loss": 0.7603, "step": 33900 }, { "epoch": 2.6918573498184126, "grad_norm": 0.9411490559577942, "learning_rate": 5.149374189621868e-06, "loss": 0.8174, "step": 33910 }, { "epoch": 2.6926511738673122, "grad_norm": 0.6878808736801147, "learning_rate": 5.1361435262364064e-06, "loss": 0.7783, "step": 33920 }, { "epoch": 2.693444997916212, "grad_norm": 1.0253195762634277, "learning_rate": 5.122912862850944e-06, "loss": 0.8235, "step": 33930 }, { "epoch": 2.6942388219651114, "grad_norm": 1.0071438550949097, "learning_rate": 5.109682199465481e-06, "loss": 0.7548, "step": 33940 }, { "epoch": 2.695032646014011, "grad_norm": 0.876988410949707, "learning_rate": 5.096451536080019e-06, "loss": 0.7692, "step": 33950 }, { "epoch": 2.6958264700629107, "grad_norm": 0.8041113615036011, "learning_rate": 5.083220872694557e-06, "loss": 0.7844, "step": 33960 }, { "epoch": 2.6966202941118103, "grad_norm": 0.8075045943260193, "learning_rate": 5.069990209309095e-06, "loss": 0.7855, "step": 33970 }, { "epoch": 2.69741411816071, "grad_norm": 0.9511701464653015, "learning_rate": 5.056759545923633e-06, "loss": 0.7444, "step": 33980 }, { "epoch": 2.698207942209609, "grad_norm": 0.9707065224647522, "learning_rate": 5.0435288825381705e-06, "loss": 0.757, "step": 33990 }, { "epoch": 2.699001766258509, "grad_norm": 1.0104913711547852, "learning_rate": 5.0302982191527085e-06, "loss": 0.7089, "step": 34000 }, { "epoch": 2.6997955903074082, "grad_norm": 0.9136759042739868, "learning_rate": 5.017067555767247e-06, "loss": 0.7715, "step": 34010 }, { "epoch": 2.700589414356308, "grad_norm": 1.179523229598999, "learning_rate": 5.003836892381785e-06, "loss": 0.7927, "step": 34020 }, { "epoch": 2.7013832384052074, "grad_norm": 0.7838528156280518, "learning_rate": 4.990606228996322e-06, "loss": 0.7484, "step": 34030 }, { "epoch": 2.702177062454107, "grad_norm": 1.034165859222412, "learning_rate": 4.97737556561086e-06, "loss": 0.8256, "step": 34040 }, { "epoch": 2.7029708865030067, "grad_norm": 1.1382567882537842, "learning_rate": 4.964144902225397e-06, "loss": 0.7398, "step": 34050 }, { "epoch": 2.7037647105519063, "grad_norm": 0.9856430292129517, "learning_rate": 4.950914238839935e-06, "loss": 0.7579, "step": 34060 }, { "epoch": 2.704558534600806, "grad_norm": 0.8700586557388306, "learning_rate": 4.937683575454473e-06, "loss": 0.7864, "step": 34070 }, { "epoch": 2.705352358649705, "grad_norm": 0.9212909936904907, "learning_rate": 4.9244529120690115e-06, "loss": 0.7534, "step": 34080 }, { "epoch": 2.706146182698605, "grad_norm": 0.9201529026031494, "learning_rate": 4.912545315022096e-06, "loss": 0.7437, "step": 34090 }, { "epoch": 2.7069400067475042, "grad_norm": 0.9818313121795654, "learning_rate": 4.899314651636634e-06, "loss": 0.7595, "step": 34100 }, { "epoch": 2.707733830796404, "grad_norm": 0.9335930347442627, "learning_rate": 4.886083988251171e-06, "loss": 0.7705, "step": 34110 }, { "epoch": 2.7085276548453034, "grad_norm": 1.0011483430862427, "learning_rate": 4.872853324865708e-06, "loss": 0.7944, "step": 34120 }, { "epoch": 2.709321478894203, "grad_norm": 0.9489617943763733, "learning_rate": 4.8596226614802465e-06, "loss": 0.7808, "step": 34130 }, { "epoch": 2.7101153029431027, "grad_norm": 1.0148978233337402, "learning_rate": 4.8463919980947845e-06, "loss": 0.7909, "step": 34140 }, { "epoch": 2.7109091269920023, "grad_norm": 0.89469313621521, "learning_rate": 4.833161334709323e-06, "loss": 0.8341, "step": 34150 }, { "epoch": 2.711702951040902, "grad_norm": 1.135300874710083, "learning_rate": 4.819930671323861e-06, "loss": 0.785, "step": 34160 }, { "epoch": 2.7124967750898015, "grad_norm": 0.9394294619560242, "learning_rate": 4.806700007938398e-06, "loss": 0.7499, "step": 34170 }, { "epoch": 2.713290599138701, "grad_norm": 0.9445950984954834, "learning_rate": 4.793469344552936e-06, "loss": 0.7784, "step": 34180 }, { "epoch": 2.7140844231876002, "grad_norm": 0.7616167664527893, "learning_rate": 4.780238681167474e-06, "loss": 0.8297, "step": 34190 }, { "epoch": 2.7148782472365003, "grad_norm": 0.9969817399978638, "learning_rate": 4.767008017782012e-06, "loss": 0.771, "step": 34200 }, { "epoch": 2.7156720712853994, "grad_norm": 0.9528368711471558, "learning_rate": 4.75377735439655e-06, "loss": 0.8011, "step": 34210 }, { "epoch": 2.716465895334299, "grad_norm": 0.9359155893325806, "learning_rate": 4.7405466910110875e-06, "loss": 0.7494, "step": 34220 }, { "epoch": 2.7172597193831987, "grad_norm": 0.9772827625274658, "learning_rate": 4.727316027625625e-06, "loss": 0.7775, "step": 34230 }, { "epoch": 2.7180535434320983, "grad_norm": 0.8860666751861572, "learning_rate": 4.714085364240163e-06, "loss": 0.7459, "step": 34240 }, { "epoch": 2.718847367480998, "grad_norm": 0.9986729621887207, "learning_rate": 4.700854700854701e-06, "loss": 0.7979, "step": 34250 }, { "epoch": 2.7196411915298975, "grad_norm": 1.0801081657409668, "learning_rate": 4.687624037469239e-06, "loss": 0.765, "step": 34260 }, { "epoch": 2.720435015578797, "grad_norm": 1.139674186706543, "learning_rate": 4.674393374083777e-06, "loss": 0.7262, "step": 34270 }, { "epoch": 2.7212288396276967, "grad_norm": 1.0824925899505615, "learning_rate": 4.661162710698315e-06, "loss": 0.7435, "step": 34280 }, { "epoch": 2.7220226636765963, "grad_norm": 1.0776777267456055, "learning_rate": 4.647932047312852e-06, "loss": 0.741, "step": 34290 }, { "epoch": 2.7228164877254954, "grad_norm": 0.7067087292671204, "learning_rate": 4.6347013839273904e-06, "loss": 0.7718, "step": 34300 }, { "epoch": 2.723610311774395, "grad_norm": 0.9206405282020569, "learning_rate": 4.6214707205419285e-06, "loss": 0.8099, "step": 34310 }, { "epoch": 2.7244041358232947, "grad_norm": 0.9427090287208557, "learning_rate": 4.608240057156466e-06, "loss": 0.8307, "step": 34320 }, { "epoch": 2.7251979598721943, "grad_norm": 0.9077566862106323, "learning_rate": 4.595009393771004e-06, "loss": 0.7284, "step": 34330 }, { "epoch": 2.725991783921094, "grad_norm": 1.0393189191818237, "learning_rate": 4.581778730385542e-06, "loss": 0.76, "step": 34340 }, { "epoch": 2.7267856079699935, "grad_norm": 0.7990016937255859, "learning_rate": 4.568548067000079e-06, "loss": 0.8172, "step": 34350 }, { "epoch": 2.727579432018893, "grad_norm": 0.9199971556663513, "learning_rate": 4.555317403614617e-06, "loss": 0.8014, "step": 34360 }, { "epoch": 2.7283732560677927, "grad_norm": 1.028395175933838, "learning_rate": 4.542086740229155e-06, "loss": 0.7645, "step": 34370 }, { "epoch": 2.7291670801166923, "grad_norm": 0.9020273089408875, "learning_rate": 4.528856076843693e-06, "loss": 0.7711, "step": 34380 }, { "epoch": 2.7299609041655915, "grad_norm": 1.0992895364761353, "learning_rate": 4.5156254134582315e-06, "loss": 0.7861, "step": 34390 }, { "epoch": 2.7307547282144915, "grad_norm": 0.8821068406105042, "learning_rate": 4.502394750072769e-06, "loss": 0.7572, "step": 34400 }, { "epoch": 2.7315485522633907, "grad_norm": 0.8414868712425232, "learning_rate": 4.489164086687307e-06, "loss": 0.8069, "step": 34410 }, { "epoch": 2.7323423763122903, "grad_norm": 1.0444607734680176, "learning_rate": 4.475933423301845e-06, "loss": 0.8252, "step": 34420 }, { "epoch": 2.73313620036119, "grad_norm": 0.7695857882499695, "learning_rate": 4.462702759916382e-06, "loss": 0.8368, "step": 34430 }, { "epoch": 2.7339300244100895, "grad_norm": 0.9509304761886597, "learning_rate": 4.44947209653092e-06, "loss": 0.796, "step": 34440 }, { "epoch": 2.734723848458989, "grad_norm": 0.798905074596405, "learning_rate": 4.436241433145458e-06, "loss": 0.7507, "step": 34450 }, { "epoch": 2.7355176725078887, "grad_norm": 0.9972504377365112, "learning_rate": 4.4230107697599955e-06, "loss": 0.8479, "step": 34460 }, { "epoch": 2.7363114965567883, "grad_norm": 0.9366284012794495, "learning_rate": 4.4097801063745336e-06, "loss": 0.7644, "step": 34470 }, { "epoch": 2.737105320605688, "grad_norm": 1.0526267290115356, "learning_rate": 4.396549442989072e-06, "loss": 0.7578, "step": 34480 }, { "epoch": 2.7378991446545875, "grad_norm": 0.8934128880500793, "learning_rate": 4.38331877960361e-06, "loss": 0.8356, "step": 34490 }, { "epoch": 2.7386929687034867, "grad_norm": 1.0690593719482422, "learning_rate": 4.370088116218148e-06, "loss": 0.78, "step": 34500 }, { "epoch": 2.7394867927523863, "grad_norm": 0.9687326550483704, "learning_rate": 4.356857452832686e-06, "loss": 0.7375, "step": 34510 }, { "epoch": 2.740280616801286, "grad_norm": 0.9082489609718323, "learning_rate": 4.343626789447223e-06, "loss": 0.7743, "step": 34520 }, { "epoch": 2.7410744408501855, "grad_norm": 0.951102614402771, "learning_rate": 4.330396126061761e-06, "loss": 0.797, "step": 34530 }, { "epoch": 2.741868264899085, "grad_norm": 1.032860517501831, "learning_rate": 4.3171654626762984e-06, "loss": 0.8604, "step": 34540 }, { "epoch": 2.7426620889479847, "grad_norm": 0.9105594158172607, "learning_rate": 4.3039347992908365e-06, "loss": 0.7811, "step": 34550 }, { "epoch": 2.7434559129968843, "grad_norm": 0.9474354982376099, "learning_rate": 4.290704135905375e-06, "loss": 0.7872, "step": 34560 }, { "epoch": 2.744249737045784, "grad_norm": 0.7172473669052124, "learning_rate": 4.277473472519913e-06, "loss": 0.7295, "step": 34570 }, { "epoch": 2.7450435610946835, "grad_norm": 0.8522198796272278, "learning_rate": 4.26424280913445e-06, "loss": 0.7326, "step": 34580 }, { "epoch": 2.7458373851435827, "grad_norm": 1.0782872438430786, "learning_rate": 4.251012145748988e-06, "loss": 0.7579, "step": 34590 }, { "epoch": 2.7466312091924827, "grad_norm": 1.02603280544281, "learning_rate": 4.237781482363526e-06, "loss": 0.7964, "step": 34600 }, { "epoch": 2.747425033241382, "grad_norm": 0.9840404391288757, "learning_rate": 4.224550818978064e-06, "loss": 0.8247, "step": 34610 }, { "epoch": 2.7482188572902815, "grad_norm": 0.8115825653076172, "learning_rate": 4.211320155592602e-06, "loss": 0.7881, "step": 34620 }, { "epoch": 2.749012681339181, "grad_norm": 1.053634524345398, "learning_rate": 4.1980894922071395e-06, "loss": 0.7765, "step": 34630 }, { "epoch": 2.7498065053880807, "grad_norm": 0.8972168564796448, "learning_rate": 4.184858828821677e-06, "loss": 0.7956, "step": 34640 }, { "epoch": 2.7506003294369803, "grad_norm": 0.9098021388053894, "learning_rate": 4.171628165436215e-06, "loss": 0.7835, "step": 34650 }, { "epoch": 2.75139415348588, "grad_norm": 0.8237542510032654, "learning_rate": 4.158397502050753e-06, "loss": 0.7629, "step": 34660 }, { "epoch": 2.7521879775347795, "grad_norm": 1.1170654296875, "learning_rate": 4.145166838665291e-06, "loss": 0.7887, "step": 34670 }, { "epoch": 2.752981801583679, "grad_norm": 1.165892243385315, "learning_rate": 4.131936175279829e-06, "loss": 0.7624, "step": 34680 }, { "epoch": 2.7537756256325787, "grad_norm": 0.8696096539497375, "learning_rate": 4.118705511894366e-06, "loss": 0.7976, "step": 34690 }, { "epoch": 2.754569449681478, "grad_norm": 1.0606629848480225, "learning_rate": 4.105474848508904e-06, "loss": 0.7863, "step": 34700 }, { "epoch": 2.755363273730378, "grad_norm": 0.9789169430732727, "learning_rate": 4.092244185123442e-06, "loss": 0.7347, "step": 34710 }, { "epoch": 2.756157097779277, "grad_norm": 0.9391206502914429, "learning_rate": 4.0790135217379805e-06, "loss": 0.8182, "step": 34720 }, { "epoch": 2.7569509218281767, "grad_norm": 0.9895641803741455, "learning_rate": 4.0657828583525186e-06, "loss": 0.7514, "step": 34730 }, { "epoch": 2.7577447458770763, "grad_norm": 0.8437541723251343, "learning_rate": 4.052552194967056e-06, "loss": 0.7866, "step": 34740 }, { "epoch": 2.758538569925976, "grad_norm": 1.0394833087921143, "learning_rate": 4.039321531581593e-06, "loss": 0.8191, "step": 34750 }, { "epoch": 2.7593323939748755, "grad_norm": 0.9118010997772217, "learning_rate": 4.026090868196131e-06, "loss": 0.7347, "step": 34760 }, { "epoch": 2.760126218023775, "grad_norm": 0.8801964521408081, "learning_rate": 4.012860204810669e-06, "loss": 0.7648, "step": 34770 }, { "epoch": 2.7609200420726747, "grad_norm": 0.9062778353691101, "learning_rate": 3.999629541425207e-06, "loss": 0.7519, "step": 34780 }, { "epoch": 2.761713866121574, "grad_norm": 0.9668712615966797, "learning_rate": 3.986398878039745e-06, "loss": 0.7754, "step": 34790 }, { "epoch": 2.762507690170474, "grad_norm": 0.7536428570747375, "learning_rate": 3.9731682146542834e-06, "loss": 0.8258, "step": 34800 }, { "epoch": 2.763301514219373, "grad_norm": 0.9052528142929077, "learning_rate": 3.959937551268821e-06, "loss": 0.829, "step": 34810 }, { "epoch": 2.7640953382682727, "grad_norm": 0.9865114688873291, "learning_rate": 3.946706887883359e-06, "loss": 0.8256, "step": 34820 }, { "epoch": 2.7648891623171723, "grad_norm": 0.8684812188148499, "learning_rate": 3.933476224497897e-06, "loss": 0.7742, "step": 34830 }, { "epoch": 2.765682986366072, "grad_norm": 1.075271725654602, "learning_rate": 3.920245561112434e-06, "loss": 0.8008, "step": 34840 }, { "epoch": 2.7664768104149715, "grad_norm": 0.9537501335144043, "learning_rate": 3.907014897726972e-06, "loss": 0.8286, "step": 34850 }, { "epoch": 2.767270634463871, "grad_norm": 0.8851303458213806, "learning_rate": 3.89378423434151e-06, "loss": 0.815, "step": 34860 }, { "epoch": 2.7680644585127707, "grad_norm": 0.8173339366912842, "learning_rate": 3.8805535709560474e-06, "loss": 0.8254, "step": 34870 }, { "epoch": 2.7688582825616703, "grad_norm": 1.0855127573013306, "learning_rate": 3.8673229075705855e-06, "loss": 0.7537, "step": 34880 }, { "epoch": 2.76965210661057, "grad_norm": 1.0475999116897583, "learning_rate": 3.854092244185124e-06, "loss": 0.7576, "step": 34890 }, { "epoch": 2.770445930659469, "grad_norm": 0.9665488004684448, "learning_rate": 3.840861580799662e-06, "loss": 0.728, "step": 34900 }, { "epoch": 2.771239754708369, "grad_norm": 0.8024313449859619, "learning_rate": 3.8276309174142e-06, "loss": 0.7808, "step": 34910 }, { "epoch": 2.7720335787572683, "grad_norm": 0.955835223197937, "learning_rate": 3.814400254028737e-06, "loss": 0.7961, "step": 34920 }, { "epoch": 2.772827402806168, "grad_norm": 0.996760368347168, "learning_rate": 3.8011695906432747e-06, "loss": 0.7857, "step": 34930 }, { "epoch": 2.7736212268550675, "grad_norm": 0.9988394975662231, "learning_rate": 3.7879389272578127e-06, "loss": 0.7821, "step": 34940 }, { "epoch": 2.774415050903967, "grad_norm": 0.9682359099388123, "learning_rate": 3.774708263872351e-06, "loss": 0.8208, "step": 34950 }, { "epoch": 2.7752088749528667, "grad_norm": 0.8783732652664185, "learning_rate": 3.761477600486889e-06, "loss": 0.8191, "step": 34960 }, { "epoch": 2.7760026990017663, "grad_norm": 1.049475073814392, "learning_rate": 3.7482469371014266e-06, "loss": 0.733, "step": 34970 }, { "epoch": 2.776796523050666, "grad_norm": 0.9063088893890381, "learning_rate": 3.7350162737159638e-06, "loss": 0.8224, "step": 34980 }, { "epoch": 2.7775903470995655, "grad_norm": 0.9383750557899475, "learning_rate": 3.721785610330502e-06, "loss": 0.7338, "step": 34990 }, { "epoch": 2.778384171148465, "grad_norm": 0.849460244178772, "learning_rate": 3.70855494694504e-06, "loss": 0.7359, "step": 35000 }, { "epoch": 2.7791779951973643, "grad_norm": 1.0074882507324219, "learning_rate": 3.695324283559578e-06, "loss": 0.7969, "step": 35010 }, { "epoch": 2.779971819246264, "grad_norm": 0.9304841160774231, "learning_rate": 3.6820936201741157e-06, "loss": 0.7987, "step": 35020 }, { "epoch": 2.7807656432951635, "grad_norm": 1.1180306673049927, "learning_rate": 3.6688629567886538e-06, "loss": 0.7637, "step": 35030 }, { "epoch": 2.781559467344063, "grad_norm": 0.9960020184516907, "learning_rate": 3.655632293403191e-06, "loss": 0.7664, "step": 35040 }, { "epoch": 2.7823532913929627, "grad_norm": 1.1792720556259155, "learning_rate": 3.642401630017729e-06, "loss": 0.7887, "step": 35050 }, { "epoch": 2.7831471154418623, "grad_norm": 0.7898070216178894, "learning_rate": 3.629170966632267e-06, "loss": 0.8356, "step": 35060 }, { "epoch": 2.783940939490762, "grad_norm": 0.7194439172744751, "learning_rate": 3.6159403032468052e-06, "loss": 0.7888, "step": 35070 }, { "epoch": 2.7847347635396615, "grad_norm": 1.077726125717163, "learning_rate": 3.602709639861343e-06, "loss": 0.8264, "step": 35080 }, { "epoch": 2.785528587588561, "grad_norm": 1.00077486038208, "learning_rate": 3.589478976475881e-06, "loss": 0.7843, "step": 35090 }, { "epoch": 2.7863224116374603, "grad_norm": 1.037901520729065, "learning_rate": 3.576248313090418e-06, "loss": 0.7137, "step": 35100 }, { "epoch": 2.7871162356863604, "grad_norm": 0.8961439728736877, "learning_rate": 3.5630176497049563e-06, "loss": 0.7605, "step": 35110 }, { "epoch": 2.7879100597352595, "grad_norm": 0.9663600325584412, "learning_rate": 3.5497869863194944e-06, "loss": 0.732, "step": 35120 }, { "epoch": 2.788703883784159, "grad_norm": 0.8328473567962646, "learning_rate": 3.536556322934032e-06, "loss": 0.7589, "step": 35130 }, { "epoch": 2.7894977078330587, "grad_norm": 1.0931073427200317, "learning_rate": 3.52332565954857e-06, "loss": 0.8035, "step": 35140 }, { "epoch": 2.7902915318819583, "grad_norm": 0.8528993725776672, "learning_rate": 3.5100949961631073e-06, "loss": 0.818, "step": 35150 }, { "epoch": 2.791085355930858, "grad_norm": 0.9126871228218079, "learning_rate": 3.4968643327776454e-06, "loss": 0.7092, "step": 35160 }, { "epoch": 2.7918791799797575, "grad_norm": 0.8456578850746155, "learning_rate": 3.4836336693921835e-06, "loss": 0.7571, "step": 35170 }, { "epoch": 2.792673004028657, "grad_norm": 1.0316028594970703, "learning_rate": 3.470403006006721e-06, "loss": 0.7413, "step": 35180 }, { "epoch": 2.7934668280775568, "grad_norm": 0.8580575585365295, "learning_rate": 3.4571723426212592e-06, "loss": 0.7685, "step": 35190 }, { "epoch": 2.7942606521264564, "grad_norm": 0.9380501508712769, "learning_rate": 3.4439416792357973e-06, "loss": 0.7756, "step": 35200 }, { "epoch": 2.7950544761753555, "grad_norm": 0.8025434613227844, "learning_rate": 3.4307110158503345e-06, "loss": 0.7646, "step": 35210 }, { "epoch": 2.7958483002242556, "grad_norm": 0.828920841217041, "learning_rate": 3.4174803524648726e-06, "loss": 0.8187, "step": 35220 }, { "epoch": 2.7966421242731547, "grad_norm": 0.8058600425720215, "learning_rate": 3.4042496890794107e-06, "loss": 0.7068, "step": 35230 }, { "epoch": 2.7974359483220543, "grad_norm": 0.9694280028343201, "learning_rate": 3.3910190256939484e-06, "loss": 0.7607, "step": 35240 }, { "epoch": 2.798229772370954, "grad_norm": 0.9409134984016418, "learning_rate": 3.3777883623084864e-06, "loss": 0.7601, "step": 35250 }, { "epoch": 2.7990235964198535, "grad_norm": 1.1917532682418823, "learning_rate": 3.3645576989230245e-06, "loss": 0.7952, "step": 35260 }, { "epoch": 2.799817420468753, "grad_norm": 0.8570424318313599, "learning_rate": 3.3513270355375618e-06, "loss": 0.8205, "step": 35270 }, { "epoch": 2.8006112445176528, "grad_norm": 1.0113235712051392, "learning_rate": 3.3380963721521e-06, "loss": 0.8328, "step": 35280 }, { "epoch": 2.8014050685665524, "grad_norm": 0.9469982385635376, "learning_rate": 3.3248657087666375e-06, "loss": 0.7525, "step": 35290 }, { "epoch": 2.8021988926154515, "grad_norm": 1.03362238407135, "learning_rate": 3.3116350453811756e-06, "loss": 0.7338, "step": 35300 }, { "epoch": 2.8029927166643516, "grad_norm": 1.1091042757034302, "learning_rate": 3.2984043819957137e-06, "loss": 0.7698, "step": 35310 }, { "epoch": 2.8037865407132507, "grad_norm": 1.1740446090698242, "learning_rate": 3.2851737186102517e-06, "loss": 0.7686, "step": 35320 }, { "epoch": 2.8045803647621503, "grad_norm": 1.0045807361602783, "learning_rate": 3.271943055224789e-06, "loss": 0.7481, "step": 35330 }, { "epoch": 2.80537418881105, "grad_norm": 0.9175392985343933, "learning_rate": 3.258712391839327e-06, "loss": 0.7549, "step": 35340 }, { "epoch": 2.8061680128599495, "grad_norm": 1.221364974975586, "learning_rate": 3.2454817284538647e-06, "loss": 0.7778, "step": 35350 }, { "epoch": 2.806961836908849, "grad_norm": 0.9719924330711365, "learning_rate": 3.2322510650684028e-06, "loss": 0.7635, "step": 35360 }, { "epoch": 2.8077556609577488, "grad_norm": 0.9859107732772827, "learning_rate": 3.219020401682941e-06, "loss": 0.8012, "step": 35370 }, { "epoch": 2.8085494850066484, "grad_norm": 1.052498698234558, "learning_rate": 3.2057897382974785e-06, "loss": 0.8148, "step": 35380 }, { "epoch": 2.809343309055548, "grad_norm": 0.7815044522285461, "learning_rate": 3.192559074912016e-06, "loss": 0.7499, "step": 35390 }, { "epoch": 2.8101371331044476, "grad_norm": 1.2309107780456543, "learning_rate": 3.179328411526554e-06, "loss": 0.8063, "step": 35400 }, { "epoch": 2.8109309571533467, "grad_norm": 1.1573877334594727, "learning_rate": 3.166097748141092e-06, "loss": 0.7626, "step": 35410 }, { "epoch": 2.811724781202247, "grad_norm": 1.0697295665740967, "learning_rate": 3.15286708475563e-06, "loss": 0.7613, "step": 35420 }, { "epoch": 2.812518605251146, "grad_norm": 0.9957389235496521, "learning_rate": 3.139636421370168e-06, "loss": 0.7382, "step": 35430 }, { "epoch": 2.8133124293000455, "grad_norm": 0.9990561604499817, "learning_rate": 3.1264057579847053e-06, "loss": 0.7597, "step": 35440 }, { "epoch": 2.814106253348945, "grad_norm": 1.1225290298461914, "learning_rate": 3.1131750945992434e-06, "loss": 0.817, "step": 35450 }, { "epoch": 2.8149000773978448, "grad_norm": 1.0921093225479126, "learning_rate": 3.099944431213781e-06, "loss": 0.7977, "step": 35460 }, { "epoch": 2.8156939014467444, "grad_norm": 0.9584634304046631, "learning_rate": 3.086713767828319e-06, "loss": 0.7256, "step": 35470 }, { "epoch": 2.816487725495644, "grad_norm": 0.8979785442352295, "learning_rate": 3.073483104442857e-06, "loss": 0.8171, "step": 35480 }, { "epoch": 2.8172815495445436, "grad_norm": 0.9863408803939819, "learning_rate": 3.060252441057395e-06, "loss": 0.8187, "step": 35490 }, { "epoch": 2.818075373593443, "grad_norm": 0.9419934153556824, "learning_rate": 3.0470217776719325e-06, "loss": 0.7236, "step": 35500 }, { "epoch": 2.818869197642343, "grad_norm": 0.7588933706283569, "learning_rate": 3.0337911142864706e-06, "loss": 0.7996, "step": 35510 }, { "epoch": 2.819663021691242, "grad_norm": 0.9996737837791443, "learning_rate": 3.0205604509010083e-06, "loss": 0.7806, "step": 35520 }, { "epoch": 2.8204568457401415, "grad_norm": 1.1076947450637817, "learning_rate": 3.0073297875155463e-06, "loss": 0.7945, "step": 35530 }, { "epoch": 2.821250669789041, "grad_norm": 0.942015528678894, "learning_rate": 2.9940991241300844e-06, "loss": 0.7777, "step": 35540 }, { "epoch": 2.8220444938379408, "grad_norm": 0.9087643027305603, "learning_rate": 2.9808684607446216e-06, "loss": 0.8318, "step": 35550 }, { "epoch": 2.8228383178868404, "grad_norm": 1.026443600654602, "learning_rate": 2.9676377973591597e-06, "loss": 0.7698, "step": 35560 }, { "epoch": 2.82363214193574, "grad_norm": 0.9382554888725281, "learning_rate": 2.9544071339736974e-06, "loss": 0.7869, "step": 35570 }, { "epoch": 2.8244259659846396, "grad_norm": 0.7953215837478638, "learning_rate": 2.9411764705882355e-06, "loss": 0.7498, "step": 35580 }, { "epoch": 2.825219790033539, "grad_norm": 0.9464923143386841, "learning_rate": 2.9279458072027735e-06, "loss": 0.7229, "step": 35590 }, { "epoch": 2.826013614082439, "grad_norm": 0.9360369443893433, "learning_rate": 2.914715143817311e-06, "loss": 0.7409, "step": 35600 }, { "epoch": 2.826807438131338, "grad_norm": 0.9968975186347961, "learning_rate": 2.901484480431849e-06, "loss": 0.8111, "step": 35610 }, { "epoch": 2.827601262180238, "grad_norm": 0.7860965132713318, "learning_rate": 2.888253817046387e-06, "loss": 0.8074, "step": 35620 }, { "epoch": 2.828395086229137, "grad_norm": 0.8920134902000427, "learning_rate": 2.8750231536609246e-06, "loss": 0.7975, "step": 35630 }, { "epoch": 2.8291889102780368, "grad_norm": 0.802579939365387, "learning_rate": 2.8617924902754627e-06, "loss": 0.7383, "step": 35640 }, { "epoch": 2.8299827343269364, "grad_norm": 0.790934681892395, "learning_rate": 2.8485618268900003e-06, "loss": 0.7811, "step": 35650 }, { "epoch": 2.830776558375836, "grad_norm": 0.9820284247398376, "learning_rate": 2.835331163504538e-06, "loss": 0.7466, "step": 35660 }, { "epoch": 2.8315703824247356, "grad_norm": 1.035416841506958, "learning_rate": 2.822100500119076e-06, "loss": 0.7927, "step": 35670 }, { "epoch": 2.832364206473635, "grad_norm": 1.0442098379135132, "learning_rate": 2.808869836733614e-06, "loss": 0.7825, "step": 35680 }, { "epoch": 2.833158030522535, "grad_norm": 1.0599391460418701, "learning_rate": 2.795639173348152e-06, "loss": 0.8229, "step": 35690 }, { "epoch": 2.8339518545714344, "grad_norm": 1.2767279148101807, "learning_rate": 2.78240850996269e-06, "loss": 0.7614, "step": 35700 }, { "epoch": 2.834745678620334, "grad_norm": 1.054215431213379, "learning_rate": 2.7691778465772275e-06, "loss": 0.799, "step": 35710 }, { "epoch": 2.835539502669233, "grad_norm": 0.8956333994865417, "learning_rate": 2.755947183191765e-06, "loss": 0.7311, "step": 35720 }, { "epoch": 2.8363333267181328, "grad_norm": 1.0596696138381958, "learning_rate": 2.7427165198063033e-06, "loss": 0.7634, "step": 35730 }, { "epoch": 2.8371271507670324, "grad_norm": 1.0594090223312378, "learning_rate": 2.7294858564208414e-06, "loss": 0.8017, "step": 35740 }, { "epoch": 2.837920974815932, "grad_norm": 1.278930902481079, "learning_rate": 2.716255193035379e-06, "loss": 0.7555, "step": 35750 }, { "epoch": 2.8387147988648316, "grad_norm": 1.1408640146255493, "learning_rate": 2.7030245296499167e-06, "loss": 0.7985, "step": 35760 }, { "epoch": 2.839508622913731, "grad_norm": 0.8677951693534851, "learning_rate": 2.6897938662644547e-06, "loss": 0.7973, "step": 35770 }, { "epoch": 2.840302446962631, "grad_norm": 0.9274892807006836, "learning_rate": 2.6765632028789924e-06, "loss": 0.81, "step": 35780 }, { "epoch": 2.8410962710115304, "grad_norm": 0.9693270921707153, "learning_rate": 2.6633325394935305e-06, "loss": 0.7442, "step": 35790 }, { "epoch": 2.84189009506043, "grad_norm": 0.9193242192268372, "learning_rate": 2.6501018761080686e-06, "loss": 0.7411, "step": 35800 }, { "epoch": 2.842683919109329, "grad_norm": 0.9392445087432861, "learning_rate": 2.636871212722606e-06, "loss": 0.7334, "step": 35810 }, { "epoch": 2.843477743158229, "grad_norm": 0.9353786706924438, "learning_rate": 2.623640549337144e-06, "loss": 0.8122, "step": 35820 }, { "epoch": 2.8442715672071284, "grad_norm": 0.7990022301673889, "learning_rate": 2.6104098859516815e-06, "loss": 0.7969, "step": 35830 }, { "epoch": 2.845065391256028, "grad_norm": 0.7755838632583618, "learning_rate": 2.5971792225662196e-06, "loss": 0.8085, "step": 35840 }, { "epoch": 2.8458592153049276, "grad_norm": 0.9300201535224915, "learning_rate": 2.5839485591807577e-06, "loss": 0.8288, "step": 35850 }, { "epoch": 2.846653039353827, "grad_norm": 1.0524238348007202, "learning_rate": 2.5707178957952953e-06, "loss": 0.7261, "step": 35860 }, { "epoch": 2.847446863402727, "grad_norm": 1.0584592819213867, "learning_rate": 2.557487232409833e-06, "loss": 0.779, "step": 35870 }, { "epoch": 2.8482406874516264, "grad_norm": 1.093838095664978, "learning_rate": 2.544256569024371e-06, "loss": 0.8021, "step": 35880 }, { "epoch": 2.849034511500526, "grad_norm": 0.6603565216064453, "learning_rate": 2.5310259056389087e-06, "loss": 0.8322, "step": 35890 }, { "epoch": 2.8498283355494256, "grad_norm": 0.9231084585189819, "learning_rate": 2.517795242253447e-06, "loss": 0.7669, "step": 35900 }, { "epoch": 2.850622159598325, "grad_norm": 0.9503127336502075, "learning_rate": 2.5045645788679845e-06, "loss": 0.7969, "step": 35910 }, { "epoch": 2.8514159836472244, "grad_norm": 0.9831972122192383, "learning_rate": 2.491333915482522e-06, "loss": 0.7686, "step": 35920 }, { "epoch": 2.8522098076961244, "grad_norm": 0.9379409551620483, "learning_rate": 2.4781032520970602e-06, "loss": 0.7717, "step": 35930 }, { "epoch": 2.8530036317450236, "grad_norm": 0.9157143235206604, "learning_rate": 2.4648725887115983e-06, "loss": 0.7746, "step": 35940 }, { "epoch": 2.853797455793923, "grad_norm": 0.896962583065033, "learning_rate": 2.451641925326136e-06, "loss": 0.7007, "step": 35950 }, { "epoch": 2.854591279842823, "grad_norm": 1.0108494758605957, "learning_rate": 2.438411261940674e-06, "loss": 0.7847, "step": 35960 }, { "epoch": 2.8553851038917224, "grad_norm": 1.0298715829849243, "learning_rate": 2.4251805985552117e-06, "loss": 0.7752, "step": 35970 }, { "epoch": 2.856178927940622, "grad_norm": 1.0451425313949585, "learning_rate": 2.4119499351697493e-06, "loss": 0.7909, "step": 35980 }, { "epoch": 2.8569727519895216, "grad_norm": 0.9338813424110413, "learning_rate": 2.3987192717842874e-06, "loss": 0.7663, "step": 35990 }, { "epoch": 2.857766576038421, "grad_norm": 0.905019998550415, "learning_rate": 2.3854886083988255e-06, "loss": 0.746, "step": 36000 }, { "epoch": 2.8585604000873204, "grad_norm": 0.8356999754905701, "learning_rate": 2.372257945013363e-06, "loss": 0.7736, "step": 36010 }, { "epoch": 2.8593542241362204, "grad_norm": 1.016014575958252, "learning_rate": 2.359027281627901e-06, "loss": 0.8181, "step": 36020 }, { "epoch": 2.8601480481851196, "grad_norm": 0.9665111303329468, "learning_rate": 2.345796618242439e-06, "loss": 0.7668, "step": 36030 }, { "epoch": 2.860941872234019, "grad_norm": 0.8725518584251404, "learning_rate": 2.3325659548569766e-06, "loss": 0.8637, "step": 36040 }, { "epoch": 2.861735696282919, "grad_norm": 1.1010116338729858, "learning_rate": 2.3193352914715146e-06, "loss": 0.7869, "step": 36050 }, { "epoch": 2.8625295203318184, "grad_norm": 1.0355409383773804, "learning_rate": 2.3061046280860527e-06, "loss": 0.7937, "step": 36060 }, { "epoch": 2.863323344380718, "grad_norm": 1.042080283164978, "learning_rate": 2.29287396470059e-06, "loss": 0.7468, "step": 36070 }, { "epoch": 2.8641171684296176, "grad_norm": 0.9209195375442505, "learning_rate": 2.279643301315128e-06, "loss": 0.765, "step": 36080 }, { "epoch": 2.864910992478517, "grad_norm": 0.9951355457305908, "learning_rate": 2.2664126379296657e-06, "loss": 0.7323, "step": 36090 }, { "epoch": 2.865704816527417, "grad_norm": 0.969089925289154, "learning_rate": 2.2531819745442038e-06, "loss": 0.7319, "step": 36100 }, { "epoch": 2.8664986405763164, "grad_norm": 1.1039822101593018, "learning_rate": 2.239951311158742e-06, "loss": 0.7683, "step": 36110 }, { "epoch": 2.8672924646252156, "grad_norm": 1.0060070753097534, "learning_rate": 2.2267206477732795e-06, "loss": 0.7516, "step": 36120 }, { "epoch": 2.8680862886741156, "grad_norm": 0.8177821636199951, "learning_rate": 2.213489984387817e-06, "loss": 0.8189, "step": 36130 }, { "epoch": 2.868880112723015, "grad_norm": 0.8943607807159424, "learning_rate": 2.2002593210023552e-06, "loss": 0.7652, "step": 36140 }, { "epoch": 2.8696739367719144, "grad_norm": 0.8499715328216553, "learning_rate": 2.187028657616893e-06, "loss": 0.8638, "step": 36150 }, { "epoch": 2.870467760820814, "grad_norm": 0.8518121838569641, "learning_rate": 2.173797994231431e-06, "loss": 0.8079, "step": 36160 }, { "epoch": 2.8712615848697136, "grad_norm": 1.0183836221694946, "learning_rate": 2.1605673308459686e-06, "loss": 0.7418, "step": 36170 }, { "epoch": 2.872055408918613, "grad_norm": 1.092712640762329, "learning_rate": 2.1473366674605063e-06, "loss": 0.7641, "step": 36180 }, { "epoch": 2.872849232967513, "grad_norm": 1.0446547269821167, "learning_rate": 2.1341060040750444e-06, "loss": 0.79, "step": 36190 }, { "epoch": 2.8736430570164124, "grad_norm": 0.9164413213729858, "learning_rate": 2.1208753406895824e-06, "loss": 0.8226, "step": 36200 }, { "epoch": 2.874436881065312, "grad_norm": 0.9824967980384827, "learning_rate": 2.10764467730412e-06, "loss": 0.7573, "step": 36210 }, { "epoch": 2.8752307051142116, "grad_norm": 1.1960909366607666, "learning_rate": 2.094414013918658e-06, "loss": 0.7664, "step": 36220 }, { "epoch": 2.876024529163111, "grad_norm": 1.140021562576294, "learning_rate": 2.081183350533196e-06, "loss": 0.7775, "step": 36230 }, { "epoch": 2.8768183532120104, "grad_norm": 1.0924111604690552, "learning_rate": 2.0679526871477335e-06, "loss": 0.7845, "step": 36240 }, { "epoch": 2.87761217726091, "grad_norm": 0.8583391308784485, "learning_rate": 2.0547220237622716e-06, "loss": 0.8183, "step": 36250 }, { "epoch": 2.8784060013098096, "grad_norm": 1.004576563835144, "learning_rate": 2.0414913603768097e-06, "loss": 0.7346, "step": 36260 }, { "epoch": 2.879199825358709, "grad_norm": 0.8908334374427795, "learning_rate": 2.0282606969913473e-06, "loss": 0.86, "step": 36270 }, { "epoch": 2.879993649407609, "grad_norm": 1.0126510858535767, "learning_rate": 2.015030033605885e-06, "loss": 0.8082, "step": 36280 }, { "epoch": 2.8807874734565084, "grad_norm": 1.0101633071899414, "learning_rate": 2.001799370220423e-06, "loss": 0.796, "step": 36290 }, { "epoch": 2.881581297505408, "grad_norm": 0.8697376847267151, "learning_rate": 1.9885687068349607e-06, "loss": 0.7772, "step": 36300 }, { "epoch": 2.8823751215543076, "grad_norm": 1.0588469505310059, "learning_rate": 1.9753380434494988e-06, "loss": 0.7941, "step": 36310 }, { "epoch": 2.883168945603207, "grad_norm": 0.8329793810844421, "learning_rate": 1.962107380064037e-06, "loss": 0.7721, "step": 36320 }, { "epoch": 2.883962769652107, "grad_norm": 1.057317852973938, "learning_rate": 1.948876716678574e-06, "loss": 0.7572, "step": 36330 }, { "epoch": 2.884756593701006, "grad_norm": 0.87096107006073, "learning_rate": 1.935646053293112e-06, "loss": 0.7499, "step": 36340 }, { "epoch": 2.8855504177499056, "grad_norm": 0.9541813135147095, "learning_rate": 1.92241538990765e-06, "loss": 0.7405, "step": 36350 }, { "epoch": 2.886344241798805, "grad_norm": 0.7810908555984497, "learning_rate": 1.909184726522188e-06, "loss": 0.7629, "step": 36360 }, { "epoch": 2.887138065847705, "grad_norm": 0.8539981842041016, "learning_rate": 1.8959540631367258e-06, "loss": 0.7746, "step": 36370 }, { "epoch": 2.8879318898966044, "grad_norm": 1.008750081062317, "learning_rate": 1.8827233997512634e-06, "loss": 0.8307, "step": 36380 }, { "epoch": 2.888725713945504, "grad_norm": 0.9009593725204468, "learning_rate": 1.8694927363658015e-06, "loss": 0.7134, "step": 36390 }, { "epoch": 2.8895195379944036, "grad_norm": 1.03337562084198, "learning_rate": 1.8562620729803394e-06, "loss": 0.7966, "step": 36400 }, { "epoch": 2.8903133620433032, "grad_norm": 0.9321558475494385, "learning_rate": 1.843031409594877e-06, "loss": 0.8321, "step": 36410 }, { "epoch": 2.891107186092203, "grad_norm": 0.8705295324325562, "learning_rate": 1.829800746209415e-06, "loss": 0.821, "step": 36420 }, { "epoch": 2.891901010141102, "grad_norm": 1.093181848526001, "learning_rate": 1.816570082823953e-06, "loss": 0.7454, "step": 36430 }, { "epoch": 2.892694834190002, "grad_norm": 1.1398403644561768, "learning_rate": 1.8033394194384906e-06, "loss": 0.8026, "step": 36440 }, { "epoch": 2.8934886582389012, "grad_norm": 0.8322063088417053, "learning_rate": 1.7901087560530285e-06, "loss": 0.8001, "step": 36450 }, { "epoch": 2.894282482287801, "grad_norm": 0.9073172807693481, "learning_rate": 1.7768780926675666e-06, "loss": 0.8225, "step": 36460 }, { "epoch": 2.8950763063367004, "grad_norm": 0.8599694967269897, "learning_rate": 1.7636474292821043e-06, "loss": 0.7355, "step": 36470 }, { "epoch": 2.8958701303856, "grad_norm": 0.9327001571655273, "learning_rate": 1.7504167658966421e-06, "loss": 0.7628, "step": 36480 }, { "epoch": 2.8966639544344996, "grad_norm": 1.1032301187515259, "learning_rate": 1.7371861025111802e-06, "loss": 0.7719, "step": 36490 }, { "epoch": 2.8974577784833992, "grad_norm": 1.0107958316802979, "learning_rate": 1.7239554391257176e-06, "loss": 0.7785, "step": 36500 }, { "epoch": 2.898251602532299, "grad_norm": 0.9559915065765381, "learning_rate": 1.7107247757402557e-06, "loss": 0.7749, "step": 36510 }, { "epoch": 2.899045426581198, "grad_norm": 0.9474267363548279, "learning_rate": 1.6974941123547936e-06, "loss": 0.8143, "step": 36520 }, { "epoch": 2.899839250630098, "grad_norm": 0.7721654772758484, "learning_rate": 1.6842634489693313e-06, "loss": 0.7488, "step": 36530 }, { "epoch": 2.9006330746789972, "grad_norm": 0.8206939697265625, "learning_rate": 1.6710327855838693e-06, "loss": 0.7174, "step": 36540 }, { "epoch": 2.901426898727897, "grad_norm": 0.8247297406196594, "learning_rate": 1.6578021221984072e-06, "loss": 0.7827, "step": 36550 }, { "epoch": 2.9022207227767964, "grad_norm": 1.261728286743164, "learning_rate": 1.6445714588129449e-06, "loss": 0.7458, "step": 36560 }, { "epoch": 2.903014546825696, "grad_norm": 0.9776942729949951, "learning_rate": 1.631340795427483e-06, "loss": 0.7707, "step": 36570 }, { "epoch": 2.9038083708745956, "grad_norm": 1.0205057859420776, "learning_rate": 1.6181101320420208e-06, "loss": 0.7757, "step": 36580 }, { "epoch": 2.9046021949234953, "grad_norm": 1.0153295993804932, "learning_rate": 1.6048794686565585e-06, "loss": 0.7932, "step": 36590 }, { "epoch": 2.905396018972395, "grad_norm": 1.073330044746399, "learning_rate": 1.5916488052710963e-06, "loss": 0.742, "step": 36600 }, { "epoch": 2.9061898430212945, "grad_norm": 1.0096873044967651, "learning_rate": 1.578418141885634e-06, "loss": 0.8228, "step": 36610 }, { "epoch": 2.906983667070194, "grad_norm": 1.1333979368209839, "learning_rate": 1.565187478500172e-06, "loss": 0.7219, "step": 36620 }, { "epoch": 2.9077774911190932, "grad_norm": 1.1649788618087769, "learning_rate": 1.55195681511471e-06, "loss": 0.8223, "step": 36630 }, { "epoch": 2.9085713151679933, "grad_norm": 0.8990685939788818, "learning_rate": 1.5387261517292478e-06, "loss": 0.7569, "step": 36640 }, { "epoch": 2.9093651392168924, "grad_norm": 1.097396731376648, "learning_rate": 1.5254954883437857e-06, "loss": 0.7456, "step": 36650 }, { "epoch": 2.910158963265792, "grad_norm": 0.9843643307685852, "learning_rate": 1.5122648249583233e-06, "loss": 0.7238, "step": 36660 }, { "epoch": 2.9109527873146916, "grad_norm": 1.0941895246505737, "learning_rate": 1.4990341615728614e-06, "loss": 0.8031, "step": 36670 }, { "epoch": 2.9117466113635913, "grad_norm": 0.9502320289611816, "learning_rate": 1.485803498187399e-06, "loss": 0.8127, "step": 36680 }, { "epoch": 2.912540435412491, "grad_norm": 0.8268551826477051, "learning_rate": 1.472572834801937e-06, "loss": 0.7619, "step": 36690 }, { "epoch": 2.9133342594613905, "grad_norm": 1.1277645826339722, "learning_rate": 1.459342171416475e-06, "loss": 0.8046, "step": 36700 }, { "epoch": 2.91412808351029, "grad_norm": 0.9089832901954651, "learning_rate": 1.4461115080310127e-06, "loss": 0.7497, "step": 36710 }, { "epoch": 2.9149219075591892, "grad_norm": 0.9186846017837524, "learning_rate": 1.4328808446455505e-06, "loss": 0.771, "step": 36720 }, { "epoch": 2.9157157316080893, "grad_norm": 1.0367939472198486, "learning_rate": 1.4196501812600884e-06, "loss": 0.7857, "step": 36730 }, { "epoch": 2.9165095556569884, "grad_norm": 0.9849185347557068, "learning_rate": 1.4064195178746263e-06, "loss": 0.8304, "step": 36740 }, { "epoch": 2.917303379705888, "grad_norm": 1.022588849067688, "learning_rate": 1.3931888544891641e-06, "loss": 0.7862, "step": 36750 }, { "epoch": 2.9180972037547876, "grad_norm": 0.8609658479690552, "learning_rate": 1.379958191103702e-06, "loss": 0.7718, "step": 36760 }, { "epoch": 2.9188910278036873, "grad_norm": 0.9891222715377808, "learning_rate": 1.3667275277182399e-06, "loss": 0.7544, "step": 36770 }, { "epoch": 2.919684851852587, "grad_norm": 1.002481460571289, "learning_rate": 1.3534968643327777e-06, "loss": 0.8537, "step": 36780 }, { "epoch": 2.9204786759014865, "grad_norm": 1.0346823930740356, "learning_rate": 1.3402662009473156e-06, "loss": 0.7605, "step": 36790 }, { "epoch": 2.921272499950386, "grad_norm": 1.154091715812683, "learning_rate": 1.3270355375618535e-06, "loss": 0.7222, "step": 36800 }, { "epoch": 2.9220663239992857, "grad_norm": 1.07093346118927, "learning_rate": 1.3138048741763914e-06, "loss": 0.8214, "step": 36810 }, { "epoch": 2.9228601480481853, "grad_norm": 0.9682141542434692, "learning_rate": 1.300574210790929e-06, "loss": 0.7921, "step": 36820 }, { "epoch": 2.9236539720970844, "grad_norm": 0.9235316514968872, "learning_rate": 1.287343547405467e-06, "loss": 0.8082, "step": 36830 }, { "epoch": 2.9244477961459845, "grad_norm": 1.0996876955032349, "learning_rate": 1.2741128840200047e-06, "loss": 0.7969, "step": 36840 }, { "epoch": 2.9252416201948837, "grad_norm": 0.8696537017822266, "learning_rate": 1.2608822206345426e-06, "loss": 0.7344, "step": 36850 }, { "epoch": 2.9260354442437833, "grad_norm": 1.0605179071426392, "learning_rate": 1.2476515572490805e-06, "loss": 0.7609, "step": 36860 }, { "epoch": 2.926829268292683, "grad_norm": 1.034128189086914, "learning_rate": 1.2344208938636184e-06, "loss": 0.8119, "step": 36870 }, { "epoch": 2.9276230923415825, "grad_norm": 1.083617091178894, "learning_rate": 1.2211902304781562e-06, "loss": 0.7628, "step": 36880 }, { "epoch": 2.928416916390482, "grad_norm": 1.001076102256775, "learning_rate": 1.207959567092694e-06, "loss": 0.7721, "step": 36890 }, { "epoch": 2.9292107404393817, "grad_norm": 0.8138130903244019, "learning_rate": 1.194728903707232e-06, "loss": 0.7045, "step": 36900 }, { "epoch": 2.9300045644882813, "grad_norm": 0.8781431317329407, "learning_rate": 1.1814982403217698e-06, "loss": 0.8074, "step": 36910 }, { "epoch": 2.930798388537181, "grad_norm": 0.8041318655014038, "learning_rate": 1.1682675769363077e-06, "loss": 0.7763, "step": 36920 }, { "epoch": 2.9315922125860805, "grad_norm": 0.9655971527099609, "learning_rate": 1.1550369135508456e-06, "loss": 0.7511, "step": 36930 }, { "epoch": 2.9323860366349797, "grad_norm": 1.0426075458526611, "learning_rate": 1.1418062501653834e-06, "loss": 0.7663, "step": 36940 }, { "epoch": 2.9331798606838793, "grad_norm": 1.0765414237976074, "learning_rate": 1.128575586779921e-06, "loss": 0.8248, "step": 36950 }, { "epoch": 2.933973684732779, "grad_norm": 0.8941267132759094, "learning_rate": 1.1153449233944592e-06, "loss": 0.8233, "step": 36960 }, { "epoch": 2.9347675087816785, "grad_norm": 1.1485576629638672, "learning_rate": 1.1021142600089968e-06, "loss": 0.7031, "step": 36970 }, { "epoch": 2.935561332830578, "grad_norm": 1.0902643203735352, "learning_rate": 1.0888835966235347e-06, "loss": 0.7641, "step": 36980 }, { "epoch": 2.9363551568794777, "grad_norm": 0.8667743802070618, "learning_rate": 1.0756529332380728e-06, "loss": 0.7518, "step": 36990 }, { "epoch": 2.9371489809283773, "grad_norm": 1.2408496141433716, "learning_rate": 1.0624222698526104e-06, "loss": 0.8086, "step": 37000 }, { "epoch": 2.937942804977277, "grad_norm": 0.9038211703300476, "learning_rate": 1.0491916064671483e-06, "loss": 0.7384, "step": 37010 }, { "epoch": 2.9387366290261765, "grad_norm": 1.0854371786117554, "learning_rate": 1.0359609430816862e-06, "loss": 0.73, "step": 37020 }, { "epoch": 2.9395304530750757, "grad_norm": 0.986851155757904, "learning_rate": 1.022730279696224e-06, "loss": 0.7916, "step": 37030 }, { "epoch": 2.9403242771239757, "grad_norm": 0.9306720495223999, "learning_rate": 1.009499616310762e-06, "loss": 0.7434, "step": 37040 }, { "epoch": 2.941118101172875, "grad_norm": 1.075705647468567, "learning_rate": 9.962689529252998e-07, "loss": 0.7953, "step": 37050 }, { "epoch": 2.9419119252217745, "grad_norm": 0.9013558626174927, "learning_rate": 9.830382895398376e-07, "loss": 0.7916, "step": 37060 }, { "epoch": 2.942705749270674, "grad_norm": 1.0537384748458862, "learning_rate": 9.698076261543755e-07, "loss": 0.768, "step": 37070 }, { "epoch": 2.9434995733195737, "grad_norm": 0.8394224643707275, "learning_rate": 9.565769627689132e-07, "loss": 0.8091, "step": 37080 }, { "epoch": 2.9442933973684733, "grad_norm": 0.9808106422424316, "learning_rate": 9.433462993834511e-07, "loss": 0.7533, "step": 37090 }, { "epoch": 2.945087221417373, "grad_norm": 0.8724024295806885, "learning_rate": 9.30115635997989e-07, "loss": 0.736, "step": 37100 }, { "epoch": 2.9458810454662725, "grad_norm": 1.0221234560012817, "learning_rate": 9.168849726125268e-07, "loss": 0.8032, "step": 37110 }, { "epoch": 2.946674869515172, "grad_norm": 0.8556252121925354, "learning_rate": 9.036543092270647e-07, "loss": 0.7819, "step": 37120 }, { "epoch": 2.9474686935640717, "grad_norm": 0.9892777800559998, "learning_rate": 8.904236458416025e-07, "loss": 0.7875, "step": 37130 }, { "epoch": 2.948262517612971, "grad_norm": 0.8527591824531555, "learning_rate": 8.771929824561404e-07, "loss": 0.7543, "step": 37140 }, { "epoch": 2.949056341661871, "grad_norm": 1.1402502059936523, "learning_rate": 8.639623190706783e-07, "loss": 0.8077, "step": 37150 }, { "epoch": 2.94985016571077, "grad_norm": 0.9675154089927673, "learning_rate": 8.507316556852161e-07, "loss": 0.7796, "step": 37160 }, { "epoch": 2.9506439897596697, "grad_norm": 0.8908880352973938, "learning_rate": 8.375009922997539e-07, "loss": 0.7795, "step": 37170 }, { "epoch": 2.9514378138085693, "grad_norm": 0.9992043972015381, "learning_rate": 8.242703289142918e-07, "loss": 0.801, "step": 37180 }, { "epoch": 2.952231637857469, "grad_norm": 0.9588622450828552, "learning_rate": 8.110396655288297e-07, "loss": 0.83, "step": 37190 }, { "epoch": 2.9530254619063685, "grad_norm": 0.9657321572303772, "learning_rate": 7.978090021433675e-07, "loss": 0.8267, "step": 37200 }, { "epoch": 2.953819285955268, "grad_norm": 0.8427868485450745, "learning_rate": 7.845783387579052e-07, "loss": 0.8107, "step": 37210 }, { "epoch": 2.9546131100041677, "grad_norm": 1.1387544870376587, "learning_rate": 7.713476753724432e-07, "loss": 0.7857, "step": 37220 }, { "epoch": 2.955406934053067, "grad_norm": 0.8458208441734314, "learning_rate": 7.581170119869811e-07, "loss": 0.815, "step": 37230 }, { "epoch": 2.956200758101967, "grad_norm": 1.2061816453933716, "learning_rate": 7.44886348601519e-07, "loss": 0.7483, "step": 37240 }, { "epoch": 2.956994582150866, "grad_norm": 0.93003910779953, "learning_rate": 7.316556852160567e-07, "loss": 0.7802, "step": 37250 }, { "epoch": 2.9577884061997657, "grad_norm": 0.8982664346694946, "learning_rate": 7.184250218305946e-07, "loss": 0.8041, "step": 37260 }, { "epoch": 2.9585822302486653, "grad_norm": 1.058793306350708, "learning_rate": 7.051943584451326e-07, "loss": 0.7814, "step": 37270 }, { "epoch": 2.959376054297565, "grad_norm": 0.9171466827392578, "learning_rate": 6.919636950596703e-07, "loss": 0.7798, "step": 37280 }, { "epoch": 2.9601698783464645, "grad_norm": 0.8719449043273926, "learning_rate": 6.787330316742082e-07, "loss": 0.8213, "step": 37290 }, { "epoch": 2.960963702395364, "grad_norm": 0.8667718172073364, "learning_rate": 6.655023682887459e-07, "loss": 0.7089, "step": 37300 }, { "epoch": 2.9617575264442637, "grad_norm": 0.8706735968589783, "learning_rate": 6.522717049032839e-07, "loss": 0.7605, "step": 37310 }, { "epoch": 2.9625513504931633, "grad_norm": 0.9747303128242493, "learning_rate": 6.390410415178218e-07, "loss": 0.7329, "step": 37320 }, { "epoch": 2.963345174542063, "grad_norm": 1.0735442638397217, "learning_rate": 6.258103781323596e-07, "loss": 0.784, "step": 37330 }, { "epoch": 2.964138998590962, "grad_norm": 0.8519766330718994, "learning_rate": 6.125797147468974e-07, "loss": 0.7985, "step": 37340 }, { "epoch": 2.964932822639862, "grad_norm": 1.0868384838104248, "learning_rate": 5.993490513614353e-07, "loss": 0.8058, "step": 37350 }, { "epoch": 2.9657266466887613, "grad_norm": 1.1832605600357056, "learning_rate": 5.861183879759732e-07, "loss": 0.7284, "step": 37360 }, { "epoch": 2.966520470737661, "grad_norm": 1.0526885986328125, "learning_rate": 5.72887724590511e-07, "loss": 0.8052, "step": 37370 }, { "epoch": 2.9673142947865605, "grad_norm": 1.137607455253601, "learning_rate": 5.596570612050488e-07, "loss": 0.8004, "step": 37380 }, { "epoch": 2.96810811883546, "grad_norm": 0.8867448568344116, "learning_rate": 5.464263978195867e-07, "loss": 0.7579, "step": 37390 }, { "epoch": 2.9689019428843597, "grad_norm": 0.992607057094574, "learning_rate": 5.331957344341246e-07, "loss": 0.7234, "step": 37400 }, { "epoch": 2.9696957669332593, "grad_norm": 0.9389944076538086, "learning_rate": 5.199650710486624e-07, "loss": 0.7204, "step": 37410 }, { "epoch": 2.970489590982159, "grad_norm": 1.1689140796661377, "learning_rate": 5.067344076632003e-07, "loss": 0.7956, "step": 37420 }, { "epoch": 2.9712834150310585, "grad_norm": 1.0063467025756836, "learning_rate": 4.93503744277738e-07, "loss": 0.8114, "step": 37430 }, { "epoch": 2.972077239079958, "grad_norm": 0.9847369194030762, "learning_rate": 4.815961472308222e-07, "loss": 0.7854, "step": 37440 }, { "epoch": 2.9728710631288573, "grad_norm": 0.8891132473945618, "learning_rate": 4.6836548384536e-07, "loss": 0.8131, "step": 37450 }, { "epoch": 2.973664887177757, "grad_norm": 1.1381394863128662, "learning_rate": 4.551348204598979e-07, "loss": 0.7877, "step": 37460 }, { "epoch": 2.9744587112266565, "grad_norm": 1.0094081163406372, "learning_rate": 4.4190415707443577e-07, "loss": 0.7812, "step": 37470 }, { "epoch": 2.975252535275556, "grad_norm": 1.1680759191513062, "learning_rate": 4.286734936889736e-07, "loss": 0.7642, "step": 37480 }, { "epoch": 2.9760463593244557, "grad_norm": 1.0051449537277222, "learning_rate": 4.1544283030351145e-07, "loss": 0.7755, "step": 37490 }, { "epoch": 2.9768401833733553, "grad_norm": 1.1129390001296997, "learning_rate": 4.0221216691804927e-07, "loss": 0.7441, "step": 37500 }, { "epoch": 2.977634007422255, "grad_norm": 0.8162450194358826, "learning_rate": 3.8898150353258714e-07, "loss": 0.7353, "step": 37510 }, { "epoch": 2.9784278314711545, "grad_norm": 0.8343786001205444, "learning_rate": 3.7575084014712495e-07, "loss": 0.8653, "step": 37520 }, { "epoch": 2.979221655520054, "grad_norm": 0.9422026872634888, "learning_rate": 3.625201767616629e-07, "loss": 0.7788, "step": 37530 }, { "epoch": 2.9800154795689533, "grad_norm": 0.7513265013694763, "learning_rate": 3.492895133762007e-07, "loss": 0.7978, "step": 37540 }, { "epoch": 2.9808093036178533, "grad_norm": 1.1932988166809082, "learning_rate": 3.3605884999073856e-07, "loss": 0.7766, "step": 37550 }, { "epoch": 2.9816031276667525, "grad_norm": 0.8924500942230225, "learning_rate": 3.228281866052764e-07, "loss": 0.816, "step": 37560 }, { "epoch": 2.982396951715652, "grad_norm": 1.2411748170852661, "learning_rate": 3.095975232198143e-07, "loss": 0.7699, "step": 37570 }, { "epoch": 2.9831907757645517, "grad_norm": 0.9249628186225891, "learning_rate": 2.963668598343521e-07, "loss": 0.8282, "step": 37580 }, { "epoch": 2.9839845998134513, "grad_norm": 0.9491875171661377, "learning_rate": 2.8313619644889e-07, "loss": 0.848, "step": 37590 }, { "epoch": 2.984778423862351, "grad_norm": 1.0644991397857666, "learning_rate": 2.699055330634278e-07, "loss": 0.7872, "step": 37600 }, { "epoch": 2.9855722479112505, "grad_norm": 1.1010690927505493, "learning_rate": 2.5667486967796566e-07, "loss": 0.7806, "step": 37610 }, { "epoch": 2.98636607196015, "grad_norm": 0.8743943572044373, "learning_rate": 2.4344420629250353e-07, "loss": 0.7946, "step": 37620 }, { "epoch": 2.9871598960090497, "grad_norm": 0.909243106842041, "learning_rate": 2.3021354290704137e-07, "loss": 0.8322, "step": 37630 }, { "epoch": 2.9879537200579493, "grad_norm": 0.8433053493499756, "learning_rate": 2.1698287952157921e-07, "loss": 0.8142, "step": 37640 }, { "epoch": 2.9887475441068485, "grad_norm": 0.8749028444290161, "learning_rate": 2.0375221613611706e-07, "loss": 0.7644, "step": 37650 }, { "epoch": 2.989541368155748, "grad_norm": 1.0767362117767334, "learning_rate": 1.9052155275065492e-07, "loss": 0.7983, "step": 37660 }, { "epoch": 2.9903351922046477, "grad_norm": 1.0285117626190186, "learning_rate": 1.772908893651928e-07, "loss": 0.7426, "step": 37670 }, { "epoch": 2.9911290162535473, "grad_norm": 0.986137330532074, "learning_rate": 1.6406022597973063e-07, "loss": 0.7596, "step": 37680 }, { "epoch": 2.991922840302447, "grad_norm": 0.9621859192848206, "learning_rate": 1.508295625942685e-07, "loss": 0.7735, "step": 37690 }, { "epoch": 2.9927166643513465, "grad_norm": 0.9703050851821899, "learning_rate": 1.3759889920880634e-07, "loss": 0.7648, "step": 37700 }, { "epoch": 2.993510488400246, "grad_norm": 0.7976683378219604, "learning_rate": 1.2436823582334419e-07, "loss": 0.7838, "step": 37710 }, { "epoch": 2.9943043124491457, "grad_norm": 0.8955792188644409, "learning_rate": 1.1113757243788204e-07, "loss": 0.7638, "step": 37720 }, { "epoch": 2.9950981364980453, "grad_norm": 0.8139486312866211, "learning_rate": 9.79069090524199e-08, "loss": 0.7634, "step": 37730 }, { "epoch": 2.9958919605469445, "grad_norm": 1.1136054992675781, "learning_rate": 8.467624566695774e-08, "loss": 0.7807, "step": 37740 }, { "epoch": 2.9966857845958446, "grad_norm": 0.8526073098182678, "learning_rate": 7.144558228149559e-08, "loss": 0.742, "step": 37750 }, { "epoch": 2.9974796086447437, "grad_norm": 0.9740519523620605, "learning_rate": 5.821491889603345e-08, "loss": 0.7485, "step": 37760 }, { "epoch": 2.9982734326936433, "grad_norm": 0.8623798489570618, "learning_rate": 4.4984255510571303e-08, "loss": 0.8157, "step": 37770 }, { "epoch": 2.999067256742543, "grad_norm": 1.0553157329559326, "learning_rate": 3.175359212510916e-08, "loss": 0.7943, "step": 37780 }, { "epoch": 2.9998610807914425, "grad_norm": 0.9887831807136536, "learning_rate": 1.8522928739647007e-08, "loss": 0.7406, "step": 37790 } ], "logging_steps": 10, "max_steps": 37791, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.60495148153779e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }