{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9995218742529284, "eval_steps": 500, "global_step": 8364, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00035859431030360985, "grad_norm": 5.813747406005859, "learning_rate": 1.1947431302270011e-08, "loss": 0.7557, "step": 1 }, { "epoch": 0.0007171886206072197, "grad_norm": 6.137444019317627, "learning_rate": 2.3894862604540023e-08, "loss": 0.7931, "step": 2 }, { "epoch": 0.0010757829309108295, "grad_norm": 5.942802429199219, "learning_rate": 3.5842293906810036e-08, "loss": 0.789, "step": 3 }, { "epoch": 0.0014343772412144394, "grad_norm": 6.085781574249268, "learning_rate": 4.7789725209080046e-08, "loss": 0.7998, "step": 4 }, { "epoch": 0.0017929715515180493, "grad_norm": 5.89831018447876, "learning_rate": 5.973715651135007e-08, "loss": 0.7609, "step": 5 }, { "epoch": 0.002151565861821659, "grad_norm": 6.242189407348633, "learning_rate": 7.168458781362007e-08, "loss": 0.8069, "step": 6 }, { "epoch": 0.002510160172125269, "grad_norm": 5.904728889465332, "learning_rate": 8.363201911589009e-08, "loss": 0.7615, "step": 7 }, { "epoch": 0.002868754482428879, "grad_norm": 5.817963123321533, "learning_rate": 9.557945041816009e-08, "loss": 0.7561, "step": 8 }, { "epoch": 0.0032273487927324887, "grad_norm": 5.802619457244873, "learning_rate": 1.0752688172043012e-07, "loss": 0.7567, "step": 9 }, { "epoch": 0.0035859431030360986, "grad_norm": 6.080618381500244, "learning_rate": 1.1947431302270014e-07, "loss": 0.7826, "step": 10 }, { "epoch": 0.0039445374133397085, "grad_norm": 6.149567604064941, "learning_rate": 1.3142174432497014e-07, "loss": 0.7818, "step": 11 }, { "epoch": 0.004303131723643318, "grad_norm": 6.3897504806518555, "learning_rate": 1.4336917562724014e-07, "loss": 0.8132, "step": 12 }, { "epoch": 0.004661726033946928, "grad_norm": 6.261907577514648, "learning_rate": 1.5531660692951017e-07, "loss": 0.8055, "step": 13 }, { "epoch": 0.005020320344250538, "grad_norm": 6.215187072753906, "learning_rate": 1.6726403823178018e-07, "loss": 0.7999, "step": 14 }, { "epoch": 0.005378914654554148, "grad_norm": 5.934791088104248, "learning_rate": 1.7921146953405018e-07, "loss": 0.775, "step": 15 }, { "epoch": 0.005737508964857758, "grad_norm": 6.008138656616211, "learning_rate": 1.9115890083632018e-07, "loss": 0.7961, "step": 16 }, { "epoch": 0.006096103275161367, "grad_norm": 5.816669464111328, "learning_rate": 2.031063321385902e-07, "loss": 0.7666, "step": 17 }, { "epoch": 0.006454697585464977, "grad_norm": 5.934407711029053, "learning_rate": 2.1505376344086024e-07, "loss": 0.7906, "step": 18 }, { "epoch": 0.006813291895768587, "grad_norm": 5.895085334777832, "learning_rate": 2.2700119474313027e-07, "loss": 0.7783, "step": 19 }, { "epoch": 0.007171886206072197, "grad_norm": 5.743860244750977, "learning_rate": 2.389486260454003e-07, "loss": 0.7762, "step": 20 }, { "epoch": 0.007530480516375807, "grad_norm": 5.963977336883545, "learning_rate": 2.508960573476703e-07, "loss": 0.8013, "step": 21 }, { "epoch": 0.007889074826679417, "grad_norm": 5.890718460083008, "learning_rate": 2.628434886499403e-07, "loss": 0.794, "step": 22 }, { "epoch": 0.008247669136983026, "grad_norm": 5.616881370544434, "learning_rate": 2.747909199522103e-07, "loss": 0.7667, "step": 23 }, { "epoch": 0.008606263447286636, "grad_norm": 5.491384029388428, "learning_rate": 2.867383512544803e-07, "loss": 0.7715, "step": 24 }, { "epoch": 0.008964857757590245, "grad_norm": 5.702642917633057, "learning_rate": 2.9868578255675034e-07, "loss": 0.7805, "step": 25 }, { "epoch": 0.009323452067893857, "grad_norm": 5.592012882232666, "learning_rate": 3.1063321385902035e-07, "loss": 0.7717, "step": 26 }, { "epoch": 0.009682046378197466, "grad_norm": 5.1552042961120605, "learning_rate": 3.2258064516129035e-07, "loss": 0.7375, "step": 27 }, { "epoch": 0.010040640688501076, "grad_norm": 4.930234432220459, "learning_rate": 3.3452807646356035e-07, "loss": 0.7674, "step": 28 }, { "epoch": 0.010399234998804685, "grad_norm": 4.5574870109558105, "learning_rate": 3.4647550776583036e-07, "loss": 0.7229, "step": 29 }, { "epoch": 0.010757829309108296, "grad_norm": 4.551031589508057, "learning_rate": 3.5842293906810036e-07, "loss": 0.7211, "step": 30 }, { "epoch": 0.011116423619411906, "grad_norm": 4.596621513366699, "learning_rate": 3.7037037037037036e-07, "loss": 0.7241, "step": 31 }, { "epoch": 0.011475017929715515, "grad_norm": 4.638003826141357, "learning_rate": 3.8231780167264037e-07, "loss": 0.7333, "step": 32 }, { "epoch": 0.011833612240019125, "grad_norm": 4.641276836395264, "learning_rate": 3.942652329749104e-07, "loss": 0.7411, "step": 33 }, { "epoch": 0.012192206550322734, "grad_norm": 4.565354824066162, "learning_rate": 4.062126642771804e-07, "loss": 0.7248, "step": 34 }, { "epoch": 0.012550800860626345, "grad_norm": 4.349046230316162, "learning_rate": 4.1816009557945043e-07, "loss": 0.7188, "step": 35 }, { "epoch": 0.012909395170929955, "grad_norm": 4.504807472229004, "learning_rate": 4.301075268817205e-07, "loss": 0.742, "step": 36 }, { "epoch": 0.013267989481233564, "grad_norm": 3.6936798095703125, "learning_rate": 4.420549581839905e-07, "loss": 0.6815, "step": 37 }, { "epoch": 0.013626583791537174, "grad_norm": 2.8796613216400146, "learning_rate": 4.5400238948626054e-07, "loss": 0.6706, "step": 38 }, { "epoch": 0.013985178101840785, "grad_norm": 2.587453603744507, "learning_rate": 4.6594982078853055e-07, "loss": 0.6947, "step": 39 }, { "epoch": 0.014343772412144394, "grad_norm": 2.5277955532073975, "learning_rate": 4.778972520908006e-07, "loss": 0.6765, "step": 40 }, { "epoch": 0.014702366722448004, "grad_norm": 2.387789487838745, "learning_rate": 4.898446833930706e-07, "loss": 0.6691, "step": 41 }, { "epoch": 0.015060961032751613, "grad_norm": 2.4974539279937744, "learning_rate": 5.017921146953406e-07, "loss": 0.6838, "step": 42 }, { "epoch": 0.015419555343055223, "grad_norm": 2.5676281452178955, "learning_rate": 5.137395459976106e-07, "loss": 0.6885, "step": 43 }, { "epoch": 0.015778149653358834, "grad_norm": 2.463214635848999, "learning_rate": 5.256869772998806e-07, "loss": 0.6859, "step": 44 }, { "epoch": 0.016136743963662444, "grad_norm": 2.2105417251586914, "learning_rate": 5.376344086021506e-07, "loss": 0.6541, "step": 45 }, { "epoch": 0.016495338273966053, "grad_norm": 2.306584596633911, "learning_rate": 5.495818399044206e-07, "loss": 0.6492, "step": 46 }, { "epoch": 0.016853932584269662, "grad_norm": 2.14253306388855, "learning_rate": 5.615292712066906e-07, "loss": 0.6782, "step": 47 }, { "epoch": 0.017212526894573272, "grad_norm": 2.150322437286377, "learning_rate": 5.734767025089606e-07, "loss": 0.6542, "step": 48 }, { "epoch": 0.01757112120487688, "grad_norm": 2.016105890274048, "learning_rate": 5.854241338112306e-07, "loss": 0.6685, "step": 49 }, { "epoch": 0.01792971551518049, "grad_norm": 1.6633003950119019, "learning_rate": 5.973715651135007e-07, "loss": 0.6441, "step": 50 }, { "epoch": 0.018288309825484104, "grad_norm": 1.5507277250289917, "learning_rate": 6.093189964157707e-07, "loss": 0.6588, "step": 51 }, { "epoch": 0.018646904135787713, "grad_norm": 1.5771547555923462, "learning_rate": 6.212664277180407e-07, "loss": 0.6393, "step": 52 }, { "epoch": 0.019005498446091323, "grad_norm": 1.7296382188796997, "learning_rate": 6.332138590203107e-07, "loss": 0.6161, "step": 53 }, { "epoch": 0.019364092756394932, "grad_norm": 1.7727106809616089, "learning_rate": 6.451612903225807e-07, "loss": 0.6238, "step": 54 }, { "epoch": 0.01972268706669854, "grad_norm": 1.762372612953186, "learning_rate": 6.571087216248508e-07, "loss": 0.6079, "step": 55 }, { "epoch": 0.02008128137700215, "grad_norm": 1.777508020401001, "learning_rate": 6.690561529271207e-07, "loss": 0.6121, "step": 56 }, { "epoch": 0.02043987568730576, "grad_norm": 1.8281099796295166, "learning_rate": 6.810035842293908e-07, "loss": 0.6398, "step": 57 }, { "epoch": 0.02079846999760937, "grad_norm": 1.6171157360076904, "learning_rate": 6.929510155316607e-07, "loss": 0.5741, "step": 58 }, { "epoch": 0.02115706430791298, "grad_norm": 1.65292489528656, "learning_rate": 7.048984468339308e-07, "loss": 0.5953, "step": 59 }, { "epoch": 0.021515658618216593, "grad_norm": 1.3973344564437866, "learning_rate": 7.168458781362007e-07, "loss": 0.5956, "step": 60 }, { "epoch": 0.021874252928520202, "grad_norm": 1.391780138015747, "learning_rate": 7.287933094384708e-07, "loss": 0.6082, "step": 61 }, { "epoch": 0.02223284723882381, "grad_norm": 1.2572957277297974, "learning_rate": 7.407407407407407e-07, "loss": 0.5989, "step": 62 }, { "epoch": 0.02259144154912742, "grad_norm": 1.1930052042007446, "learning_rate": 7.526881720430108e-07, "loss": 0.6022, "step": 63 }, { "epoch": 0.02295003585943103, "grad_norm": 1.1392743587493896, "learning_rate": 7.646356033452807e-07, "loss": 0.5788, "step": 64 }, { "epoch": 0.02330863016973464, "grad_norm": 1.0564833879470825, "learning_rate": 7.765830346475508e-07, "loss": 0.576, "step": 65 }, { "epoch": 0.02366722448003825, "grad_norm": 1.0425097942352295, "learning_rate": 7.885304659498208e-07, "loss": 0.5893, "step": 66 }, { "epoch": 0.02402581879034186, "grad_norm": 0.9327940940856934, "learning_rate": 8.004778972520908e-07, "loss": 0.5655, "step": 67 }, { "epoch": 0.02438441310064547, "grad_norm": 0.9780715107917786, "learning_rate": 8.124253285543609e-07, "loss": 0.5541, "step": 68 }, { "epoch": 0.02474300741094908, "grad_norm": 0.8504688739776611, "learning_rate": 8.243727598566309e-07, "loss": 0.5715, "step": 69 }, { "epoch": 0.02510160172125269, "grad_norm": 0.8125919699668884, "learning_rate": 8.363201911589009e-07, "loss": 0.5623, "step": 70 }, { "epoch": 0.0254601960315563, "grad_norm": 0.8115418553352356, "learning_rate": 8.48267622461171e-07, "loss": 0.5665, "step": 71 }, { "epoch": 0.02581879034185991, "grad_norm": 0.7817463278770447, "learning_rate": 8.60215053763441e-07, "loss": 0.5526, "step": 72 }, { "epoch": 0.02617738465216352, "grad_norm": 0.6981951594352722, "learning_rate": 8.72162485065711e-07, "loss": 0.5458, "step": 73 }, { "epoch": 0.02653597896246713, "grad_norm": 0.6925907135009766, "learning_rate": 8.84109916367981e-07, "loss": 0.513, "step": 74 }, { "epoch": 0.026894573272770738, "grad_norm": 0.6867126822471619, "learning_rate": 8.96057347670251e-07, "loss": 0.5609, "step": 75 }, { "epoch": 0.027253167583074348, "grad_norm": 0.7216516137123108, "learning_rate": 9.080047789725211e-07, "loss": 0.565, "step": 76 }, { "epoch": 0.027611761893377957, "grad_norm": 0.6820318698883057, "learning_rate": 9.19952210274791e-07, "loss": 0.5171, "step": 77 }, { "epoch": 0.02797035620368157, "grad_norm": 0.668279230594635, "learning_rate": 9.318996415770611e-07, "loss": 0.5072, "step": 78 }, { "epoch": 0.02832895051398518, "grad_norm": 0.6304678320884705, "learning_rate": 9.43847072879331e-07, "loss": 0.5302, "step": 79 }, { "epoch": 0.02868754482428879, "grad_norm": 0.5884588956832886, "learning_rate": 9.55794504181601e-07, "loss": 0.5209, "step": 80 }, { "epoch": 0.0290461391345924, "grad_norm": 0.6816221475601196, "learning_rate": 9.67741935483871e-07, "loss": 0.5429, "step": 81 }, { "epoch": 0.029404733444896008, "grad_norm": 0.5852920413017273, "learning_rate": 9.796893667861411e-07, "loss": 0.5362, "step": 82 }, { "epoch": 0.029763327755199617, "grad_norm": 0.5465925335884094, "learning_rate": 9.91636798088411e-07, "loss": 0.5097, "step": 83 }, { "epoch": 0.030121922065503227, "grad_norm": 0.6239656805992126, "learning_rate": 1.0035842293906811e-06, "loss": 0.5376, "step": 84 }, { "epoch": 0.030480516375806836, "grad_norm": 0.5550203323364258, "learning_rate": 1.015531660692951e-06, "loss": 0.5139, "step": 85 }, { "epoch": 0.030839110686110446, "grad_norm": 0.5149758458137512, "learning_rate": 1.0274790919952211e-06, "loss": 0.5096, "step": 86 }, { "epoch": 0.03119770499641406, "grad_norm": 0.5053648352622986, "learning_rate": 1.039426523297491e-06, "loss": 0.515, "step": 87 }, { "epoch": 0.03155629930671767, "grad_norm": 0.5891280174255371, "learning_rate": 1.0513739545997611e-06, "loss": 0.5244, "step": 88 }, { "epoch": 0.031914893617021274, "grad_norm": 0.4834136664867401, "learning_rate": 1.063321385902031e-06, "loss": 0.4974, "step": 89 }, { "epoch": 0.03227348792732489, "grad_norm": 0.4783404767513275, "learning_rate": 1.0752688172043011e-06, "loss": 0.4898, "step": 90 }, { "epoch": 0.03263208223762849, "grad_norm": 0.48157259821891785, "learning_rate": 1.087216248506571e-06, "loss": 0.5233, "step": 91 }, { "epoch": 0.032990676547932106, "grad_norm": 0.511447548866272, "learning_rate": 1.0991636798088411e-06, "loss": 0.492, "step": 92 }, { "epoch": 0.03334927085823572, "grad_norm": 0.45387741923332214, "learning_rate": 1.111111111111111e-06, "loss": 0.4956, "step": 93 }, { "epoch": 0.033707865168539325, "grad_norm": 0.4703842103481293, "learning_rate": 1.1230585424133811e-06, "loss": 0.5237, "step": 94 }, { "epoch": 0.03406645947884294, "grad_norm": 0.4625861942768097, "learning_rate": 1.1350059737156513e-06, "loss": 0.5176, "step": 95 }, { "epoch": 0.034425053789146544, "grad_norm": 0.4769379794597626, "learning_rate": 1.1469534050179212e-06, "loss": 0.487, "step": 96 }, { "epoch": 0.03478364809945016, "grad_norm": 0.5081961154937744, "learning_rate": 1.1589008363201913e-06, "loss": 0.4812, "step": 97 }, { "epoch": 0.03514224240975376, "grad_norm": 0.4488717317581177, "learning_rate": 1.1708482676224612e-06, "loss": 0.4938, "step": 98 }, { "epoch": 0.035500836720057376, "grad_norm": 0.4627521336078644, "learning_rate": 1.1827956989247313e-06, "loss": 0.4805, "step": 99 }, { "epoch": 0.03585943103036098, "grad_norm": 0.46215003728866577, "learning_rate": 1.1947431302270014e-06, "loss": 0.5346, "step": 100 }, { "epoch": 0.036218025340664595, "grad_norm": 0.46619918942451477, "learning_rate": 1.2066905615292713e-06, "loss": 0.4785, "step": 101 }, { "epoch": 0.03657661965096821, "grad_norm": 0.49877965450286865, "learning_rate": 1.2186379928315414e-06, "loss": 0.5416, "step": 102 }, { "epoch": 0.036935213961271814, "grad_norm": 0.46613070368766785, "learning_rate": 1.2305854241338113e-06, "loss": 0.5112, "step": 103 }, { "epoch": 0.03729380827157543, "grad_norm": 0.48237210512161255, "learning_rate": 1.2425328554360814e-06, "loss": 0.4834, "step": 104 }, { "epoch": 0.03765240258187903, "grad_norm": 0.4832072854042053, "learning_rate": 1.2544802867383513e-06, "loss": 0.4958, "step": 105 }, { "epoch": 0.038010996892182645, "grad_norm": 0.4644368290901184, "learning_rate": 1.2664277180406214e-06, "loss": 0.4713, "step": 106 }, { "epoch": 0.03836959120248625, "grad_norm": 0.4670327603816986, "learning_rate": 1.2783751493428915e-06, "loss": 0.4929, "step": 107 }, { "epoch": 0.038728185512789864, "grad_norm": 0.4978494346141815, "learning_rate": 1.2903225806451614e-06, "loss": 0.4989, "step": 108 }, { "epoch": 0.03908677982309347, "grad_norm": 0.48968860507011414, "learning_rate": 1.3022700119474313e-06, "loss": 0.5129, "step": 109 }, { "epoch": 0.03944537413339708, "grad_norm": 0.4298454821109772, "learning_rate": 1.3142174432497016e-06, "loss": 0.478, "step": 110 }, { "epoch": 0.039803968443700696, "grad_norm": 0.4162214398384094, "learning_rate": 1.3261648745519715e-06, "loss": 0.4977, "step": 111 }, { "epoch": 0.0401625627540043, "grad_norm": 0.45692169666290283, "learning_rate": 1.3381123058542414e-06, "loss": 0.5022, "step": 112 }, { "epoch": 0.040521157064307915, "grad_norm": 0.46499913930892944, "learning_rate": 1.3500597371565113e-06, "loss": 0.5024, "step": 113 }, { "epoch": 0.04087975137461152, "grad_norm": 0.4545077681541443, "learning_rate": 1.3620071684587816e-06, "loss": 0.4753, "step": 114 }, { "epoch": 0.041238345684915134, "grad_norm": 0.41209807991981506, "learning_rate": 1.3739545997610515e-06, "loss": 0.4792, "step": 115 }, { "epoch": 0.04159693999521874, "grad_norm": 0.4404059648513794, "learning_rate": 1.3859020310633214e-06, "loss": 0.4732, "step": 116 }, { "epoch": 0.04195553430552235, "grad_norm": 0.45538318157196045, "learning_rate": 1.3978494623655913e-06, "loss": 0.4832, "step": 117 }, { "epoch": 0.04231412861582596, "grad_norm": 0.454279363155365, "learning_rate": 1.4097968936678616e-06, "loss": 0.4943, "step": 118 }, { "epoch": 0.04267272292612957, "grad_norm": 0.45746931433677673, "learning_rate": 1.4217443249701315e-06, "loss": 0.4649, "step": 119 }, { "epoch": 0.043031317236433185, "grad_norm": 0.47124800086021423, "learning_rate": 1.4336917562724014e-06, "loss": 0.4871, "step": 120 }, { "epoch": 0.04338991154673679, "grad_norm": 0.46561646461486816, "learning_rate": 1.4456391875746718e-06, "loss": 0.4437, "step": 121 }, { "epoch": 0.043748505857040404, "grad_norm": 0.46095746755599976, "learning_rate": 1.4575866188769417e-06, "loss": 0.457, "step": 122 }, { "epoch": 0.04410710016734401, "grad_norm": 0.4685754179954529, "learning_rate": 1.4695340501792116e-06, "loss": 0.4837, "step": 123 }, { "epoch": 0.04446569447764762, "grad_norm": 0.4720911979675293, "learning_rate": 1.4814814814814815e-06, "loss": 0.4867, "step": 124 }, { "epoch": 0.04482428878795123, "grad_norm": 0.4405423700809479, "learning_rate": 1.4934289127837518e-06, "loss": 0.4822, "step": 125 }, { "epoch": 0.04518288309825484, "grad_norm": 0.4468826949596405, "learning_rate": 1.5053763440860217e-06, "loss": 0.4626, "step": 126 }, { "epoch": 0.04554147740855845, "grad_norm": 0.42601898312568665, "learning_rate": 1.5173237753882916e-06, "loss": 0.4679, "step": 127 }, { "epoch": 0.04590007171886206, "grad_norm": 0.47886261343955994, "learning_rate": 1.5292712066905615e-06, "loss": 0.4817, "step": 128 }, { "epoch": 0.046258666029165674, "grad_norm": 0.4487503170967102, "learning_rate": 1.5412186379928318e-06, "loss": 0.4884, "step": 129 }, { "epoch": 0.04661726033946928, "grad_norm": 0.4665096402168274, "learning_rate": 1.5531660692951017e-06, "loss": 0.4852, "step": 130 }, { "epoch": 0.04697585464977289, "grad_norm": 0.4710441827774048, "learning_rate": 1.5651135005973716e-06, "loss": 0.4818, "step": 131 }, { "epoch": 0.0473344489600765, "grad_norm": 0.43055543303489685, "learning_rate": 1.5770609318996417e-06, "loss": 0.4513, "step": 132 }, { "epoch": 0.04769304327038011, "grad_norm": 0.4270454943180084, "learning_rate": 1.5890083632019118e-06, "loss": 0.4651, "step": 133 }, { "epoch": 0.04805163758068372, "grad_norm": 0.4024721086025238, "learning_rate": 1.6009557945041817e-06, "loss": 0.4821, "step": 134 }, { "epoch": 0.04841023189098733, "grad_norm": 0.41903844475746155, "learning_rate": 1.6129032258064516e-06, "loss": 0.4607, "step": 135 }, { "epoch": 0.04876882620129094, "grad_norm": 0.41146767139434814, "learning_rate": 1.6248506571087217e-06, "loss": 0.442, "step": 136 }, { "epoch": 0.04912742051159455, "grad_norm": 0.4741232991218567, "learning_rate": 1.6367980884109918e-06, "loss": 0.4613, "step": 137 }, { "epoch": 0.04948601482189816, "grad_norm": 0.42933928966522217, "learning_rate": 1.6487455197132617e-06, "loss": 0.4431, "step": 138 }, { "epoch": 0.04984460913220177, "grad_norm": 0.43163248896598816, "learning_rate": 1.6606929510155318e-06, "loss": 0.4542, "step": 139 }, { "epoch": 0.05020320344250538, "grad_norm": 0.4171218276023865, "learning_rate": 1.6726403823178017e-06, "loss": 0.4533, "step": 140 }, { "epoch": 0.05056179775280899, "grad_norm": 0.477053165435791, "learning_rate": 1.6845878136200718e-06, "loss": 0.4653, "step": 141 }, { "epoch": 0.0509203920631126, "grad_norm": 0.4596562683582306, "learning_rate": 1.696535244922342e-06, "loss": 0.4632, "step": 142 }, { "epoch": 0.051278986373416206, "grad_norm": 0.4249671399593353, "learning_rate": 1.7084826762246118e-06, "loss": 0.4556, "step": 143 }, { "epoch": 0.05163758068371982, "grad_norm": 0.49102407693862915, "learning_rate": 1.720430107526882e-06, "loss": 0.4433, "step": 144 }, { "epoch": 0.051996174994023425, "grad_norm": 0.5081464052200317, "learning_rate": 1.7323775388291518e-06, "loss": 0.438, "step": 145 }, { "epoch": 0.05235476930432704, "grad_norm": 0.4545251727104187, "learning_rate": 1.744324970131422e-06, "loss": 0.468, "step": 146 }, { "epoch": 0.05271336361463065, "grad_norm": 0.4545481503009796, "learning_rate": 1.7562724014336918e-06, "loss": 0.4542, "step": 147 }, { "epoch": 0.05307195792493426, "grad_norm": 0.46549251675605774, "learning_rate": 1.768219832735962e-06, "loss": 0.495, "step": 148 }, { "epoch": 0.05343055223523787, "grad_norm": 0.44655436277389526, "learning_rate": 1.780167264038232e-06, "loss": 0.4734, "step": 149 }, { "epoch": 0.053789146545541476, "grad_norm": 0.4611184895038605, "learning_rate": 1.792114695340502e-06, "loss": 0.4653, "step": 150 }, { "epoch": 0.05414774085584509, "grad_norm": 0.4338730275630951, "learning_rate": 1.8040621266427719e-06, "loss": 0.444, "step": 151 }, { "epoch": 0.054506335166148695, "grad_norm": 0.46660712361335754, "learning_rate": 1.8160095579450422e-06, "loss": 0.4827, "step": 152 }, { "epoch": 0.05486492947645231, "grad_norm": 0.5534628629684448, "learning_rate": 1.827956989247312e-06, "loss": 0.4853, "step": 153 }, { "epoch": 0.055223523786755914, "grad_norm": 0.4604191780090332, "learning_rate": 1.839904420549582e-06, "loss": 0.463, "step": 154 }, { "epoch": 0.05558211809705953, "grad_norm": 0.4226815104484558, "learning_rate": 1.8518518518518519e-06, "loss": 0.4558, "step": 155 }, { "epoch": 0.05594071240736314, "grad_norm": 0.4829096496105194, "learning_rate": 1.8637992831541222e-06, "loss": 0.441, "step": 156 }, { "epoch": 0.056299306717666746, "grad_norm": 0.42170384526252747, "learning_rate": 1.875746714456392e-06, "loss": 0.4578, "step": 157 }, { "epoch": 0.05665790102797036, "grad_norm": 0.44567519426345825, "learning_rate": 1.887694145758662e-06, "loss": 0.4746, "step": 158 }, { "epoch": 0.057016495338273965, "grad_norm": 0.41693976521492004, "learning_rate": 1.8996415770609319e-06, "loss": 0.4653, "step": 159 }, { "epoch": 0.05737508964857758, "grad_norm": 0.4378735423088074, "learning_rate": 1.911589008363202e-06, "loss": 0.4632, "step": 160 }, { "epoch": 0.057733683958881184, "grad_norm": 0.44417476654052734, "learning_rate": 1.923536439665472e-06, "loss": 0.4663, "step": 161 }, { "epoch": 0.0580922782691848, "grad_norm": 0.5166869163513184, "learning_rate": 1.935483870967742e-06, "loss": 0.4463, "step": 162 }, { "epoch": 0.0584508725794884, "grad_norm": 0.43879398703575134, "learning_rate": 1.947431302270012e-06, "loss": 0.4536, "step": 163 }, { "epoch": 0.058809466889792016, "grad_norm": 0.4827614724636078, "learning_rate": 1.9593787335722822e-06, "loss": 0.4562, "step": 164 }, { "epoch": 0.05916806120009563, "grad_norm": 0.39898476004600525, "learning_rate": 1.9713261648745523e-06, "loss": 0.4745, "step": 165 }, { "epoch": 0.059526655510399235, "grad_norm": 0.4788789749145508, "learning_rate": 1.983273596176822e-06, "loss": 0.481, "step": 166 }, { "epoch": 0.05988524982070285, "grad_norm": 0.4558192193508148, "learning_rate": 1.995221027479092e-06, "loss": 0.435, "step": 167 }, { "epoch": 0.06024384413100645, "grad_norm": 0.42313262820243835, "learning_rate": 2.0071684587813622e-06, "loss": 0.4348, "step": 168 }, { "epoch": 0.060602438441310066, "grad_norm": 0.5074014067649841, "learning_rate": 2.0191158900836323e-06, "loss": 0.4554, "step": 169 }, { "epoch": 0.06096103275161367, "grad_norm": 0.39877399802207947, "learning_rate": 2.031063321385902e-06, "loss": 0.4655, "step": 170 }, { "epoch": 0.061319627061917285, "grad_norm": 0.43756380677223206, "learning_rate": 2.043010752688172e-06, "loss": 0.4702, "step": 171 }, { "epoch": 0.06167822137222089, "grad_norm": 0.41741687059402466, "learning_rate": 2.0549581839904422e-06, "loss": 0.4735, "step": 172 }, { "epoch": 0.062036815682524504, "grad_norm": 0.45764362812042236, "learning_rate": 2.0669056152927124e-06, "loss": 0.4557, "step": 173 }, { "epoch": 0.06239540999282812, "grad_norm": 0.38925036787986755, "learning_rate": 2.078853046594982e-06, "loss": 0.462, "step": 174 }, { "epoch": 0.06275400430313173, "grad_norm": 0.42780226469039917, "learning_rate": 2.0908004778972526e-06, "loss": 0.4443, "step": 175 }, { "epoch": 0.06311259861343534, "grad_norm": 0.39879998564720154, "learning_rate": 2.1027479091995223e-06, "loss": 0.4695, "step": 176 }, { "epoch": 0.06347119292373894, "grad_norm": 0.4270436465740204, "learning_rate": 2.1146953405017924e-06, "loss": 0.4619, "step": 177 }, { "epoch": 0.06382978723404255, "grad_norm": 0.4090409278869629, "learning_rate": 2.126642771804062e-06, "loss": 0.4736, "step": 178 }, { "epoch": 0.06418838154434617, "grad_norm": 0.46043404936790466, "learning_rate": 2.1385902031063326e-06, "loss": 0.4314, "step": 179 }, { "epoch": 0.06454697585464977, "grad_norm": 0.46646538376808167, "learning_rate": 2.1505376344086023e-06, "loss": 0.4428, "step": 180 }, { "epoch": 0.06490557016495338, "grad_norm": 0.4322068393230438, "learning_rate": 2.1624850657108724e-06, "loss": 0.4493, "step": 181 }, { "epoch": 0.06526416447525699, "grad_norm": 0.4320879876613617, "learning_rate": 2.174432497013142e-06, "loss": 0.4224, "step": 182 }, { "epoch": 0.0656227587855606, "grad_norm": 0.44721320271492004, "learning_rate": 2.1863799283154126e-06, "loss": 0.4569, "step": 183 }, { "epoch": 0.06598135309586421, "grad_norm": 0.42664921283721924, "learning_rate": 2.1983273596176823e-06, "loss": 0.4433, "step": 184 }, { "epoch": 0.06633994740616782, "grad_norm": 0.4542321264743805, "learning_rate": 2.2102747909199524e-06, "loss": 0.4532, "step": 185 }, { "epoch": 0.06669854171647144, "grad_norm": 0.4822806119918823, "learning_rate": 2.222222222222222e-06, "loss": 0.4237, "step": 186 }, { "epoch": 0.06705713602677504, "grad_norm": 0.43471086025238037, "learning_rate": 2.2341696535244926e-06, "loss": 0.4271, "step": 187 }, { "epoch": 0.06741573033707865, "grad_norm": 0.4078637361526489, "learning_rate": 2.2461170848267623e-06, "loss": 0.4523, "step": 188 }, { "epoch": 0.06777432464738226, "grad_norm": 0.38276827335357666, "learning_rate": 2.2580645161290324e-06, "loss": 0.4414, "step": 189 }, { "epoch": 0.06813291895768588, "grad_norm": 0.45744773745536804, "learning_rate": 2.2700119474313025e-06, "loss": 0.4392, "step": 190 }, { "epoch": 0.06849151326798948, "grad_norm": 0.44423171877861023, "learning_rate": 2.2819593787335726e-06, "loss": 0.4216, "step": 191 }, { "epoch": 0.06885010757829309, "grad_norm": 0.45351332426071167, "learning_rate": 2.2939068100358423e-06, "loss": 0.4539, "step": 192 }, { "epoch": 0.06920870188859671, "grad_norm": 0.42011627554893494, "learning_rate": 2.3058542413381124e-06, "loss": 0.4247, "step": 193 }, { "epoch": 0.06956729619890031, "grad_norm": 0.45623040199279785, "learning_rate": 2.3178016726403825e-06, "loss": 0.4393, "step": 194 }, { "epoch": 0.06992589050920392, "grad_norm": 0.439191073179245, "learning_rate": 2.3297491039426526e-06, "loss": 0.4383, "step": 195 }, { "epoch": 0.07028448481950753, "grad_norm": 0.44982990622520447, "learning_rate": 2.3416965352449223e-06, "loss": 0.4146, "step": 196 }, { "epoch": 0.07064307912981115, "grad_norm": 0.49837490916252136, "learning_rate": 2.3536439665471924e-06, "loss": 0.4328, "step": 197 }, { "epoch": 0.07100167344011475, "grad_norm": 0.46311745047569275, "learning_rate": 2.3655913978494625e-06, "loss": 0.4385, "step": 198 }, { "epoch": 0.07136026775041836, "grad_norm": 0.43602147698402405, "learning_rate": 2.3775388291517326e-06, "loss": 0.4342, "step": 199 }, { "epoch": 0.07171886206072196, "grad_norm": 0.49500641226768494, "learning_rate": 2.3894862604540028e-06, "loss": 0.4777, "step": 200 }, { "epoch": 0.07207745637102558, "grad_norm": 0.4523989260196686, "learning_rate": 2.4014336917562724e-06, "loss": 0.4491, "step": 201 }, { "epoch": 0.07243605068132919, "grad_norm": 0.47953134775161743, "learning_rate": 2.4133811230585425e-06, "loss": 0.4381, "step": 202 }, { "epoch": 0.0727946449916328, "grad_norm": 0.4672757089138031, "learning_rate": 2.4253285543608127e-06, "loss": 0.4556, "step": 203 }, { "epoch": 0.07315323930193642, "grad_norm": 0.4133681058883667, "learning_rate": 2.4372759856630828e-06, "loss": 0.4253, "step": 204 }, { "epoch": 0.07351183361224002, "grad_norm": 0.46363624930381775, "learning_rate": 2.4492234169653525e-06, "loss": 0.4463, "step": 205 }, { "epoch": 0.07387042792254363, "grad_norm": 0.5357568860054016, "learning_rate": 2.4611708482676226e-06, "loss": 0.464, "step": 206 }, { "epoch": 0.07422902223284723, "grad_norm": 0.46366146206855774, "learning_rate": 2.4731182795698927e-06, "loss": 0.4536, "step": 207 }, { "epoch": 0.07458761654315085, "grad_norm": 0.48073410987854004, "learning_rate": 2.4850657108721628e-06, "loss": 0.4481, "step": 208 }, { "epoch": 0.07494621085345446, "grad_norm": 0.4992479681968689, "learning_rate": 2.4970131421744325e-06, "loss": 0.4502, "step": 209 }, { "epoch": 0.07530480516375807, "grad_norm": 0.4073732793331146, "learning_rate": 2.5089605734767026e-06, "loss": 0.4141, "step": 210 }, { "epoch": 0.07566339947406168, "grad_norm": 0.5078056454658508, "learning_rate": 2.5209080047789723e-06, "loss": 0.4447, "step": 211 }, { "epoch": 0.07602199378436529, "grad_norm": 0.44773274660110474, "learning_rate": 2.532855436081243e-06, "loss": 0.4349, "step": 212 }, { "epoch": 0.0763805880946689, "grad_norm": 0.4367981255054474, "learning_rate": 2.544802867383513e-06, "loss": 0.432, "step": 213 }, { "epoch": 0.0767391824049725, "grad_norm": 0.4475233852863312, "learning_rate": 2.556750298685783e-06, "loss": 0.4179, "step": 214 }, { "epoch": 0.07709777671527612, "grad_norm": 0.47192108631134033, "learning_rate": 2.5686977299880527e-06, "loss": 0.4227, "step": 215 }, { "epoch": 0.07745637102557973, "grad_norm": 0.37443971633911133, "learning_rate": 2.580645161290323e-06, "loss": 0.4361, "step": 216 }, { "epoch": 0.07781496533588333, "grad_norm": 0.46042904257774353, "learning_rate": 2.5925925925925925e-06, "loss": 0.464, "step": 217 }, { "epoch": 0.07817355964618694, "grad_norm": 0.44923046231269836, "learning_rate": 2.6045400238948626e-06, "loss": 0.4397, "step": 218 }, { "epoch": 0.07853215395649056, "grad_norm": 0.4807237684726715, "learning_rate": 2.616487455197133e-06, "loss": 0.4478, "step": 219 }, { "epoch": 0.07889074826679417, "grad_norm": 0.42120349407196045, "learning_rate": 2.6284348864994032e-06, "loss": 0.4362, "step": 220 }, { "epoch": 0.07924934257709777, "grad_norm": 0.41625019907951355, "learning_rate": 2.640382317801673e-06, "loss": 0.4311, "step": 221 }, { "epoch": 0.07960793688740139, "grad_norm": 0.4667854607105255, "learning_rate": 2.652329749103943e-06, "loss": 0.4306, "step": 222 }, { "epoch": 0.079966531197705, "grad_norm": 0.4239513874053955, "learning_rate": 2.6642771804062127e-06, "loss": 0.4279, "step": 223 }, { "epoch": 0.0803251255080086, "grad_norm": 0.4838809669017792, "learning_rate": 2.676224611708483e-06, "loss": 0.4521, "step": 224 }, { "epoch": 0.08068371981831221, "grad_norm": 0.41140133142471313, "learning_rate": 2.688172043010753e-06, "loss": 0.4303, "step": 225 }, { "epoch": 0.08104231412861583, "grad_norm": 0.42571136355400085, "learning_rate": 2.7001194743130226e-06, "loss": 0.4469, "step": 226 }, { "epoch": 0.08140090843891944, "grad_norm": 0.4722057580947876, "learning_rate": 2.712066905615293e-06, "loss": 0.4565, "step": 227 }, { "epoch": 0.08175950274922304, "grad_norm": 0.4227771461009979, "learning_rate": 2.7240143369175633e-06, "loss": 0.4305, "step": 228 }, { "epoch": 0.08211809705952666, "grad_norm": 0.48728030920028687, "learning_rate": 2.735961768219833e-06, "loss": 0.463, "step": 229 }, { "epoch": 0.08247669136983027, "grad_norm": 0.47822996973991394, "learning_rate": 2.747909199522103e-06, "loss": 0.4584, "step": 230 }, { "epoch": 0.08283528568013387, "grad_norm": 0.44649040699005127, "learning_rate": 2.7598566308243727e-06, "loss": 0.4287, "step": 231 }, { "epoch": 0.08319387999043748, "grad_norm": 0.4551233947277069, "learning_rate": 2.771804062126643e-06, "loss": 0.4651, "step": 232 }, { "epoch": 0.0835524743007411, "grad_norm": 0.4662633240222931, "learning_rate": 2.783751493428913e-06, "loss": 0.4371, "step": 233 }, { "epoch": 0.0839110686110447, "grad_norm": 0.42524224519729614, "learning_rate": 2.7956989247311827e-06, "loss": 0.4427, "step": 234 }, { "epoch": 0.08426966292134831, "grad_norm": 0.4232719838619232, "learning_rate": 2.807646356033453e-06, "loss": 0.4286, "step": 235 }, { "epoch": 0.08462825723165192, "grad_norm": 0.44517698884010315, "learning_rate": 2.8195937873357233e-06, "loss": 0.4313, "step": 236 }, { "epoch": 0.08498685154195554, "grad_norm": 0.432350218296051, "learning_rate": 2.831541218637993e-06, "loss": 0.4148, "step": 237 }, { "epoch": 0.08534544585225914, "grad_norm": 0.4551621079444885, "learning_rate": 2.843488649940263e-06, "loss": 0.4128, "step": 238 }, { "epoch": 0.08570404016256275, "grad_norm": 0.4656342566013336, "learning_rate": 2.855436081242533e-06, "loss": 0.4616, "step": 239 }, { "epoch": 0.08606263447286637, "grad_norm": 0.4390260577201843, "learning_rate": 2.867383512544803e-06, "loss": 0.4309, "step": 240 }, { "epoch": 0.08642122878316998, "grad_norm": 0.4781060218811035, "learning_rate": 2.879330943847073e-06, "loss": 0.4171, "step": 241 }, { "epoch": 0.08677982309347358, "grad_norm": 0.4425780475139618, "learning_rate": 2.8912783751493435e-06, "loss": 0.4246, "step": 242 }, { "epoch": 0.08713841740377719, "grad_norm": 0.4800470471382141, "learning_rate": 2.903225806451613e-06, "loss": 0.4194, "step": 243 }, { "epoch": 0.08749701171408081, "grad_norm": 0.44708219170570374, "learning_rate": 2.9151732377538833e-06, "loss": 0.4376, "step": 244 }, { "epoch": 0.08785560602438441, "grad_norm": 0.49553558230400085, "learning_rate": 2.9271206690561534e-06, "loss": 0.4542, "step": 245 }, { "epoch": 0.08821420033468802, "grad_norm": 0.4415110647678375, "learning_rate": 2.939068100358423e-06, "loss": 0.4303, "step": 246 }, { "epoch": 0.08857279464499163, "grad_norm": 0.49096545577049255, "learning_rate": 2.9510155316606932e-06, "loss": 0.439, "step": 247 }, { "epoch": 0.08893138895529525, "grad_norm": 0.47814106941223145, "learning_rate": 2.962962962962963e-06, "loss": 0.408, "step": 248 }, { "epoch": 0.08928998326559885, "grad_norm": 0.4967195689678192, "learning_rate": 2.974910394265233e-06, "loss": 0.4598, "step": 249 }, { "epoch": 0.08964857757590246, "grad_norm": 0.4531094431877136, "learning_rate": 2.9868578255675035e-06, "loss": 0.4115, "step": 250 }, { "epoch": 0.09000717188620608, "grad_norm": 0.4831942021846771, "learning_rate": 2.9988052568697732e-06, "loss": 0.452, "step": 251 }, { "epoch": 0.09036576619650968, "grad_norm": 0.4613558351993561, "learning_rate": 3.0107526881720433e-06, "loss": 0.4189, "step": 252 }, { "epoch": 0.09072436050681329, "grad_norm": 0.46022093296051025, "learning_rate": 3.0227001194743135e-06, "loss": 0.4495, "step": 253 }, { "epoch": 0.0910829548171169, "grad_norm": 0.420345664024353, "learning_rate": 3.034647550776583e-06, "loss": 0.4199, "step": 254 }, { "epoch": 0.09144154912742052, "grad_norm": 0.42419618368148804, "learning_rate": 3.0465949820788532e-06, "loss": 0.4368, "step": 255 }, { "epoch": 0.09180014343772412, "grad_norm": 0.464143306016922, "learning_rate": 3.058542413381123e-06, "loss": 0.4375, "step": 256 }, { "epoch": 0.09215873774802773, "grad_norm": 0.4900236427783966, "learning_rate": 3.070489844683393e-06, "loss": 0.4497, "step": 257 }, { "epoch": 0.09251733205833135, "grad_norm": 0.45043498277664185, "learning_rate": 3.0824372759856636e-06, "loss": 0.4156, "step": 258 }, { "epoch": 0.09287592636863495, "grad_norm": 0.43874049186706543, "learning_rate": 3.0943847072879337e-06, "loss": 0.4258, "step": 259 }, { "epoch": 0.09323452067893856, "grad_norm": 0.48382794857025146, "learning_rate": 3.1063321385902034e-06, "loss": 0.4244, "step": 260 }, { "epoch": 0.09359311498924217, "grad_norm": 0.44900384545326233, "learning_rate": 3.1182795698924735e-06, "loss": 0.4233, "step": 261 }, { "epoch": 0.09395170929954579, "grad_norm": 0.4669889807701111, "learning_rate": 3.130227001194743e-06, "loss": 0.4344, "step": 262 }, { "epoch": 0.09431030360984939, "grad_norm": 0.45236778259277344, "learning_rate": 3.1421744324970133e-06, "loss": 0.4295, "step": 263 }, { "epoch": 0.094668897920153, "grad_norm": 0.4849781394004822, "learning_rate": 3.1541218637992834e-06, "loss": 0.4021, "step": 264 }, { "epoch": 0.0950274922304566, "grad_norm": 0.46734780073165894, "learning_rate": 3.1660692951015535e-06, "loss": 0.44, "step": 265 }, { "epoch": 0.09538608654076022, "grad_norm": 0.4730963408946991, "learning_rate": 3.1780167264038236e-06, "loss": 0.4661, "step": 266 }, { "epoch": 0.09574468085106383, "grad_norm": 0.4545542597770691, "learning_rate": 3.1899641577060937e-06, "loss": 0.4283, "step": 267 }, { "epoch": 0.09610327516136744, "grad_norm": 0.42188286781311035, "learning_rate": 3.2019115890083634e-06, "loss": 0.4212, "step": 268 }, { "epoch": 0.09646186947167106, "grad_norm": 0.4499861001968384, "learning_rate": 3.2138590203106335e-06, "loss": 0.4308, "step": 269 }, { "epoch": 0.09682046378197466, "grad_norm": 0.4316302239894867, "learning_rate": 3.225806451612903e-06, "loss": 0.4417, "step": 270 }, { "epoch": 0.09717905809227827, "grad_norm": 0.42928311228752136, "learning_rate": 3.2377538829151733e-06, "loss": 0.429, "step": 271 }, { "epoch": 0.09753765240258187, "grad_norm": 0.5126577019691467, "learning_rate": 3.2497013142174434e-06, "loss": 0.4184, "step": 272 }, { "epoch": 0.09789624671288549, "grad_norm": 0.4300714135169983, "learning_rate": 3.261648745519714e-06, "loss": 0.4376, "step": 273 }, { "epoch": 0.0982548410231891, "grad_norm": 0.4440878629684448, "learning_rate": 3.2735961768219836e-06, "loss": 0.4357, "step": 274 }, { "epoch": 0.0986134353334927, "grad_norm": 0.41550126671791077, "learning_rate": 3.2855436081242537e-06, "loss": 0.4227, "step": 275 }, { "epoch": 0.09897202964379632, "grad_norm": 0.4118005335330963, "learning_rate": 3.2974910394265234e-06, "loss": 0.4398, "step": 276 }, { "epoch": 0.09933062395409993, "grad_norm": 0.4354921579360962, "learning_rate": 3.3094384707287935e-06, "loss": 0.4246, "step": 277 }, { "epoch": 0.09968921826440354, "grad_norm": 0.42197689414024353, "learning_rate": 3.3213859020310636e-06, "loss": 0.4415, "step": 278 }, { "epoch": 0.10004781257470714, "grad_norm": 0.4519917964935303, "learning_rate": 3.3333333333333333e-06, "loss": 0.4338, "step": 279 }, { "epoch": 0.10040640688501076, "grad_norm": 0.47145670652389526, "learning_rate": 3.3452807646356034e-06, "loss": 0.4219, "step": 280 }, { "epoch": 0.10076500119531437, "grad_norm": 0.4341362714767456, "learning_rate": 3.357228195937874e-06, "loss": 0.4226, "step": 281 }, { "epoch": 0.10112359550561797, "grad_norm": 0.42016154527664185, "learning_rate": 3.3691756272401437e-06, "loss": 0.3994, "step": 282 }, { "epoch": 0.10148218981592158, "grad_norm": 0.4355105757713318, "learning_rate": 3.3811230585424138e-06, "loss": 0.4159, "step": 283 }, { "epoch": 0.1018407841262252, "grad_norm": 0.5150867104530334, "learning_rate": 3.393070489844684e-06, "loss": 0.4165, "step": 284 }, { "epoch": 0.1021993784365288, "grad_norm": 0.4744488298892975, "learning_rate": 3.4050179211469536e-06, "loss": 0.4027, "step": 285 }, { "epoch": 0.10255797274683241, "grad_norm": 0.4081594944000244, "learning_rate": 3.4169653524492237e-06, "loss": 0.406, "step": 286 }, { "epoch": 0.10291656705713603, "grad_norm": 0.45914798974990845, "learning_rate": 3.4289127837514933e-06, "loss": 0.4379, "step": 287 }, { "epoch": 0.10327516136743964, "grad_norm": 0.5100423693656921, "learning_rate": 3.440860215053764e-06, "loss": 0.4539, "step": 288 }, { "epoch": 0.10363375567774324, "grad_norm": 0.43948543071746826, "learning_rate": 3.452807646356034e-06, "loss": 0.4065, "step": 289 }, { "epoch": 0.10399234998804685, "grad_norm": 0.4446389675140381, "learning_rate": 3.4647550776583037e-06, "loss": 0.4482, "step": 290 }, { "epoch": 0.10435094429835047, "grad_norm": 0.4767173230648041, "learning_rate": 3.4767025089605738e-06, "loss": 0.4302, "step": 291 }, { "epoch": 0.10470953860865408, "grad_norm": 0.4299291670322418, "learning_rate": 3.488649940262844e-06, "loss": 0.4259, "step": 292 }, { "epoch": 0.10506813291895768, "grad_norm": 0.5040909647941589, "learning_rate": 3.5005973715651136e-06, "loss": 0.421, "step": 293 }, { "epoch": 0.1054267272292613, "grad_norm": 0.4795377254486084, "learning_rate": 3.5125448028673837e-06, "loss": 0.4602, "step": 294 }, { "epoch": 0.10578532153956491, "grad_norm": 0.4598230719566345, "learning_rate": 3.5244922341696534e-06, "loss": 0.4302, "step": 295 }, { "epoch": 0.10614391584986851, "grad_norm": 0.4708668291568756, "learning_rate": 3.536439665471924e-06, "loss": 0.4134, "step": 296 }, { "epoch": 0.10650251016017212, "grad_norm": 0.4188295304775238, "learning_rate": 3.548387096774194e-06, "loss": 0.4003, "step": 297 }, { "epoch": 0.10686110447047574, "grad_norm": 0.49161994457244873, "learning_rate": 3.560334528076464e-06, "loss": 0.4156, "step": 298 }, { "epoch": 0.10721969878077935, "grad_norm": 0.5641816258430481, "learning_rate": 3.572281959378734e-06, "loss": 0.4137, "step": 299 }, { "epoch": 0.10757829309108295, "grad_norm": 0.4321749210357666, "learning_rate": 3.584229390681004e-06, "loss": 0.4429, "step": 300 }, { "epoch": 0.10793688740138656, "grad_norm": 0.4726661145687103, "learning_rate": 3.5961768219832736e-06, "loss": 0.4506, "step": 301 }, { "epoch": 0.10829548171169018, "grad_norm": 0.5574744343757629, "learning_rate": 3.6081242532855437e-06, "loss": 0.4467, "step": 302 }, { "epoch": 0.10865407602199378, "grad_norm": 0.41390398144721985, "learning_rate": 3.620071684587814e-06, "loss": 0.4172, "step": 303 }, { "epoch": 0.10901267033229739, "grad_norm": 0.4741494357585907, "learning_rate": 3.6320191158900844e-06, "loss": 0.4238, "step": 304 }, { "epoch": 0.10937126464260101, "grad_norm": 0.3964839577674866, "learning_rate": 3.643966547192354e-06, "loss": 0.4132, "step": 305 }, { "epoch": 0.10972985895290462, "grad_norm": 0.45965060591697693, "learning_rate": 3.655913978494624e-06, "loss": 0.424, "step": 306 }, { "epoch": 0.11008845326320822, "grad_norm": 0.469982385635376, "learning_rate": 3.667861409796894e-06, "loss": 0.4279, "step": 307 }, { "epoch": 0.11044704757351183, "grad_norm": 0.4569823443889618, "learning_rate": 3.679808841099164e-06, "loss": 0.418, "step": 308 }, { "epoch": 0.11080564188381545, "grad_norm": 0.4452831745147705, "learning_rate": 3.691756272401434e-06, "loss": 0.4331, "step": 309 }, { "epoch": 0.11116423619411905, "grad_norm": 0.4360690116882324, "learning_rate": 3.7037037037037037e-06, "loss": 0.4441, "step": 310 }, { "epoch": 0.11152283050442266, "grad_norm": 0.45781147480010986, "learning_rate": 3.7156511350059743e-06, "loss": 0.4161, "step": 311 }, { "epoch": 0.11188142481472628, "grad_norm": 0.5019540190696716, "learning_rate": 3.7275985663082444e-06, "loss": 0.4252, "step": 312 }, { "epoch": 0.11224001912502989, "grad_norm": 0.4655226767063141, "learning_rate": 3.739545997610514e-06, "loss": 0.4229, "step": 313 }, { "epoch": 0.11259861343533349, "grad_norm": 0.4232858419418335, "learning_rate": 3.751493428912784e-06, "loss": 0.3914, "step": 314 }, { "epoch": 0.1129572077456371, "grad_norm": 0.4276549518108368, "learning_rate": 3.763440860215054e-06, "loss": 0.4279, "step": 315 }, { "epoch": 0.11331580205594072, "grad_norm": 0.4654027819633484, "learning_rate": 3.775388291517324e-06, "loss": 0.4257, "step": 316 }, { "epoch": 0.11367439636624432, "grad_norm": 0.534253716468811, "learning_rate": 3.787335722819594e-06, "loss": 0.4123, "step": 317 }, { "epoch": 0.11403299067654793, "grad_norm": 0.48361146450042725, "learning_rate": 3.7992831541218638e-06, "loss": 0.4084, "step": 318 }, { "epoch": 0.11439158498685154, "grad_norm": 0.5128506422042847, "learning_rate": 3.8112305854241343e-06, "loss": 0.3912, "step": 319 }, { "epoch": 0.11475017929715516, "grad_norm": 0.43937358260154724, "learning_rate": 3.823178016726404e-06, "loss": 0.4028, "step": 320 }, { "epoch": 0.11510877360745876, "grad_norm": 0.5064302086830139, "learning_rate": 3.8351254480286745e-06, "loss": 0.4251, "step": 321 }, { "epoch": 0.11546736791776237, "grad_norm": 0.5159521698951721, "learning_rate": 3.847072879330944e-06, "loss": 0.4284, "step": 322 }, { "epoch": 0.11582596222806599, "grad_norm": 0.47985216975212097, "learning_rate": 3.859020310633214e-06, "loss": 0.3997, "step": 323 }, { "epoch": 0.1161845565383696, "grad_norm": 0.5051913261413574, "learning_rate": 3.870967741935484e-06, "loss": 0.391, "step": 324 }, { "epoch": 0.1165431508486732, "grad_norm": 0.4569436311721802, "learning_rate": 3.882915173237754e-06, "loss": 0.3982, "step": 325 }, { "epoch": 0.1169017451589768, "grad_norm": 0.4527565538883209, "learning_rate": 3.894862604540024e-06, "loss": 0.4005, "step": 326 }, { "epoch": 0.11726033946928043, "grad_norm": 0.42843908071517944, "learning_rate": 3.906810035842294e-06, "loss": 0.4261, "step": 327 }, { "epoch": 0.11761893377958403, "grad_norm": 0.5522409081459045, "learning_rate": 3.9187574671445644e-06, "loss": 0.4187, "step": 328 }, { "epoch": 0.11797752808988764, "grad_norm": 0.48151281476020813, "learning_rate": 3.9307048984468345e-06, "loss": 0.4202, "step": 329 }, { "epoch": 0.11833612240019126, "grad_norm": 0.4350339472293854, "learning_rate": 3.942652329749105e-06, "loss": 0.4055, "step": 330 }, { "epoch": 0.11869471671049486, "grad_norm": 0.4706771671772003, "learning_rate": 3.954599761051374e-06, "loss": 0.4288, "step": 331 }, { "epoch": 0.11905331102079847, "grad_norm": 0.4983105957508087, "learning_rate": 3.966547192353644e-06, "loss": 0.3985, "step": 332 }, { "epoch": 0.11941190533110208, "grad_norm": 0.4616604745388031, "learning_rate": 3.978494623655914e-06, "loss": 0.4154, "step": 333 }, { "epoch": 0.1197704996414057, "grad_norm": 0.4133656322956085, "learning_rate": 3.990442054958184e-06, "loss": 0.4117, "step": 334 }, { "epoch": 0.1201290939517093, "grad_norm": 0.4387432336807251, "learning_rate": 4.002389486260454e-06, "loss": 0.4137, "step": 335 }, { "epoch": 0.1204876882620129, "grad_norm": 0.468796968460083, "learning_rate": 4.0143369175627245e-06, "loss": 0.4253, "step": 336 }, { "epoch": 0.12084628257231651, "grad_norm": 0.4582894444465637, "learning_rate": 4.0262843488649946e-06, "loss": 0.3922, "step": 337 }, { "epoch": 0.12120487688262013, "grad_norm": 0.4600510001182556, "learning_rate": 4.038231780167265e-06, "loss": 0.4277, "step": 338 }, { "epoch": 0.12156347119292374, "grad_norm": 0.4769425392150879, "learning_rate": 4.050179211469534e-06, "loss": 0.4186, "step": 339 }, { "epoch": 0.12192206550322734, "grad_norm": 0.4200136065483093, "learning_rate": 4.062126642771804e-06, "loss": 0.4416, "step": 340 }, { "epoch": 0.12228065981353096, "grad_norm": 0.3954818844795227, "learning_rate": 4.074074074074074e-06, "loss": 0.4198, "step": 341 }, { "epoch": 0.12263925412383457, "grad_norm": 0.4832349121570587, "learning_rate": 4.086021505376344e-06, "loss": 0.3927, "step": 342 }, { "epoch": 0.12299784843413818, "grad_norm": 0.40870988368988037, "learning_rate": 4.097968936678614e-06, "loss": 0.4307, "step": 343 }, { "epoch": 0.12335644274444178, "grad_norm": 0.46725401282310486, "learning_rate": 4.1099163679808845e-06, "loss": 0.4339, "step": 344 }, { "epoch": 0.1237150370547454, "grad_norm": 0.40695059299468994, "learning_rate": 4.121863799283155e-06, "loss": 0.3939, "step": 345 }, { "epoch": 0.12407363136504901, "grad_norm": 0.407865971326828, "learning_rate": 4.133811230585425e-06, "loss": 0.4152, "step": 346 }, { "epoch": 0.12443222567535261, "grad_norm": 0.45993244647979736, "learning_rate": 4.145758661887694e-06, "loss": 0.4455, "step": 347 }, { "epoch": 0.12479081998565623, "grad_norm": 0.4351024925708771, "learning_rate": 4.157706093189964e-06, "loss": 0.4091, "step": 348 }, { "epoch": 0.12514941429595983, "grad_norm": 0.4329681694507599, "learning_rate": 4.169653524492234e-06, "loss": 0.4054, "step": 349 }, { "epoch": 0.12550800860626346, "grad_norm": 0.43498513102531433, "learning_rate": 4.181600955794505e-06, "loss": 0.4488, "step": 350 }, { "epoch": 0.12586660291656707, "grad_norm": 0.40836501121520996, "learning_rate": 4.193548387096774e-06, "loss": 0.4268, "step": 351 }, { "epoch": 0.12622519722687067, "grad_norm": 0.4931747019290924, "learning_rate": 4.2054958183990445e-06, "loss": 0.4164, "step": 352 }, { "epoch": 0.12658379153717428, "grad_norm": 0.4233679473400116, "learning_rate": 4.217443249701315e-06, "loss": 0.4298, "step": 353 }, { "epoch": 0.12694238584747788, "grad_norm": 0.42361119389533997, "learning_rate": 4.229390681003585e-06, "loss": 0.4359, "step": 354 }, { "epoch": 0.1273009801577815, "grad_norm": 0.47112739086151123, "learning_rate": 4.241338112305855e-06, "loss": 0.4134, "step": 355 }, { "epoch": 0.1276595744680851, "grad_norm": 0.4081611931324005, "learning_rate": 4.253285543608124e-06, "loss": 0.4168, "step": 356 }, { "epoch": 0.12801816877838873, "grad_norm": 0.42900654673576355, "learning_rate": 4.265232974910394e-06, "loss": 0.4167, "step": 357 }, { "epoch": 0.12837676308869234, "grad_norm": 0.49911803007125854, "learning_rate": 4.277180406212665e-06, "loss": 0.4009, "step": 358 }, { "epoch": 0.12873535739899594, "grad_norm": 0.4675311744213104, "learning_rate": 4.2891278375149344e-06, "loss": 0.4312, "step": 359 }, { "epoch": 0.12909395170929955, "grad_norm": 0.4150729477405548, "learning_rate": 4.3010752688172045e-06, "loss": 0.3924, "step": 360 }, { "epoch": 0.12945254601960315, "grad_norm": 0.4550046920776367, "learning_rate": 4.313022700119475e-06, "loss": 0.4158, "step": 361 }, { "epoch": 0.12981114032990676, "grad_norm": 0.47866836190223694, "learning_rate": 4.324970131421745e-06, "loss": 0.4067, "step": 362 }, { "epoch": 0.13016973464021037, "grad_norm": 0.48211902379989624, "learning_rate": 4.336917562724015e-06, "loss": 0.4094, "step": 363 }, { "epoch": 0.13052832895051397, "grad_norm": 0.4760768413543701, "learning_rate": 4.348864994026284e-06, "loss": 0.4079, "step": 364 }, { "epoch": 0.1308869232608176, "grad_norm": 0.4411994516849518, "learning_rate": 4.360812425328555e-06, "loss": 0.4297, "step": 365 }, { "epoch": 0.1312455175711212, "grad_norm": 0.4508727490901947, "learning_rate": 4.372759856630825e-06, "loss": 0.3772, "step": 366 }, { "epoch": 0.13160411188142482, "grad_norm": 0.46549275517463684, "learning_rate": 4.3847072879330945e-06, "loss": 0.4017, "step": 367 }, { "epoch": 0.13196270619172842, "grad_norm": 0.4669816792011261, "learning_rate": 4.3966547192353646e-06, "loss": 0.4019, "step": 368 }, { "epoch": 0.13232130050203203, "grad_norm": 0.4564000964164734, "learning_rate": 4.408602150537635e-06, "loss": 0.4169, "step": 369 }, { "epoch": 0.13267989481233564, "grad_norm": 0.46386364102363586, "learning_rate": 4.420549581839905e-06, "loss": 0.3796, "step": 370 }, { "epoch": 0.13303848912263924, "grad_norm": 0.48532411456108093, "learning_rate": 4.432497013142175e-06, "loss": 0.4213, "step": 371 }, { "epoch": 0.13339708343294288, "grad_norm": 0.43314632773399353, "learning_rate": 4.444444444444444e-06, "loss": 0.4093, "step": 372 }, { "epoch": 0.13375567774324648, "grad_norm": 0.5016446709632874, "learning_rate": 4.456391875746715e-06, "loss": 0.4181, "step": 373 }, { "epoch": 0.1341142720535501, "grad_norm": 0.50053471326828, "learning_rate": 4.468339307048985e-06, "loss": 0.4337, "step": 374 }, { "epoch": 0.1344728663638537, "grad_norm": 0.4567839205265045, "learning_rate": 4.480286738351255e-06, "loss": 0.3958, "step": 375 }, { "epoch": 0.1348314606741573, "grad_norm": 0.4560275971889496, "learning_rate": 4.492234169653525e-06, "loss": 0.3939, "step": 376 }, { "epoch": 0.1351900549844609, "grad_norm": 0.4924687147140503, "learning_rate": 4.504181600955795e-06, "loss": 0.4258, "step": 377 }, { "epoch": 0.1355486492947645, "grad_norm": 0.5159792900085449, "learning_rate": 4.516129032258065e-06, "loss": 0.4292, "step": 378 }, { "epoch": 0.13590724360506815, "grad_norm": 0.5652803182601929, "learning_rate": 4.528076463560335e-06, "loss": 0.4188, "step": 379 }, { "epoch": 0.13626583791537175, "grad_norm": 0.48609235882759094, "learning_rate": 4.540023894862605e-06, "loss": 0.4027, "step": 380 }, { "epoch": 0.13662443222567536, "grad_norm": 0.5172960162162781, "learning_rate": 4.551971326164875e-06, "loss": 0.4028, "step": 381 }, { "epoch": 0.13698302653597896, "grad_norm": 0.6238587498664856, "learning_rate": 4.563918757467145e-06, "loss": 0.4121, "step": 382 }, { "epoch": 0.13734162084628257, "grad_norm": 0.5132438540458679, "learning_rate": 4.575866188769415e-06, "loss": 0.4325, "step": 383 }, { "epoch": 0.13770021515658618, "grad_norm": 0.4661480784416199, "learning_rate": 4.587813620071685e-06, "loss": 0.4084, "step": 384 }, { "epoch": 0.13805880946688978, "grad_norm": 0.5311444401741028, "learning_rate": 4.599761051373955e-06, "loss": 0.4265, "step": 385 }, { "epoch": 0.13841740377719342, "grad_norm": 0.43878626823425293, "learning_rate": 4.611708482676225e-06, "loss": 0.3944, "step": 386 }, { "epoch": 0.13877599808749702, "grad_norm": 0.5396472215652466, "learning_rate": 4.623655913978495e-06, "loss": 0.4336, "step": 387 }, { "epoch": 0.13913459239780063, "grad_norm": 0.45337140560150146, "learning_rate": 4.635603345280765e-06, "loss": 0.4176, "step": 388 }, { "epoch": 0.13949318670810423, "grad_norm": 0.4791487157344818, "learning_rate": 4.647550776583035e-06, "loss": 0.3992, "step": 389 }, { "epoch": 0.13985178101840784, "grad_norm": 0.4655843675136566, "learning_rate": 4.659498207885305e-06, "loss": 0.4176, "step": 390 }, { "epoch": 0.14021037532871145, "grad_norm": 0.4870719015598297, "learning_rate": 4.671445639187575e-06, "loss": 0.4223, "step": 391 }, { "epoch": 0.14056896963901505, "grad_norm": 0.4123729467391968, "learning_rate": 4.683393070489845e-06, "loss": 0.4111, "step": 392 }, { "epoch": 0.14092756394931866, "grad_norm": 0.44620513916015625, "learning_rate": 4.695340501792115e-06, "loss": 0.4117, "step": 393 }, { "epoch": 0.1412861582596223, "grad_norm": 0.46829867362976074, "learning_rate": 4.707287933094385e-06, "loss": 0.4051, "step": 394 }, { "epoch": 0.1416447525699259, "grad_norm": 0.4789651334285736, "learning_rate": 4.719235364396655e-06, "loss": 0.4017, "step": 395 }, { "epoch": 0.1420033468802295, "grad_norm": 0.48044994473457336, "learning_rate": 4.731182795698925e-06, "loss": 0.391, "step": 396 }, { "epoch": 0.1423619411905331, "grad_norm": 0.47641265392303467, "learning_rate": 4.743130227001195e-06, "loss": 0.3865, "step": 397 }, { "epoch": 0.14272053550083671, "grad_norm": 0.4889044761657715, "learning_rate": 4.755077658303465e-06, "loss": 0.4055, "step": 398 }, { "epoch": 0.14307912981114032, "grad_norm": 0.47970882058143616, "learning_rate": 4.767025089605735e-06, "loss": 0.4258, "step": 399 }, { "epoch": 0.14343772412144393, "grad_norm": 0.4490072429180145, "learning_rate": 4.7789725209080055e-06, "loss": 0.4056, "step": 400 }, { "epoch": 0.14379631843174756, "grad_norm": 0.48312029242515564, "learning_rate": 4.790919952210275e-06, "loss": 0.4032, "step": 401 }, { "epoch": 0.14415491274205117, "grad_norm": 0.4564647972583771, "learning_rate": 4.802867383512545e-06, "loss": 0.4004, "step": 402 }, { "epoch": 0.14451350705235477, "grad_norm": 0.4821085035800934, "learning_rate": 4.814814814814815e-06, "loss": 0.4098, "step": 403 }, { "epoch": 0.14487210136265838, "grad_norm": 0.45707646012306213, "learning_rate": 4.826762246117085e-06, "loss": 0.4079, "step": 404 }, { "epoch": 0.14523069567296198, "grad_norm": 0.49015548825263977, "learning_rate": 4.838709677419355e-06, "loss": 0.3965, "step": 405 }, { "epoch": 0.1455892899832656, "grad_norm": 0.43123766779899597, "learning_rate": 4.850657108721625e-06, "loss": 0.4105, "step": 406 }, { "epoch": 0.1459478842935692, "grad_norm": 0.49732089042663574, "learning_rate": 4.8626045400238954e-06, "loss": 0.3962, "step": 407 }, { "epoch": 0.14630647860387283, "grad_norm": 0.466040700674057, "learning_rate": 4.8745519713261655e-06, "loss": 0.4127, "step": 408 }, { "epoch": 0.14666507291417644, "grad_norm": 0.4689634442329407, "learning_rate": 4.886499402628435e-06, "loss": 0.3998, "step": 409 }, { "epoch": 0.14702366722448004, "grad_norm": 0.4752364158630371, "learning_rate": 4.898446833930705e-06, "loss": 0.4093, "step": 410 }, { "epoch": 0.14738226153478365, "grad_norm": 0.4989449083805084, "learning_rate": 4.910394265232976e-06, "loss": 0.422, "step": 411 }, { "epoch": 0.14774085584508725, "grad_norm": 0.5042423009872437, "learning_rate": 4.922341696535245e-06, "loss": 0.4072, "step": 412 }, { "epoch": 0.14809945015539086, "grad_norm": 0.49850499629974365, "learning_rate": 4.934289127837515e-06, "loss": 0.4196, "step": 413 }, { "epoch": 0.14845804446569447, "grad_norm": 0.5196467041969299, "learning_rate": 4.946236559139785e-06, "loss": 0.3744, "step": 414 }, { "epoch": 0.1488166387759981, "grad_norm": 0.5216379165649414, "learning_rate": 4.9581839904420555e-06, "loss": 0.4146, "step": 415 }, { "epoch": 0.1491752330863017, "grad_norm": 0.5360924005508423, "learning_rate": 4.9701314217443256e-06, "loss": 0.4054, "step": 416 }, { "epoch": 0.1495338273966053, "grad_norm": 0.47251689434051514, "learning_rate": 4.982078853046595e-06, "loss": 0.3952, "step": 417 }, { "epoch": 0.14989242170690892, "grad_norm": 0.4307398796081543, "learning_rate": 4.994026284348865e-06, "loss": 0.4061, "step": 418 }, { "epoch": 0.15025101601721252, "grad_norm": 0.47331857681274414, "learning_rate": 5.005973715651136e-06, "loss": 0.4272, "step": 419 }, { "epoch": 0.15060961032751613, "grad_norm": 0.48072749376296997, "learning_rate": 5.017921146953405e-06, "loss": 0.4124, "step": 420 }, { "epoch": 0.15096820463781974, "grad_norm": 0.489924818277359, "learning_rate": 5.029868578255675e-06, "loss": 0.3894, "step": 421 }, { "epoch": 0.15132679894812337, "grad_norm": 0.4969591200351715, "learning_rate": 5.0418160095579445e-06, "loss": 0.4138, "step": 422 }, { "epoch": 0.15168539325842698, "grad_norm": 0.47553256154060364, "learning_rate": 5.0537634408602155e-06, "loss": 0.375, "step": 423 }, { "epoch": 0.15204398756873058, "grad_norm": 0.5273871421813965, "learning_rate": 5.065710872162486e-06, "loss": 0.4219, "step": 424 }, { "epoch": 0.1524025818790342, "grad_norm": 0.47017964720726013, "learning_rate": 5.077658303464756e-06, "loss": 0.395, "step": 425 }, { "epoch": 0.1527611761893378, "grad_norm": 0.510373055934906, "learning_rate": 5.089605734767026e-06, "loss": 0.4268, "step": 426 }, { "epoch": 0.1531197704996414, "grad_norm": 0.49943843483924866, "learning_rate": 5.101553166069295e-06, "loss": 0.4285, "step": 427 }, { "epoch": 0.153478364809945, "grad_norm": 0.5286436676979065, "learning_rate": 5.113500597371566e-06, "loss": 0.4015, "step": 428 }, { "epoch": 0.1538369591202486, "grad_norm": 0.5137655138969421, "learning_rate": 5.125448028673835e-06, "loss": 0.4504, "step": 429 }, { "epoch": 0.15419555343055225, "grad_norm": 0.47832366824150085, "learning_rate": 5.137395459976105e-06, "loss": 0.4256, "step": 430 }, { "epoch": 0.15455414774085585, "grad_norm": 0.5342938899993896, "learning_rate": 5.149342891278376e-06, "loss": 0.3961, "step": 431 }, { "epoch": 0.15491274205115946, "grad_norm": 0.49771854281425476, "learning_rate": 5.161290322580646e-06, "loss": 0.4101, "step": 432 }, { "epoch": 0.15527133636146306, "grad_norm": 0.5007303953170776, "learning_rate": 5.173237753882916e-06, "loss": 0.4344, "step": 433 }, { "epoch": 0.15562993067176667, "grad_norm": 0.5008660554885864, "learning_rate": 5.185185185185185e-06, "loss": 0.3892, "step": 434 }, { "epoch": 0.15598852498207028, "grad_norm": 0.525950014591217, "learning_rate": 5.197132616487456e-06, "loss": 0.4126, "step": 435 }, { "epoch": 0.15634711929237388, "grad_norm": 0.4985194802284241, "learning_rate": 5.209080047789725e-06, "loss": 0.3979, "step": 436 }, { "epoch": 0.15670571360267752, "grad_norm": 0.5512293577194214, "learning_rate": 5.221027479091995e-06, "loss": 0.4215, "step": 437 }, { "epoch": 0.15706430791298112, "grad_norm": 0.470608651638031, "learning_rate": 5.232974910394266e-06, "loss": 0.4049, "step": 438 }, { "epoch": 0.15742290222328473, "grad_norm": 0.4503597319126129, "learning_rate": 5.2449223416965355e-06, "loss": 0.4087, "step": 439 }, { "epoch": 0.15778149653358833, "grad_norm": 0.47264793515205383, "learning_rate": 5.2568697729988065e-06, "loss": 0.3884, "step": 440 }, { "epoch": 0.15814009084389194, "grad_norm": 0.54245525598526, "learning_rate": 5.268817204301076e-06, "loss": 0.4078, "step": 441 }, { "epoch": 0.15849868515419555, "grad_norm": 0.5028846263885498, "learning_rate": 5.280764635603346e-06, "loss": 0.4319, "step": 442 }, { "epoch": 0.15885727946449915, "grad_norm": 0.47455456852912903, "learning_rate": 5.292712066905615e-06, "loss": 0.4114, "step": 443 }, { "epoch": 0.15921587377480279, "grad_norm": 0.445149689912796, "learning_rate": 5.304659498207886e-06, "loss": 0.3954, "step": 444 }, { "epoch": 0.1595744680851064, "grad_norm": 0.5123813152313232, "learning_rate": 5.316606929510155e-06, "loss": 0.3802, "step": 445 }, { "epoch": 0.15993306239541, "grad_norm": 0.4960103929042816, "learning_rate": 5.3285543608124254e-06, "loss": 0.4032, "step": 446 }, { "epoch": 0.1602916567057136, "grad_norm": 0.44163453578948975, "learning_rate": 5.340501792114696e-06, "loss": 0.3989, "step": 447 }, { "epoch": 0.1606502510160172, "grad_norm": 0.523522675037384, "learning_rate": 5.352449223416966e-06, "loss": 0.4276, "step": 448 }, { "epoch": 0.16100884532632082, "grad_norm": 0.4399987757205963, "learning_rate": 5.364396654719236e-06, "loss": 0.4114, "step": 449 }, { "epoch": 0.16136743963662442, "grad_norm": 0.48488882184028625, "learning_rate": 5.376344086021506e-06, "loss": 0.4264, "step": 450 }, { "epoch": 0.16172603394692806, "grad_norm": 0.45255032181739807, "learning_rate": 5.388291517323776e-06, "loss": 0.3771, "step": 451 }, { "epoch": 0.16208462825723166, "grad_norm": 0.41369837522506714, "learning_rate": 5.400238948626045e-06, "loss": 0.4068, "step": 452 }, { "epoch": 0.16244322256753527, "grad_norm": 0.4342319071292877, "learning_rate": 5.412186379928316e-06, "loss": 0.4263, "step": 453 }, { "epoch": 0.16280181687783887, "grad_norm": 0.4615875780582428, "learning_rate": 5.424133811230586e-06, "loss": 0.4308, "step": 454 }, { "epoch": 0.16316041118814248, "grad_norm": 0.48974621295928955, "learning_rate": 5.436081242532856e-06, "loss": 0.3804, "step": 455 }, { "epoch": 0.16351900549844609, "grad_norm": 0.4120875597000122, "learning_rate": 5.4480286738351265e-06, "loss": 0.3973, "step": 456 }, { "epoch": 0.1638775998087497, "grad_norm": 0.44808584451675415, "learning_rate": 5.459976105137396e-06, "loss": 0.4076, "step": 457 }, { "epoch": 0.16423619411905332, "grad_norm": 0.44389763474464417, "learning_rate": 5.471923536439666e-06, "loss": 0.3839, "step": 458 }, { "epoch": 0.16459478842935693, "grad_norm": 0.4771408140659332, "learning_rate": 5.483870967741935e-06, "loss": 0.393, "step": 459 }, { "epoch": 0.16495338273966054, "grad_norm": 0.44366106390953064, "learning_rate": 5.495818399044206e-06, "loss": 0.4209, "step": 460 }, { "epoch": 0.16531197704996414, "grad_norm": 0.4985314607620239, "learning_rate": 5.507765830346476e-06, "loss": 0.4044, "step": 461 }, { "epoch": 0.16567057136026775, "grad_norm": 0.46818777918815613, "learning_rate": 5.5197132616487455e-06, "loss": 0.4105, "step": 462 }, { "epoch": 0.16602916567057135, "grad_norm": 0.4167502224445343, "learning_rate": 5.5316606929510165e-06, "loss": 0.4028, "step": 463 }, { "epoch": 0.16638775998087496, "grad_norm": 0.47375890612602234, "learning_rate": 5.543608124253286e-06, "loss": 0.4063, "step": 464 }, { "epoch": 0.16674635429117857, "grad_norm": 0.5073155760765076, "learning_rate": 5.555555555555557e-06, "loss": 0.4096, "step": 465 }, { "epoch": 0.1671049486014822, "grad_norm": 0.5154780149459839, "learning_rate": 5.567502986857826e-06, "loss": 0.4087, "step": 466 }, { "epoch": 0.1674635429117858, "grad_norm": 0.4367597699165344, "learning_rate": 5.579450418160096e-06, "loss": 0.3769, "step": 467 }, { "epoch": 0.1678221372220894, "grad_norm": 0.528298020362854, "learning_rate": 5.591397849462365e-06, "loss": 0.3957, "step": 468 }, { "epoch": 0.16818073153239302, "grad_norm": 0.5119720697402954, "learning_rate": 5.603345280764636e-06, "loss": 0.4215, "step": 469 }, { "epoch": 0.16853932584269662, "grad_norm": 0.46755367517471313, "learning_rate": 5.615292712066906e-06, "loss": 0.3842, "step": 470 }, { "epoch": 0.16889792015300023, "grad_norm": 0.4926641285419464, "learning_rate": 5.627240143369176e-06, "loss": 0.3884, "step": 471 }, { "epoch": 0.16925651446330384, "grad_norm": 0.519961953163147, "learning_rate": 5.639187574671447e-06, "loss": 0.4068, "step": 472 }, { "epoch": 0.16961510877360747, "grad_norm": 0.4763745963573456, "learning_rate": 5.651135005973716e-06, "loss": 0.408, "step": 473 }, { "epoch": 0.16997370308391108, "grad_norm": 0.5620372891426086, "learning_rate": 5.663082437275986e-06, "loss": 0.4092, "step": 474 }, { "epoch": 0.17033229739421468, "grad_norm": 0.4842250645160675, "learning_rate": 5.675029868578256e-06, "loss": 0.3915, "step": 475 }, { "epoch": 0.1706908917045183, "grad_norm": 0.5357539653778076, "learning_rate": 5.686977299880526e-06, "loss": 0.4054, "step": 476 }, { "epoch": 0.1710494860148219, "grad_norm": 0.4987579882144928, "learning_rate": 5.698924731182796e-06, "loss": 0.4109, "step": 477 }, { "epoch": 0.1714080803251255, "grad_norm": 0.466442734003067, "learning_rate": 5.710872162485066e-06, "loss": 0.3981, "step": 478 }, { "epoch": 0.1717666746354291, "grad_norm": 0.4783289134502411, "learning_rate": 5.7228195937873365e-06, "loss": 0.3792, "step": 479 }, { "epoch": 0.17212526894573274, "grad_norm": 0.5725646615028381, "learning_rate": 5.734767025089606e-06, "loss": 0.3884, "step": 480 }, { "epoch": 0.17248386325603635, "grad_norm": 0.5062675476074219, "learning_rate": 5.746714456391877e-06, "loss": 0.4163, "step": 481 }, { "epoch": 0.17284245756633995, "grad_norm": 0.4564410448074341, "learning_rate": 5.758661887694146e-06, "loss": 0.3845, "step": 482 }, { "epoch": 0.17320105187664356, "grad_norm": 0.528139591217041, "learning_rate": 5.770609318996416e-06, "loss": 0.3936, "step": 483 }, { "epoch": 0.17355964618694716, "grad_norm": 0.5238683223724365, "learning_rate": 5.782556750298687e-06, "loss": 0.3993, "step": 484 }, { "epoch": 0.17391824049725077, "grad_norm": 0.5208744406700134, "learning_rate": 5.794504181600956e-06, "loss": 0.3989, "step": 485 }, { "epoch": 0.17427683480755438, "grad_norm": 0.4848564863204956, "learning_rate": 5.806451612903226e-06, "loss": 0.4201, "step": 486 }, { "epoch": 0.174635429117858, "grad_norm": 0.44612419605255127, "learning_rate": 5.818399044205496e-06, "loss": 0.3895, "step": 487 }, { "epoch": 0.17499402342816162, "grad_norm": 0.48482319712638855, "learning_rate": 5.830346475507767e-06, "loss": 0.416, "step": 488 }, { "epoch": 0.17535261773846522, "grad_norm": 0.5114999413490295, "learning_rate": 5.842293906810036e-06, "loss": 0.3985, "step": 489 }, { "epoch": 0.17571121204876883, "grad_norm": 0.4707900583744049, "learning_rate": 5.854241338112307e-06, "loss": 0.3841, "step": 490 }, { "epoch": 0.17606980635907243, "grad_norm": 0.4636889696121216, "learning_rate": 5.866188769414576e-06, "loss": 0.4173, "step": 491 }, { "epoch": 0.17642840066937604, "grad_norm": 0.4619494676589966, "learning_rate": 5.878136200716846e-06, "loss": 0.3666, "step": 492 }, { "epoch": 0.17678699497967965, "grad_norm": 0.4353981912136078, "learning_rate": 5.890083632019117e-06, "loss": 0.3804, "step": 493 }, { "epoch": 0.17714558928998325, "grad_norm": 0.4704913794994354, "learning_rate": 5.9020310633213864e-06, "loss": 0.4056, "step": 494 }, { "epoch": 0.17750418360028689, "grad_norm": 0.463922381401062, "learning_rate": 5.9139784946236566e-06, "loss": 0.4144, "step": 495 }, { "epoch": 0.1778627779105905, "grad_norm": 0.5296831727027893, "learning_rate": 5.925925925925926e-06, "loss": 0.3987, "step": 496 }, { "epoch": 0.1782213722208941, "grad_norm": 0.48521700501441956, "learning_rate": 5.937873357228197e-06, "loss": 0.4099, "step": 497 }, { "epoch": 0.1785799665311977, "grad_norm": 0.4415605664253235, "learning_rate": 5.949820788530466e-06, "loss": 0.4008, "step": 498 }, { "epoch": 0.1789385608415013, "grad_norm": 0.4724884033203125, "learning_rate": 5.961768219832736e-06, "loss": 0.4173, "step": 499 }, { "epoch": 0.17929715515180492, "grad_norm": 0.4622402489185333, "learning_rate": 5.973715651135007e-06, "loss": 0.3798, "step": 500 }, { "epoch": 0.17965574946210852, "grad_norm": 0.48325562477111816, "learning_rate": 5.985663082437276e-06, "loss": 0.4114, "step": 501 }, { "epoch": 0.18001434377241216, "grad_norm": 0.4283542335033417, "learning_rate": 5.9976105137395465e-06, "loss": 0.401, "step": 502 }, { "epoch": 0.18037293808271576, "grad_norm": 0.4434736967086792, "learning_rate": 6.009557945041817e-06, "loss": 0.3986, "step": 503 }, { "epoch": 0.18073153239301937, "grad_norm": 0.48133277893066406, "learning_rate": 6.021505376344087e-06, "loss": 0.388, "step": 504 }, { "epoch": 0.18109012670332297, "grad_norm": 0.5381149053573608, "learning_rate": 6.033452807646356e-06, "loss": 0.4249, "step": 505 }, { "epoch": 0.18144872101362658, "grad_norm": 0.4846644997596741, "learning_rate": 6.045400238948627e-06, "loss": 0.3697, "step": 506 }, { "epoch": 0.18180731532393019, "grad_norm": 0.4784573018550873, "learning_rate": 6.057347670250897e-06, "loss": 0.4004, "step": 507 }, { "epoch": 0.1821659096342338, "grad_norm": 0.5343933701515198, "learning_rate": 6.069295101553166e-06, "loss": 0.4246, "step": 508 }, { "epoch": 0.18252450394453743, "grad_norm": 0.5129504799842834, "learning_rate": 6.081242532855437e-06, "loss": 0.3962, "step": 509 }, { "epoch": 0.18288309825484103, "grad_norm": 0.5532310605049133, "learning_rate": 6.0931899641577065e-06, "loss": 0.4143, "step": 510 }, { "epoch": 0.18324169256514464, "grad_norm": 0.5119930505752563, "learning_rate": 6.105137395459977e-06, "loss": 0.3667, "step": 511 }, { "epoch": 0.18360028687544824, "grad_norm": 0.49164390563964844, "learning_rate": 6.117084826762246e-06, "loss": 0.3928, "step": 512 }, { "epoch": 0.18395888118575185, "grad_norm": 0.4821913242340088, "learning_rate": 6.129032258064517e-06, "loss": 0.4077, "step": 513 }, { "epoch": 0.18431747549605546, "grad_norm": 0.45569178462028503, "learning_rate": 6.140979689366786e-06, "loss": 0.389, "step": 514 }, { "epoch": 0.18467606980635906, "grad_norm": 0.47082850337028503, "learning_rate": 6.152927120669057e-06, "loss": 0.3815, "step": 515 }, { "epoch": 0.1850346641166627, "grad_norm": 0.4344950318336487, "learning_rate": 6.164874551971327e-06, "loss": 0.3558, "step": 516 }, { "epoch": 0.1853932584269663, "grad_norm": 0.451945424079895, "learning_rate": 6.176821983273596e-06, "loss": 0.4111, "step": 517 }, { "epoch": 0.1857518527372699, "grad_norm": 0.4379846155643463, "learning_rate": 6.188769414575867e-06, "loss": 0.4253, "step": 518 }, { "epoch": 0.1861104470475735, "grad_norm": 0.4544430375099182, "learning_rate": 6.200716845878137e-06, "loss": 0.4019, "step": 519 }, { "epoch": 0.18646904135787712, "grad_norm": 0.4509560763835907, "learning_rate": 6.212664277180407e-06, "loss": 0.4088, "step": 520 }, { "epoch": 0.18682763566818072, "grad_norm": 0.4233449399471283, "learning_rate": 6.224611708482676e-06, "loss": 0.4024, "step": 521 }, { "epoch": 0.18718622997848433, "grad_norm": 0.4731083810329437, "learning_rate": 6.236559139784947e-06, "loss": 0.3956, "step": 522 }, { "epoch": 0.18754482428878796, "grad_norm": 0.49143949151039124, "learning_rate": 6.248506571087217e-06, "loss": 0.4103, "step": 523 }, { "epoch": 0.18790341859909157, "grad_norm": 0.503290593624115, "learning_rate": 6.260454002389486e-06, "loss": 0.3881, "step": 524 }, { "epoch": 0.18826201290939518, "grad_norm": 0.47964543104171753, "learning_rate": 6.272401433691757e-06, "loss": 0.4048, "step": 525 }, { "epoch": 0.18862060721969878, "grad_norm": 0.4981192350387573, "learning_rate": 6.2843488649940265e-06, "loss": 0.4056, "step": 526 }, { "epoch": 0.1889792015300024, "grad_norm": 0.4368390142917633, "learning_rate": 6.296296296296297e-06, "loss": 0.3719, "step": 527 }, { "epoch": 0.189337795840306, "grad_norm": 0.4651443064212799, "learning_rate": 6.308243727598567e-06, "loss": 0.3627, "step": 528 }, { "epoch": 0.1896963901506096, "grad_norm": 0.4651325047016144, "learning_rate": 6.320191158900837e-06, "loss": 0.4107, "step": 529 }, { "epoch": 0.1900549844609132, "grad_norm": 0.44491153955459595, "learning_rate": 6.332138590203107e-06, "loss": 0.3952, "step": 530 }, { "epoch": 0.19041357877121684, "grad_norm": 0.5050843954086304, "learning_rate": 6.344086021505377e-06, "loss": 0.3985, "step": 531 }, { "epoch": 0.19077217308152045, "grad_norm": 0.4923928380012512, "learning_rate": 6.356033452807647e-06, "loss": 0.388, "step": 532 }, { "epoch": 0.19113076739182405, "grad_norm": 0.46229827404022217, "learning_rate": 6.3679808841099165e-06, "loss": 0.3949, "step": 533 }, { "epoch": 0.19148936170212766, "grad_norm": 0.47825583815574646, "learning_rate": 6.379928315412187e-06, "loss": 0.405, "step": 534 }, { "epoch": 0.19184795601243126, "grad_norm": 0.5222941637039185, "learning_rate": 6.391875746714457e-06, "loss": 0.3989, "step": 535 }, { "epoch": 0.19220655032273487, "grad_norm": 0.519848108291626, "learning_rate": 6.403823178016727e-06, "loss": 0.4169, "step": 536 }, { "epoch": 0.19256514463303848, "grad_norm": 0.5215396285057068, "learning_rate": 6.415770609318996e-06, "loss": 0.3936, "step": 537 }, { "epoch": 0.1929237389433421, "grad_norm": 0.5572916865348816, "learning_rate": 6.427718040621267e-06, "loss": 0.4213, "step": 538 }, { "epoch": 0.19328233325364572, "grad_norm": 0.4607081115245819, "learning_rate": 6.439665471923537e-06, "loss": 0.4005, "step": 539 }, { "epoch": 0.19364092756394932, "grad_norm": 0.5272471308708191, "learning_rate": 6.451612903225806e-06, "loss": 0.3985, "step": 540 }, { "epoch": 0.19399952187425293, "grad_norm": 0.5063799619674683, "learning_rate": 6.463560334528077e-06, "loss": 0.4087, "step": 541 }, { "epoch": 0.19435811618455653, "grad_norm": 0.43912652134895325, "learning_rate": 6.475507765830347e-06, "loss": 0.405, "step": 542 }, { "epoch": 0.19471671049486014, "grad_norm": 0.551730215549469, "learning_rate": 6.4874551971326176e-06, "loss": 0.3905, "step": 543 }, { "epoch": 0.19507530480516375, "grad_norm": 0.517902135848999, "learning_rate": 6.499402628434887e-06, "loss": 0.4043, "step": 544 }, { "epoch": 0.19543389911546738, "grad_norm": 0.4465118944644928, "learning_rate": 6.511350059737157e-06, "loss": 0.3778, "step": 545 }, { "epoch": 0.19579249342577099, "grad_norm": 0.4488280117511749, "learning_rate": 6.523297491039428e-06, "loss": 0.3925, "step": 546 }, { "epoch": 0.1961510877360746, "grad_norm": 0.4733932316303253, "learning_rate": 6.535244922341697e-06, "loss": 0.4042, "step": 547 }, { "epoch": 0.1965096820463782, "grad_norm": 0.47780826687812805, "learning_rate": 6.547192353643967e-06, "loss": 0.4165, "step": 548 }, { "epoch": 0.1968682763566818, "grad_norm": 0.4688851833343506, "learning_rate": 6.5591397849462365e-06, "loss": 0.3891, "step": 549 }, { "epoch": 0.1972268706669854, "grad_norm": 0.45017462968826294, "learning_rate": 6.5710872162485075e-06, "loss": 0.3991, "step": 550 }, { "epoch": 0.19758546497728902, "grad_norm": 0.4886326193809509, "learning_rate": 6.583034647550777e-06, "loss": 0.393, "step": 551 }, { "epoch": 0.19794405928759265, "grad_norm": 0.48198920488357544, "learning_rate": 6.594982078853047e-06, "loss": 0.3881, "step": 552 }, { "epoch": 0.19830265359789626, "grad_norm": 0.4290037751197815, "learning_rate": 6.606929510155318e-06, "loss": 0.4075, "step": 553 }, { "epoch": 0.19866124790819986, "grad_norm": 0.504683792591095, "learning_rate": 6.618876941457587e-06, "loss": 0.3893, "step": 554 }, { "epoch": 0.19901984221850347, "grad_norm": 0.4648478031158447, "learning_rate": 6.630824372759857e-06, "loss": 0.3952, "step": 555 }, { "epoch": 0.19937843652880707, "grad_norm": 0.46784868836402893, "learning_rate": 6.642771804062127e-06, "loss": 0.4141, "step": 556 }, { "epoch": 0.19973703083911068, "grad_norm": 0.4692215323448181, "learning_rate": 6.654719235364397e-06, "loss": 0.4193, "step": 557 }, { "epoch": 0.20009562514941429, "grad_norm": 0.4910225570201874, "learning_rate": 6.666666666666667e-06, "loss": 0.4233, "step": 558 }, { "epoch": 0.20045421945971792, "grad_norm": 0.5319250822067261, "learning_rate": 6.678614097968938e-06, "loss": 0.4203, "step": 559 }, { "epoch": 0.20081281377002153, "grad_norm": 0.46914440393447876, "learning_rate": 6.690561529271207e-06, "loss": 0.4182, "step": 560 }, { "epoch": 0.20117140808032513, "grad_norm": 0.46411892771720886, "learning_rate": 6.702508960573477e-06, "loss": 0.3766, "step": 561 }, { "epoch": 0.20153000239062874, "grad_norm": 0.48260554671287537, "learning_rate": 6.714456391875748e-06, "loss": 0.382, "step": 562 }, { "epoch": 0.20188859670093234, "grad_norm": 0.48425039649009705, "learning_rate": 6.726403823178017e-06, "loss": 0.4056, "step": 563 }, { "epoch": 0.20224719101123595, "grad_norm": 0.511290431022644, "learning_rate": 6.738351254480287e-06, "loss": 0.4025, "step": 564 }, { "epoch": 0.20260578532153956, "grad_norm": 0.45453551411628723, "learning_rate": 6.7502986857825566e-06, "loss": 0.3895, "step": 565 }, { "epoch": 0.20296437963184316, "grad_norm": 0.5212664604187012, "learning_rate": 6.7622461170848275e-06, "loss": 0.3894, "step": 566 }, { "epoch": 0.2033229739421468, "grad_norm": 0.426395982503891, "learning_rate": 6.774193548387097e-06, "loss": 0.3755, "step": 567 }, { "epoch": 0.2036815682524504, "grad_norm": 0.5430606007575989, "learning_rate": 6.786140979689368e-06, "loss": 0.3573, "step": 568 }, { "epoch": 0.204040162562754, "grad_norm": 0.5327696204185486, "learning_rate": 6.798088410991638e-06, "loss": 0.4281, "step": 569 }, { "epoch": 0.2043987568730576, "grad_norm": 0.4554588198661804, "learning_rate": 6.810035842293907e-06, "loss": 0.3869, "step": 570 }, { "epoch": 0.20475735118336122, "grad_norm": 0.5796671509742737, "learning_rate": 6.821983273596178e-06, "loss": 0.393, "step": 571 }, { "epoch": 0.20511594549366483, "grad_norm": 0.45197275280952454, "learning_rate": 6.833930704898447e-06, "loss": 0.3889, "step": 572 }, { "epoch": 0.20547453980396843, "grad_norm": 0.45874449610710144, "learning_rate": 6.8458781362007174e-06, "loss": 0.379, "step": 573 }, { "epoch": 0.20583313411427206, "grad_norm": 0.49823427200317383, "learning_rate": 6.857825567502987e-06, "loss": 0.4089, "step": 574 }, { "epoch": 0.20619172842457567, "grad_norm": 0.43053093552589417, "learning_rate": 6.869772998805258e-06, "loss": 0.3851, "step": 575 }, { "epoch": 0.20655032273487928, "grad_norm": 0.40929970145225525, "learning_rate": 6.881720430107528e-06, "loss": 0.3833, "step": 576 }, { "epoch": 0.20690891704518288, "grad_norm": 0.5031723976135254, "learning_rate": 6.893667861409797e-06, "loss": 0.3794, "step": 577 }, { "epoch": 0.2072675113554865, "grad_norm": 0.517949640750885, "learning_rate": 6.905615292712068e-06, "loss": 0.3975, "step": 578 }, { "epoch": 0.2076261056657901, "grad_norm": 0.47281935811042786, "learning_rate": 6.917562724014337e-06, "loss": 0.3968, "step": 579 }, { "epoch": 0.2079846999760937, "grad_norm": 0.4719352126121521, "learning_rate": 6.929510155316607e-06, "loss": 0.4074, "step": 580 }, { "epoch": 0.20834329428639733, "grad_norm": 0.509178638458252, "learning_rate": 6.9414575866188775e-06, "loss": 0.4082, "step": 581 }, { "epoch": 0.20870188859670094, "grad_norm": 0.4124467670917511, "learning_rate": 6.9534050179211476e-06, "loss": 0.3669, "step": 582 }, { "epoch": 0.20906048290700455, "grad_norm": 0.4686502516269684, "learning_rate": 6.965352449223417e-06, "loss": 0.3768, "step": 583 }, { "epoch": 0.20941907721730815, "grad_norm": 0.4813520610332489, "learning_rate": 6.977299880525688e-06, "loss": 0.3929, "step": 584 }, { "epoch": 0.20977767152761176, "grad_norm": 0.42136937379837036, "learning_rate": 6.989247311827958e-06, "loss": 0.3857, "step": 585 }, { "epoch": 0.21013626583791536, "grad_norm": 0.42666861414909363, "learning_rate": 7.001194743130227e-06, "loss": 0.394, "step": 586 }, { "epoch": 0.21049486014821897, "grad_norm": 0.5123594403266907, "learning_rate": 7.013142174432498e-06, "loss": 0.385, "step": 587 }, { "epoch": 0.2108534544585226, "grad_norm": 0.46554991602897644, "learning_rate": 7.025089605734767e-06, "loss": 0.3996, "step": 588 }, { "epoch": 0.2112120487688262, "grad_norm": 0.4773072898387909, "learning_rate": 7.0370370370370375e-06, "loss": 0.379, "step": 589 }, { "epoch": 0.21157064307912982, "grad_norm": 0.5412073731422424, "learning_rate": 7.048984468339307e-06, "loss": 0.3955, "step": 590 }, { "epoch": 0.21192923738943342, "grad_norm": 0.4397923946380615, "learning_rate": 7.060931899641578e-06, "loss": 0.3983, "step": 591 }, { "epoch": 0.21228783169973703, "grad_norm": 0.48601940274238586, "learning_rate": 7.072879330943848e-06, "loss": 0.3811, "step": 592 }, { "epoch": 0.21264642601004063, "grad_norm": 0.4290758967399597, "learning_rate": 7.084826762246118e-06, "loss": 0.4108, "step": 593 }, { "epoch": 0.21300502032034424, "grad_norm": 0.45370006561279297, "learning_rate": 7.096774193548388e-06, "loss": 0.3651, "step": 594 }, { "epoch": 0.21336361463064787, "grad_norm": 0.4481194019317627, "learning_rate": 7.108721624850657e-06, "loss": 0.3872, "step": 595 }, { "epoch": 0.21372220894095148, "grad_norm": 0.45926257967948914, "learning_rate": 7.120669056152928e-06, "loss": 0.4162, "step": 596 }, { "epoch": 0.2140808032512551, "grad_norm": 0.4622972309589386, "learning_rate": 7.1326164874551975e-06, "loss": 0.4024, "step": 597 }, { "epoch": 0.2144393975615587, "grad_norm": 0.4608667492866516, "learning_rate": 7.144563918757468e-06, "loss": 0.398, "step": 598 }, { "epoch": 0.2147979918718623, "grad_norm": 0.4645674526691437, "learning_rate": 7.156511350059739e-06, "loss": 0.3819, "step": 599 }, { "epoch": 0.2151565861821659, "grad_norm": 0.46989938616752625, "learning_rate": 7.168458781362008e-06, "loss": 0.3898, "step": 600 }, { "epoch": 0.2155151804924695, "grad_norm": 0.48512014746665955, "learning_rate": 7.180406212664278e-06, "loss": 0.3832, "step": 601 }, { "epoch": 0.21587377480277312, "grad_norm": 0.46613094210624695, "learning_rate": 7.192353643966547e-06, "loss": 0.3861, "step": 602 }, { "epoch": 0.21623236911307675, "grad_norm": 0.5066441893577576, "learning_rate": 7.204301075268818e-06, "loss": 0.3848, "step": 603 }, { "epoch": 0.21659096342338036, "grad_norm": 0.4781426191329956, "learning_rate": 7.2162485065710874e-06, "loss": 0.381, "step": 604 }, { "epoch": 0.21694955773368396, "grad_norm": 0.5609757304191589, "learning_rate": 7.2281959378733575e-06, "loss": 0.3827, "step": 605 }, { "epoch": 0.21730815204398757, "grad_norm": 0.46893075108528137, "learning_rate": 7.240143369175628e-06, "loss": 0.3939, "step": 606 }, { "epoch": 0.21766674635429117, "grad_norm": 0.45587462186813354, "learning_rate": 7.252090800477898e-06, "loss": 0.38, "step": 607 }, { "epoch": 0.21802534066459478, "grad_norm": 0.5003952980041504, "learning_rate": 7.264038231780169e-06, "loss": 0.4056, "step": 608 }, { "epoch": 0.21838393497489839, "grad_norm": 0.46948251128196716, "learning_rate": 7.275985663082438e-06, "loss": 0.4018, "step": 609 }, { "epoch": 0.21874252928520202, "grad_norm": 0.5179306268692017, "learning_rate": 7.287933094384708e-06, "loss": 0.4318, "step": 610 }, { "epoch": 0.21910112359550563, "grad_norm": 0.43379729986190796, "learning_rate": 7.299880525686977e-06, "loss": 0.3775, "step": 611 }, { "epoch": 0.21945971790580923, "grad_norm": 0.5352974534034729, "learning_rate": 7.311827956989248e-06, "loss": 0.3724, "step": 612 }, { "epoch": 0.21981831221611284, "grad_norm": 0.4571295976638794, "learning_rate": 7.3237753882915176e-06, "loss": 0.3846, "step": 613 }, { "epoch": 0.22017690652641644, "grad_norm": 0.42357704043388367, "learning_rate": 7.335722819593788e-06, "loss": 0.4032, "step": 614 }, { "epoch": 0.22053550083672005, "grad_norm": 0.44756099581718445, "learning_rate": 7.347670250896059e-06, "loss": 0.3851, "step": 615 }, { "epoch": 0.22089409514702366, "grad_norm": 0.46630120277404785, "learning_rate": 7.359617682198328e-06, "loss": 0.379, "step": 616 }, { "epoch": 0.2212526894573273, "grad_norm": 0.4867898225784302, "learning_rate": 7.371565113500598e-06, "loss": 0.3878, "step": 617 }, { "epoch": 0.2216112837676309, "grad_norm": 0.47168588638305664, "learning_rate": 7.383512544802868e-06, "loss": 0.3875, "step": 618 }, { "epoch": 0.2219698780779345, "grad_norm": 0.39512279629707336, "learning_rate": 7.395459976105138e-06, "loss": 0.3865, "step": 619 }, { "epoch": 0.2223284723882381, "grad_norm": 0.45712777972221375, "learning_rate": 7.4074074074074075e-06, "loss": 0.4032, "step": 620 }, { "epoch": 0.2226870666985417, "grad_norm": 0.5029059648513794, "learning_rate": 7.4193548387096784e-06, "loss": 0.4024, "step": 621 }, { "epoch": 0.22304566100884532, "grad_norm": 0.44883763790130615, "learning_rate": 7.4313022700119485e-06, "loss": 0.4057, "step": 622 }, { "epoch": 0.22340425531914893, "grad_norm": 0.46412113308906555, "learning_rate": 7.443249701314218e-06, "loss": 0.382, "step": 623 }, { "epoch": 0.22376284962945256, "grad_norm": 0.5820634365081787, "learning_rate": 7.455197132616489e-06, "loss": 0.3835, "step": 624 }, { "epoch": 0.22412144393975617, "grad_norm": 0.4479105472564697, "learning_rate": 7.467144563918758e-06, "loss": 0.3906, "step": 625 }, { "epoch": 0.22448003825005977, "grad_norm": 0.5865285992622375, "learning_rate": 7.479091995221028e-06, "loss": 0.4086, "step": 626 }, { "epoch": 0.22483863256036338, "grad_norm": 0.5259613990783691, "learning_rate": 7.491039426523297e-06, "loss": 0.405, "step": 627 }, { "epoch": 0.22519722687066698, "grad_norm": 0.5104691386222839, "learning_rate": 7.502986857825568e-06, "loss": 0.3939, "step": 628 }, { "epoch": 0.2255558211809706, "grad_norm": 0.5837239027023315, "learning_rate": 7.514934289127838e-06, "loss": 0.3902, "step": 629 }, { "epoch": 0.2259144154912742, "grad_norm": 0.5220528244972229, "learning_rate": 7.526881720430108e-06, "loss": 0.4059, "step": 630 }, { "epoch": 0.2262730098015778, "grad_norm": 0.4882347285747528, "learning_rate": 7.538829151732379e-06, "loss": 0.3785, "step": 631 }, { "epoch": 0.22663160411188144, "grad_norm": 0.48700669407844543, "learning_rate": 7.550776583034648e-06, "loss": 0.4113, "step": 632 }, { "epoch": 0.22699019842218504, "grad_norm": 0.5017825961112976, "learning_rate": 7.562724014336919e-06, "loss": 0.3938, "step": 633 }, { "epoch": 0.22734879273248865, "grad_norm": 0.5926743745803833, "learning_rate": 7.574671445639188e-06, "loss": 0.3879, "step": 634 }, { "epoch": 0.22770738704279225, "grad_norm": 0.4891039729118347, "learning_rate": 7.586618876941458e-06, "loss": 0.3987, "step": 635 }, { "epoch": 0.22806598135309586, "grad_norm": 0.598374605178833, "learning_rate": 7.5985663082437275e-06, "loss": 0.3989, "step": 636 }, { "epoch": 0.22842457566339947, "grad_norm": 0.4944741427898407, "learning_rate": 7.6105137395459985e-06, "loss": 0.3868, "step": 637 }, { "epoch": 0.22878316997370307, "grad_norm": 0.5403999090194702, "learning_rate": 7.622461170848269e-06, "loss": 0.3821, "step": 638 }, { "epoch": 0.2291417642840067, "grad_norm": 0.44009387493133545, "learning_rate": 7.634408602150538e-06, "loss": 0.3689, "step": 639 }, { "epoch": 0.2295003585943103, "grad_norm": 0.4918476343154907, "learning_rate": 7.646356033452809e-06, "loss": 0.4004, "step": 640 }, { "epoch": 0.22985895290461392, "grad_norm": 0.453818142414093, "learning_rate": 7.658303464755078e-06, "loss": 0.3745, "step": 641 }, { "epoch": 0.23021754721491752, "grad_norm": 0.44781020283699036, "learning_rate": 7.670250896057349e-06, "loss": 0.4153, "step": 642 }, { "epoch": 0.23057614152522113, "grad_norm": 0.4944031536579132, "learning_rate": 7.682198327359618e-06, "loss": 0.4117, "step": 643 }, { "epoch": 0.23093473583552473, "grad_norm": 0.5161592364311218, "learning_rate": 7.694145758661888e-06, "loss": 0.427, "step": 644 }, { "epoch": 0.23129333014582834, "grad_norm": 0.46554994583129883, "learning_rate": 7.706093189964159e-06, "loss": 0.3972, "step": 645 }, { "epoch": 0.23165192445613197, "grad_norm": 0.5006209015846252, "learning_rate": 7.718040621266428e-06, "loss": 0.4011, "step": 646 }, { "epoch": 0.23201051876643558, "grad_norm": 0.47263363003730774, "learning_rate": 7.729988052568699e-06, "loss": 0.3922, "step": 647 }, { "epoch": 0.2323691130767392, "grad_norm": 0.5390496850013733, "learning_rate": 7.741935483870968e-06, "loss": 0.4035, "step": 648 }, { "epoch": 0.2327277073870428, "grad_norm": 0.5005566477775574, "learning_rate": 7.753882915173239e-06, "loss": 0.4075, "step": 649 }, { "epoch": 0.2330863016973464, "grad_norm": 0.5843583941459656, "learning_rate": 7.765830346475508e-06, "loss": 0.3998, "step": 650 }, { "epoch": 0.23344489600765, "grad_norm": 0.525547444820404, "learning_rate": 7.77777777777778e-06, "loss": 0.39, "step": 651 }, { "epoch": 0.2338034903179536, "grad_norm": 0.507908821105957, "learning_rate": 7.789725209080048e-06, "loss": 0.3992, "step": 652 }, { "epoch": 0.23416208462825724, "grad_norm": 0.47272494435310364, "learning_rate": 7.801672640382318e-06, "loss": 0.3975, "step": 653 }, { "epoch": 0.23452067893856085, "grad_norm": 0.5821009278297424, "learning_rate": 7.813620071684589e-06, "loss": 0.38, "step": 654 }, { "epoch": 0.23487927324886446, "grad_norm": 0.5661270022392273, "learning_rate": 7.825567502986858e-06, "loss": 0.3768, "step": 655 }, { "epoch": 0.23523786755916806, "grad_norm": 0.49092385172843933, "learning_rate": 7.837514934289129e-06, "loss": 0.3926, "step": 656 }, { "epoch": 0.23559646186947167, "grad_norm": 0.5851085782051086, "learning_rate": 7.849462365591398e-06, "loss": 0.3957, "step": 657 }, { "epoch": 0.23595505617977527, "grad_norm": 0.521277666091919, "learning_rate": 7.861409796893669e-06, "loss": 0.3846, "step": 658 }, { "epoch": 0.23631365049007888, "grad_norm": 0.47766050696372986, "learning_rate": 7.873357228195938e-06, "loss": 0.3965, "step": 659 }, { "epoch": 0.23667224480038251, "grad_norm": 0.5344738364219666, "learning_rate": 7.88530465949821e-06, "loss": 0.4062, "step": 660 }, { "epoch": 0.23703083911068612, "grad_norm": 0.5142890214920044, "learning_rate": 7.897252090800479e-06, "loss": 0.3818, "step": 661 }, { "epoch": 0.23738943342098973, "grad_norm": 0.5957561731338501, "learning_rate": 7.909199522102748e-06, "loss": 0.3964, "step": 662 }, { "epoch": 0.23774802773129333, "grad_norm": 0.4383288025856018, "learning_rate": 7.921146953405019e-06, "loss": 0.3912, "step": 663 }, { "epoch": 0.23810662204159694, "grad_norm": 0.6080900430679321, "learning_rate": 7.933094384707288e-06, "loss": 0.4136, "step": 664 }, { "epoch": 0.23846521635190054, "grad_norm": 0.5169405341148376, "learning_rate": 7.945041816009559e-06, "loss": 0.3868, "step": 665 }, { "epoch": 0.23882381066220415, "grad_norm": 0.4689905047416687, "learning_rate": 7.956989247311828e-06, "loss": 0.3693, "step": 666 }, { "epoch": 0.23918240497250776, "grad_norm": 0.5555201172828674, "learning_rate": 7.9689366786141e-06, "loss": 0.3904, "step": 667 }, { "epoch": 0.2395409992828114, "grad_norm": 0.4640530049800873, "learning_rate": 7.980884109916368e-06, "loss": 0.3728, "step": 668 }, { "epoch": 0.239899593593115, "grad_norm": 0.47273001074790955, "learning_rate": 7.992831541218638e-06, "loss": 0.3777, "step": 669 }, { "epoch": 0.2402581879034186, "grad_norm": 0.4770370125770569, "learning_rate": 8.004778972520909e-06, "loss": 0.3779, "step": 670 }, { "epoch": 0.2406167822137222, "grad_norm": 0.48450401425361633, "learning_rate": 8.016726403823178e-06, "loss": 0.3987, "step": 671 }, { "epoch": 0.2409753765240258, "grad_norm": 0.4481484293937683, "learning_rate": 8.028673835125449e-06, "loss": 0.382, "step": 672 }, { "epoch": 0.24133397083432942, "grad_norm": 0.5058109760284424, "learning_rate": 8.040621266427718e-06, "loss": 0.394, "step": 673 }, { "epoch": 0.24169256514463303, "grad_norm": 0.43163934350013733, "learning_rate": 8.052568697729989e-06, "loss": 0.3857, "step": 674 }, { "epoch": 0.24205115945493666, "grad_norm": 0.4693927466869354, "learning_rate": 8.064516129032258e-06, "loss": 0.4143, "step": 675 }, { "epoch": 0.24240975376524027, "grad_norm": 0.44455644488334656, "learning_rate": 8.07646356033453e-06, "loss": 0.3852, "step": 676 }, { "epoch": 0.24276834807554387, "grad_norm": 0.43430259823799133, "learning_rate": 8.088410991636799e-06, "loss": 0.3914, "step": 677 }, { "epoch": 0.24312694238584748, "grad_norm": 0.47194597125053406, "learning_rate": 8.100358422939068e-06, "loss": 0.3869, "step": 678 }, { "epoch": 0.24348553669615108, "grad_norm": 0.5073704123497009, "learning_rate": 8.112305854241339e-06, "loss": 0.3935, "step": 679 }, { "epoch": 0.2438441310064547, "grad_norm": 0.4767199456691742, "learning_rate": 8.124253285543608e-06, "loss": 0.4053, "step": 680 }, { "epoch": 0.2442027253167583, "grad_norm": 0.4816272556781769, "learning_rate": 8.136200716845879e-06, "loss": 0.3792, "step": 681 }, { "epoch": 0.24456131962706193, "grad_norm": 0.5568973422050476, "learning_rate": 8.148148148148148e-06, "loss": 0.3861, "step": 682 }, { "epoch": 0.24491991393736554, "grad_norm": 0.47738754749298096, "learning_rate": 8.16009557945042e-06, "loss": 0.3879, "step": 683 }, { "epoch": 0.24527850824766914, "grad_norm": 0.5255285501480103, "learning_rate": 8.172043010752689e-06, "loss": 0.3811, "step": 684 }, { "epoch": 0.24563710255797275, "grad_norm": 0.511052668094635, "learning_rate": 8.18399044205496e-06, "loss": 0.3816, "step": 685 }, { "epoch": 0.24599569686827635, "grad_norm": 0.5308071374893188, "learning_rate": 8.195937873357229e-06, "loss": 0.3757, "step": 686 }, { "epoch": 0.24635429117857996, "grad_norm": 0.5344949960708618, "learning_rate": 8.207885304659498e-06, "loss": 0.3972, "step": 687 }, { "epoch": 0.24671288548888357, "grad_norm": 0.5560814142227173, "learning_rate": 8.219832735961769e-06, "loss": 0.39, "step": 688 }, { "epoch": 0.2470714797991872, "grad_norm": 0.4216794967651367, "learning_rate": 8.231780167264038e-06, "loss": 0.3884, "step": 689 }, { "epoch": 0.2474300741094908, "grad_norm": 0.5654876232147217, "learning_rate": 8.24372759856631e-06, "loss": 0.3976, "step": 690 }, { "epoch": 0.2477886684197944, "grad_norm": 0.5228829383850098, "learning_rate": 8.255675029868578e-06, "loss": 0.4007, "step": 691 }, { "epoch": 0.24814726273009802, "grad_norm": 0.6125988960266113, "learning_rate": 8.26762246117085e-06, "loss": 0.3907, "step": 692 }, { "epoch": 0.24850585704040162, "grad_norm": 0.4796591103076935, "learning_rate": 8.279569892473119e-06, "loss": 0.3698, "step": 693 }, { "epoch": 0.24886445135070523, "grad_norm": 0.5348652601242065, "learning_rate": 8.291517323775388e-06, "loss": 0.406, "step": 694 }, { "epoch": 0.24922304566100884, "grad_norm": 0.5076994895935059, "learning_rate": 8.303464755077659e-06, "loss": 0.3756, "step": 695 }, { "epoch": 0.24958163997131247, "grad_norm": 0.5610140562057495, "learning_rate": 8.315412186379928e-06, "loss": 0.3724, "step": 696 }, { "epoch": 0.24994023428161607, "grad_norm": 0.5784087181091309, "learning_rate": 8.327359617682199e-06, "loss": 0.4015, "step": 697 }, { "epoch": 0.25029882859191965, "grad_norm": 0.4458978474140167, "learning_rate": 8.339307048984468e-06, "loss": 0.3676, "step": 698 }, { "epoch": 0.25065742290222326, "grad_norm": 0.5053375959396362, "learning_rate": 8.35125448028674e-06, "loss": 0.3692, "step": 699 }, { "epoch": 0.2510160172125269, "grad_norm": 0.5538926124572754, "learning_rate": 8.36320191158901e-06, "loss": 0.3777, "step": 700 }, { "epoch": 0.2513746115228305, "grad_norm": 0.48962169885635376, "learning_rate": 8.37514934289128e-06, "loss": 0.387, "step": 701 }, { "epoch": 0.25173320583313413, "grad_norm": 0.47645559906959534, "learning_rate": 8.387096774193549e-06, "loss": 0.4032, "step": 702 }, { "epoch": 0.25209180014343774, "grad_norm": 0.6630553007125854, "learning_rate": 8.399044205495818e-06, "loss": 0.4122, "step": 703 }, { "epoch": 0.25245039445374134, "grad_norm": 0.501896321773529, "learning_rate": 8.410991636798089e-06, "loss": 0.3695, "step": 704 }, { "epoch": 0.25280898876404495, "grad_norm": 0.5550310611724854, "learning_rate": 8.422939068100358e-06, "loss": 0.3752, "step": 705 }, { "epoch": 0.25316758307434856, "grad_norm": 0.5129506587982178, "learning_rate": 8.43488649940263e-06, "loss": 0.3944, "step": 706 }, { "epoch": 0.25352617738465216, "grad_norm": 0.4436836838722229, "learning_rate": 8.4468339307049e-06, "loss": 0.3873, "step": 707 }, { "epoch": 0.25388477169495577, "grad_norm": 0.5029718279838562, "learning_rate": 8.45878136200717e-06, "loss": 0.3742, "step": 708 }, { "epoch": 0.2542433660052594, "grad_norm": 0.5677691102027893, "learning_rate": 8.470728793309439e-06, "loss": 0.382, "step": 709 }, { "epoch": 0.254601960315563, "grad_norm": 0.5410685539245605, "learning_rate": 8.48267622461171e-06, "loss": 0.3822, "step": 710 }, { "epoch": 0.2549605546258666, "grad_norm": 0.46200165152549744, "learning_rate": 8.494623655913979e-06, "loss": 0.3917, "step": 711 }, { "epoch": 0.2553191489361702, "grad_norm": 0.5269785523414612, "learning_rate": 8.506571087216248e-06, "loss": 0.3828, "step": 712 }, { "epoch": 0.2556777432464738, "grad_norm": 0.4996577799320221, "learning_rate": 8.518518518518519e-06, "loss": 0.412, "step": 713 }, { "epoch": 0.25603633755677746, "grad_norm": 0.5329709053039551, "learning_rate": 8.530465949820788e-06, "loss": 0.3809, "step": 714 }, { "epoch": 0.25639493186708107, "grad_norm": 0.4959821105003357, "learning_rate": 8.54241338112306e-06, "loss": 0.3826, "step": 715 }, { "epoch": 0.25675352617738467, "grad_norm": 0.5066509246826172, "learning_rate": 8.55436081242533e-06, "loss": 0.3754, "step": 716 }, { "epoch": 0.2571121204876883, "grad_norm": 0.5543563365936279, "learning_rate": 8.5663082437276e-06, "loss": 0.3915, "step": 717 }, { "epoch": 0.2574707147979919, "grad_norm": 0.44262292981147766, "learning_rate": 8.578255675029869e-06, "loss": 0.3838, "step": 718 }, { "epoch": 0.2578293091082955, "grad_norm": 0.5615265369415283, "learning_rate": 8.590203106332138e-06, "loss": 0.3936, "step": 719 }, { "epoch": 0.2581879034185991, "grad_norm": 0.544588029384613, "learning_rate": 8.602150537634409e-06, "loss": 0.3856, "step": 720 }, { "epoch": 0.2585464977289027, "grad_norm": 0.4977324903011322, "learning_rate": 8.614097968936678e-06, "loss": 0.3909, "step": 721 }, { "epoch": 0.2589050920392063, "grad_norm": 0.5414032340049744, "learning_rate": 8.62604540023895e-06, "loss": 0.3695, "step": 722 }, { "epoch": 0.2592636863495099, "grad_norm": 0.5742842555046082, "learning_rate": 8.63799283154122e-06, "loss": 0.3986, "step": 723 }, { "epoch": 0.2596222806598135, "grad_norm": 0.4867381453514099, "learning_rate": 8.64994026284349e-06, "loss": 0.3912, "step": 724 }, { "epoch": 0.2599808749701171, "grad_norm": 0.5892418622970581, "learning_rate": 8.66188769414576e-06, "loss": 0.3922, "step": 725 }, { "epoch": 0.26033946928042073, "grad_norm": 0.48612019419670105, "learning_rate": 8.67383512544803e-06, "loss": 0.3936, "step": 726 }, { "epoch": 0.26069806359072434, "grad_norm": 0.5159116387367249, "learning_rate": 8.685782556750299e-06, "loss": 0.3605, "step": 727 }, { "epoch": 0.26105665790102794, "grad_norm": 0.5188771486282349, "learning_rate": 8.697729988052568e-06, "loss": 0.3917, "step": 728 }, { "epoch": 0.2614152522113316, "grad_norm": 0.4884946644306183, "learning_rate": 8.70967741935484e-06, "loss": 0.4013, "step": 729 }, { "epoch": 0.2617738465216352, "grad_norm": 0.4490492343902588, "learning_rate": 8.72162485065711e-06, "loss": 0.387, "step": 730 }, { "epoch": 0.2621324408319388, "grad_norm": 0.5006316304206848, "learning_rate": 8.73357228195938e-06, "loss": 0.3736, "step": 731 }, { "epoch": 0.2624910351422424, "grad_norm": 0.4661957621574402, "learning_rate": 8.74551971326165e-06, "loss": 0.4004, "step": 732 }, { "epoch": 0.26284962945254603, "grad_norm": 0.447486013174057, "learning_rate": 8.75746714456392e-06, "loss": 0.3947, "step": 733 }, { "epoch": 0.26320822376284964, "grad_norm": 0.5142832398414612, "learning_rate": 8.769414575866189e-06, "loss": 0.3705, "step": 734 }, { "epoch": 0.26356681807315324, "grad_norm": 0.5152048468589783, "learning_rate": 8.78136200716846e-06, "loss": 0.3832, "step": 735 }, { "epoch": 0.26392541238345685, "grad_norm": 0.5081390142440796, "learning_rate": 8.793309438470729e-06, "loss": 0.3871, "step": 736 }, { "epoch": 0.26428400669376045, "grad_norm": 0.4656601548194885, "learning_rate": 8.805256869772998e-06, "loss": 0.3541, "step": 737 }, { "epoch": 0.26464260100406406, "grad_norm": 0.5806810259819031, "learning_rate": 8.81720430107527e-06, "loss": 0.3911, "step": 738 }, { "epoch": 0.26500119531436767, "grad_norm": 0.512076735496521, "learning_rate": 8.82915173237754e-06, "loss": 0.3643, "step": 739 }, { "epoch": 0.26535978962467127, "grad_norm": 0.5281196236610413, "learning_rate": 8.84109916367981e-06, "loss": 0.401, "step": 740 }, { "epoch": 0.2657183839349749, "grad_norm": 0.4784122407436371, "learning_rate": 8.85304659498208e-06, "loss": 0.3813, "step": 741 }, { "epoch": 0.2660769782452785, "grad_norm": 0.5050911903381348, "learning_rate": 8.86499402628435e-06, "loss": 0.3946, "step": 742 }, { "epoch": 0.26643557255558215, "grad_norm": 0.49442070722579956, "learning_rate": 8.876941457586619e-06, "loss": 0.3802, "step": 743 }, { "epoch": 0.26679416686588575, "grad_norm": 0.497566282749176, "learning_rate": 8.888888888888888e-06, "loss": 0.4324, "step": 744 }, { "epoch": 0.26715276117618936, "grad_norm": 0.5290309190750122, "learning_rate": 8.90083632019116e-06, "loss": 0.4133, "step": 745 }, { "epoch": 0.26751135548649296, "grad_norm": 0.44927436113357544, "learning_rate": 8.91278375149343e-06, "loss": 0.3893, "step": 746 }, { "epoch": 0.26786994979679657, "grad_norm": 0.5873064398765564, "learning_rate": 8.9247311827957e-06, "loss": 0.3909, "step": 747 }, { "epoch": 0.2682285441071002, "grad_norm": 0.4599419832229614, "learning_rate": 8.93667861409797e-06, "loss": 0.3576, "step": 748 }, { "epoch": 0.2685871384174038, "grad_norm": 0.44493812322616577, "learning_rate": 8.94862604540024e-06, "loss": 0.3906, "step": 749 }, { "epoch": 0.2689457327277074, "grad_norm": 0.528063952922821, "learning_rate": 8.96057347670251e-06, "loss": 0.3886, "step": 750 }, { "epoch": 0.269304327038011, "grad_norm": 0.46077585220336914, "learning_rate": 8.97252090800478e-06, "loss": 0.3521, "step": 751 }, { "epoch": 0.2696629213483146, "grad_norm": 0.4205012321472168, "learning_rate": 8.98446833930705e-06, "loss": 0.3881, "step": 752 }, { "epoch": 0.2700215156586182, "grad_norm": 0.4875257611274719, "learning_rate": 8.99641577060932e-06, "loss": 0.3717, "step": 753 }, { "epoch": 0.2703801099689218, "grad_norm": 0.45052048563957214, "learning_rate": 9.00836320191159e-06, "loss": 0.4102, "step": 754 }, { "epoch": 0.2707387042792254, "grad_norm": 0.5154467225074768, "learning_rate": 9.02031063321386e-06, "loss": 0.391, "step": 755 }, { "epoch": 0.271097298589529, "grad_norm": 0.5086910724639893, "learning_rate": 9.03225806451613e-06, "loss": 0.3741, "step": 756 }, { "epoch": 0.27145589289983263, "grad_norm": 0.4885992109775543, "learning_rate": 9.0442054958184e-06, "loss": 0.3841, "step": 757 }, { "epoch": 0.2718144872101363, "grad_norm": 0.491050124168396, "learning_rate": 9.05615292712067e-06, "loss": 0.4116, "step": 758 }, { "epoch": 0.2721730815204399, "grad_norm": 0.39030689001083374, "learning_rate": 9.068100358422939e-06, "loss": 0.3893, "step": 759 }, { "epoch": 0.2725316758307435, "grad_norm": 0.5221425294876099, "learning_rate": 9.08004778972521e-06, "loss": 0.372, "step": 760 }, { "epoch": 0.2728902701410471, "grad_norm": 0.418911874294281, "learning_rate": 9.09199522102748e-06, "loss": 0.3961, "step": 761 }, { "epoch": 0.2732488644513507, "grad_norm": 0.4517577886581421, "learning_rate": 9.10394265232975e-06, "loss": 0.3792, "step": 762 }, { "epoch": 0.2736074587616543, "grad_norm": 0.4465749263763428, "learning_rate": 9.11589008363202e-06, "loss": 0.3824, "step": 763 }, { "epoch": 0.2739660530719579, "grad_norm": 0.4136614501476288, "learning_rate": 9.12783751493429e-06, "loss": 0.4127, "step": 764 }, { "epoch": 0.27432464738226153, "grad_norm": 0.4393359124660492, "learning_rate": 9.13978494623656e-06, "loss": 0.3957, "step": 765 }, { "epoch": 0.27468324169256514, "grad_norm": 0.5275862216949463, "learning_rate": 9.15173237753883e-06, "loss": 0.3774, "step": 766 }, { "epoch": 0.27504183600286874, "grad_norm": 0.4385104179382324, "learning_rate": 9.1636798088411e-06, "loss": 0.3627, "step": 767 }, { "epoch": 0.27540043031317235, "grad_norm": 0.4870828092098236, "learning_rate": 9.17562724014337e-06, "loss": 0.351, "step": 768 }, { "epoch": 0.27575902462347596, "grad_norm": 0.523923397064209, "learning_rate": 9.18757467144564e-06, "loss": 0.3677, "step": 769 }, { "epoch": 0.27611761893377956, "grad_norm": 0.4668755531311035, "learning_rate": 9.19952210274791e-06, "loss": 0.3857, "step": 770 }, { "epoch": 0.27647621324408317, "grad_norm": 0.5371063947677612, "learning_rate": 9.21146953405018e-06, "loss": 0.3979, "step": 771 }, { "epoch": 0.27683480755438683, "grad_norm": 0.4777204096317291, "learning_rate": 9.22341696535245e-06, "loss": 0.3815, "step": 772 }, { "epoch": 0.27719340186469044, "grad_norm": 0.48473331332206726, "learning_rate": 9.23536439665472e-06, "loss": 0.387, "step": 773 }, { "epoch": 0.27755199617499404, "grad_norm": 0.5202040076255798, "learning_rate": 9.24731182795699e-06, "loss": 0.4123, "step": 774 }, { "epoch": 0.27791059048529765, "grad_norm": 0.4456934630870819, "learning_rate": 9.25925925925926e-06, "loss": 0.3858, "step": 775 }, { "epoch": 0.27826918479560125, "grad_norm": 0.4226578176021576, "learning_rate": 9.27120669056153e-06, "loss": 0.3743, "step": 776 }, { "epoch": 0.27862777910590486, "grad_norm": 0.4562804400920868, "learning_rate": 9.2831541218638e-06, "loss": 0.3911, "step": 777 }, { "epoch": 0.27898637341620847, "grad_norm": 0.48570406436920166, "learning_rate": 9.29510155316607e-06, "loss": 0.3681, "step": 778 }, { "epoch": 0.2793449677265121, "grad_norm": 0.5414186120033264, "learning_rate": 9.30704898446834e-06, "loss": 0.4149, "step": 779 }, { "epoch": 0.2797035620368157, "grad_norm": 0.44129636883735657, "learning_rate": 9.31899641577061e-06, "loss": 0.3765, "step": 780 }, { "epoch": 0.2800621563471193, "grad_norm": 0.5112062692642212, "learning_rate": 9.33094384707288e-06, "loss": 0.3844, "step": 781 }, { "epoch": 0.2804207506574229, "grad_norm": 0.5425307750701904, "learning_rate": 9.34289127837515e-06, "loss": 0.3717, "step": 782 }, { "epoch": 0.2807793449677265, "grad_norm": 0.43767938017845154, "learning_rate": 9.35483870967742e-06, "loss": 0.3662, "step": 783 }, { "epoch": 0.2811379392780301, "grad_norm": 0.5027325749397278, "learning_rate": 9.36678614097969e-06, "loss": 0.3808, "step": 784 }, { "epoch": 0.2814965335883337, "grad_norm": 0.4419451653957367, "learning_rate": 9.37873357228196e-06, "loss": 0.3927, "step": 785 }, { "epoch": 0.2818551278986373, "grad_norm": 0.4879298508167267, "learning_rate": 9.39068100358423e-06, "loss": 0.3916, "step": 786 }, { "epoch": 0.282213722208941, "grad_norm": 0.4473230242729187, "learning_rate": 9.4026284348865e-06, "loss": 0.3937, "step": 787 }, { "epoch": 0.2825723165192446, "grad_norm": 0.45601531863212585, "learning_rate": 9.41457586618877e-06, "loss": 0.3685, "step": 788 }, { "epoch": 0.2829309108295482, "grad_norm": 0.4400896430015564, "learning_rate": 9.42652329749104e-06, "loss": 0.3719, "step": 789 }, { "epoch": 0.2832895051398518, "grad_norm": 0.4495096206665039, "learning_rate": 9.43847072879331e-06, "loss": 0.3833, "step": 790 }, { "epoch": 0.2836480994501554, "grad_norm": 0.4538611173629761, "learning_rate": 9.450418160095581e-06, "loss": 0.3601, "step": 791 }, { "epoch": 0.284006693760459, "grad_norm": 0.4652613401412964, "learning_rate": 9.46236559139785e-06, "loss": 0.3741, "step": 792 }, { "epoch": 0.2843652880707626, "grad_norm": 0.4683339595794678, "learning_rate": 9.47431302270012e-06, "loss": 0.3941, "step": 793 }, { "epoch": 0.2847238823810662, "grad_norm": 0.4588361084461212, "learning_rate": 9.48626045400239e-06, "loss": 0.4002, "step": 794 }, { "epoch": 0.2850824766913698, "grad_norm": 0.45115190744400024, "learning_rate": 9.49820788530466e-06, "loss": 0.3967, "step": 795 }, { "epoch": 0.28544107100167343, "grad_norm": 0.46005797386169434, "learning_rate": 9.51015531660693e-06, "loss": 0.3687, "step": 796 }, { "epoch": 0.28579966531197704, "grad_norm": 0.4679110050201416, "learning_rate": 9.5221027479092e-06, "loss": 0.3689, "step": 797 }, { "epoch": 0.28615825962228064, "grad_norm": 0.5255662202835083, "learning_rate": 9.53405017921147e-06, "loss": 0.3859, "step": 798 }, { "epoch": 0.28651685393258425, "grad_norm": 0.554934024810791, "learning_rate": 9.54599761051374e-06, "loss": 0.3861, "step": 799 }, { "epoch": 0.28687544824288785, "grad_norm": 0.4583991765975952, "learning_rate": 9.557945041816011e-06, "loss": 0.3689, "step": 800 }, { "epoch": 0.2872340425531915, "grad_norm": 0.6212143898010254, "learning_rate": 9.56989247311828e-06, "loss": 0.4124, "step": 801 }, { "epoch": 0.2875926368634951, "grad_norm": 0.486269474029541, "learning_rate": 9.58183990442055e-06, "loss": 0.3813, "step": 802 }, { "epoch": 0.2879512311737987, "grad_norm": 0.5667541027069092, "learning_rate": 9.59378733572282e-06, "loss": 0.3999, "step": 803 }, { "epoch": 0.28830982548410233, "grad_norm": 0.5717812180519104, "learning_rate": 9.60573476702509e-06, "loss": 0.3744, "step": 804 }, { "epoch": 0.28866841979440594, "grad_norm": 0.4555979371070862, "learning_rate": 9.61768219832736e-06, "loss": 0.4011, "step": 805 }, { "epoch": 0.28902701410470955, "grad_norm": 0.5175149440765381, "learning_rate": 9.62962962962963e-06, "loss": 0.3661, "step": 806 }, { "epoch": 0.28938560841501315, "grad_norm": 0.4981291890144348, "learning_rate": 9.641577060931901e-06, "loss": 0.3953, "step": 807 }, { "epoch": 0.28974420272531676, "grad_norm": 0.5998547077178955, "learning_rate": 9.65352449223417e-06, "loss": 0.4021, "step": 808 }, { "epoch": 0.29010279703562036, "grad_norm": 0.5044562816619873, "learning_rate": 9.66547192353644e-06, "loss": 0.3561, "step": 809 }, { "epoch": 0.29046139134592397, "grad_norm": 0.5133641362190247, "learning_rate": 9.67741935483871e-06, "loss": 0.3768, "step": 810 }, { "epoch": 0.2908199856562276, "grad_norm": 0.6294229030609131, "learning_rate": 9.68936678614098e-06, "loss": 0.3935, "step": 811 }, { "epoch": 0.2911785799665312, "grad_norm": 0.47036460041999817, "learning_rate": 9.70131421744325e-06, "loss": 0.3687, "step": 812 }, { "epoch": 0.2915371742768348, "grad_norm": 0.5573934316635132, "learning_rate": 9.71326164874552e-06, "loss": 0.3829, "step": 813 }, { "epoch": 0.2918957685871384, "grad_norm": 0.5072863698005676, "learning_rate": 9.725209080047791e-06, "loss": 0.3824, "step": 814 }, { "epoch": 0.29225436289744205, "grad_norm": 0.4971676766872406, "learning_rate": 9.737156511350062e-06, "loss": 0.3959, "step": 815 }, { "epoch": 0.29261295720774566, "grad_norm": 0.6154597997665405, "learning_rate": 9.749103942652331e-06, "loss": 0.3632, "step": 816 }, { "epoch": 0.29297155151804927, "grad_norm": 0.4802889823913574, "learning_rate": 9.7610513739546e-06, "loss": 0.3789, "step": 817 }, { "epoch": 0.2933301458283529, "grad_norm": 0.6538023948669434, "learning_rate": 9.77299880525687e-06, "loss": 0.3932, "step": 818 }, { "epoch": 0.2936887401386565, "grad_norm": 0.4658275246620178, "learning_rate": 9.78494623655914e-06, "loss": 0.3696, "step": 819 }, { "epoch": 0.2940473344489601, "grad_norm": 0.6398813724517822, "learning_rate": 9.79689366786141e-06, "loss": 0.4136, "step": 820 }, { "epoch": 0.2944059287592637, "grad_norm": 0.5205370187759399, "learning_rate": 9.80884109916368e-06, "loss": 0.3873, "step": 821 }, { "epoch": 0.2947645230695673, "grad_norm": 0.4855462610721588, "learning_rate": 9.820788530465952e-06, "loss": 0.3762, "step": 822 }, { "epoch": 0.2951231173798709, "grad_norm": 0.5582971572875977, "learning_rate": 9.832735961768221e-06, "loss": 0.3746, "step": 823 }, { "epoch": 0.2954817116901745, "grad_norm": 0.5271080732345581, "learning_rate": 9.84468339307049e-06, "loss": 0.3932, "step": 824 }, { "epoch": 0.2958403060004781, "grad_norm": 0.5884802341461182, "learning_rate": 9.856630824372761e-06, "loss": 0.4149, "step": 825 }, { "epoch": 0.2961989003107817, "grad_norm": 0.5215122103691101, "learning_rate": 9.86857825567503e-06, "loss": 0.3748, "step": 826 }, { "epoch": 0.2965574946210853, "grad_norm": 0.5172651410102844, "learning_rate": 9.8805256869773e-06, "loss": 0.3978, "step": 827 }, { "epoch": 0.29691608893138893, "grad_norm": 0.49479159712791443, "learning_rate": 9.89247311827957e-06, "loss": 0.4045, "step": 828 }, { "epoch": 0.29727468324169254, "grad_norm": 0.5102479457855225, "learning_rate": 9.90442054958184e-06, "loss": 0.3881, "step": 829 }, { "epoch": 0.2976332775519962, "grad_norm": 0.5007883906364441, "learning_rate": 9.916367980884111e-06, "loss": 0.368, "step": 830 }, { "epoch": 0.2979918718622998, "grad_norm": 0.5163625478744507, "learning_rate": 9.928315412186382e-06, "loss": 0.3787, "step": 831 }, { "epoch": 0.2983504661726034, "grad_norm": 0.4900919497013092, "learning_rate": 9.940262843488651e-06, "loss": 0.3869, "step": 832 }, { "epoch": 0.298709060482907, "grad_norm": 0.5503488779067993, "learning_rate": 9.95221027479092e-06, "loss": 0.3825, "step": 833 }, { "epoch": 0.2990676547932106, "grad_norm": 0.528723955154419, "learning_rate": 9.96415770609319e-06, "loss": 0.3863, "step": 834 }, { "epoch": 0.29942624910351423, "grad_norm": 0.4574635624885559, "learning_rate": 9.97610513739546e-06, "loss": 0.3921, "step": 835 }, { "epoch": 0.29978484341381784, "grad_norm": 0.4423650801181793, "learning_rate": 9.98805256869773e-06, "loss": 0.372, "step": 836 }, { "epoch": 0.30014343772412144, "grad_norm": 0.5507116317749023, "learning_rate": 1e-05, "loss": 0.377, "step": 837 }, { "epoch": 0.30050203203442505, "grad_norm": 0.4745589792728424, "learning_rate": 9.999999564492222e-06, "loss": 0.3884, "step": 838 }, { "epoch": 0.30086062634472865, "grad_norm": 0.4709901511669159, "learning_rate": 9.999998257968965e-06, "loss": 0.3786, "step": 839 }, { "epoch": 0.30121922065503226, "grad_norm": 0.5344143509864807, "learning_rate": 9.999996080430454e-06, "loss": 0.3866, "step": 840 }, { "epoch": 0.30157781496533587, "grad_norm": 0.5257929563522339, "learning_rate": 9.99999303187707e-06, "loss": 0.3925, "step": 841 }, { "epoch": 0.3019364092756395, "grad_norm": 0.5237584710121155, "learning_rate": 9.999989112309344e-06, "loss": 0.3838, "step": 842 }, { "epoch": 0.3022950035859431, "grad_norm": 0.4750411808490753, "learning_rate": 9.99998432172796e-06, "loss": 0.3736, "step": 843 }, { "epoch": 0.30265359789624674, "grad_norm": 0.5024147033691406, "learning_rate": 9.99997866013375e-06, "loss": 0.384, "step": 844 }, { "epoch": 0.30301219220655035, "grad_norm": 0.4348618686199188, "learning_rate": 9.999972127527704e-06, "loss": 0.3901, "step": 845 }, { "epoch": 0.30337078651685395, "grad_norm": 0.5085117220878601, "learning_rate": 9.999964723910956e-06, "loss": 0.3989, "step": 846 }, { "epoch": 0.30372938082715756, "grad_norm": 0.49803850054740906, "learning_rate": 9.999956449284797e-06, "loss": 0.3949, "step": 847 }, { "epoch": 0.30408797513746116, "grad_norm": 0.5258169174194336, "learning_rate": 9.999947303650669e-06, "loss": 0.3718, "step": 848 }, { "epoch": 0.30444656944776477, "grad_norm": 0.6079532504081726, "learning_rate": 9.999937287010164e-06, "loss": 0.3721, "step": 849 }, { "epoch": 0.3048051637580684, "grad_norm": 0.4827178716659546, "learning_rate": 9.99992639936503e-06, "loss": 0.4136, "step": 850 }, { "epoch": 0.305163758068372, "grad_norm": 0.5285632610321045, "learning_rate": 9.999914640717159e-06, "loss": 0.3895, "step": 851 }, { "epoch": 0.3055223523786756, "grad_norm": 0.5023762583732605, "learning_rate": 9.999902011068604e-06, "loss": 0.3609, "step": 852 }, { "epoch": 0.3058809466889792, "grad_norm": 0.5103974342346191, "learning_rate": 9.999888510421562e-06, "loss": 0.3872, "step": 853 }, { "epoch": 0.3062395409992828, "grad_norm": 0.4926343262195587, "learning_rate": 9.999874138778387e-06, "loss": 0.4264, "step": 854 }, { "epoch": 0.3065981353095864, "grad_norm": 0.4606998562812805, "learning_rate": 9.99985889614158e-06, "loss": 0.4004, "step": 855 }, { "epoch": 0.30695672961989, "grad_norm": 0.461781769990921, "learning_rate": 9.9998427825138e-06, "loss": 0.3715, "step": 856 }, { "epoch": 0.3073153239301936, "grad_norm": 0.5363285541534424, "learning_rate": 9.99982579789785e-06, "loss": 0.4055, "step": 857 }, { "epoch": 0.3076739182404972, "grad_norm": 0.4763014614582062, "learning_rate": 9.99980794229669e-06, "loss": 0.4087, "step": 858 }, { "epoch": 0.3080325125508009, "grad_norm": 0.5317734479904175, "learning_rate": 9.999789215713434e-06, "loss": 0.3728, "step": 859 }, { "epoch": 0.3083911068611045, "grad_norm": 0.4557059407234192, "learning_rate": 9.999769618151339e-06, "loss": 0.3834, "step": 860 }, { "epoch": 0.3087497011714081, "grad_norm": 0.47982126474380493, "learning_rate": 9.999749149613822e-06, "loss": 0.4202, "step": 861 }, { "epoch": 0.3091082954817117, "grad_norm": 0.4835798740386963, "learning_rate": 9.99972781010445e-06, "loss": 0.3949, "step": 862 }, { "epoch": 0.3094668897920153, "grad_norm": 0.44796010851860046, "learning_rate": 9.999705599626935e-06, "loss": 0.3888, "step": 863 }, { "epoch": 0.3098254841023189, "grad_norm": 0.4704638719558716, "learning_rate": 9.999682518185153e-06, "loss": 0.4162, "step": 864 }, { "epoch": 0.3101840784126225, "grad_norm": 0.48783597350120544, "learning_rate": 9.99965856578312e-06, "loss": 0.3884, "step": 865 }, { "epoch": 0.3105426727229261, "grad_norm": 0.4220445454120636, "learning_rate": 9.999633742425009e-06, "loss": 0.3831, "step": 866 }, { "epoch": 0.31090126703322973, "grad_norm": 0.49152037501335144, "learning_rate": 9.999608048115146e-06, "loss": 0.3705, "step": 867 }, { "epoch": 0.31125986134353334, "grad_norm": 0.4928252398967743, "learning_rate": 9.999581482858006e-06, "loss": 0.3598, "step": 868 }, { "epoch": 0.31161845565383695, "grad_norm": 0.419975608587265, "learning_rate": 9.999554046658218e-06, "loss": 0.3709, "step": 869 }, { "epoch": 0.31197704996414055, "grad_norm": 0.46087533235549927, "learning_rate": 9.99952573952056e-06, "loss": 0.3721, "step": 870 }, { "epoch": 0.31233564427444416, "grad_norm": 0.42997559905052185, "learning_rate": 9.999496561449964e-06, "loss": 0.3744, "step": 871 }, { "epoch": 0.31269423858474776, "grad_norm": 0.49220365285873413, "learning_rate": 9.999466512451511e-06, "loss": 0.3678, "step": 872 }, { "epoch": 0.3130528328950514, "grad_norm": 0.4728860557079315, "learning_rate": 9.99943559253044e-06, "loss": 0.3826, "step": 873 }, { "epoch": 0.31341142720535503, "grad_norm": 0.42297494411468506, "learning_rate": 9.999403801692132e-06, "loss": 0.379, "step": 874 }, { "epoch": 0.31377002151565864, "grad_norm": 0.45019567012786865, "learning_rate": 9.999371139942128e-06, "loss": 0.411, "step": 875 }, { "epoch": 0.31412861582596224, "grad_norm": 0.4667013883590698, "learning_rate": 9.99933760728612e-06, "loss": 0.3622, "step": 876 }, { "epoch": 0.31448721013626585, "grad_norm": 0.5045577883720398, "learning_rate": 9.999303203729946e-06, "loss": 0.3834, "step": 877 }, { "epoch": 0.31484580444656945, "grad_norm": 0.4845924973487854, "learning_rate": 9.9992679292796e-06, "loss": 0.4045, "step": 878 }, { "epoch": 0.31520439875687306, "grad_norm": 0.5124425292015076, "learning_rate": 9.999231783941226e-06, "loss": 0.3838, "step": 879 }, { "epoch": 0.31556299306717667, "grad_norm": 0.5290742516517639, "learning_rate": 9.999194767721123e-06, "loss": 0.3841, "step": 880 }, { "epoch": 0.3159215873774803, "grad_norm": 0.43215256929397583, "learning_rate": 9.999156880625737e-06, "loss": 0.3646, "step": 881 }, { "epoch": 0.3162801816877839, "grad_norm": 0.5009775757789612, "learning_rate": 9.99911812266167e-06, "loss": 0.3819, "step": 882 }, { "epoch": 0.3166387759980875, "grad_norm": 0.5509739518165588, "learning_rate": 9.999078493835674e-06, "loss": 0.3563, "step": 883 }, { "epoch": 0.3169973703083911, "grad_norm": 0.41038450598716736, "learning_rate": 9.99903799415465e-06, "loss": 0.3748, "step": 884 }, { "epoch": 0.3173559646186947, "grad_norm": 0.53842693567276, "learning_rate": 9.998996623625654e-06, "loss": 0.4054, "step": 885 }, { "epoch": 0.3177145589289983, "grad_norm": 0.5194488763809204, "learning_rate": 9.998954382255894e-06, "loss": 0.4164, "step": 886 }, { "epoch": 0.3180731532393019, "grad_norm": 0.4532516598701477, "learning_rate": 9.998911270052728e-06, "loss": 0.3872, "step": 887 }, { "epoch": 0.31843174754960557, "grad_norm": 0.5295155644416809, "learning_rate": 9.998867287023667e-06, "loss": 0.4027, "step": 888 }, { "epoch": 0.3187903418599092, "grad_norm": 0.5318670868873596, "learning_rate": 9.998822433176371e-06, "loss": 0.3762, "step": 889 }, { "epoch": 0.3191489361702128, "grad_norm": 0.4868699908256531, "learning_rate": 9.998776708518657e-06, "loss": 0.387, "step": 890 }, { "epoch": 0.3195075304805164, "grad_norm": 0.5342751145362854, "learning_rate": 9.998730113058485e-06, "loss": 0.3815, "step": 891 }, { "epoch": 0.31986612479082, "grad_norm": 0.4541134834289551, "learning_rate": 9.998682646803978e-06, "loss": 0.3759, "step": 892 }, { "epoch": 0.3202247191011236, "grad_norm": 0.4407695233821869, "learning_rate": 9.9986343097634e-06, "loss": 0.3896, "step": 893 }, { "epoch": 0.3205833134114272, "grad_norm": 0.4407765567302704, "learning_rate": 9.998585101945176e-06, "loss": 0.3787, "step": 894 }, { "epoch": 0.3209419077217308, "grad_norm": 0.5572770833969116, "learning_rate": 9.998535023357874e-06, "loss": 0.3942, "step": 895 }, { "epoch": 0.3213005020320344, "grad_norm": 0.4095647931098938, "learning_rate": 9.99848407401022e-06, "loss": 0.3844, "step": 896 }, { "epoch": 0.321659096342338, "grad_norm": 0.5304213166236877, "learning_rate": 9.99843225391109e-06, "loss": 0.399, "step": 897 }, { "epoch": 0.32201769065264163, "grad_norm": 0.5048721432685852, "learning_rate": 9.998379563069509e-06, "loss": 0.4063, "step": 898 }, { "epoch": 0.32237628496294524, "grad_norm": 0.4551934599876404, "learning_rate": 9.998326001494657e-06, "loss": 0.3906, "step": 899 }, { "epoch": 0.32273487927324884, "grad_norm": 0.41378071904182434, "learning_rate": 9.998271569195867e-06, "loss": 0.3756, "step": 900 }, { "epoch": 0.32309347358355245, "grad_norm": 0.488162100315094, "learning_rate": 9.998216266182618e-06, "loss": 0.3746, "step": 901 }, { "epoch": 0.3234520678938561, "grad_norm": 0.4850342869758606, "learning_rate": 9.998160092464547e-06, "loss": 0.365, "step": 902 }, { "epoch": 0.3238106622041597, "grad_norm": 0.4402473270893097, "learning_rate": 9.998103048051435e-06, "loss": 0.3871, "step": 903 }, { "epoch": 0.3241692565144633, "grad_norm": 0.4778062105178833, "learning_rate": 9.998045132953222e-06, "loss": 0.3787, "step": 904 }, { "epoch": 0.32452785082476693, "grad_norm": 0.445992112159729, "learning_rate": 9.997986347179997e-06, "loss": 0.3933, "step": 905 }, { "epoch": 0.32488644513507053, "grad_norm": 0.45843127369880676, "learning_rate": 9.997926690742003e-06, "loss": 0.3913, "step": 906 }, { "epoch": 0.32524503944537414, "grad_norm": 0.4881919026374817, "learning_rate": 9.997866163649628e-06, "loss": 0.3618, "step": 907 }, { "epoch": 0.32560363375567775, "grad_norm": 0.5140910148620605, "learning_rate": 9.997804765913419e-06, "loss": 0.3912, "step": 908 }, { "epoch": 0.32596222806598135, "grad_norm": 0.4937053322792053, "learning_rate": 9.99774249754407e-06, "loss": 0.3612, "step": 909 }, { "epoch": 0.32632082237628496, "grad_norm": 0.5481410026550293, "learning_rate": 9.99767935855243e-06, "loss": 0.4068, "step": 910 }, { "epoch": 0.32667941668658856, "grad_norm": 0.5239419341087341, "learning_rate": 9.997615348949497e-06, "loss": 0.3935, "step": 911 }, { "epoch": 0.32703801099689217, "grad_norm": 0.5032150149345398, "learning_rate": 9.99755046874642e-06, "loss": 0.351, "step": 912 }, { "epoch": 0.3273966053071958, "grad_norm": 0.6174289584159851, "learning_rate": 9.997484717954506e-06, "loss": 0.3718, "step": 913 }, { "epoch": 0.3277551996174994, "grad_norm": 0.5043581128120422, "learning_rate": 9.997418096585203e-06, "loss": 0.407, "step": 914 }, { "epoch": 0.328113793927803, "grad_norm": 0.4462878406047821, "learning_rate": 9.997350604650123e-06, "loss": 0.386, "step": 915 }, { "epoch": 0.32847238823810665, "grad_norm": 0.49653497338294983, "learning_rate": 9.997282242161016e-06, "loss": 0.3789, "step": 916 }, { "epoch": 0.32883098254841026, "grad_norm": 0.4964295029640198, "learning_rate": 9.997213009129797e-06, "loss": 0.3826, "step": 917 }, { "epoch": 0.32918957685871386, "grad_norm": 0.4635046422481537, "learning_rate": 9.997142905568527e-06, "loss": 0.3779, "step": 918 }, { "epoch": 0.32954817116901747, "grad_norm": 0.4857913851737976, "learning_rate": 9.997071931489413e-06, "loss": 0.3715, "step": 919 }, { "epoch": 0.3299067654793211, "grad_norm": 0.4820643961429596, "learning_rate": 9.997000086904822e-06, "loss": 0.3534, "step": 920 }, { "epoch": 0.3302653597896247, "grad_norm": 0.5319174528121948, "learning_rate": 9.99692737182727e-06, "loss": 0.3776, "step": 921 }, { "epoch": 0.3306239540999283, "grad_norm": 0.4493691623210907, "learning_rate": 9.996853786269423e-06, "loss": 0.3719, "step": 922 }, { "epoch": 0.3309825484102319, "grad_norm": 0.5131881237030029, "learning_rate": 9.9967793302441e-06, "loss": 0.3983, "step": 923 }, { "epoch": 0.3313411427205355, "grad_norm": 0.47991615533828735, "learning_rate": 9.996704003764273e-06, "loss": 0.3613, "step": 924 }, { "epoch": 0.3316997370308391, "grad_norm": 0.4297630190849304, "learning_rate": 9.996627806843062e-06, "loss": 0.364, "step": 925 }, { "epoch": 0.3320583313411427, "grad_norm": 0.5240213871002197, "learning_rate": 9.996550739493743e-06, "loss": 0.3922, "step": 926 }, { "epoch": 0.3324169256514463, "grad_norm": 0.49942511320114136, "learning_rate": 9.996472801729737e-06, "loss": 0.3876, "step": 927 }, { "epoch": 0.3327755199617499, "grad_norm": 0.45805495977401733, "learning_rate": 9.996393993564626e-06, "loss": 0.3942, "step": 928 }, { "epoch": 0.3331341142720535, "grad_norm": 0.4673439860343933, "learning_rate": 9.996314315012137e-06, "loss": 0.3781, "step": 929 }, { "epoch": 0.33349270858235713, "grad_norm": 0.4655293822288513, "learning_rate": 9.99623376608615e-06, "loss": 0.3675, "step": 930 }, { "epoch": 0.3338513028926608, "grad_norm": 0.46567055583000183, "learning_rate": 9.996152346800696e-06, "loss": 0.3793, "step": 931 }, { "epoch": 0.3342098972029644, "grad_norm": 0.4175040125846863, "learning_rate": 9.99607005716996e-06, "loss": 0.3611, "step": 932 }, { "epoch": 0.334568491513268, "grad_norm": 0.4869968891143799, "learning_rate": 9.995986897208276e-06, "loss": 0.3584, "step": 933 }, { "epoch": 0.3349270858235716, "grad_norm": 0.4401181936264038, "learning_rate": 9.99590286693013e-06, "loss": 0.3906, "step": 934 }, { "epoch": 0.3352856801338752, "grad_norm": 0.4535777270793915, "learning_rate": 9.995817966350164e-06, "loss": 0.4092, "step": 935 }, { "epoch": 0.3356442744441788, "grad_norm": 0.5068574547767639, "learning_rate": 9.995732195483164e-06, "loss": 0.3708, "step": 936 }, { "epoch": 0.33600286875448243, "grad_norm": 0.426138699054718, "learning_rate": 9.995645554344071e-06, "loss": 0.3604, "step": 937 }, { "epoch": 0.33636146306478604, "grad_norm": 0.48631176352500916, "learning_rate": 9.995558042947982e-06, "loss": 0.3987, "step": 938 }, { "epoch": 0.33672005737508964, "grad_norm": 0.5068015456199646, "learning_rate": 9.99546966131014e-06, "loss": 0.3713, "step": 939 }, { "epoch": 0.33707865168539325, "grad_norm": 0.47908729314804077, "learning_rate": 9.995380409445941e-06, "loss": 0.361, "step": 940 }, { "epoch": 0.33743724599569686, "grad_norm": 0.4678844213485718, "learning_rate": 9.995290287370933e-06, "loss": 0.4128, "step": 941 }, { "epoch": 0.33779584030600046, "grad_norm": 0.5017681121826172, "learning_rate": 9.995199295100817e-06, "loss": 0.4019, "step": 942 }, { "epoch": 0.33815443461630407, "grad_norm": 0.45364469289779663, "learning_rate": 9.99510743265144e-06, "loss": 0.3783, "step": 943 }, { "epoch": 0.3385130289266077, "grad_norm": 0.46694180369377136, "learning_rate": 9.99501470003881e-06, "loss": 0.3935, "step": 944 }, { "epoch": 0.33887162323691133, "grad_norm": 0.5166188478469849, "learning_rate": 9.994921097279078e-06, "loss": 0.369, "step": 945 }, { "epoch": 0.33923021754721494, "grad_norm": 0.4641836881637573, "learning_rate": 9.99482662438855e-06, "loss": 0.3904, "step": 946 }, { "epoch": 0.33958881185751855, "grad_norm": 0.47197046875953674, "learning_rate": 9.994731281383685e-06, "loss": 0.3659, "step": 947 }, { "epoch": 0.33994740616782215, "grad_norm": 0.47789815068244934, "learning_rate": 9.994635068281093e-06, "loss": 0.3624, "step": 948 }, { "epoch": 0.34030600047812576, "grad_norm": 0.41871917247772217, "learning_rate": 9.99453798509753e-06, "loss": 0.3832, "step": 949 }, { "epoch": 0.34066459478842936, "grad_norm": 0.45990052819252014, "learning_rate": 9.994440031849912e-06, "loss": 0.3781, "step": 950 }, { "epoch": 0.34102318909873297, "grad_norm": 0.46753111481666565, "learning_rate": 9.994341208555303e-06, "loss": 0.3875, "step": 951 }, { "epoch": 0.3413817834090366, "grad_norm": 0.44899338483810425, "learning_rate": 9.994241515230916e-06, "loss": 0.3535, "step": 952 }, { "epoch": 0.3417403777193402, "grad_norm": 0.4474872350692749, "learning_rate": 9.994140951894121e-06, "loss": 0.3999, "step": 953 }, { "epoch": 0.3420989720296438, "grad_norm": 0.4466594159603119, "learning_rate": 9.994039518562433e-06, "loss": 0.3823, "step": 954 }, { "epoch": 0.3424575663399474, "grad_norm": 0.418952614068985, "learning_rate": 9.993937215253522e-06, "loss": 0.3563, "step": 955 }, { "epoch": 0.342816160650251, "grad_norm": 0.47373148798942566, "learning_rate": 9.993834041985213e-06, "loss": 0.3663, "step": 956 }, { "epoch": 0.3431747549605546, "grad_norm": 0.44300681352615356, "learning_rate": 9.993729998775478e-06, "loss": 0.3761, "step": 957 }, { "epoch": 0.3435333492708582, "grad_norm": 0.43218109011650085, "learning_rate": 9.99362508564244e-06, "loss": 0.415, "step": 958 }, { "epoch": 0.3438919435811618, "grad_norm": 0.5012217164039612, "learning_rate": 9.993519302604376e-06, "loss": 0.3659, "step": 959 }, { "epoch": 0.3442505378914655, "grad_norm": 0.4995867609977722, "learning_rate": 9.993412649679713e-06, "loss": 0.3805, "step": 960 }, { "epoch": 0.3446091322017691, "grad_norm": 0.4466436803340912, "learning_rate": 9.993305126887031e-06, "loss": 0.3948, "step": 961 }, { "epoch": 0.3449677265120727, "grad_norm": 0.5009437799453735, "learning_rate": 9.99319673424506e-06, "loss": 0.371, "step": 962 }, { "epoch": 0.3453263208223763, "grad_norm": 0.46899327635765076, "learning_rate": 9.993087471772686e-06, "loss": 0.3793, "step": 963 }, { "epoch": 0.3456849151326799, "grad_norm": 0.45319223403930664, "learning_rate": 9.992977339488938e-06, "loss": 0.392, "step": 964 }, { "epoch": 0.3460435094429835, "grad_norm": 0.507129430770874, "learning_rate": 9.992866337413005e-06, "loss": 0.39, "step": 965 }, { "epoch": 0.3464021037532871, "grad_norm": 0.46145614981651306, "learning_rate": 9.99275446556422e-06, "loss": 0.3771, "step": 966 }, { "epoch": 0.3467606980635907, "grad_norm": 0.4867977499961853, "learning_rate": 9.992641723962076e-06, "loss": 0.3761, "step": 967 }, { "epoch": 0.34711929237389433, "grad_norm": 0.535530149936676, "learning_rate": 9.99252811262621e-06, "loss": 0.3699, "step": 968 }, { "epoch": 0.34747788668419793, "grad_norm": 0.4253098666667938, "learning_rate": 9.992413631576414e-06, "loss": 0.3883, "step": 969 }, { "epoch": 0.34783648099450154, "grad_norm": 0.47741734981536865, "learning_rate": 9.99229828083263e-06, "loss": 0.3588, "step": 970 }, { "epoch": 0.34819507530480515, "grad_norm": 0.4719032645225525, "learning_rate": 9.992182060414955e-06, "loss": 0.3751, "step": 971 }, { "epoch": 0.34855366961510875, "grad_norm": 0.4230865240097046, "learning_rate": 9.992064970343633e-06, "loss": 0.4031, "step": 972 }, { "epoch": 0.34891226392541236, "grad_norm": 0.4650398790836334, "learning_rate": 9.991947010639063e-06, "loss": 0.3668, "step": 973 }, { "epoch": 0.349270858235716, "grad_norm": 0.4749981462955475, "learning_rate": 9.991828181321791e-06, "loss": 0.3688, "step": 974 }, { "epoch": 0.3496294525460196, "grad_norm": 0.4302942156791687, "learning_rate": 9.99170848241252e-06, "loss": 0.3729, "step": 975 }, { "epoch": 0.34998804685632323, "grad_norm": 0.492363840341568, "learning_rate": 9.991587913932101e-06, "loss": 0.3774, "step": 976 }, { "epoch": 0.35034664116662684, "grad_norm": 0.4757852554321289, "learning_rate": 9.991466475901539e-06, "loss": 0.3727, "step": 977 }, { "epoch": 0.35070523547693044, "grad_norm": 0.481538861989975, "learning_rate": 9.991344168341988e-06, "loss": 0.3822, "step": 978 }, { "epoch": 0.35106382978723405, "grad_norm": 0.4964795708656311, "learning_rate": 9.991220991274752e-06, "loss": 0.3748, "step": 979 }, { "epoch": 0.35142242409753766, "grad_norm": 0.5004386901855469, "learning_rate": 9.991096944721292e-06, "loss": 0.3744, "step": 980 }, { "epoch": 0.35178101840784126, "grad_norm": 0.4719811975955963, "learning_rate": 9.990972028703216e-06, "loss": 0.3768, "step": 981 }, { "epoch": 0.35213961271814487, "grad_norm": 0.43266743421554565, "learning_rate": 9.990846243242282e-06, "loss": 0.3636, "step": 982 }, { "epoch": 0.3524982070284485, "grad_norm": 0.42431342601776123, "learning_rate": 9.99071958836041e-06, "loss": 0.3534, "step": 983 }, { "epoch": 0.3528568013387521, "grad_norm": 0.38050082325935364, "learning_rate": 9.990592064079656e-06, "loss": 0.3636, "step": 984 }, { "epoch": 0.3532153956490557, "grad_norm": 0.4621942341327667, "learning_rate": 9.990463670422237e-06, "loss": 0.4018, "step": 985 }, { "epoch": 0.3535739899593593, "grad_norm": 0.4136214554309845, "learning_rate": 9.99033440741052e-06, "loss": 0.352, "step": 986 }, { "epoch": 0.3539325842696629, "grad_norm": 0.4603418707847595, "learning_rate": 9.990204275067027e-06, "loss": 0.4026, "step": 987 }, { "epoch": 0.3542911785799665, "grad_norm": 0.4832649827003479, "learning_rate": 9.99007327341442e-06, "loss": 0.3781, "step": 988 }, { "epoch": 0.35464977289027017, "grad_norm": 0.4210643470287323, "learning_rate": 9.989941402475524e-06, "loss": 0.3686, "step": 989 }, { "epoch": 0.35500836720057377, "grad_norm": 0.4237230718135834, "learning_rate": 9.989808662273314e-06, "loss": 0.3834, "step": 990 }, { "epoch": 0.3553669615108774, "grad_norm": 0.4624187648296356, "learning_rate": 9.989675052830909e-06, "loss": 0.371, "step": 991 }, { "epoch": 0.355725555821181, "grad_norm": 0.40104061365127563, "learning_rate": 9.989540574171586e-06, "loss": 0.3639, "step": 992 }, { "epoch": 0.3560841501314846, "grad_norm": 0.43916448950767517, "learning_rate": 9.989405226318772e-06, "loss": 0.3735, "step": 993 }, { "epoch": 0.3564427444417882, "grad_norm": 0.48060745000839233, "learning_rate": 9.989269009296046e-06, "loss": 0.3713, "step": 994 }, { "epoch": 0.3568013387520918, "grad_norm": 0.4011283814907074, "learning_rate": 9.989131923127133e-06, "loss": 0.3796, "step": 995 }, { "epoch": 0.3571599330623954, "grad_norm": 0.4489310085773468, "learning_rate": 9.98899396783592e-06, "loss": 0.3828, "step": 996 }, { "epoch": 0.357518527372699, "grad_norm": 0.48968490958213806, "learning_rate": 9.988855143446434e-06, "loss": 0.3922, "step": 997 }, { "epoch": 0.3578771216830026, "grad_norm": 0.46876537799835205, "learning_rate": 9.988715449982863e-06, "loss": 0.3553, "step": 998 }, { "epoch": 0.3582357159933062, "grad_norm": 0.4295397996902466, "learning_rate": 9.988574887469538e-06, "loss": 0.4043, "step": 999 }, { "epoch": 0.35859431030360983, "grad_norm": 0.5537243485450745, "learning_rate": 9.988433455930947e-06, "loss": 0.3721, "step": 1000 }, { "epoch": 0.35895290461391344, "grad_norm": 0.4709452688694, "learning_rate": 9.988291155391729e-06, "loss": 0.3512, "step": 1001 }, { "epoch": 0.35931149892421704, "grad_norm": 0.47161757946014404, "learning_rate": 9.98814798587667e-06, "loss": 0.3836, "step": 1002 }, { "epoch": 0.3596700932345207, "grad_norm": 0.4798890948295593, "learning_rate": 9.988003947410715e-06, "loss": 0.3648, "step": 1003 }, { "epoch": 0.3600286875448243, "grad_norm": 0.569473922252655, "learning_rate": 9.987859040018955e-06, "loss": 0.3727, "step": 1004 }, { "epoch": 0.3603872818551279, "grad_norm": 0.46990519762039185, "learning_rate": 9.98771326372663e-06, "loss": 0.3745, "step": 1005 }, { "epoch": 0.3607458761654315, "grad_norm": 0.5066249370574951, "learning_rate": 9.987566618559138e-06, "loss": 0.3742, "step": 1006 }, { "epoch": 0.36110447047573513, "grad_norm": 0.519008457660675, "learning_rate": 9.987419104542023e-06, "loss": 0.3766, "step": 1007 }, { "epoch": 0.36146306478603873, "grad_norm": 0.4508252739906311, "learning_rate": 9.987270721700983e-06, "loss": 0.3683, "step": 1008 }, { "epoch": 0.36182165909634234, "grad_norm": 0.42654088139533997, "learning_rate": 9.98712147006187e-06, "loss": 0.3891, "step": 1009 }, { "epoch": 0.36218025340664595, "grad_norm": 0.4988425672054291, "learning_rate": 9.986971349650678e-06, "loss": 0.392, "step": 1010 }, { "epoch": 0.36253884771694955, "grad_norm": 0.5619475245475769, "learning_rate": 9.98682036049356e-06, "loss": 0.3995, "step": 1011 }, { "epoch": 0.36289744202725316, "grad_norm": 0.4391295313835144, "learning_rate": 9.986668502616825e-06, "loss": 0.3846, "step": 1012 }, { "epoch": 0.36325603633755676, "grad_norm": 0.4417448043823242, "learning_rate": 9.986515776046918e-06, "loss": 0.3707, "step": 1013 }, { "epoch": 0.36361463064786037, "grad_norm": 0.6563976407051086, "learning_rate": 9.986362180810452e-06, "loss": 0.3945, "step": 1014 }, { "epoch": 0.363973224958164, "grad_norm": 0.4956895112991333, "learning_rate": 9.98620771693418e-06, "loss": 0.3821, "step": 1015 }, { "epoch": 0.3643318192684676, "grad_norm": 0.5643879175186157, "learning_rate": 9.98605238444501e-06, "loss": 0.4025, "step": 1016 }, { "epoch": 0.36469041357877124, "grad_norm": 0.4877167046070099, "learning_rate": 9.985896183370003e-06, "loss": 0.367, "step": 1017 }, { "epoch": 0.36504900788907485, "grad_norm": 0.563888430595398, "learning_rate": 9.985739113736369e-06, "loss": 0.373, "step": 1018 }, { "epoch": 0.36540760219937846, "grad_norm": 0.4957871735095978, "learning_rate": 9.985581175571467e-06, "loss": 0.3706, "step": 1019 }, { "epoch": 0.36576619650968206, "grad_norm": 0.4788435101509094, "learning_rate": 9.985422368902816e-06, "loss": 0.3637, "step": 1020 }, { "epoch": 0.36612479081998567, "grad_norm": 0.5648254752159119, "learning_rate": 9.985262693758078e-06, "loss": 0.372, "step": 1021 }, { "epoch": 0.3664833851302893, "grad_norm": 0.4753447473049164, "learning_rate": 9.985102150165066e-06, "loss": 0.3922, "step": 1022 }, { "epoch": 0.3668419794405929, "grad_norm": 0.522750735282898, "learning_rate": 9.984940738151754e-06, "loss": 0.3768, "step": 1023 }, { "epoch": 0.3672005737508965, "grad_norm": 0.5114741325378418, "learning_rate": 9.984778457746254e-06, "loss": 0.3886, "step": 1024 }, { "epoch": 0.3675591680612001, "grad_norm": 0.5067566633224487, "learning_rate": 9.984615308976837e-06, "loss": 0.3653, "step": 1025 }, { "epoch": 0.3679177623715037, "grad_norm": 0.537928581237793, "learning_rate": 9.984451291871926e-06, "loss": 0.3781, "step": 1026 }, { "epoch": 0.3682763566818073, "grad_norm": 0.4866792559623718, "learning_rate": 9.984286406460093e-06, "loss": 0.3749, "step": 1027 }, { "epoch": 0.3686349509921109, "grad_norm": 0.5020841956138611, "learning_rate": 9.984120652770062e-06, "loss": 0.3821, "step": 1028 }, { "epoch": 0.3689935453024145, "grad_norm": 0.42982080578804016, "learning_rate": 9.983954030830705e-06, "loss": 0.3667, "step": 1029 }, { "epoch": 0.3693521396127181, "grad_norm": 0.509392261505127, "learning_rate": 9.983786540671052e-06, "loss": 0.3899, "step": 1030 }, { "epoch": 0.36971073392302173, "grad_norm": 0.4558204114437103, "learning_rate": 9.983618182320276e-06, "loss": 0.3709, "step": 1031 }, { "epoch": 0.3700693282333254, "grad_norm": 0.42982032895088196, "learning_rate": 9.983448955807708e-06, "loss": 0.3694, "step": 1032 }, { "epoch": 0.370427922543629, "grad_norm": 0.5066699981689453, "learning_rate": 9.983278861162829e-06, "loss": 0.3748, "step": 1033 }, { "epoch": 0.3707865168539326, "grad_norm": 0.44951483607292175, "learning_rate": 9.983107898415267e-06, "loss": 0.384, "step": 1034 }, { "epoch": 0.3711451111642362, "grad_norm": 0.48062780499458313, "learning_rate": 9.982936067594805e-06, "loss": 0.372, "step": 1035 }, { "epoch": 0.3715037054745398, "grad_norm": 0.45979130268096924, "learning_rate": 9.982763368731378e-06, "loss": 0.3989, "step": 1036 }, { "epoch": 0.3718622997848434, "grad_norm": 0.4161667227745056, "learning_rate": 9.982589801855071e-06, "loss": 0.3807, "step": 1037 }, { "epoch": 0.372220894095147, "grad_norm": 0.45196184515953064, "learning_rate": 9.982415366996119e-06, "loss": 0.3599, "step": 1038 }, { "epoch": 0.37257948840545063, "grad_norm": 0.4778638780117035, "learning_rate": 9.982240064184908e-06, "loss": 0.3769, "step": 1039 }, { "epoch": 0.37293808271575424, "grad_norm": 0.4208891689777374, "learning_rate": 9.982063893451977e-06, "loss": 0.3745, "step": 1040 }, { "epoch": 0.37329667702605784, "grad_norm": 0.44857996702194214, "learning_rate": 9.981886854828015e-06, "loss": 0.3864, "step": 1041 }, { "epoch": 0.37365527133636145, "grad_norm": 0.49721720814704895, "learning_rate": 9.981708948343864e-06, "loss": 0.3692, "step": 1042 }, { "epoch": 0.37401386564666506, "grad_norm": 0.4034333825111389, "learning_rate": 9.981530174030517e-06, "loss": 0.3722, "step": 1043 }, { "epoch": 0.37437245995696866, "grad_norm": 0.4958404302597046, "learning_rate": 9.981350531919112e-06, "loss": 0.3748, "step": 1044 }, { "epoch": 0.37473105426727227, "grad_norm": 0.5483433604240417, "learning_rate": 9.981170022040949e-06, "loss": 0.3744, "step": 1045 }, { "epoch": 0.37508964857757593, "grad_norm": 0.39734476804733276, "learning_rate": 9.980988644427471e-06, "loss": 0.3539, "step": 1046 }, { "epoch": 0.37544824288787954, "grad_norm": 0.5372884273529053, "learning_rate": 9.980806399110272e-06, "loss": 0.3778, "step": 1047 }, { "epoch": 0.37580683719818314, "grad_norm": 0.45349380373954773, "learning_rate": 9.980623286121104e-06, "loss": 0.3727, "step": 1048 }, { "epoch": 0.37616543150848675, "grad_norm": 0.5004265904426575, "learning_rate": 9.980439305491865e-06, "loss": 0.3809, "step": 1049 }, { "epoch": 0.37652402581879035, "grad_norm": 0.46904677152633667, "learning_rate": 9.980254457254601e-06, "loss": 0.3928, "step": 1050 }, { "epoch": 0.37688262012909396, "grad_norm": 0.4241349399089813, "learning_rate": 9.980068741441517e-06, "loss": 0.3866, "step": 1051 }, { "epoch": 0.37724121443939757, "grad_norm": 0.4256013035774231, "learning_rate": 9.979882158084966e-06, "loss": 0.3642, "step": 1052 }, { "epoch": 0.37759980874970117, "grad_norm": 0.5750049948692322, "learning_rate": 9.97969470721745e-06, "loss": 0.3985, "step": 1053 }, { "epoch": 0.3779584030600048, "grad_norm": 0.5213136076927185, "learning_rate": 9.97950638887162e-06, "loss": 0.3957, "step": 1054 }, { "epoch": 0.3783169973703084, "grad_norm": 0.4807332158088684, "learning_rate": 9.979317203080287e-06, "loss": 0.3777, "step": 1055 }, { "epoch": 0.378675591680612, "grad_norm": 0.4802398979663849, "learning_rate": 9.979127149876406e-06, "loss": 0.3725, "step": 1056 }, { "epoch": 0.3790341859909156, "grad_norm": 0.4262247383594513, "learning_rate": 9.978936229293083e-06, "loss": 0.3717, "step": 1057 }, { "epoch": 0.3793927803012192, "grad_norm": 0.4459189474582672, "learning_rate": 9.978744441363582e-06, "loss": 0.3682, "step": 1058 }, { "epoch": 0.3797513746115228, "grad_norm": 0.4279021620750427, "learning_rate": 9.978551786121306e-06, "loss": 0.3651, "step": 1059 }, { "epoch": 0.3801099689218264, "grad_norm": 0.501977264881134, "learning_rate": 9.97835826359982e-06, "loss": 0.3987, "step": 1060 }, { "epoch": 0.3804685632321301, "grad_norm": 0.4529384672641754, "learning_rate": 9.978163873832839e-06, "loss": 0.3652, "step": 1061 }, { "epoch": 0.3808271575424337, "grad_norm": 0.45688191056251526, "learning_rate": 9.97796861685422e-06, "loss": 0.4008, "step": 1062 }, { "epoch": 0.3811857518527373, "grad_norm": 0.44001123309135437, "learning_rate": 9.97777249269798e-06, "loss": 0.3813, "step": 1063 }, { "epoch": 0.3815443461630409, "grad_norm": 0.48194852471351624, "learning_rate": 9.97757550139829e-06, "loss": 0.3642, "step": 1064 }, { "epoch": 0.3819029404733445, "grad_norm": 0.4235965311527252, "learning_rate": 9.977377642989457e-06, "loss": 0.4038, "step": 1065 }, { "epoch": 0.3822615347836481, "grad_norm": 0.41385623812675476, "learning_rate": 9.977178917505954e-06, "loss": 0.3903, "step": 1066 }, { "epoch": 0.3826201290939517, "grad_norm": 0.3866206407546997, "learning_rate": 9.976979324982398e-06, "loss": 0.3571, "step": 1067 }, { "epoch": 0.3829787234042553, "grad_norm": 0.4352245628833771, "learning_rate": 9.976778865453561e-06, "loss": 0.3558, "step": 1068 }, { "epoch": 0.3833373177145589, "grad_norm": 0.43549349904060364, "learning_rate": 9.976577538954361e-06, "loss": 0.377, "step": 1069 }, { "epoch": 0.38369591202486253, "grad_norm": 0.4354706108570099, "learning_rate": 9.976375345519872e-06, "loss": 0.3693, "step": 1070 }, { "epoch": 0.38405450633516613, "grad_norm": 0.4380011558532715, "learning_rate": 9.976172285185315e-06, "loss": 0.3852, "step": 1071 }, { "epoch": 0.38441310064546974, "grad_norm": 0.4150690734386444, "learning_rate": 9.975968357986063e-06, "loss": 0.372, "step": 1072 }, { "epoch": 0.38477169495577335, "grad_norm": 0.4085177481174469, "learning_rate": 9.975763563957644e-06, "loss": 0.3994, "step": 1073 }, { "epoch": 0.38513028926607695, "grad_norm": 0.42620769143104553, "learning_rate": 9.975557903135728e-06, "loss": 0.3898, "step": 1074 }, { "epoch": 0.3854888835763806, "grad_norm": 0.39463135600090027, "learning_rate": 9.975351375556149e-06, "loss": 0.3505, "step": 1075 }, { "epoch": 0.3858474778866842, "grad_norm": 0.4326515197753906, "learning_rate": 9.97514398125488e-06, "loss": 0.3838, "step": 1076 }, { "epoch": 0.3862060721969878, "grad_norm": 0.43396952748298645, "learning_rate": 9.97493572026805e-06, "loss": 0.3683, "step": 1077 }, { "epoch": 0.38656466650729143, "grad_norm": 0.43469464778900146, "learning_rate": 9.97472659263194e-06, "loss": 0.3715, "step": 1078 }, { "epoch": 0.38692326081759504, "grad_norm": 0.4355434775352478, "learning_rate": 9.974516598382981e-06, "loss": 0.3781, "step": 1079 }, { "epoch": 0.38728185512789864, "grad_norm": 0.44275861978530884, "learning_rate": 9.974305737557755e-06, "loss": 0.3783, "step": 1080 }, { "epoch": 0.38764044943820225, "grad_norm": 0.46009716391563416, "learning_rate": 9.974094010192991e-06, "loss": 0.3607, "step": 1081 }, { "epoch": 0.38799904374850586, "grad_norm": 0.45369952917099, "learning_rate": 9.973881416325576e-06, "loss": 0.3642, "step": 1082 }, { "epoch": 0.38835763805880946, "grad_norm": 0.5057269930839539, "learning_rate": 9.973667955992544e-06, "loss": 0.3698, "step": 1083 }, { "epoch": 0.38871623236911307, "grad_norm": 0.4848526120185852, "learning_rate": 9.97345362923108e-06, "loss": 0.3724, "step": 1084 }, { "epoch": 0.3890748266794167, "grad_norm": 0.39461958408355713, "learning_rate": 9.973238436078521e-06, "loss": 0.3698, "step": 1085 }, { "epoch": 0.3894334209897203, "grad_norm": 0.4810941517353058, "learning_rate": 9.973022376572354e-06, "loss": 0.3803, "step": 1086 }, { "epoch": 0.3897920153000239, "grad_norm": 0.5022849440574646, "learning_rate": 9.972805450750215e-06, "loss": 0.3722, "step": 1087 }, { "epoch": 0.3901506096103275, "grad_norm": 0.4065227806568146, "learning_rate": 9.972587658649897e-06, "loss": 0.3649, "step": 1088 }, { "epoch": 0.3905092039206311, "grad_norm": 0.4605667293071747, "learning_rate": 9.972369000309338e-06, "loss": 0.3928, "step": 1089 }, { "epoch": 0.39086779823093476, "grad_norm": 0.5148940086364746, "learning_rate": 9.97214947576663e-06, "loss": 0.38, "step": 1090 }, { "epoch": 0.39122639254123837, "grad_norm": 0.45212164521217346, "learning_rate": 9.971929085060012e-06, "loss": 0.3825, "step": 1091 }, { "epoch": 0.39158498685154197, "grad_norm": 0.4404318928718567, "learning_rate": 9.971707828227881e-06, "loss": 0.3546, "step": 1092 }, { "epoch": 0.3919435811618456, "grad_norm": 0.4827144742012024, "learning_rate": 9.971485705308779e-06, "loss": 0.3603, "step": 1093 }, { "epoch": 0.3923021754721492, "grad_norm": 0.45629289746284485, "learning_rate": 9.971262716341396e-06, "loss": 0.3748, "step": 1094 }, { "epoch": 0.3926607697824528, "grad_norm": 0.4107043147087097, "learning_rate": 9.971038861364584e-06, "loss": 0.3558, "step": 1095 }, { "epoch": 0.3930193640927564, "grad_norm": 0.45441579818725586, "learning_rate": 9.970814140417337e-06, "loss": 0.3765, "step": 1096 }, { "epoch": 0.39337795840306, "grad_norm": 0.469310462474823, "learning_rate": 9.970588553538802e-06, "loss": 0.3875, "step": 1097 }, { "epoch": 0.3937365527133636, "grad_norm": 0.47191122174263, "learning_rate": 9.970362100768275e-06, "loss": 0.3865, "step": 1098 }, { "epoch": 0.3940951470236672, "grad_norm": 0.45105934143066406, "learning_rate": 9.970134782145207e-06, "loss": 0.3751, "step": 1099 }, { "epoch": 0.3944537413339708, "grad_norm": 0.41319647431373596, "learning_rate": 9.969906597709196e-06, "loss": 0.3696, "step": 1100 }, { "epoch": 0.3948123356442744, "grad_norm": 0.42072996497154236, "learning_rate": 9.969677547499995e-06, "loss": 0.343, "step": 1101 }, { "epoch": 0.39517092995457803, "grad_norm": 0.4425003230571747, "learning_rate": 9.969447631557503e-06, "loss": 0.3794, "step": 1102 }, { "epoch": 0.39552952426488164, "grad_norm": 0.4165128767490387, "learning_rate": 9.969216849921772e-06, "loss": 0.3622, "step": 1103 }, { "epoch": 0.3958881185751853, "grad_norm": 0.4632200002670288, "learning_rate": 9.968985202633008e-06, "loss": 0.3678, "step": 1104 }, { "epoch": 0.3962467128854889, "grad_norm": 0.44251352548599243, "learning_rate": 9.96875268973156e-06, "loss": 0.358, "step": 1105 }, { "epoch": 0.3966053071957925, "grad_norm": 0.3925987184047699, "learning_rate": 9.968519311257937e-06, "loss": 0.3628, "step": 1106 }, { "epoch": 0.3969639015060961, "grad_norm": 0.5545730590820312, "learning_rate": 9.96828506725279e-06, "loss": 0.4102, "step": 1107 }, { "epoch": 0.3973224958163997, "grad_norm": 0.39753562211990356, "learning_rate": 9.968049957756927e-06, "loss": 0.3552, "step": 1108 }, { "epoch": 0.39768109012670333, "grad_norm": 0.44371479749679565, "learning_rate": 9.967813982811306e-06, "loss": 0.3703, "step": 1109 }, { "epoch": 0.39803968443700694, "grad_norm": 0.45592936873435974, "learning_rate": 9.967577142457031e-06, "loss": 0.3744, "step": 1110 }, { "epoch": 0.39839827874731054, "grad_norm": 0.4495241940021515, "learning_rate": 9.967339436735367e-06, "loss": 0.3569, "step": 1111 }, { "epoch": 0.39875687305761415, "grad_norm": 0.4726768434047699, "learning_rate": 9.967100865687716e-06, "loss": 0.3934, "step": 1112 }, { "epoch": 0.39911546736791775, "grad_norm": 0.47118619084358215, "learning_rate": 9.96686142935564e-06, "loss": 0.3827, "step": 1113 }, { "epoch": 0.39947406167822136, "grad_norm": 0.5324505567550659, "learning_rate": 9.966621127780854e-06, "loss": 0.3867, "step": 1114 }, { "epoch": 0.39983265598852497, "grad_norm": 0.4683953821659088, "learning_rate": 9.966379961005212e-06, "loss": 0.3587, "step": 1115 }, { "epoch": 0.40019125029882857, "grad_norm": 0.5181872248649597, "learning_rate": 9.96613792907073e-06, "loss": 0.3643, "step": 1116 }, { "epoch": 0.4005498446091322, "grad_norm": 0.47643741965293884, "learning_rate": 9.96589503201957e-06, "loss": 0.3679, "step": 1117 }, { "epoch": 0.40090843891943584, "grad_norm": 0.447622686624527, "learning_rate": 9.965651269894048e-06, "loss": 0.369, "step": 1118 }, { "epoch": 0.40126703322973944, "grad_norm": 0.565997302532196, "learning_rate": 9.965406642736624e-06, "loss": 0.3573, "step": 1119 }, { "epoch": 0.40162562754004305, "grad_norm": 0.43090108036994934, "learning_rate": 9.965161150589914e-06, "loss": 0.3836, "step": 1120 }, { "epoch": 0.40198422185034666, "grad_norm": 0.4767793118953705, "learning_rate": 9.964914793496686e-06, "loss": 0.3739, "step": 1121 }, { "epoch": 0.40234281616065026, "grad_norm": 0.4854324460029602, "learning_rate": 9.964667571499851e-06, "loss": 0.3866, "step": 1122 }, { "epoch": 0.40270141047095387, "grad_norm": 0.45661067962646484, "learning_rate": 9.964419484642482e-06, "loss": 0.3691, "step": 1123 }, { "epoch": 0.4030600047812575, "grad_norm": 0.4145902693271637, "learning_rate": 9.964170532967792e-06, "loss": 0.3639, "step": 1124 }, { "epoch": 0.4034185990915611, "grad_norm": 0.3910544812679291, "learning_rate": 9.96392071651915e-06, "loss": 0.3815, "step": 1125 }, { "epoch": 0.4037771934018647, "grad_norm": 0.45321959257125854, "learning_rate": 9.963670035340078e-06, "loss": 0.3551, "step": 1126 }, { "epoch": 0.4041357877121683, "grad_norm": 0.4187335669994354, "learning_rate": 9.963418489474243e-06, "loss": 0.3739, "step": 1127 }, { "epoch": 0.4044943820224719, "grad_norm": 0.49557945132255554, "learning_rate": 9.963166078965463e-06, "loss": 0.4001, "step": 1128 }, { "epoch": 0.4048529763327755, "grad_norm": 0.419308066368103, "learning_rate": 9.962912803857711e-06, "loss": 0.3592, "step": 1129 }, { "epoch": 0.4052115706430791, "grad_norm": 0.4820651113986969, "learning_rate": 9.962658664195111e-06, "loss": 0.3809, "step": 1130 }, { "epoch": 0.4055701649533827, "grad_norm": 0.5146684050559998, "learning_rate": 9.96240366002193e-06, "loss": 0.3833, "step": 1131 }, { "epoch": 0.4059287592636863, "grad_norm": 0.44925400614738464, "learning_rate": 9.962147791382592e-06, "loss": 0.3758, "step": 1132 }, { "epoch": 0.40628735357399, "grad_norm": 0.45327791571617126, "learning_rate": 9.961891058321672e-06, "loss": 0.35, "step": 1133 }, { "epoch": 0.4066459478842936, "grad_norm": 0.48434141278266907, "learning_rate": 9.96163346088389e-06, "loss": 0.3728, "step": 1134 }, { "epoch": 0.4070045421945972, "grad_norm": 0.4480656087398529, "learning_rate": 9.961374999114125e-06, "loss": 0.3795, "step": 1135 }, { "epoch": 0.4073631365049008, "grad_norm": 0.4313006103038788, "learning_rate": 9.961115673057397e-06, "loss": 0.3736, "step": 1136 }, { "epoch": 0.4077217308152044, "grad_norm": 0.49622321128845215, "learning_rate": 9.960855482758885e-06, "loss": 0.384, "step": 1137 }, { "epoch": 0.408080325125508, "grad_norm": 0.4254327118396759, "learning_rate": 9.960594428263915e-06, "loss": 0.3684, "step": 1138 }, { "epoch": 0.4084389194358116, "grad_norm": 0.4227578639984131, "learning_rate": 9.96033250961796e-06, "loss": 0.396, "step": 1139 }, { "epoch": 0.4087975137461152, "grad_norm": 0.4363792836666107, "learning_rate": 9.96006972686665e-06, "loss": 0.3921, "step": 1140 }, { "epoch": 0.40915610805641883, "grad_norm": 0.3853004574775696, "learning_rate": 9.959806080055763e-06, "loss": 0.3558, "step": 1141 }, { "epoch": 0.40951470236672244, "grad_norm": 0.4773699641227722, "learning_rate": 9.959541569231225e-06, "loss": 0.3843, "step": 1142 }, { "epoch": 0.40987329667702604, "grad_norm": 0.4574517607688904, "learning_rate": 9.959276194439116e-06, "loss": 0.3692, "step": 1143 }, { "epoch": 0.41023189098732965, "grad_norm": 0.4637545049190521, "learning_rate": 9.959009955725665e-06, "loss": 0.3763, "step": 1144 }, { "epoch": 0.41059048529763326, "grad_norm": 0.48912423849105835, "learning_rate": 9.958742853137251e-06, "loss": 0.3976, "step": 1145 }, { "epoch": 0.41094907960793686, "grad_norm": 0.4528949558734894, "learning_rate": 9.958474886720407e-06, "loss": 0.3546, "step": 1146 }, { "epoch": 0.4113076739182405, "grad_norm": 0.46691396832466125, "learning_rate": 9.958206056521808e-06, "loss": 0.3872, "step": 1147 }, { "epoch": 0.41166626822854413, "grad_norm": 0.4247022569179535, "learning_rate": 9.957936362588288e-06, "loss": 0.4008, "step": 1148 }, { "epoch": 0.41202486253884774, "grad_norm": 0.45573148131370544, "learning_rate": 9.95766580496683e-06, "loss": 0.3904, "step": 1149 }, { "epoch": 0.41238345684915134, "grad_norm": 0.39623597264289856, "learning_rate": 9.957394383704564e-06, "loss": 0.3477, "step": 1150 }, { "epoch": 0.41274205115945495, "grad_norm": 0.404106080532074, "learning_rate": 9.957122098848772e-06, "loss": 0.3559, "step": 1151 }, { "epoch": 0.41310064546975855, "grad_norm": 0.43082302808761597, "learning_rate": 9.956848950446889e-06, "loss": 0.371, "step": 1152 }, { "epoch": 0.41345923978006216, "grad_norm": 0.39885812997817993, "learning_rate": 9.956574938546496e-06, "loss": 0.376, "step": 1153 }, { "epoch": 0.41381783409036577, "grad_norm": 0.4376083016395569, "learning_rate": 9.95630006319533e-06, "loss": 0.3785, "step": 1154 }, { "epoch": 0.41417642840066937, "grad_norm": 0.3825874626636505, "learning_rate": 9.956024324441274e-06, "loss": 0.3596, "step": 1155 }, { "epoch": 0.414535022710973, "grad_norm": 0.4038248360157013, "learning_rate": 9.955747722332359e-06, "loss": 0.3458, "step": 1156 }, { "epoch": 0.4148936170212766, "grad_norm": 0.3895895481109619, "learning_rate": 9.955470256916773e-06, "loss": 0.3628, "step": 1157 }, { "epoch": 0.4152522113315802, "grad_norm": 0.42631465196609497, "learning_rate": 9.95519192824285e-06, "loss": 0.3701, "step": 1158 }, { "epoch": 0.4156108056418838, "grad_norm": 0.4032100737094879, "learning_rate": 9.95491273635908e-06, "loss": 0.3731, "step": 1159 }, { "epoch": 0.4159693999521874, "grad_norm": 0.4091488718986511, "learning_rate": 9.954632681314092e-06, "loss": 0.3456, "step": 1160 }, { "epoch": 0.416327994262491, "grad_norm": 0.4085223078727722, "learning_rate": 9.954351763156679e-06, "loss": 0.3598, "step": 1161 }, { "epoch": 0.41668658857279467, "grad_norm": 0.41451552510261536, "learning_rate": 9.954069981935774e-06, "loss": 0.3869, "step": 1162 }, { "epoch": 0.4170451828830983, "grad_norm": 0.4514804184436798, "learning_rate": 9.953787337700464e-06, "loss": 0.3859, "step": 1163 }, { "epoch": 0.4174037771934019, "grad_norm": 0.4556461572647095, "learning_rate": 9.95350383049999e-06, "loss": 0.3947, "step": 1164 }, { "epoch": 0.4177623715037055, "grad_norm": 0.38606172800064087, "learning_rate": 9.953219460383734e-06, "loss": 0.3683, "step": 1165 }, { "epoch": 0.4181209658140091, "grad_norm": 0.46607089042663574, "learning_rate": 9.95293422740124e-06, "loss": 0.365, "step": 1166 }, { "epoch": 0.4184795601243127, "grad_norm": 0.4571899175643921, "learning_rate": 9.952648131602194e-06, "loss": 0.3743, "step": 1167 }, { "epoch": 0.4188381544346163, "grad_norm": 0.4284628927707672, "learning_rate": 9.952361173036435e-06, "loss": 0.3697, "step": 1168 }, { "epoch": 0.4191967487449199, "grad_norm": 0.4539240300655365, "learning_rate": 9.952073351753952e-06, "loss": 0.373, "step": 1169 }, { "epoch": 0.4195553430552235, "grad_norm": 0.46578124165534973, "learning_rate": 9.951784667804885e-06, "loss": 0.3617, "step": 1170 }, { "epoch": 0.4199139373655271, "grad_norm": 0.45951971411705017, "learning_rate": 9.951495121239521e-06, "loss": 0.3584, "step": 1171 }, { "epoch": 0.42027253167583073, "grad_norm": 0.41766372323036194, "learning_rate": 9.951204712108301e-06, "loss": 0.379, "step": 1172 }, { "epoch": 0.42063112598613434, "grad_norm": 0.47041985392570496, "learning_rate": 9.95091344046182e-06, "loss": 0.3848, "step": 1173 }, { "epoch": 0.42098972029643794, "grad_norm": 0.4919836223125458, "learning_rate": 9.950621306350812e-06, "loss": 0.3744, "step": 1174 }, { "epoch": 0.42134831460674155, "grad_norm": 0.41682201623916626, "learning_rate": 9.950328309826172e-06, "loss": 0.3664, "step": 1175 }, { "epoch": 0.4217069089170452, "grad_norm": 0.49943575263023376, "learning_rate": 9.950034450938938e-06, "loss": 0.3709, "step": 1176 }, { "epoch": 0.4220655032273488, "grad_norm": 0.4620751738548279, "learning_rate": 9.949739729740301e-06, "loss": 0.371, "step": 1177 }, { "epoch": 0.4224240975376524, "grad_norm": 0.46605271100997925, "learning_rate": 9.949444146281604e-06, "loss": 0.3869, "step": 1178 }, { "epoch": 0.422782691847956, "grad_norm": 0.40382876992225647, "learning_rate": 9.949147700614339e-06, "loss": 0.3824, "step": 1179 }, { "epoch": 0.42314128615825963, "grad_norm": 0.44636040925979614, "learning_rate": 9.948850392790147e-06, "loss": 0.3621, "step": 1180 }, { "epoch": 0.42349988046856324, "grad_norm": 0.46253278851509094, "learning_rate": 9.94855222286082e-06, "loss": 0.376, "step": 1181 }, { "epoch": 0.42385847477886684, "grad_norm": 0.40314167737960815, "learning_rate": 9.9482531908783e-06, "loss": 0.4077, "step": 1182 }, { "epoch": 0.42421706908917045, "grad_norm": 0.45167529582977295, "learning_rate": 9.947953296894677e-06, "loss": 0.3751, "step": 1183 }, { "epoch": 0.42457566339947406, "grad_norm": 0.4584077298641205, "learning_rate": 9.947652540962198e-06, "loss": 0.3756, "step": 1184 }, { "epoch": 0.42493425770977766, "grad_norm": 0.406084805727005, "learning_rate": 9.947350923133252e-06, "loss": 0.3629, "step": 1185 }, { "epoch": 0.42529285202008127, "grad_norm": 0.5483905076980591, "learning_rate": 9.947048443460384e-06, "loss": 0.3833, "step": 1186 }, { "epoch": 0.4256514463303849, "grad_norm": 0.45353490114212036, "learning_rate": 9.946745101996286e-06, "loss": 0.371, "step": 1187 }, { "epoch": 0.4260100406406885, "grad_norm": 0.44731655716896057, "learning_rate": 9.9464408987938e-06, "loss": 0.3706, "step": 1188 }, { "epoch": 0.4263686349509921, "grad_norm": 0.4879469871520996, "learning_rate": 9.94613583390592e-06, "loss": 0.3861, "step": 1189 }, { "epoch": 0.42672722926129575, "grad_norm": 0.5142694711685181, "learning_rate": 9.945829907385793e-06, "loss": 0.3858, "step": 1190 }, { "epoch": 0.42708582357159935, "grad_norm": 0.5091421604156494, "learning_rate": 9.945523119286706e-06, "loss": 0.4098, "step": 1191 }, { "epoch": 0.42744441788190296, "grad_norm": 0.5032779574394226, "learning_rate": 9.945215469662104e-06, "loss": 0.3909, "step": 1192 }, { "epoch": 0.42780301219220657, "grad_norm": 0.4513814151287079, "learning_rate": 9.944906958565584e-06, "loss": 0.3719, "step": 1193 }, { "epoch": 0.4281616065025102, "grad_norm": 0.5384894013404846, "learning_rate": 9.944597586050886e-06, "loss": 0.389, "step": 1194 }, { "epoch": 0.4285202008128138, "grad_norm": 0.48849791288375854, "learning_rate": 9.944287352171907e-06, "loss": 0.3826, "step": 1195 }, { "epoch": 0.4288787951231174, "grad_norm": 0.42993447184562683, "learning_rate": 9.943976256982686e-06, "loss": 0.3537, "step": 1196 }, { "epoch": 0.429237389433421, "grad_norm": 0.42335692048072815, "learning_rate": 9.943664300537421e-06, "loss": 0.3867, "step": 1197 }, { "epoch": 0.4295959837437246, "grad_norm": 0.5243844985961914, "learning_rate": 9.943351482890454e-06, "loss": 0.3979, "step": 1198 }, { "epoch": 0.4299545780540282, "grad_norm": 0.5162110924720764, "learning_rate": 9.943037804096279e-06, "loss": 0.3904, "step": 1199 }, { "epoch": 0.4303131723643318, "grad_norm": 0.4228641986846924, "learning_rate": 9.94272326420954e-06, "loss": 0.3682, "step": 1200 }, { "epoch": 0.4306717666746354, "grad_norm": 0.4546799659729004, "learning_rate": 9.94240786328503e-06, "loss": 0.3658, "step": 1201 }, { "epoch": 0.431030360984939, "grad_norm": 0.5063191056251526, "learning_rate": 9.942091601377693e-06, "loss": 0.3804, "step": 1202 }, { "epoch": 0.4313889552952426, "grad_norm": 0.45018333196640015, "learning_rate": 9.941774478542625e-06, "loss": 0.3633, "step": 1203 }, { "epoch": 0.43174754960554623, "grad_norm": 0.5452163815498352, "learning_rate": 9.941456494835066e-06, "loss": 0.364, "step": 1204 }, { "epoch": 0.4321061439158499, "grad_norm": 0.4825517237186432, "learning_rate": 9.941137650310414e-06, "loss": 0.342, "step": 1205 }, { "epoch": 0.4324647382261535, "grad_norm": 0.43904000520706177, "learning_rate": 9.94081794502421e-06, "loss": 0.3693, "step": 1206 }, { "epoch": 0.4328233325364571, "grad_norm": 0.5359645485877991, "learning_rate": 9.940497379032147e-06, "loss": 0.3903, "step": 1207 }, { "epoch": 0.4331819268467607, "grad_norm": 0.4793299734592438, "learning_rate": 9.940175952390072e-06, "loss": 0.3875, "step": 1208 }, { "epoch": 0.4335405211570643, "grad_norm": 0.5256228446960449, "learning_rate": 9.939853665153976e-06, "loss": 0.3624, "step": 1209 }, { "epoch": 0.4338991154673679, "grad_norm": 0.4640861451625824, "learning_rate": 9.93953051738e-06, "loss": 0.3831, "step": 1210 }, { "epoch": 0.43425770977767153, "grad_norm": 0.47120723128318787, "learning_rate": 9.939206509124445e-06, "loss": 0.3663, "step": 1211 }, { "epoch": 0.43461630408797514, "grad_norm": 0.47549498081207275, "learning_rate": 9.938881640443745e-06, "loss": 0.383, "step": 1212 }, { "epoch": 0.43497489839827874, "grad_norm": 0.45298007130622864, "learning_rate": 9.938555911394499e-06, "loss": 0.3719, "step": 1213 }, { "epoch": 0.43533349270858235, "grad_norm": 0.5348756909370422, "learning_rate": 9.938229322033448e-06, "loss": 0.3695, "step": 1214 }, { "epoch": 0.43569208701888595, "grad_norm": 0.5105109810829163, "learning_rate": 9.937901872417486e-06, "loss": 0.3472, "step": 1215 }, { "epoch": 0.43605068132918956, "grad_norm": 0.44322872161865234, "learning_rate": 9.937573562603655e-06, "loss": 0.3827, "step": 1216 }, { "epoch": 0.43640927563949317, "grad_norm": 0.4995171129703522, "learning_rate": 9.937244392649149e-06, "loss": 0.3828, "step": 1217 }, { "epoch": 0.43676786994979677, "grad_norm": 0.44661593437194824, "learning_rate": 9.936914362611306e-06, "loss": 0.3722, "step": 1218 }, { "epoch": 0.43712646426010043, "grad_norm": 0.502136766910553, "learning_rate": 9.936583472547622e-06, "loss": 0.3766, "step": 1219 }, { "epoch": 0.43748505857040404, "grad_norm": 0.4385530352592468, "learning_rate": 9.936251722515742e-06, "loss": 0.364, "step": 1220 }, { "epoch": 0.43784365288070765, "grad_norm": 0.4215499460697174, "learning_rate": 9.935919112573451e-06, "loss": 0.371, "step": 1221 }, { "epoch": 0.43820224719101125, "grad_norm": 0.46433648467063904, "learning_rate": 9.935585642778696e-06, "loss": 0.3665, "step": 1222 }, { "epoch": 0.43856084150131486, "grad_norm": 0.41540655493736267, "learning_rate": 9.935251313189564e-06, "loss": 0.3384, "step": 1223 }, { "epoch": 0.43891943581161846, "grad_norm": 0.4155145287513733, "learning_rate": 9.934916123864302e-06, "loss": 0.3789, "step": 1224 }, { "epoch": 0.43927803012192207, "grad_norm": 0.4440002143383026, "learning_rate": 9.934580074861295e-06, "loss": 0.3727, "step": 1225 }, { "epoch": 0.4396366244322257, "grad_norm": 0.49071204662323, "learning_rate": 9.93424316623909e-06, "loss": 0.3652, "step": 1226 }, { "epoch": 0.4399952187425293, "grad_norm": 0.4041130542755127, "learning_rate": 9.933905398056371e-06, "loss": 0.3696, "step": 1227 }, { "epoch": 0.4403538130528329, "grad_norm": 0.44333869218826294, "learning_rate": 9.933566770371983e-06, "loss": 0.358, "step": 1228 }, { "epoch": 0.4407124073631365, "grad_norm": 0.4534135162830353, "learning_rate": 9.933227283244913e-06, "loss": 0.3693, "step": 1229 }, { "epoch": 0.4410710016734401, "grad_norm": 0.46314653754234314, "learning_rate": 9.932886936734303e-06, "loss": 0.3932, "step": 1230 }, { "epoch": 0.4414295959837437, "grad_norm": 0.4089769124984741, "learning_rate": 9.932545730899443e-06, "loss": 0.362, "step": 1231 }, { "epoch": 0.4417881902940473, "grad_norm": 0.5635547041893005, "learning_rate": 9.932203665799768e-06, "loss": 0.3844, "step": 1232 }, { "epoch": 0.4421467846043509, "grad_norm": 0.4172903597354889, "learning_rate": 9.931860741494871e-06, "loss": 0.3528, "step": 1233 }, { "epoch": 0.4425053789146546, "grad_norm": 0.44729775190353394, "learning_rate": 9.931516958044489e-06, "loss": 0.3694, "step": 1234 }, { "epoch": 0.4428639732249582, "grad_norm": 0.46076610684394836, "learning_rate": 9.93117231550851e-06, "loss": 0.3652, "step": 1235 }, { "epoch": 0.4432225675352618, "grad_norm": 0.46297913789749146, "learning_rate": 9.93082681394697e-06, "loss": 0.3598, "step": 1236 }, { "epoch": 0.4435811618455654, "grad_norm": 0.47769635915756226, "learning_rate": 9.930480453420062e-06, "loss": 0.3816, "step": 1237 }, { "epoch": 0.443939756155869, "grad_norm": 0.39438197016716003, "learning_rate": 9.930133233988117e-06, "loss": 0.3698, "step": 1238 }, { "epoch": 0.4442983504661726, "grad_norm": 0.4597742259502411, "learning_rate": 9.929785155711624e-06, "loss": 0.3832, "step": 1239 }, { "epoch": 0.4446569447764762, "grad_norm": 0.4520537555217743, "learning_rate": 9.92943621865122e-06, "loss": 0.348, "step": 1240 }, { "epoch": 0.4450155390867798, "grad_norm": 0.38863545656204224, "learning_rate": 9.92908642286769e-06, "loss": 0.3558, "step": 1241 }, { "epoch": 0.4453741333970834, "grad_norm": 0.4267067015171051, "learning_rate": 9.92873576842197e-06, "loss": 0.405, "step": 1242 }, { "epoch": 0.44573272770738703, "grad_norm": 0.4399453401565552, "learning_rate": 9.928384255375146e-06, "loss": 0.3466, "step": 1243 }, { "epoch": 0.44609132201769064, "grad_norm": 0.45623600482940674, "learning_rate": 9.92803188378845e-06, "loss": 0.3709, "step": 1244 }, { "epoch": 0.44644991632799425, "grad_norm": 0.3936661183834076, "learning_rate": 9.927678653723269e-06, "loss": 0.366, "step": 1245 }, { "epoch": 0.44680851063829785, "grad_norm": 0.4289155900478363, "learning_rate": 9.927324565241135e-06, "loss": 0.3764, "step": 1246 }, { "epoch": 0.44716710494860146, "grad_norm": 0.4883957505226135, "learning_rate": 9.926969618403732e-06, "loss": 0.3605, "step": 1247 }, { "epoch": 0.4475256992589051, "grad_norm": 0.44394564628601074, "learning_rate": 9.926613813272894e-06, "loss": 0.3639, "step": 1248 }, { "epoch": 0.4478842935692087, "grad_norm": 0.3904780149459839, "learning_rate": 9.9262571499106e-06, "loss": 0.3674, "step": 1249 }, { "epoch": 0.44824288787951233, "grad_norm": 0.40086057782173157, "learning_rate": 9.925899628378986e-06, "loss": 0.3534, "step": 1250 }, { "epoch": 0.44860148218981594, "grad_norm": 0.4542239010334015, "learning_rate": 9.92554124874033e-06, "loss": 0.4106, "step": 1251 }, { "epoch": 0.44896007650011954, "grad_norm": 0.3847843110561371, "learning_rate": 9.925182011057063e-06, "loss": 0.3588, "step": 1252 }, { "epoch": 0.44931867081042315, "grad_norm": 0.46983757615089417, "learning_rate": 9.92482191539177e-06, "loss": 0.3816, "step": 1253 }, { "epoch": 0.44967726512072675, "grad_norm": 0.4191640615463257, "learning_rate": 9.924460961807176e-06, "loss": 0.3329, "step": 1254 }, { "epoch": 0.45003585943103036, "grad_norm": 0.42779913544654846, "learning_rate": 9.92409915036616e-06, "loss": 0.3643, "step": 1255 }, { "epoch": 0.45039445374133397, "grad_norm": 0.42559516429901123, "learning_rate": 9.923736481131754e-06, "loss": 0.3606, "step": 1256 }, { "epoch": 0.4507530480516376, "grad_norm": 0.4810108244419098, "learning_rate": 9.923372954167134e-06, "loss": 0.3601, "step": 1257 }, { "epoch": 0.4511116423619412, "grad_norm": 0.4323420822620392, "learning_rate": 9.923008569535628e-06, "loss": 0.3776, "step": 1258 }, { "epoch": 0.4514702366722448, "grad_norm": 0.4227525591850281, "learning_rate": 9.922643327300713e-06, "loss": 0.3747, "step": 1259 }, { "epoch": 0.4518288309825484, "grad_norm": 0.3994510769844055, "learning_rate": 9.922277227526015e-06, "loss": 0.3511, "step": 1260 }, { "epoch": 0.452187425292852, "grad_norm": 0.5129123330116272, "learning_rate": 9.92191027027531e-06, "loss": 0.3822, "step": 1261 }, { "epoch": 0.4525460196031556, "grad_norm": 0.4487043619155884, "learning_rate": 9.921542455612524e-06, "loss": 0.3888, "step": 1262 }, { "epoch": 0.45290461391345926, "grad_norm": 0.44513583183288574, "learning_rate": 9.921173783601728e-06, "loss": 0.3901, "step": 1263 }, { "epoch": 0.45326320822376287, "grad_norm": 0.4423873722553253, "learning_rate": 9.920804254307149e-06, "loss": 0.3709, "step": 1264 }, { "epoch": 0.4536218025340665, "grad_norm": 0.4496631622314453, "learning_rate": 9.92043386779316e-06, "loss": 0.3752, "step": 1265 }, { "epoch": 0.4539803968443701, "grad_norm": 0.47561442852020264, "learning_rate": 9.920062624124282e-06, "loss": 0.3749, "step": 1266 }, { "epoch": 0.4543389911546737, "grad_norm": 0.43072310090065, "learning_rate": 9.919690523365189e-06, "loss": 0.3593, "step": 1267 }, { "epoch": 0.4546975854649773, "grad_norm": 0.4552958607673645, "learning_rate": 9.919317565580699e-06, "loss": 0.3612, "step": 1268 }, { "epoch": 0.4550561797752809, "grad_norm": 0.4796408414840698, "learning_rate": 9.918943750835785e-06, "loss": 0.3821, "step": 1269 }, { "epoch": 0.4554147740855845, "grad_norm": 0.39593836665153503, "learning_rate": 9.918569079195568e-06, "loss": 0.3669, "step": 1270 }, { "epoch": 0.4557733683958881, "grad_norm": 0.40156978368759155, "learning_rate": 9.918193550725312e-06, "loss": 0.379, "step": 1271 }, { "epoch": 0.4561319627061917, "grad_norm": 0.4516018033027649, "learning_rate": 9.91781716549044e-06, "loss": 0.4041, "step": 1272 }, { "epoch": 0.4564905570164953, "grad_norm": 0.43385136127471924, "learning_rate": 9.917439923556517e-06, "loss": 0.3453, "step": 1273 }, { "epoch": 0.45684915132679893, "grad_norm": 0.4150177836418152, "learning_rate": 9.91706182498926e-06, "loss": 0.3609, "step": 1274 }, { "epoch": 0.45720774563710254, "grad_norm": 0.4107837378978729, "learning_rate": 9.916682869854534e-06, "loss": 0.3767, "step": 1275 }, { "epoch": 0.45756633994740614, "grad_norm": 0.4121682345867157, "learning_rate": 9.916303058218357e-06, "loss": 0.3752, "step": 1276 }, { "epoch": 0.4579249342577098, "grad_norm": 0.45327499508857727, "learning_rate": 9.915922390146892e-06, "loss": 0.3648, "step": 1277 }, { "epoch": 0.4582835285680134, "grad_norm": 0.4051954448223114, "learning_rate": 9.915540865706452e-06, "loss": 0.3583, "step": 1278 }, { "epoch": 0.458642122878317, "grad_norm": 0.4326026141643524, "learning_rate": 9.915158484963501e-06, "loss": 0.3699, "step": 1279 }, { "epoch": 0.4590007171886206, "grad_norm": 0.3669991195201874, "learning_rate": 9.914775247984649e-06, "loss": 0.3493, "step": 1280 }, { "epoch": 0.4593593114989242, "grad_norm": 0.422541081905365, "learning_rate": 9.914391154836657e-06, "loss": 0.3781, "step": 1281 }, { "epoch": 0.45971790580922783, "grad_norm": 0.4159057140350342, "learning_rate": 9.914006205586438e-06, "loss": 0.3482, "step": 1282 }, { "epoch": 0.46007650011953144, "grad_norm": 0.4279913902282715, "learning_rate": 9.913620400301048e-06, "loss": 0.3832, "step": 1283 }, { "epoch": 0.46043509442983505, "grad_norm": 0.3943678140640259, "learning_rate": 9.913233739047699e-06, "loss": 0.3622, "step": 1284 }, { "epoch": 0.46079368874013865, "grad_norm": 0.40617117285728455, "learning_rate": 9.912846221893745e-06, "loss": 0.3727, "step": 1285 }, { "epoch": 0.46115228305044226, "grad_norm": 0.42698365449905396, "learning_rate": 9.912457848906695e-06, "loss": 0.3815, "step": 1286 }, { "epoch": 0.46151087736074586, "grad_norm": 0.42713284492492676, "learning_rate": 9.912068620154205e-06, "loss": 0.3549, "step": 1287 }, { "epoch": 0.46186947167104947, "grad_norm": 0.4095936715602875, "learning_rate": 9.911678535704078e-06, "loss": 0.3733, "step": 1288 }, { "epoch": 0.4622280659813531, "grad_norm": 0.4683229327201843, "learning_rate": 9.911287595624269e-06, "loss": 0.3642, "step": 1289 }, { "epoch": 0.4625866602916567, "grad_norm": 0.3581908345222473, "learning_rate": 9.910895799982881e-06, "loss": 0.3609, "step": 1290 }, { "epoch": 0.46294525460196034, "grad_norm": 0.406105101108551, "learning_rate": 9.910503148848167e-06, "loss": 0.3729, "step": 1291 }, { "epoch": 0.46330384891226395, "grad_norm": 0.512775182723999, "learning_rate": 9.910109642288526e-06, "loss": 0.3822, "step": 1292 }, { "epoch": 0.46366244322256756, "grad_norm": 0.44486764073371887, "learning_rate": 9.909715280372509e-06, "loss": 0.3671, "step": 1293 }, { "epoch": 0.46402103753287116, "grad_norm": 0.467559278011322, "learning_rate": 9.909320063168817e-06, "loss": 0.3764, "step": 1294 }, { "epoch": 0.46437963184317477, "grad_norm": 0.5293225049972534, "learning_rate": 9.908923990746294e-06, "loss": 0.3839, "step": 1295 }, { "epoch": 0.4647382261534784, "grad_norm": 0.44161269068717957, "learning_rate": 9.90852706317394e-06, "loss": 0.3644, "step": 1296 }, { "epoch": 0.465096820463782, "grad_norm": 0.45479097962379456, "learning_rate": 9.9081292805209e-06, "loss": 0.3761, "step": 1297 }, { "epoch": 0.4654554147740856, "grad_norm": 0.4229014217853546, "learning_rate": 9.90773064285647e-06, "loss": 0.3525, "step": 1298 }, { "epoch": 0.4658140090843892, "grad_norm": 0.41326963901519775, "learning_rate": 9.907331150250093e-06, "loss": 0.3549, "step": 1299 }, { "epoch": 0.4661726033946928, "grad_norm": 0.4376573860645294, "learning_rate": 9.906930802771361e-06, "loss": 0.3857, "step": 1300 }, { "epoch": 0.4665311977049964, "grad_norm": 0.4295831620693207, "learning_rate": 9.906529600490018e-06, "loss": 0.3754, "step": 1301 }, { "epoch": 0.4668897920153, "grad_norm": 0.4219476878643036, "learning_rate": 9.906127543475953e-06, "loss": 0.3544, "step": 1302 }, { "epoch": 0.4672483863256036, "grad_norm": 0.4431656002998352, "learning_rate": 9.905724631799207e-06, "loss": 0.3462, "step": 1303 }, { "epoch": 0.4676069806359072, "grad_norm": 0.4339430630207062, "learning_rate": 9.905320865529968e-06, "loss": 0.375, "step": 1304 }, { "epoch": 0.4679655749462108, "grad_norm": 0.41951388120651245, "learning_rate": 9.904916244738572e-06, "loss": 0.3678, "step": 1305 }, { "epoch": 0.4683241692565145, "grad_norm": 0.4174754321575165, "learning_rate": 9.904510769495506e-06, "loss": 0.3797, "step": 1306 }, { "epoch": 0.4686827635668181, "grad_norm": 0.4065185785293579, "learning_rate": 9.904104439871404e-06, "loss": 0.3596, "step": 1307 }, { "epoch": 0.4690413578771217, "grad_norm": 0.40791136026382446, "learning_rate": 9.903697255937053e-06, "loss": 0.3544, "step": 1308 }, { "epoch": 0.4693999521874253, "grad_norm": 0.43667706847190857, "learning_rate": 9.903289217763382e-06, "loss": 0.349, "step": 1309 }, { "epoch": 0.4697585464977289, "grad_norm": 0.4328427016735077, "learning_rate": 9.902880325421476e-06, "loss": 0.378, "step": 1310 }, { "epoch": 0.4701171408080325, "grad_norm": 0.45928239822387695, "learning_rate": 9.902470578982561e-06, "loss": 0.3856, "step": 1311 }, { "epoch": 0.4704757351183361, "grad_norm": 0.3986533582210541, "learning_rate": 9.902059978518022e-06, "loss": 0.3853, "step": 1312 }, { "epoch": 0.47083432942863973, "grad_norm": 0.46560657024383545, "learning_rate": 9.901648524099381e-06, "loss": 0.3718, "step": 1313 }, { "epoch": 0.47119292373894334, "grad_norm": 0.4969557225704193, "learning_rate": 9.901236215798317e-06, "loss": 0.3452, "step": 1314 }, { "epoch": 0.47155151804924694, "grad_norm": 0.44811564683914185, "learning_rate": 9.900823053686657e-06, "loss": 0.3537, "step": 1315 }, { "epoch": 0.47191011235955055, "grad_norm": 0.4974655508995056, "learning_rate": 9.900409037836374e-06, "loss": 0.3648, "step": 1316 }, { "epoch": 0.47226870666985415, "grad_norm": 0.49452418088912964, "learning_rate": 9.89999416831959e-06, "loss": 0.3572, "step": 1317 }, { "epoch": 0.47262730098015776, "grad_norm": 0.49674829840660095, "learning_rate": 9.899578445208578e-06, "loss": 0.3566, "step": 1318 }, { "epoch": 0.47298589529046137, "grad_norm": 0.4200642704963684, "learning_rate": 9.899161868575756e-06, "loss": 0.3588, "step": 1319 }, { "epoch": 0.47334448960076503, "grad_norm": 0.4071858823299408, "learning_rate": 9.898744438493694e-06, "loss": 0.3604, "step": 1320 }, { "epoch": 0.47370308391106863, "grad_norm": 0.4428824782371521, "learning_rate": 9.898326155035111e-06, "loss": 0.3597, "step": 1321 }, { "epoch": 0.47406167822137224, "grad_norm": 0.4386223256587982, "learning_rate": 9.897907018272874e-06, "loss": 0.3621, "step": 1322 }, { "epoch": 0.47442027253167585, "grad_norm": 0.38120707869529724, "learning_rate": 9.897487028279993e-06, "loss": 0.3568, "step": 1323 }, { "epoch": 0.47477886684197945, "grad_norm": 0.38705387711524963, "learning_rate": 9.897066185129636e-06, "loss": 0.3699, "step": 1324 }, { "epoch": 0.47513746115228306, "grad_norm": 0.4669357240200043, "learning_rate": 9.896644488895115e-06, "loss": 0.3722, "step": 1325 }, { "epoch": 0.47549605546258666, "grad_norm": 0.3878997266292572, "learning_rate": 9.896221939649889e-06, "loss": 0.3616, "step": 1326 }, { "epoch": 0.47585464977289027, "grad_norm": 0.45149335265159607, "learning_rate": 9.895798537467568e-06, "loss": 0.3672, "step": 1327 }, { "epoch": 0.4762132440831939, "grad_norm": 0.4870775043964386, "learning_rate": 9.895374282421909e-06, "loss": 0.358, "step": 1328 }, { "epoch": 0.4765718383934975, "grad_norm": 0.43494245409965515, "learning_rate": 9.894949174586819e-06, "loss": 0.3934, "step": 1329 }, { "epoch": 0.4769304327038011, "grad_norm": 0.5071903467178345, "learning_rate": 9.894523214036355e-06, "loss": 0.3591, "step": 1330 }, { "epoch": 0.4772890270141047, "grad_norm": 0.4131316542625427, "learning_rate": 9.89409640084472e-06, "loss": 0.3633, "step": 1331 }, { "epoch": 0.4776476213244083, "grad_norm": 0.46809113025665283, "learning_rate": 9.893668735086265e-06, "loss": 0.3704, "step": 1332 }, { "epoch": 0.4780062156347119, "grad_norm": 0.4712592661380768, "learning_rate": 9.893240216835491e-06, "loss": 0.3782, "step": 1333 }, { "epoch": 0.4783648099450155, "grad_norm": 0.46253132820129395, "learning_rate": 9.892810846167048e-06, "loss": 0.3883, "step": 1334 }, { "epoch": 0.4787234042553192, "grad_norm": 0.4298301041126251, "learning_rate": 9.892380623155733e-06, "loss": 0.348, "step": 1335 }, { "epoch": 0.4790819985656228, "grad_norm": 0.46029382944107056, "learning_rate": 9.891949547876492e-06, "loss": 0.3776, "step": 1336 }, { "epoch": 0.4794405928759264, "grad_norm": 0.4624936282634735, "learning_rate": 9.89151762040442e-06, "loss": 0.3646, "step": 1337 }, { "epoch": 0.47979918718623, "grad_norm": 0.46129658818244934, "learning_rate": 9.891084840814762e-06, "loss": 0.358, "step": 1338 }, { "epoch": 0.4801577814965336, "grad_norm": 0.41358527541160583, "learning_rate": 9.890651209182907e-06, "loss": 0.377, "step": 1339 }, { "epoch": 0.4805163758068372, "grad_norm": 0.48685240745544434, "learning_rate": 9.890216725584394e-06, "loss": 0.3721, "step": 1340 }, { "epoch": 0.4808749701171408, "grad_norm": 0.5089372396469116, "learning_rate": 9.889781390094914e-06, "loss": 0.3879, "step": 1341 }, { "epoch": 0.4812335644274444, "grad_norm": 0.4236043393611908, "learning_rate": 9.889345202790304e-06, "loss": 0.3817, "step": 1342 }, { "epoch": 0.481592158737748, "grad_norm": 0.5292632579803467, "learning_rate": 9.888908163746546e-06, "loss": 0.3771, "step": 1343 }, { "epoch": 0.4819507530480516, "grad_norm": 0.49395596981048584, "learning_rate": 9.888470273039776e-06, "loss": 0.3698, "step": 1344 }, { "epoch": 0.48230934735835523, "grad_norm": 0.4215496778488159, "learning_rate": 9.888031530746277e-06, "loss": 0.36, "step": 1345 }, { "epoch": 0.48266794166865884, "grad_norm": 0.419282466173172, "learning_rate": 9.887591936942476e-06, "loss": 0.3614, "step": 1346 }, { "epoch": 0.48302653597896245, "grad_norm": 0.5213088393211365, "learning_rate": 9.887151491704955e-06, "loss": 0.3642, "step": 1347 }, { "epoch": 0.48338513028926605, "grad_norm": 0.43132686614990234, "learning_rate": 9.886710195110438e-06, "loss": 0.3624, "step": 1348 }, { "epoch": 0.4837437245995697, "grad_norm": 0.39004164934158325, "learning_rate": 9.886268047235803e-06, "loss": 0.3611, "step": 1349 }, { "epoch": 0.4841023189098733, "grad_norm": 0.47801822423934937, "learning_rate": 9.885825048158071e-06, "loss": 0.3711, "step": 1350 }, { "epoch": 0.4844609132201769, "grad_norm": 0.4343005120754242, "learning_rate": 9.885381197954415e-06, "loss": 0.3604, "step": 1351 }, { "epoch": 0.48481950753048053, "grad_norm": 0.42404404282569885, "learning_rate": 9.884936496702156e-06, "loss": 0.3666, "step": 1352 }, { "epoch": 0.48517810184078414, "grad_norm": 0.4159984588623047, "learning_rate": 9.884490944478761e-06, "loss": 0.3512, "step": 1353 }, { "epoch": 0.48553669615108774, "grad_norm": 0.47903865575790405, "learning_rate": 9.884044541361846e-06, "loss": 0.3658, "step": 1354 }, { "epoch": 0.48589529046139135, "grad_norm": 0.4246938228607178, "learning_rate": 9.883597287429179e-06, "loss": 0.3822, "step": 1355 }, { "epoch": 0.48625388477169496, "grad_norm": 0.39843451976776123, "learning_rate": 9.883149182758667e-06, "loss": 0.3767, "step": 1356 }, { "epoch": 0.48661247908199856, "grad_norm": 0.39727213978767395, "learning_rate": 9.88270022742838e-06, "loss": 0.3956, "step": 1357 }, { "epoch": 0.48697107339230217, "grad_norm": 0.4241868555545807, "learning_rate": 9.882250421516519e-06, "loss": 0.3744, "step": 1358 }, { "epoch": 0.4873296677026058, "grad_norm": 0.433908611536026, "learning_rate": 9.881799765101446e-06, "loss": 0.3868, "step": 1359 }, { "epoch": 0.4876882620129094, "grad_norm": 0.37736546993255615, "learning_rate": 9.881348258261665e-06, "loss": 0.3692, "step": 1360 }, { "epoch": 0.488046856323213, "grad_norm": 0.41012293100357056, "learning_rate": 9.880895901075831e-06, "loss": 0.3846, "step": 1361 }, { "epoch": 0.4884054506335166, "grad_norm": 0.4700547158718109, "learning_rate": 9.880442693622745e-06, "loss": 0.3664, "step": 1362 }, { "epoch": 0.4887640449438202, "grad_norm": 0.4448428153991699, "learning_rate": 9.879988635981359e-06, "loss": 0.3614, "step": 1363 }, { "epoch": 0.48912263925412386, "grad_norm": 0.4141594469547272, "learning_rate": 9.87953372823077e-06, "loss": 0.3701, "step": 1364 }, { "epoch": 0.48948123356442746, "grad_norm": 0.5318564772605896, "learning_rate": 9.879077970450224e-06, "loss": 0.37, "step": 1365 }, { "epoch": 0.48983982787473107, "grad_norm": 0.4508623480796814, "learning_rate": 9.878621362719117e-06, "loss": 0.3802, "step": 1366 }, { "epoch": 0.4901984221850347, "grad_norm": 0.42358890175819397, "learning_rate": 9.878163905116988e-06, "loss": 0.3742, "step": 1367 }, { "epoch": 0.4905570164953383, "grad_norm": 0.43598657846450806, "learning_rate": 9.877705597723533e-06, "loss": 0.3635, "step": 1368 }, { "epoch": 0.4909156108056419, "grad_norm": 0.48217692971229553, "learning_rate": 9.877246440618586e-06, "loss": 0.3599, "step": 1369 }, { "epoch": 0.4912742051159455, "grad_norm": 0.4090609550476074, "learning_rate": 9.876786433882134e-06, "loss": 0.3678, "step": 1370 }, { "epoch": 0.4916327994262491, "grad_norm": 0.4408690929412842, "learning_rate": 9.876325577594315e-06, "loss": 0.3554, "step": 1371 }, { "epoch": 0.4919913937365527, "grad_norm": 0.4406038224697113, "learning_rate": 9.875863871835407e-06, "loss": 0.3827, "step": 1372 }, { "epoch": 0.4923499880468563, "grad_norm": 0.3897648751735687, "learning_rate": 9.875401316685844e-06, "loss": 0.3631, "step": 1373 }, { "epoch": 0.4927085823571599, "grad_norm": 0.4239162504673004, "learning_rate": 9.874937912226204e-06, "loss": 0.3848, "step": 1374 }, { "epoch": 0.4930671766674635, "grad_norm": 0.513043999671936, "learning_rate": 9.874473658537211e-06, "loss": 0.3615, "step": 1375 }, { "epoch": 0.49342577097776713, "grad_norm": 0.4356820583343506, "learning_rate": 9.874008555699742e-06, "loss": 0.3834, "step": 1376 }, { "epoch": 0.49378436528807074, "grad_norm": 0.42227602005004883, "learning_rate": 9.873542603794819e-06, "loss": 0.3729, "step": 1377 }, { "epoch": 0.4941429595983744, "grad_norm": 0.4744589328765869, "learning_rate": 9.873075802903612e-06, "loss": 0.3637, "step": 1378 }, { "epoch": 0.494501553908678, "grad_norm": 0.42020928859710693, "learning_rate": 9.872608153107439e-06, "loss": 0.3482, "step": 1379 }, { "epoch": 0.4948601482189816, "grad_norm": 0.3958687484264374, "learning_rate": 9.872139654487767e-06, "loss": 0.3766, "step": 1380 }, { "epoch": 0.4952187425292852, "grad_norm": 0.43744999170303345, "learning_rate": 9.871670307126209e-06, "loss": 0.3507, "step": 1381 }, { "epoch": 0.4955773368395888, "grad_norm": 0.4590883255004883, "learning_rate": 9.871200111104527e-06, "loss": 0.3661, "step": 1382 }, { "epoch": 0.49593593114989243, "grad_norm": 0.3947252035140991, "learning_rate": 9.870729066504629e-06, "loss": 0.3522, "step": 1383 }, { "epoch": 0.49629452546019603, "grad_norm": 0.4592130780220032, "learning_rate": 9.870257173408575e-06, "loss": 0.3725, "step": 1384 }, { "epoch": 0.49665311977049964, "grad_norm": 0.437844842672348, "learning_rate": 9.869784431898569e-06, "loss": 0.3925, "step": 1385 }, { "epoch": 0.49701171408080325, "grad_norm": 0.4060682952404022, "learning_rate": 9.869310842056962e-06, "loss": 0.3622, "step": 1386 }, { "epoch": 0.49737030839110685, "grad_norm": 0.39358240365982056, "learning_rate": 9.86883640396626e-06, "loss": 0.38, "step": 1387 }, { "epoch": 0.49772890270141046, "grad_norm": 0.4782959520816803, "learning_rate": 9.868361117709108e-06, "loss": 0.3672, "step": 1388 }, { "epoch": 0.49808749701171406, "grad_norm": 0.42594021558761597, "learning_rate": 9.867884983368303e-06, "loss": 0.3895, "step": 1389 }, { "epoch": 0.49844609132201767, "grad_norm": 0.38971471786499023, "learning_rate": 9.867408001026789e-06, "loss": 0.3662, "step": 1390 }, { "epoch": 0.4988046856323213, "grad_norm": 0.4048916697502136, "learning_rate": 9.866930170767657e-06, "loss": 0.3582, "step": 1391 }, { "epoch": 0.49916327994262494, "grad_norm": 0.38356518745422363, "learning_rate": 9.866451492674148e-06, "loss": 0.365, "step": 1392 }, { "epoch": 0.49952187425292854, "grad_norm": 0.4579867422580719, "learning_rate": 9.865971966829648e-06, "loss": 0.3815, "step": 1393 }, { "epoch": 0.49988046856323215, "grad_norm": 0.40056243538856506, "learning_rate": 9.865491593317693e-06, "loss": 0.3549, "step": 1394 }, { "epoch": 0.5002390628735357, "grad_norm": 0.396756112575531, "learning_rate": 9.865010372221966e-06, "loss": 0.3648, "step": 1395 }, { "epoch": 0.5005976571838393, "grad_norm": 0.45713064074516296, "learning_rate": 9.864528303626295e-06, "loss": 0.3805, "step": 1396 }, { "epoch": 0.5009562514941429, "grad_norm": 0.414523720741272, "learning_rate": 9.864045387614659e-06, "loss": 0.3609, "step": 1397 }, { "epoch": 0.5013148458044465, "grad_norm": 0.41731932759284973, "learning_rate": 9.863561624271185e-06, "loss": 0.3592, "step": 1398 }, { "epoch": 0.5016734401147501, "grad_norm": 0.4458618760108948, "learning_rate": 9.863077013680142e-06, "loss": 0.3727, "step": 1399 }, { "epoch": 0.5020320344250538, "grad_norm": 0.45942991971969604, "learning_rate": 9.862591555925956e-06, "loss": 0.3692, "step": 1400 }, { "epoch": 0.5023906287353574, "grad_norm": 0.4398845136165619, "learning_rate": 9.86210525109319e-06, "loss": 0.3408, "step": 1401 }, { "epoch": 0.502749223045661, "grad_norm": 0.3942681550979614, "learning_rate": 9.861618099266566e-06, "loss": 0.3799, "step": 1402 }, { "epoch": 0.5031078173559647, "grad_norm": 0.43537309765815735, "learning_rate": 9.86113010053094e-06, "loss": 0.3669, "step": 1403 }, { "epoch": 0.5034664116662683, "grad_norm": 0.4119764566421509, "learning_rate": 9.860641254971327e-06, "loss": 0.358, "step": 1404 }, { "epoch": 0.5038250059765719, "grad_norm": 0.43931856751441956, "learning_rate": 9.860151562672888e-06, "loss": 0.378, "step": 1405 }, { "epoch": 0.5041836002868755, "grad_norm": 0.353824257850647, "learning_rate": 9.859661023720924e-06, "loss": 0.3771, "step": 1406 }, { "epoch": 0.5045421945971791, "grad_norm": 0.3720281720161438, "learning_rate": 9.859169638200891e-06, "loss": 0.3405, "step": 1407 }, { "epoch": 0.5049007889074827, "grad_norm": 0.4810190200805664, "learning_rate": 9.858677406198388e-06, "loss": 0.375, "step": 1408 }, { "epoch": 0.5052593832177863, "grad_norm": 0.4141077399253845, "learning_rate": 9.858184327799167e-06, "loss": 0.3739, "step": 1409 }, { "epoch": 0.5056179775280899, "grad_norm": 0.3793151378631592, "learning_rate": 9.85769040308912e-06, "loss": 0.3494, "step": 1410 }, { "epoch": 0.5059765718383935, "grad_norm": 0.478630006313324, "learning_rate": 9.857195632154291e-06, "loss": 0.3552, "step": 1411 }, { "epoch": 0.5063351661486971, "grad_norm": 0.42126575112342834, "learning_rate": 9.856700015080873e-06, "loss": 0.3749, "step": 1412 }, { "epoch": 0.5066937604590007, "grad_norm": 0.39594003558158875, "learning_rate": 9.856203551955202e-06, "loss": 0.3733, "step": 1413 }, { "epoch": 0.5070523547693043, "grad_norm": 0.39093083143234253, "learning_rate": 9.855706242863763e-06, "loss": 0.3501, "step": 1414 }, { "epoch": 0.5074109490796079, "grad_norm": 0.3778493106365204, "learning_rate": 9.855208087893189e-06, "loss": 0.3481, "step": 1415 }, { "epoch": 0.5077695433899115, "grad_norm": 0.45516642928123474, "learning_rate": 9.854709087130261e-06, "loss": 0.3766, "step": 1416 }, { "epoch": 0.5081281377002151, "grad_norm": 0.396719753742218, "learning_rate": 9.854209240661907e-06, "loss": 0.3753, "step": 1417 }, { "epoch": 0.5084867320105187, "grad_norm": 0.43107351660728455, "learning_rate": 9.8537085485752e-06, "loss": 0.3902, "step": 1418 }, { "epoch": 0.5088453263208224, "grad_norm": 0.39439335465431213, "learning_rate": 9.853207010957364e-06, "loss": 0.3694, "step": 1419 }, { "epoch": 0.509203920631126, "grad_norm": 0.4115743637084961, "learning_rate": 9.852704627895767e-06, "loss": 0.3631, "step": 1420 }, { "epoch": 0.5095625149414296, "grad_norm": 0.41768309473991394, "learning_rate": 9.852201399477926e-06, "loss": 0.3629, "step": 1421 }, { "epoch": 0.5099211092517332, "grad_norm": 0.3830670714378357, "learning_rate": 9.851697325791505e-06, "loss": 0.3882, "step": 1422 }, { "epoch": 0.5102797035620368, "grad_norm": 0.4122150242328644, "learning_rate": 9.851192406924316e-06, "loss": 0.3392, "step": 1423 }, { "epoch": 0.5106382978723404, "grad_norm": 0.3991565704345703, "learning_rate": 9.850686642964315e-06, "loss": 0.3602, "step": 1424 }, { "epoch": 0.510996892182644, "grad_norm": 0.37454283237457275, "learning_rate": 9.850180033999612e-06, "loss": 0.3724, "step": 1425 }, { "epoch": 0.5113554864929476, "grad_norm": 0.4161570370197296, "learning_rate": 9.849672580118458e-06, "loss": 0.3465, "step": 1426 }, { "epoch": 0.5117140808032512, "grad_norm": 0.40562790632247925, "learning_rate": 9.84916428140925e-06, "loss": 0.3549, "step": 1427 }, { "epoch": 0.5120726751135549, "grad_norm": 0.3764801025390625, "learning_rate": 9.848655137960537e-06, "loss": 0.3645, "step": 1428 }, { "epoch": 0.5124312694238585, "grad_norm": 0.5235685706138611, "learning_rate": 9.848145149861014e-06, "loss": 0.3659, "step": 1429 }, { "epoch": 0.5127898637341621, "grad_norm": 0.4195060431957245, "learning_rate": 9.847634317199524e-06, "loss": 0.3861, "step": 1430 }, { "epoch": 0.5131484580444657, "grad_norm": 0.38214078545570374, "learning_rate": 9.847122640065054e-06, "loss": 0.3578, "step": 1431 }, { "epoch": 0.5135070523547693, "grad_norm": 0.4475308954715729, "learning_rate": 9.84661011854674e-06, "loss": 0.39, "step": 1432 }, { "epoch": 0.513865646665073, "grad_norm": 0.46674826741218567, "learning_rate": 9.846096752733865e-06, "loss": 0.3816, "step": 1433 }, { "epoch": 0.5142242409753766, "grad_norm": 0.4218215346336365, "learning_rate": 9.845582542715856e-06, "loss": 0.3798, "step": 1434 }, { "epoch": 0.5145828352856802, "grad_norm": 0.3961579203605652, "learning_rate": 9.845067488582297e-06, "loss": 0.3432, "step": 1435 }, { "epoch": 0.5149414295959838, "grad_norm": 0.3921550214290619, "learning_rate": 9.844551590422905e-06, "loss": 0.3698, "step": 1436 }, { "epoch": 0.5153000239062874, "grad_norm": 0.3790408670902252, "learning_rate": 9.844034848327553e-06, "loss": 0.3501, "step": 1437 }, { "epoch": 0.515658618216591, "grad_norm": 0.37862807512283325, "learning_rate": 9.84351726238626e-06, "loss": 0.3558, "step": 1438 }, { "epoch": 0.5160172125268946, "grad_norm": 0.3761593997478485, "learning_rate": 9.842998832689191e-06, "loss": 0.3583, "step": 1439 }, { "epoch": 0.5163758068371982, "grad_norm": 0.4076906442642212, "learning_rate": 9.842479559326659e-06, "loss": 0.3631, "step": 1440 }, { "epoch": 0.5167344011475018, "grad_norm": 0.42649704217910767, "learning_rate": 9.841959442389121e-06, "loss": 0.3927, "step": 1441 }, { "epoch": 0.5170929954578054, "grad_norm": 0.35444384813308716, "learning_rate": 9.841438481967184e-06, "loss": 0.3819, "step": 1442 }, { "epoch": 0.517451589768109, "grad_norm": 0.422076940536499, "learning_rate": 9.840916678151601e-06, "loss": 0.369, "step": 1443 }, { "epoch": 0.5178101840784126, "grad_norm": 0.42279160022735596, "learning_rate": 9.840394031033271e-06, "loss": 0.3566, "step": 1444 }, { "epoch": 0.5181687783887162, "grad_norm": 0.4549050033092499, "learning_rate": 9.839870540703243e-06, "loss": 0.3643, "step": 1445 }, { "epoch": 0.5185273726990198, "grad_norm": 0.44090601801872253, "learning_rate": 9.839346207252708e-06, "loss": 0.3441, "step": 1446 }, { "epoch": 0.5188859670093234, "grad_norm": 0.36956432461738586, "learning_rate": 9.838821030773007e-06, "loss": 0.3803, "step": 1447 }, { "epoch": 0.519244561319627, "grad_norm": 0.3929886817932129, "learning_rate": 9.83829501135563e-06, "loss": 0.3543, "step": 1448 }, { "epoch": 0.5196031556299306, "grad_norm": 0.40775439143180847, "learning_rate": 9.837768149092208e-06, "loss": 0.3516, "step": 1449 }, { "epoch": 0.5199617499402343, "grad_norm": 0.42091983556747437, "learning_rate": 9.837240444074525e-06, "loss": 0.3712, "step": 1450 }, { "epoch": 0.5203203442505379, "grad_norm": 0.3666721284389496, "learning_rate": 9.836711896394504e-06, "loss": 0.3477, "step": 1451 }, { "epoch": 0.5206789385608415, "grad_norm": 0.43458521366119385, "learning_rate": 9.836182506144226e-06, "loss": 0.3512, "step": 1452 }, { "epoch": 0.5210375328711451, "grad_norm": 0.4390840530395508, "learning_rate": 9.835652273415906e-06, "loss": 0.3571, "step": 1453 }, { "epoch": 0.5213961271814487, "grad_norm": 0.4033825695514679, "learning_rate": 9.835121198301918e-06, "loss": 0.3361, "step": 1454 }, { "epoch": 0.5217547214917523, "grad_norm": 0.3836355209350586, "learning_rate": 9.834589280894772e-06, "loss": 0.363, "step": 1455 }, { "epoch": 0.5221133158020559, "grad_norm": 0.40280458331108093, "learning_rate": 9.834056521287135e-06, "loss": 0.3479, "step": 1456 }, { "epoch": 0.5224719101123596, "grad_norm": 0.46325716376304626, "learning_rate": 9.833522919571809e-06, "loss": 0.3784, "step": 1457 }, { "epoch": 0.5228305044226632, "grad_norm": 0.3720915913581848, "learning_rate": 9.832988475841755e-06, "loss": 0.3595, "step": 1458 }, { "epoch": 0.5231890987329668, "grad_norm": 0.4300890564918518, "learning_rate": 9.832453190190072e-06, "loss": 0.36, "step": 1459 }, { "epoch": 0.5235476930432704, "grad_norm": 0.4491024911403656, "learning_rate": 9.831917062710008e-06, "loss": 0.3537, "step": 1460 }, { "epoch": 0.523906287353574, "grad_norm": 0.4504033923149109, "learning_rate": 9.831380093494957e-06, "loss": 0.3673, "step": 1461 }, { "epoch": 0.5242648816638776, "grad_norm": 0.4600393772125244, "learning_rate": 9.830842282638464e-06, "loss": 0.3717, "step": 1462 }, { "epoch": 0.5246234759741812, "grad_norm": 0.45336902141571045, "learning_rate": 9.830303630234217e-06, "loss": 0.3706, "step": 1463 }, { "epoch": 0.5249820702844848, "grad_norm": 0.4274975061416626, "learning_rate": 9.82976413637605e-06, "loss": 0.3395, "step": 1464 }, { "epoch": 0.5253406645947885, "grad_norm": 0.44499456882476807, "learning_rate": 9.829223801157943e-06, "loss": 0.3688, "step": 1465 }, { "epoch": 0.5256992589050921, "grad_norm": 0.4678840935230255, "learning_rate": 9.828682624674024e-06, "loss": 0.3961, "step": 1466 }, { "epoch": 0.5260578532153957, "grad_norm": 0.4653649628162384, "learning_rate": 9.82814060701857e-06, "loss": 0.3655, "step": 1467 }, { "epoch": 0.5264164475256993, "grad_norm": 0.39377662539482117, "learning_rate": 9.827597748286001e-06, "loss": 0.3766, "step": 1468 }, { "epoch": 0.5267750418360029, "grad_norm": 0.4491354525089264, "learning_rate": 9.827054048570885e-06, "loss": 0.3759, "step": 1469 }, { "epoch": 0.5271336361463065, "grad_norm": 0.458484947681427, "learning_rate": 9.826509507967936e-06, "loss": 0.365, "step": 1470 }, { "epoch": 0.5274922304566101, "grad_norm": 0.44947248697280884, "learning_rate": 9.825964126572013e-06, "loss": 0.3667, "step": 1471 }, { "epoch": 0.5278508247669137, "grad_norm": 0.41283780336380005, "learning_rate": 9.825417904478128e-06, "loss": 0.3843, "step": 1472 }, { "epoch": 0.5282094190772173, "grad_norm": 0.4669884443283081, "learning_rate": 9.824870841781429e-06, "loss": 0.345, "step": 1473 }, { "epoch": 0.5285680133875209, "grad_norm": 0.4913058876991272, "learning_rate": 9.824322938577217e-06, "loss": 0.356, "step": 1474 }, { "epoch": 0.5289266076978245, "grad_norm": 0.441648930311203, "learning_rate": 9.823774194960943e-06, "loss": 0.3653, "step": 1475 }, { "epoch": 0.5292852020081281, "grad_norm": 0.43232429027557373, "learning_rate": 9.823224611028194e-06, "loss": 0.3608, "step": 1476 }, { "epoch": 0.5296437963184317, "grad_norm": 0.4573530852794647, "learning_rate": 9.822674186874715e-06, "loss": 0.3732, "step": 1477 }, { "epoch": 0.5300023906287353, "grad_norm": 0.4990096986293793, "learning_rate": 9.822122922596384e-06, "loss": 0.3907, "step": 1478 }, { "epoch": 0.5303609849390389, "grad_norm": 0.3825890123844147, "learning_rate": 9.82157081828924e-06, "loss": 0.3747, "step": 1479 }, { "epoch": 0.5307195792493425, "grad_norm": 0.4754282534122467, "learning_rate": 9.821017874049459e-06, "loss": 0.3641, "step": 1480 }, { "epoch": 0.5310781735596461, "grad_norm": 0.3799813687801361, "learning_rate": 9.820464089973364e-06, "loss": 0.3751, "step": 1481 }, { "epoch": 0.5314367678699498, "grad_norm": 0.4206717312335968, "learning_rate": 9.819909466157429e-06, "loss": 0.363, "step": 1482 }, { "epoch": 0.5317953621802534, "grad_norm": 0.4123644232749939, "learning_rate": 9.819354002698268e-06, "loss": 0.3747, "step": 1483 }, { "epoch": 0.532153956490557, "grad_norm": 0.38122403621673584, "learning_rate": 9.818797699692645e-06, "loss": 0.3565, "step": 1484 }, { "epoch": 0.5325125508008606, "grad_norm": 0.45154356956481934, "learning_rate": 9.818240557237473e-06, "loss": 0.3722, "step": 1485 }, { "epoch": 0.5328711451111643, "grad_norm": 0.4330032169818878, "learning_rate": 9.817682575429805e-06, "loss": 0.3366, "step": 1486 }, { "epoch": 0.5332297394214679, "grad_norm": 0.4225082993507385, "learning_rate": 9.817123754366842e-06, "loss": 0.3536, "step": 1487 }, { "epoch": 0.5335883337317715, "grad_norm": 0.388285756111145, "learning_rate": 9.816564094145937e-06, "loss": 0.3792, "step": 1488 }, { "epoch": 0.5339469280420751, "grad_norm": 0.4120311141014099, "learning_rate": 9.816003594864578e-06, "loss": 0.355, "step": 1489 }, { "epoch": 0.5343055223523787, "grad_norm": 0.4197534918785095, "learning_rate": 9.815442256620411e-06, "loss": 0.3871, "step": 1490 }, { "epoch": 0.5346641166626823, "grad_norm": 0.39315858483314514, "learning_rate": 9.814880079511222e-06, "loss": 0.3868, "step": 1491 }, { "epoch": 0.5350227109729859, "grad_norm": 0.40677380561828613, "learning_rate": 9.814317063634943e-06, "loss": 0.3745, "step": 1492 }, { "epoch": 0.5353813052832895, "grad_norm": 0.3582475185394287, "learning_rate": 9.813753209089653e-06, "loss": 0.3655, "step": 1493 }, { "epoch": 0.5357398995935931, "grad_norm": 0.3971039652824402, "learning_rate": 9.813188515973577e-06, "loss": 0.3753, "step": 1494 }, { "epoch": 0.5360984939038967, "grad_norm": 0.3827597200870514, "learning_rate": 9.812622984385088e-06, "loss": 0.3722, "step": 1495 }, { "epoch": 0.5364570882142004, "grad_norm": 0.39289259910583496, "learning_rate": 9.8120566144227e-06, "loss": 0.3794, "step": 1496 }, { "epoch": 0.536815682524504, "grad_norm": 0.46331363916397095, "learning_rate": 9.811489406185079e-06, "loss": 0.364, "step": 1497 }, { "epoch": 0.5371742768348076, "grad_norm": 0.4077833890914917, "learning_rate": 9.810921359771036e-06, "loss": 0.3828, "step": 1498 }, { "epoch": 0.5375328711451112, "grad_norm": 0.40494367480278015, "learning_rate": 9.810352475279523e-06, "loss": 0.3746, "step": 1499 }, { "epoch": 0.5378914654554148, "grad_norm": 0.4443601071834564, "learning_rate": 9.809782752809644e-06, "loss": 0.3638, "step": 1500 }, { "epoch": 0.5382500597657184, "grad_norm": 0.4215395748615265, "learning_rate": 9.809212192460645e-06, "loss": 0.3568, "step": 1501 }, { "epoch": 0.538608654076022, "grad_norm": 0.48375412821769714, "learning_rate": 9.80864079433192e-06, "loss": 0.3932, "step": 1502 }, { "epoch": 0.5389672483863256, "grad_norm": 0.4175979197025299, "learning_rate": 9.808068558523008e-06, "loss": 0.3716, "step": 1503 }, { "epoch": 0.5393258426966292, "grad_norm": 0.393451064825058, "learning_rate": 9.807495485133594e-06, "loss": 0.3579, "step": 1504 }, { "epoch": 0.5396844370069328, "grad_norm": 0.4309934079647064, "learning_rate": 9.80692157426351e-06, "loss": 0.3399, "step": 1505 }, { "epoch": 0.5400430313172364, "grad_norm": 0.4168650209903717, "learning_rate": 9.80634682601273e-06, "loss": 0.3806, "step": 1506 }, { "epoch": 0.54040162562754, "grad_norm": 0.3835715651512146, "learning_rate": 9.805771240481382e-06, "loss": 0.3636, "step": 1507 }, { "epoch": 0.5407602199378436, "grad_norm": 0.42721226811408997, "learning_rate": 9.805194817769732e-06, "loss": 0.3752, "step": 1508 }, { "epoch": 0.5411188142481472, "grad_norm": 0.4379332363605499, "learning_rate": 9.804617557978196e-06, "loss": 0.357, "step": 1509 }, { "epoch": 0.5414774085584508, "grad_norm": 0.444464772939682, "learning_rate": 9.804039461207332e-06, "loss": 0.355, "step": 1510 }, { "epoch": 0.5418360028687544, "grad_norm": 0.44964948296546936, "learning_rate": 9.803460527557848e-06, "loss": 0.3654, "step": 1511 }, { "epoch": 0.542194597179058, "grad_norm": 0.4721531569957733, "learning_rate": 9.802880757130596e-06, "loss": 0.3704, "step": 1512 }, { "epoch": 0.5425531914893617, "grad_norm": 0.49020734429359436, "learning_rate": 9.802300150026573e-06, "loss": 0.3685, "step": 1513 }, { "epoch": 0.5429117857996653, "grad_norm": 0.4139340817928314, "learning_rate": 9.801718706346923e-06, "loss": 0.3798, "step": 1514 }, { "epoch": 0.543270380109969, "grad_norm": 0.49844875931739807, "learning_rate": 9.801136426192935e-06, "loss": 0.352, "step": 1515 }, { "epoch": 0.5436289744202726, "grad_norm": 0.4135315716266632, "learning_rate": 9.800553309666045e-06, "loss": 0.3611, "step": 1516 }, { "epoch": 0.5439875687305762, "grad_norm": 0.46527841687202454, "learning_rate": 9.799969356867833e-06, "loss": 0.3564, "step": 1517 }, { "epoch": 0.5443461630408798, "grad_norm": 0.49542051553726196, "learning_rate": 9.799384567900025e-06, "loss": 0.3766, "step": 1518 }, { "epoch": 0.5447047573511834, "grad_norm": 0.4185848534107208, "learning_rate": 9.798798942864494e-06, "loss": 0.341, "step": 1519 }, { "epoch": 0.545063351661487, "grad_norm": 0.5042614340782166, "learning_rate": 9.798212481863259e-06, "loss": 0.374, "step": 1520 }, { "epoch": 0.5454219459717906, "grad_norm": 0.43371641635894775, "learning_rate": 9.797625184998478e-06, "loss": 0.3583, "step": 1521 }, { "epoch": 0.5457805402820942, "grad_norm": 0.4423272907733917, "learning_rate": 9.797037052372466e-06, "loss": 0.345, "step": 1522 }, { "epoch": 0.5461391345923978, "grad_norm": 0.4322085976600647, "learning_rate": 9.796448084087674e-06, "loss": 0.3393, "step": 1523 }, { "epoch": 0.5464977289027014, "grad_norm": 0.4601684808731079, "learning_rate": 9.795858280246704e-06, "loss": 0.3697, "step": 1524 }, { "epoch": 0.546856323213005, "grad_norm": 0.43112829327583313, "learning_rate": 9.7952676409523e-06, "loss": 0.3825, "step": 1525 }, { "epoch": 0.5472149175233086, "grad_norm": 0.39354029297828674, "learning_rate": 9.794676166307354e-06, "loss": 0.3671, "step": 1526 }, { "epoch": 0.5475735118336122, "grad_norm": 0.43906939029693604, "learning_rate": 9.794083856414903e-06, "loss": 0.3518, "step": 1527 }, { "epoch": 0.5479321061439159, "grad_norm": 0.43044739961624146, "learning_rate": 9.793490711378128e-06, "loss": 0.3554, "step": 1528 }, { "epoch": 0.5482907004542195, "grad_norm": 0.43417611718177795, "learning_rate": 9.79289673130036e-06, "loss": 0.3539, "step": 1529 }, { "epoch": 0.5486492947645231, "grad_norm": 0.5163275003433228, "learning_rate": 9.792301916285069e-06, "loss": 0.3834, "step": 1530 }, { "epoch": 0.5490078890748267, "grad_norm": 0.4413203001022339, "learning_rate": 9.791706266435875e-06, "loss": 0.3745, "step": 1531 }, { "epoch": 0.5493664833851303, "grad_norm": 0.38342154026031494, "learning_rate": 9.79110978185654e-06, "loss": 0.3377, "step": 1532 }, { "epoch": 0.5497250776954339, "grad_norm": 0.4225137233734131, "learning_rate": 9.790512462650975e-06, "loss": 0.3715, "step": 1533 }, { "epoch": 0.5500836720057375, "grad_norm": 0.3778180480003357, "learning_rate": 9.789914308923237e-06, "loss": 0.3557, "step": 1534 }, { "epoch": 0.5504422663160411, "grad_norm": 0.42662155628204346, "learning_rate": 9.789315320777522e-06, "loss": 0.3551, "step": 1535 }, { "epoch": 0.5508008606263447, "grad_norm": 0.44229885935783386, "learning_rate": 9.788715498318178e-06, "loss": 0.3713, "step": 1536 }, { "epoch": 0.5511594549366483, "grad_norm": 0.44481879472732544, "learning_rate": 9.788114841649696e-06, "loss": 0.3547, "step": 1537 }, { "epoch": 0.5515180492469519, "grad_norm": 0.4137446880340576, "learning_rate": 9.787513350876712e-06, "loss": 0.3741, "step": 1538 }, { "epoch": 0.5518766435572555, "grad_norm": 0.45066583156585693, "learning_rate": 9.786911026104007e-06, "loss": 0.377, "step": 1539 }, { "epoch": 0.5522352378675591, "grad_norm": 0.44643253087997437, "learning_rate": 9.786307867436508e-06, "loss": 0.3798, "step": 1540 }, { "epoch": 0.5525938321778627, "grad_norm": 0.41589096188545227, "learning_rate": 9.785703874979288e-06, "loss": 0.3562, "step": 1541 }, { "epoch": 0.5529524264881663, "grad_norm": 0.4532563090324402, "learning_rate": 9.785099048837564e-06, "loss": 0.3578, "step": 1542 }, { "epoch": 0.5533110207984699, "grad_norm": 0.4233599007129669, "learning_rate": 9.784493389116699e-06, "loss": 0.36, "step": 1543 }, { "epoch": 0.5536696151087737, "grad_norm": 0.4391278624534607, "learning_rate": 9.7838868959222e-06, "loss": 0.3719, "step": 1544 }, { "epoch": 0.5540282094190773, "grad_norm": 0.391533762216568, "learning_rate": 9.783279569359719e-06, "loss": 0.3744, "step": 1545 }, { "epoch": 0.5543868037293809, "grad_norm": 0.4269729256629944, "learning_rate": 9.782671409535056e-06, "loss": 0.3777, "step": 1546 }, { "epoch": 0.5547453980396845, "grad_norm": 0.4304582476615906, "learning_rate": 9.782062416554154e-06, "loss": 0.3706, "step": 1547 }, { "epoch": 0.5551039923499881, "grad_norm": 0.4227413237094879, "learning_rate": 9.7814525905231e-06, "loss": 0.3594, "step": 1548 }, { "epoch": 0.5554625866602917, "grad_norm": 0.440503865480423, "learning_rate": 9.780841931548131e-06, "loss": 0.3539, "step": 1549 }, { "epoch": 0.5558211809705953, "grad_norm": 0.41182661056518555, "learning_rate": 9.780230439735622e-06, "loss": 0.3665, "step": 1550 }, { "epoch": 0.5561797752808989, "grad_norm": 0.4233601689338684, "learning_rate": 9.779618115192098e-06, "loss": 0.3377, "step": 1551 }, { "epoch": 0.5565383695912025, "grad_norm": 0.38599756360054016, "learning_rate": 9.77900495802423e-06, "loss": 0.3584, "step": 1552 }, { "epoch": 0.5568969639015061, "grad_norm": 0.47455915808677673, "learning_rate": 9.778390968338828e-06, "loss": 0.3663, "step": 1553 }, { "epoch": 0.5572555582118097, "grad_norm": 0.43423205614089966, "learning_rate": 9.777776146242853e-06, "loss": 0.3561, "step": 1554 }, { "epoch": 0.5576141525221133, "grad_norm": 0.4515976011753082, "learning_rate": 9.777160491843409e-06, "loss": 0.3754, "step": 1555 }, { "epoch": 0.5579727468324169, "grad_norm": 0.45920348167419434, "learning_rate": 9.776544005247746e-06, "loss": 0.3532, "step": 1556 }, { "epoch": 0.5583313411427205, "grad_norm": 0.45271265506744385, "learning_rate": 9.775926686563255e-06, "loss": 0.3635, "step": 1557 }, { "epoch": 0.5586899354530241, "grad_norm": 0.44560012221336365, "learning_rate": 9.775308535897478e-06, "loss": 0.3578, "step": 1558 }, { "epoch": 0.5590485297633278, "grad_norm": 0.4736953675746918, "learning_rate": 9.774689553358096e-06, "loss": 0.3773, "step": 1559 }, { "epoch": 0.5594071240736314, "grad_norm": 0.5378643870353699, "learning_rate": 9.774069739052938e-06, "loss": 0.3535, "step": 1560 }, { "epoch": 0.559765718383935, "grad_norm": 0.42757415771484375, "learning_rate": 9.77344909308998e-06, "loss": 0.3684, "step": 1561 }, { "epoch": 0.5601243126942386, "grad_norm": 0.5261363387107849, "learning_rate": 9.77282761557734e-06, "loss": 0.3871, "step": 1562 }, { "epoch": 0.5604829070045422, "grad_norm": 0.4396798014640808, "learning_rate": 9.772205306623279e-06, "loss": 0.3701, "step": 1563 }, { "epoch": 0.5608415013148458, "grad_norm": 0.4707452356815338, "learning_rate": 9.771582166336206e-06, "loss": 0.3869, "step": 1564 }, { "epoch": 0.5612000956251494, "grad_norm": 0.48842090368270874, "learning_rate": 9.770958194824673e-06, "loss": 0.373, "step": 1565 }, { "epoch": 0.561558689935453, "grad_norm": 0.44037434458732605, "learning_rate": 9.770333392197382e-06, "loss": 0.377, "step": 1566 }, { "epoch": 0.5619172842457566, "grad_norm": 0.487152636051178, "learning_rate": 9.769707758563171e-06, "loss": 0.363, "step": 1567 }, { "epoch": 0.5622758785560602, "grad_norm": 0.4871939718723297, "learning_rate": 9.76908129403103e-06, "loss": 0.3403, "step": 1568 }, { "epoch": 0.5626344728663638, "grad_norm": 0.42016398906707764, "learning_rate": 9.76845399871009e-06, "loss": 0.3476, "step": 1569 }, { "epoch": 0.5629930671766674, "grad_norm": 0.4500119686126709, "learning_rate": 9.767825872709628e-06, "loss": 0.3652, "step": 1570 }, { "epoch": 0.563351661486971, "grad_norm": 0.560946524143219, "learning_rate": 9.767196916139066e-06, "loss": 0.3824, "step": 1571 }, { "epoch": 0.5637102557972746, "grad_norm": 0.45912864804267883, "learning_rate": 9.766567129107967e-06, "loss": 0.3806, "step": 1572 }, { "epoch": 0.5640688501075783, "grad_norm": 0.40015482902526855, "learning_rate": 9.765936511726047e-06, "loss": 0.3656, "step": 1573 }, { "epoch": 0.564427444417882, "grad_norm": 0.40471377968788147, "learning_rate": 9.765305064103159e-06, "loss": 0.3702, "step": 1574 }, { "epoch": 0.5647860387281856, "grad_norm": 0.410534143447876, "learning_rate": 9.764672786349301e-06, "loss": 0.3686, "step": 1575 }, { "epoch": 0.5651446330384892, "grad_norm": 0.43315279483795166, "learning_rate": 9.764039678574621e-06, "loss": 0.3589, "step": 1576 }, { "epoch": 0.5655032273487928, "grad_norm": 0.4531698524951935, "learning_rate": 9.763405740889408e-06, "loss": 0.3549, "step": 1577 }, { "epoch": 0.5658618216590964, "grad_norm": 0.3940357565879822, "learning_rate": 9.762770973404094e-06, "loss": 0.37, "step": 1578 }, { "epoch": 0.5662204159694, "grad_norm": 0.4112902581691742, "learning_rate": 9.76213537622926e-06, "loss": 0.359, "step": 1579 }, { "epoch": 0.5665790102797036, "grad_norm": 0.4270588755607605, "learning_rate": 9.761498949475627e-06, "loss": 0.3703, "step": 1580 }, { "epoch": 0.5669376045900072, "grad_norm": 0.4091397523880005, "learning_rate": 9.760861693254061e-06, "loss": 0.3635, "step": 1581 }, { "epoch": 0.5672961989003108, "grad_norm": 0.40518826246261597, "learning_rate": 9.76022360767558e-06, "loss": 0.3567, "step": 1582 }, { "epoch": 0.5676547932106144, "grad_norm": 0.41191980242729187, "learning_rate": 9.759584692851333e-06, "loss": 0.3467, "step": 1583 }, { "epoch": 0.568013387520918, "grad_norm": 0.39843153953552246, "learning_rate": 9.758944948892628e-06, "loss": 0.3645, "step": 1584 }, { "epoch": 0.5683719818312216, "grad_norm": 0.3879227340221405, "learning_rate": 9.758304375910905e-06, "loss": 0.3566, "step": 1585 }, { "epoch": 0.5687305761415252, "grad_norm": 0.449474573135376, "learning_rate": 9.757662974017756e-06, "loss": 0.3769, "step": 1586 }, { "epoch": 0.5690891704518288, "grad_norm": 0.4092496633529663, "learning_rate": 9.757020743324915e-06, "loss": 0.3565, "step": 1587 }, { "epoch": 0.5694477647621324, "grad_norm": 0.3985067903995514, "learning_rate": 9.75637768394426e-06, "loss": 0.3439, "step": 1588 }, { "epoch": 0.569806359072436, "grad_norm": 0.42710080742836, "learning_rate": 9.755733795987817e-06, "loss": 0.3522, "step": 1589 }, { "epoch": 0.5701649533827396, "grad_norm": 0.4606521427631378, "learning_rate": 9.755089079567748e-06, "loss": 0.3732, "step": 1590 }, { "epoch": 0.5705235476930433, "grad_norm": 0.3893314301967621, "learning_rate": 9.754443534796368e-06, "loss": 0.3514, "step": 1591 }, { "epoch": 0.5708821420033469, "grad_norm": 0.4805208742618561, "learning_rate": 9.753797161786132e-06, "loss": 0.3542, "step": 1592 }, { "epoch": 0.5712407363136505, "grad_norm": 0.38363802433013916, "learning_rate": 9.753149960649641e-06, "loss": 0.3569, "step": 1593 }, { "epoch": 0.5715993306239541, "grad_norm": 0.40211209654808044, "learning_rate": 9.75250193149964e-06, "loss": 0.3805, "step": 1594 }, { "epoch": 0.5719579249342577, "grad_norm": 0.4507061839103699, "learning_rate": 9.751853074449016e-06, "loss": 0.364, "step": 1595 }, { "epoch": 0.5723165192445613, "grad_norm": 0.3780527710914612, "learning_rate": 9.7512033896108e-06, "loss": 0.3585, "step": 1596 }, { "epoch": 0.5726751135548649, "grad_norm": 0.3933740258216858, "learning_rate": 9.750552877098177e-06, "loss": 0.3727, "step": 1597 }, { "epoch": 0.5730337078651685, "grad_norm": 0.4082034230232239, "learning_rate": 9.74990153702446e-06, "loss": 0.3342, "step": 1598 }, { "epoch": 0.5733923021754721, "grad_norm": 0.3830973207950592, "learning_rate": 9.749249369503118e-06, "loss": 0.351, "step": 1599 }, { "epoch": 0.5737508964857757, "grad_norm": 0.3740543723106384, "learning_rate": 9.748596374647759e-06, "loss": 0.343, "step": 1600 }, { "epoch": 0.5741094907960793, "grad_norm": 0.45566901564598083, "learning_rate": 9.747942552572138e-06, "loss": 0.3389, "step": 1601 }, { "epoch": 0.574468085106383, "grad_norm": 0.42512375116348267, "learning_rate": 9.747287903390154e-06, "loss": 0.3676, "step": 1602 }, { "epoch": 0.5748266794166866, "grad_norm": 0.42359113693237305, "learning_rate": 9.746632427215846e-06, "loss": 0.3631, "step": 1603 }, { "epoch": 0.5751852737269902, "grad_norm": 0.4145870506763458, "learning_rate": 9.745976124163403e-06, "loss": 0.3707, "step": 1604 }, { "epoch": 0.5755438680372938, "grad_norm": 0.4016396403312683, "learning_rate": 9.745318994347153e-06, "loss": 0.3605, "step": 1605 }, { "epoch": 0.5759024623475975, "grad_norm": 0.4272564947605133, "learning_rate": 9.744661037881568e-06, "loss": 0.3568, "step": 1606 }, { "epoch": 0.5762610566579011, "grad_norm": 0.4079071581363678, "learning_rate": 9.744002254881273e-06, "loss": 0.368, "step": 1607 }, { "epoch": 0.5766196509682047, "grad_norm": 0.3968932330608368, "learning_rate": 9.743342645461024e-06, "loss": 0.3529, "step": 1608 }, { "epoch": 0.5769782452785083, "grad_norm": 0.4127860963344574, "learning_rate": 9.742682209735727e-06, "loss": 0.3658, "step": 1609 }, { "epoch": 0.5773368395888119, "grad_norm": 0.4160705804824829, "learning_rate": 9.742020947820436e-06, "loss": 0.3818, "step": 1610 }, { "epoch": 0.5776954338991155, "grad_norm": 0.38072940707206726, "learning_rate": 9.741358859830343e-06, "loss": 0.36, "step": 1611 }, { "epoch": 0.5780540282094191, "grad_norm": 0.42426249384880066, "learning_rate": 9.740695945880785e-06, "loss": 0.3695, "step": 1612 }, { "epoch": 0.5784126225197227, "grad_norm": 0.40452370047569275, "learning_rate": 9.740032206087244e-06, "loss": 0.3494, "step": 1613 }, { "epoch": 0.5787712168300263, "grad_norm": 0.4254333972930908, "learning_rate": 9.739367640565344e-06, "loss": 0.3765, "step": 1614 }, { "epoch": 0.5791298111403299, "grad_norm": 0.43565577268600464, "learning_rate": 9.738702249430858e-06, "loss": 0.3597, "step": 1615 }, { "epoch": 0.5794884054506335, "grad_norm": 0.42877396941185, "learning_rate": 9.738036032799697e-06, "loss": 0.3664, "step": 1616 }, { "epoch": 0.5798469997609371, "grad_norm": 0.47411295771598816, "learning_rate": 9.737368990787917e-06, "loss": 0.3636, "step": 1617 }, { "epoch": 0.5802055940712407, "grad_norm": 0.48081842064857483, "learning_rate": 9.73670112351172e-06, "loss": 0.3881, "step": 1618 }, { "epoch": 0.5805641883815443, "grad_norm": 0.4710833728313446, "learning_rate": 9.736032431087453e-06, "loss": 0.3653, "step": 1619 }, { "epoch": 0.5809227826918479, "grad_norm": 0.47810012102127075, "learning_rate": 9.735362913631598e-06, "loss": 0.3586, "step": 1620 }, { "epoch": 0.5812813770021515, "grad_norm": 0.36631104350090027, "learning_rate": 9.734692571260793e-06, "loss": 0.3406, "step": 1621 }, { "epoch": 0.5816399713124552, "grad_norm": 0.48525726795196533, "learning_rate": 9.734021404091812e-06, "loss": 0.3689, "step": 1622 }, { "epoch": 0.5819985656227588, "grad_norm": 0.4329427182674408, "learning_rate": 9.733349412241574e-06, "loss": 0.3515, "step": 1623 }, { "epoch": 0.5823571599330624, "grad_norm": 0.42741158604621887, "learning_rate": 9.732676595827141e-06, "loss": 0.3511, "step": 1624 }, { "epoch": 0.582715754243366, "grad_norm": 0.3917195498943329, "learning_rate": 9.732002954965722e-06, "loss": 0.359, "step": 1625 }, { "epoch": 0.5830743485536696, "grad_norm": 0.4640631377696991, "learning_rate": 9.731328489774666e-06, "loss": 0.3454, "step": 1626 }, { "epoch": 0.5834329428639732, "grad_norm": 0.4385175108909607, "learning_rate": 9.730653200371467e-06, "loss": 0.3798, "step": 1627 }, { "epoch": 0.5837915371742768, "grad_norm": 0.41435232758522034, "learning_rate": 9.729977086873763e-06, "loss": 0.3721, "step": 1628 }, { "epoch": 0.5841501314845804, "grad_norm": 0.43610715866088867, "learning_rate": 9.729300149399333e-06, "loss": 0.3497, "step": 1629 }, { "epoch": 0.5845087257948841, "grad_norm": 0.41384658217430115, "learning_rate": 9.728622388066104e-06, "loss": 0.3455, "step": 1630 }, { "epoch": 0.5848673201051877, "grad_norm": 0.4070773124694824, "learning_rate": 9.727943802992143e-06, "loss": 0.3553, "step": 1631 }, { "epoch": 0.5852259144154913, "grad_norm": 0.4373546540737152, "learning_rate": 9.727264394295664e-06, "loss": 0.3537, "step": 1632 }, { "epoch": 0.5855845087257949, "grad_norm": 0.3708569407463074, "learning_rate": 9.726584162095019e-06, "loss": 0.3842, "step": 1633 }, { "epoch": 0.5859431030360985, "grad_norm": 0.40316465497016907, "learning_rate": 9.725903106508707e-06, "loss": 0.3632, "step": 1634 }, { "epoch": 0.5863016973464021, "grad_norm": 0.4084901809692383, "learning_rate": 9.725221227655372e-06, "loss": 0.3535, "step": 1635 }, { "epoch": 0.5866602916567057, "grad_norm": 0.4307299554347992, "learning_rate": 9.724538525653797e-06, "loss": 0.3776, "step": 1636 }, { "epoch": 0.5870188859670094, "grad_norm": 0.44743892550468445, "learning_rate": 9.723855000622912e-06, "loss": 0.3583, "step": 1637 }, { "epoch": 0.587377480277313, "grad_norm": 0.41368862986564636, "learning_rate": 9.72317065268179e-06, "loss": 0.3382, "step": 1638 }, { "epoch": 0.5877360745876166, "grad_norm": 0.39086219668388367, "learning_rate": 9.722485481949646e-06, "loss": 0.3588, "step": 1639 }, { "epoch": 0.5880946688979202, "grad_norm": 0.43353715538978577, "learning_rate": 9.721799488545837e-06, "loss": 0.3679, "step": 1640 }, { "epoch": 0.5884532632082238, "grad_norm": 0.45198482275009155, "learning_rate": 9.721112672589868e-06, "loss": 0.3677, "step": 1641 }, { "epoch": 0.5888118575185274, "grad_norm": 0.36826497316360474, "learning_rate": 9.720425034201384e-06, "loss": 0.3541, "step": 1642 }, { "epoch": 0.589170451828831, "grad_norm": 0.44651785492897034, "learning_rate": 9.719736573500172e-06, "loss": 0.3504, "step": 1643 }, { "epoch": 0.5895290461391346, "grad_norm": 0.4104197323322296, "learning_rate": 9.719047290606164e-06, "loss": 0.3373, "step": 1644 }, { "epoch": 0.5898876404494382, "grad_norm": 0.4214363992214203, "learning_rate": 9.718357185639438e-06, "loss": 0.371, "step": 1645 }, { "epoch": 0.5902462347597418, "grad_norm": 0.4123625159263611, "learning_rate": 9.717666258720209e-06, "loss": 0.3623, "step": 1646 }, { "epoch": 0.5906048290700454, "grad_norm": 0.4359494745731354, "learning_rate": 9.71697450996884e-06, "loss": 0.3405, "step": 1647 }, { "epoch": 0.590963423380349, "grad_norm": 0.4488864541053772, "learning_rate": 9.716281939505836e-06, "loss": 0.3697, "step": 1648 }, { "epoch": 0.5913220176906526, "grad_norm": 0.38457632064819336, "learning_rate": 9.715588547451847e-06, "loss": 0.3493, "step": 1649 }, { "epoch": 0.5916806120009562, "grad_norm": 0.40127596259117126, "learning_rate": 9.71489433392766e-06, "loss": 0.3864, "step": 1650 }, { "epoch": 0.5920392063112598, "grad_norm": 0.4607175290584564, "learning_rate": 9.714199299054213e-06, "loss": 0.3606, "step": 1651 }, { "epoch": 0.5923978006215634, "grad_norm": 0.4267405867576599, "learning_rate": 9.713503442952579e-06, "loss": 0.3569, "step": 1652 }, { "epoch": 0.592756394931867, "grad_norm": 0.4145529270172119, "learning_rate": 9.712806765743982e-06, "loss": 0.3623, "step": 1653 }, { "epoch": 0.5931149892421707, "grad_norm": 0.3833918869495392, "learning_rate": 9.712109267549786e-06, "loss": 0.341, "step": 1654 }, { "epoch": 0.5934735835524743, "grad_norm": 0.5390113592147827, "learning_rate": 9.711410948491493e-06, "loss": 0.3714, "step": 1655 }, { "epoch": 0.5938321778627779, "grad_norm": 0.46366021037101746, "learning_rate": 9.710711808690754e-06, "loss": 0.3944, "step": 1656 }, { "epoch": 0.5941907721730815, "grad_norm": 0.3933699429035187, "learning_rate": 9.710011848269363e-06, "loss": 0.3825, "step": 1657 }, { "epoch": 0.5945493664833851, "grad_norm": 0.5139600038528442, "learning_rate": 9.709311067349255e-06, "loss": 0.3661, "step": 1658 }, { "epoch": 0.5949079607936888, "grad_norm": 0.4193617105484009, "learning_rate": 9.708609466052508e-06, "loss": 0.3725, "step": 1659 }, { "epoch": 0.5952665551039924, "grad_norm": 0.445298969745636, "learning_rate": 9.70790704450134e-06, "loss": 0.3824, "step": 1660 }, { "epoch": 0.595625149414296, "grad_norm": 0.4493594765663147, "learning_rate": 9.707203802818121e-06, "loss": 0.3464, "step": 1661 }, { "epoch": 0.5959837437245996, "grad_norm": 0.44573941826820374, "learning_rate": 9.706499741125353e-06, "loss": 0.3653, "step": 1662 }, { "epoch": 0.5963423380349032, "grad_norm": 0.39806556701660156, "learning_rate": 9.705794859545687e-06, "loss": 0.3621, "step": 1663 }, { "epoch": 0.5967009323452068, "grad_norm": 0.47104981541633606, "learning_rate": 9.705089158201917e-06, "loss": 0.3774, "step": 1664 }, { "epoch": 0.5970595266555104, "grad_norm": 0.4442268908023834, "learning_rate": 9.704382637216976e-06, "loss": 0.3601, "step": 1665 }, { "epoch": 0.597418120965814, "grad_norm": 0.3973439931869507, "learning_rate": 9.703675296713944e-06, "loss": 0.3399, "step": 1666 }, { "epoch": 0.5977767152761176, "grad_norm": 0.4184305965900421, "learning_rate": 9.702967136816041e-06, "loss": 0.3778, "step": 1667 }, { "epoch": 0.5981353095864212, "grad_norm": 0.4092012047767639, "learning_rate": 9.702258157646632e-06, "loss": 0.3793, "step": 1668 }, { "epoch": 0.5984939038967249, "grad_norm": 0.43235233426094055, "learning_rate": 9.701548359329223e-06, "loss": 0.3518, "step": 1669 }, { "epoch": 0.5988524982070285, "grad_norm": 0.38557493686676025, "learning_rate": 9.700837741987462e-06, "loss": 0.3366, "step": 1670 }, { "epoch": 0.5992110925173321, "grad_norm": 0.3737736940383911, "learning_rate": 9.70012630574514e-06, "loss": 0.3588, "step": 1671 }, { "epoch": 0.5995696868276357, "grad_norm": 0.42323461174964905, "learning_rate": 9.699414050726193e-06, "loss": 0.3583, "step": 1672 }, { "epoch": 0.5999282811379393, "grad_norm": 0.39072802662849426, "learning_rate": 9.698700977054698e-06, "loss": 0.366, "step": 1673 }, { "epoch": 0.6002868754482429, "grad_norm": 0.41784918308258057, "learning_rate": 9.697987084854873e-06, "loss": 0.3541, "step": 1674 }, { "epoch": 0.6006454697585465, "grad_norm": 0.37708595395088196, "learning_rate": 9.697272374251084e-06, "loss": 0.3893, "step": 1675 }, { "epoch": 0.6010040640688501, "grad_norm": 0.41813987493515015, "learning_rate": 9.69655684536783e-06, "loss": 0.3788, "step": 1676 }, { "epoch": 0.6013626583791537, "grad_norm": 0.3786706328392029, "learning_rate": 9.695840498329765e-06, "loss": 0.3682, "step": 1677 }, { "epoch": 0.6017212526894573, "grad_norm": 0.37725430727005005, "learning_rate": 9.695123333261673e-06, "loss": 0.3446, "step": 1678 }, { "epoch": 0.6020798469997609, "grad_norm": 0.38900575041770935, "learning_rate": 9.694405350288492e-06, "loss": 0.372, "step": 1679 }, { "epoch": 0.6024384413100645, "grad_norm": 0.38217833638191223, "learning_rate": 9.693686549535291e-06, "loss": 0.364, "step": 1680 }, { "epoch": 0.6027970356203681, "grad_norm": 0.400821328163147, "learning_rate": 9.692966931127292e-06, "loss": 0.3405, "step": 1681 }, { "epoch": 0.6031556299306717, "grad_norm": 0.41634100675582886, "learning_rate": 9.692246495189851e-06, "loss": 0.3599, "step": 1682 }, { "epoch": 0.6035142242409753, "grad_norm": 0.36107856035232544, "learning_rate": 9.691525241848474e-06, "loss": 0.361, "step": 1683 }, { "epoch": 0.603872818551279, "grad_norm": 0.42827799916267395, "learning_rate": 9.690803171228803e-06, "loss": 0.3749, "step": 1684 }, { "epoch": 0.6042314128615826, "grad_norm": 0.38965362310409546, "learning_rate": 9.690080283456627e-06, "loss": 0.3689, "step": 1685 }, { "epoch": 0.6045900071718862, "grad_norm": 0.4108273684978485, "learning_rate": 9.689356578657872e-06, "loss": 0.3501, "step": 1686 }, { "epoch": 0.6049486014821898, "grad_norm": 0.45168277621269226, "learning_rate": 9.688632056958612e-06, "loss": 0.3592, "step": 1687 }, { "epoch": 0.6053071957924935, "grad_norm": 0.3805676996707916, "learning_rate": 9.687906718485061e-06, "loss": 0.3732, "step": 1688 }, { "epoch": 0.6056657901027971, "grad_norm": 0.3883194923400879, "learning_rate": 9.687180563363575e-06, "loss": 0.3409, "step": 1689 }, { "epoch": 0.6060243844131007, "grad_norm": 0.4982130527496338, "learning_rate": 9.68645359172065e-06, "loss": 0.3611, "step": 1690 }, { "epoch": 0.6063829787234043, "grad_norm": 0.4118868112564087, "learning_rate": 9.685725803682932e-06, "loss": 0.3592, "step": 1691 }, { "epoch": 0.6067415730337079, "grad_norm": 0.3774496912956238, "learning_rate": 9.6849971993772e-06, "loss": 0.3473, "step": 1692 }, { "epoch": 0.6071001673440115, "grad_norm": 0.4063337445259094, "learning_rate": 9.684267778930378e-06, "loss": 0.3654, "step": 1693 }, { "epoch": 0.6074587616543151, "grad_norm": 0.40799474716186523, "learning_rate": 9.683537542469538e-06, "loss": 0.3537, "step": 1694 }, { "epoch": 0.6078173559646187, "grad_norm": 0.3978276252746582, "learning_rate": 9.682806490121886e-06, "loss": 0.3685, "step": 1695 }, { "epoch": 0.6081759502749223, "grad_norm": 0.4038204848766327, "learning_rate": 9.682074622014773e-06, "loss": 0.3728, "step": 1696 }, { "epoch": 0.6085345445852259, "grad_norm": 0.4081498682498932, "learning_rate": 9.681341938275694e-06, "loss": 0.3598, "step": 1697 }, { "epoch": 0.6088931388955295, "grad_norm": 0.40014591813087463, "learning_rate": 9.680608439032288e-06, "loss": 0.3779, "step": 1698 }, { "epoch": 0.6092517332058331, "grad_norm": 0.362062007188797, "learning_rate": 9.679874124412327e-06, "loss": 0.3485, "step": 1699 }, { "epoch": 0.6096103275161368, "grad_norm": 0.39876437187194824, "learning_rate": 9.679138994543734e-06, "loss": 0.3689, "step": 1700 }, { "epoch": 0.6099689218264404, "grad_norm": 0.3957021236419678, "learning_rate": 9.678403049554572e-06, "loss": 0.3696, "step": 1701 }, { "epoch": 0.610327516136744, "grad_norm": 0.4024095833301544, "learning_rate": 9.67766628957304e-06, "loss": 0.3866, "step": 1702 }, { "epoch": 0.6106861104470476, "grad_norm": 0.36445945501327515, "learning_rate": 9.676928714727492e-06, "loss": 0.3712, "step": 1703 }, { "epoch": 0.6110447047573512, "grad_norm": 0.40441814064979553, "learning_rate": 9.676190325146409e-06, "loss": 0.3625, "step": 1704 }, { "epoch": 0.6114032990676548, "grad_norm": 0.44511109590530396, "learning_rate": 9.67545112095842e-06, "loss": 0.3426, "step": 1705 }, { "epoch": 0.6117618933779584, "grad_norm": 0.42196500301361084, "learning_rate": 9.674711102292304e-06, "loss": 0.3516, "step": 1706 }, { "epoch": 0.612120487688262, "grad_norm": 0.41235172748565674, "learning_rate": 9.673970269276968e-06, "loss": 0.3559, "step": 1707 }, { "epoch": 0.6124790819985656, "grad_norm": 0.4762116074562073, "learning_rate": 9.67322862204147e-06, "loss": 0.3716, "step": 1708 }, { "epoch": 0.6128376763088692, "grad_norm": 0.39596259593963623, "learning_rate": 9.672486160715006e-06, "loss": 0.3725, "step": 1709 }, { "epoch": 0.6131962706191728, "grad_norm": 0.47436773777008057, "learning_rate": 9.671742885426917e-06, "loss": 0.41, "step": 1710 }, { "epoch": 0.6135548649294764, "grad_norm": 0.4436465799808502, "learning_rate": 9.670998796306682e-06, "loss": 0.3743, "step": 1711 }, { "epoch": 0.61391345923978, "grad_norm": 0.43738535046577454, "learning_rate": 9.670253893483925e-06, "loss": 0.3543, "step": 1712 }, { "epoch": 0.6142720535500836, "grad_norm": 0.4426473081111908, "learning_rate": 9.669508177088409e-06, "loss": 0.365, "step": 1713 }, { "epoch": 0.6146306478603872, "grad_norm": 0.4068325459957123, "learning_rate": 9.668761647250042e-06, "loss": 0.3592, "step": 1714 }, { "epoch": 0.6149892421706908, "grad_norm": 0.3982062041759491, "learning_rate": 9.66801430409887e-06, "loss": 0.3549, "step": 1715 }, { "epoch": 0.6153478364809944, "grad_norm": 0.41091907024383545, "learning_rate": 9.667266147765082e-06, "loss": 0.3621, "step": 1716 }, { "epoch": 0.6157064307912982, "grad_norm": 0.3539070188999176, "learning_rate": 9.66651717837901e-06, "loss": 0.3525, "step": 1717 }, { "epoch": 0.6160650251016018, "grad_norm": 0.38323846459388733, "learning_rate": 9.66576739607113e-06, "loss": 0.357, "step": 1718 }, { "epoch": 0.6164236194119054, "grad_norm": 0.42919227480888367, "learning_rate": 9.665016800972053e-06, "loss": 0.3714, "step": 1719 }, { "epoch": 0.616782213722209, "grad_norm": 0.4062871038913727, "learning_rate": 9.664265393212536e-06, "loss": 0.3582, "step": 1720 }, { "epoch": 0.6171408080325126, "grad_norm": 0.3598921298980713, "learning_rate": 9.663513172923472e-06, "loss": 0.3609, "step": 1721 }, { "epoch": 0.6174994023428162, "grad_norm": 0.3709285855293274, "learning_rate": 9.662760140235909e-06, "loss": 0.3507, "step": 1722 }, { "epoch": 0.6178579966531198, "grad_norm": 0.4612500071525574, "learning_rate": 9.662006295281021e-06, "loss": 0.3569, "step": 1723 }, { "epoch": 0.6182165909634234, "grad_norm": 0.3969232439994812, "learning_rate": 9.661251638190133e-06, "loss": 0.3761, "step": 1724 }, { "epoch": 0.618575185273727, "grad_norm": 0.40773141384124756, "learning_rate": 9.660496169094706e-06, "loss": 0.36, "step": 1725 }, { "epoch": 0.6189337795840306, "grad_norm": 0.4412006437778473, "learning_rate": 9.659739888126348e-06, "loss": 0.3512, "step": 1726 }, { "epoch": 0.6192923738943342, "grad_norm": 0.38575711846351624, "learning_rate": 9.658982795416803e-06, "loss": 0.3553, "step": 1727 }, { "epoch": 0.6196509682046378, "grad_norm": 0.37772902846336365, "learning_rate": 9.658224891097962e-06, "loss": 0.3612, "step": 1728 }, { "epoch": 0.6200095625149414, "grad_norm": 0.3817663788795471, "learning_rate": 9.657466175301851e-06, "loss": 0.3365, "step": 1729 }, { "epoch": 0.620368156825245, "grad_norm": 0.4262857437133789, "learning_rate": 9.656706648160643e-06, "loss": 0.3724, "step": 1730 }, { "epoch": 0.6207267511355486, "grad_norm": 0.4063206613063812, "learning_rate": 9.655946309806647e-06, "loss": 0.3374, "step": 1731 }, { "epoch": 0.6210853454458523, "grad_norm": 0.36222752928733826, "learning_rate": 9.65518516037232e-06, "loss": 0.3614, "step": 1732 }, { "epoch": 0.6214439397561559, "grad_norm": 0.4122057557106018, "learning_rate": 9.654423199990255e-06, "loss": 0.3588, "step": 1733 }, { "epoch": 0.6218025340664595, "grad_norm": 0.45107272267341614, "learning_rate": 9.653660428793188e-06, "loss": 0.3868, "step": 1734 }, { "epoch": 0.6221611283767631, "grad_norm": 0.40952780842781067, "learning_rate": 9.652896846913994e-06, "loss": 0.3485, "step": 1735 }, { "epoch": 0.6225197226870667, "grad_norm": 0.4122322201728821, "learning_rate": 9.652132454485697e-06, "loss": 0.3519, "step": 1736 }, { "epoch": 0.6228783169973703, "grad_norm": 0.43018922209739685, "learning_rate": 9.65136725164145e-06, "loss": 0.3774, "step": 1737 }, { "epoch": 0.6232369113076739, "grad_norm": 0.4789820909500122, "learning_rate": 9.650601238514556e-06, "loss": 0.3708, "step": 1738 }, { "epoch": 0.6235955056179775, "grad_norm": 0.41256779432296753, "learning_rate": 9.64983441523846e-06, "loss": 0.3467, "step": 1739 }, { "epoch": 0.6239540999282811, "grad_norm": 0.45431458950042725, "learning_rate": 9.649066781946742e-06, "loss": 0.3529, "step": 1740 }, { "epoch": 0.6243126942385847, "grad_norm": 0.367472380399704, "learning_rate": 9.648298338773124e-06, "loss": 0.3755, "step": 1741 }, { "epoch": 0.6246712885488883, "grad_norm": 0.4552571773529053, "learning_rate": 9.647529085851475e-06, "loss": 0.376, "step": 1742 }, { "epoch": 0.6250298828591919, "grad_norm": 0.417837917804718, "learning_rate": 9.6467590233158e-06, "loss": 0.3678, "step": 1743 }, { "epoch": 0.6253884771694955, "grad_norm": 0.39516621828079224, "learning_rate": 9.645988151300248e-06, "loss": 0.3566, "step": 1744 }, { "epoch": 0.6257470714797991, "grad_norm": 0.38553670048713684, "learning_rate": 9.645216469939103e-06, "loss": 0.337, "step": 1745 }, { "epoch": 0.6261056657901028, "grad_norm": 0.39378055930137634, "learning_rate": 9.644443979366798e-06, "loss": 0.3824, "step": 1746 }, { "epoch": 0.6264642601004065, "grad_norm": 0.42129096388816833, "learning_rate": 9.6436706797179e-06, "loss": 0.3705, "step": 1747 }, { "epoch": 0.6268228544107101, "grad_norm": 0.41485217213630676, "learning_rate": 9.642896571127124e-06, "loss": 0.3639, "step": 1748 }, { "epoch": 0.6271814487210137, "grad_norm": 0.38020578026771545, "learning_rate": 9.642121653729322e-06, "loss": 0.3733, "step": 1749 }, { "epoch": 0.6275400430313173, "grad_norm": 0.4671955704689026, "learning_rate": 9.641345927659484e-06, "loss": 0.3505, "step": 1750 }, { "epoch": 0.6278986373416209, "grad_norm": 0.4148557782173157, "learning_rate": 9.640569393052746e-06, "loss": 0.3625, "step": 1751 }, { "epoch": 0.6282572316519245, "grad_norm": 0.413845419883728, "learning_rate": 9.63979205004438e-06, "loss": 0.3608, "step": 1752 }, { "epoch": 0.6286158259622281, "grad_norm": 0.4633233845233917, "learning_rate": 9.639013898769807e-06, "loss": 0.3691, "step": 1753 }, { "epoch": 0.6289744202725317, "grad_norm": 0.3721400201320648, "learning_rate": 9.638234939364578e-06, "loss": 0.3734, "step": 1754 }, { "epoch": 0.6293330145828353, "grad_norm": 0.4501839578151703, "learning_rate": 9.63745517196439e-06, "loss": 0.3872, "step": 1755 }, { "epoch": 0.6296916088931389, "grad_norm": 0.482159823179245, "learning_rate": 9.636674596705088e-06, "loss": 0.3683, "step": 1756 }, { "epoch": 0.6300502032034425, "grad_norm": 0.3639512062072754, "learning_rate": 9.635893213722643e-06, "loss": 0.364, "step": 1757 }, { "epoch": 0.6304087975137461, "grad_norm": 0.38837817311286926, "learning_rate": 9.635111023153177e-06, "loss": 0.3729, "step": 1758 }, { "epoch": 0.6307673918240497, "grad_norm": 0.4676460027694702, "learning_rate": 9.63432802513295e-06, "loss": 0.3749, "step": 1759 }, { "epoch": 0.6311259861343533, "grad_norm": 0.4462924301624298, "learning_rate": 9.633544219798364e-06, "loss": 0.345, "step": 1760 }, { "epoch": 0.6314845804446569, "grad_norm": 0.35154569149017334, "learning_rate": 9.632759607285958e-06, "loss": 0.3584, "step": 1761 }, { "epoch": 0.6318431747549605, "grad_norm": 0.4038558900356293, "learning_rate": 9.631974187732416e-06, "loss": 0.3885, "step": 1762 }, { "epoch": 0.6322017690652642, "grad_norm": 0.5036410093307495, "learning_rate": 9.631187961274559e-06, "loss": 0.3599, "step": 1763 }, { "epoch": 0.6325603633755678, "grad_norm": 0.4095449447631836, "learning_rate": 9.63040092804935e-06, "loss": 0.3599, "step": 1764 }, { "epoch": 0.6329189576858714, "grad_norm": 0.38213473558425903, "learning_rate": 9.629613088193894e-06, "loss": 0.3668, "step": 1765 }, { "epoch": 0.633277551996175, "grad_norm": 0.4511027932167053, "learning_rate": 9.628824441845436e-06, "loss": 0.3482, "step": 1766 }, { "epoch": 0.6336361463064786, "grad_norm": 0.48917898535728455, "learning_rate": 9.628034989141357e-06, "loss": 0.3787, "step": 1767 }, { "epoch": 0.6339947406167822, "grad_norm": 0.3832378089427948, "learning_rate": 9.627244730219184e-06, "loss": 0.3527, "step": 1768 }, { "epoch": 0.6343533349270858, "grad_norm": 0.3873436152935028, "learning_rate": 9.626453665216585e-06, "loss": 0.3601, "step": 1769 }, { "epoch": 0.6347119292373894, "grad_norm": 0.38192108273506165, "learning_rate": 9.625661794271363e-06, "loss": 0.3557, "step": 1770 }, { "epoch": 0.635070523547693, "grad_norm": 0.41364800930023193, "learning_rate": 9.624869117521464e-06, "loss": 0.3656, "step": 1771 }, { "epoch": 0.6354291178579966, "grad_norm": 0.4040485918521881, "learning_rate": 9.624075635104977e-06, "loss": 0.3701, "step": 1772 }, { "epoch": 0.6357877121683002, "grad_norm": 0.34734007716178894, "learning_rate": 9.623281347160129e-06, "loss": 0.3472, "step": 1773 }, { "epoch": 0.6361463064786038, "grad_norm": 0.4005207419395447, "learning_rate": 9.622486253825284e-06, "loss": 0.3458, "step": 1774 }, { "epoch": 0.6365049007889075, "grad_norm": 0.4005671441555023, "learning_rate": 9.621690355238954e-06, "loss": 0.3396, "step": 1775 }, { "epoch": 0.6368634950992111, "grad_norm": 0.38694900274276733, "learning_rate": 9.620893651539785e-06, "loss": 0.3482, "step": 1776 }, { "epoch": 0.6372220894095147, "grad_norm": 0.41891053318977356, "learning_rate": 9.620096142866566e-06, "loss": 0.3425, "step": 1777 }, { "epoch": 0.6375806837198184, "grad_norm": 0.41846203804016113, "learning_rate": 9.619297829358222e-06, "loss": 0.358, "step": 1778 }, { "epoch": 0.637939278030122, "grad_norm": 0.42379170656204224, "learning_rate": 9.618498711153827e-06, "loss": 0.3531, "step": 1779 }, { "epoch": 0.6382978723404256, "grad_norm": 0.3656086325645447, "learning_rate": 9.61769878839259e-06, "loss": 0.3651, "step": 1780 }, { "epoch": 0.6386564666507292, "grad_norm": 0.35463637113571167, "learning_rate": 9.616898061213854e-06, "loss": 0.354, "step": 1781 }, { "epoch": 0.6390150609610328, "grad_norm": 0.37269896268844604, "learning_rate": 9.616096529757113e-06, "loss": 0.3747, "step": 1782 }, { "epoch": 0.6393736552713364, "grad_norm": 0.4060685932636261, "learning_rate": 9.615294194161994e-06, "loss": 0.3499, "step": 1783 }, { "epoch": 0.63973224958164, "grad_norm": 0.4087463319301605, "learning_rate": 9.614491054568268e-06, "loss": 0.3669, "step": 1784 }, { "epoch": 0.6400908438919436, "grad_norm": 0.38782989978790283, "learning_rate": 9.613687111115844e-06, "loss": 0.3713, "step": 1785 }, { "epoch": 0.6404494382022472, "grad_norm": 0.39965447783470154, "learning_rate": 9.612882363944772e-06, "loss": 0.3696, "step": 1786 }, { "epoch": 0.6408080325125508, "grad_norm": 0.3696741461753845, "learning_rate": 9.61207681319524e-06, "loss": 0.3487, "step": 1787 }, { "epoch": 0.6411666268228544, "grad_norm": 0.42639634013175964, "learning_rate": 9.611270459007576e-06, "loss": 0.3725, "step": 1788 }, { "epoch": 0.641525221133158, "grad_norm": 0.38872402906417847, "learning_rate": 9.610463301522255e-06, "loss": 0.3645, "step": 1789 }, { "epoch": 0.6418838154434616, "grad_norm": 0.4196288585662842, "learning_rate": 9.60965534087988e-06, "loss": 0.3826, "step": 1790 }, { "epoch": 0.6422424097537652, "grad_norm": 0.37170854210853577, "learning_rate": 9.608846577221204e-06, "loss": 0.3784, "step": 1791 }, { "epoch": 0.6426010040640688, "grad_norm": 0.3202935755252838, "learning_rate": 9.608037010687116e-06, "loss": 0.3535, "step": 1792 }, { "epoch": 0.6429595983743724, "grad_norm": 0.40428611636161804, "learning_rate": 9.607226641418645e-06, "loss": 0.3701, "step": 1793 }, { "epoch": 0.643318192684676, "grad_norm": 0.40514621138572693, "learning_rate": 9.60641546955696e-06, "loss": 0.3765, "step": 1794 }, { "epoch": 0.6436767869949797, "grad_norm": 0.3871460556983948, "learning_rate": 9.605603495243366e-06, "loss": 0.3627, "step": 1795 }, { "epoch": 0.6440353813052833, "grad_norm": 0.4206547141075134, "learning_rate": 9.604790718619316e-06, "loss": 0.3492, "step": 1796 }, { "epoch": 0.6443939756155869, "grad_norm": 0.41868093609809875, "learning_rate": 9.603977139826397e-06, "loss": 0.3684, "step": 1797 }, { "epoch": 0.6447525699258905, "grad_norm": 0.39804837107658386, "learning_rate": 9.603162759006335e-06, "loss": 0.3426, "step": 1798 }, { "epoch": 0.6451111642361941, "grad_norm": 0.39753562211990356, "learning_rate": 9.602347576300998e-06, "loss": 0.3442, "step": 1799 }, { "epoch": 0.6454697585464977, "grad_norm": 0.4119725227355957, "learning_rate": 9.601531591852397e-06, "loss": 0.3826, "step": 1800 }, { "epoch": 0.6458283528568013, "grad_norm": 0.4301588833332062, "learning_rate": 9.600714805802676e-06, "loss": 0.3543, "step": 1801 }, { "epoch": 0.6461869471671049, "grad_norm": 0.4058964252471924, "learning_rate": 9.599897218294122e-06, "loss": 0.3593, "step": 1802 }, { "epoch": 0.6465455414774085, "grad_norm": 0.3984706997871399, "learning_rate": 9.599078829469163e-06, "loss": 0.358, "step": 1803 }, { "epoch": 0.6469041357877122, "grad_norm": 0.4158039093017578, "learning_rate": 9.59825963947036e-06, "loss": 0.3743, "step": 1804 }, { "epoch": 0.6472627300980158, "grad_norm": 0.39465850591659546, "learning_rate": 9.597439648440424e-06, "loss": 0.3634, "step": 1805 }, { "epoch": 0.6476213244083194, "grad_norm": 0.41195863485336304, "learning_rate": 9.596618856522196e-06, "loss": 0.3669, "step": 1806 }, { "epoch": 0.647979918718623, "grad_norm": 0.37315675616264343, "learning_rate": 9.595797263858663e-06, "loss": 0.3717, "step": 1807 }, { "epoch": 0.6483385130289266, "grad_norm": 0.37598463892936707, "learning_rate": 9.59497487059295e-06, "loss": 0.366, "step": 1808 }, { "epoch": 0.6486971073392303, "grad_norm": 0.40473830699920654, "learning_rate": 9.594151676868317e-06, "loss": 0.3588, "step": 1809 }, { "epoch": 0.6490557016495339, "grad_norm": 0.37061285972595215, "learning_rate": 9.593327682828169e-06, "loss": 0.3395, "step": 1810 }, { "epoch": 0.6494142959598375, "grad_norm": 0.4126228988170624, "learning_rate": 9.592502888616048e-06, "loss": 0.3501, "step": 1811 }, { "epoch": 0.6497728902701411, "grad_norm": 0.3906841278076172, "learning_rate": 9.591677294375637e-06, "loss": 0.3824, "step": 1812 }, { "epoch": 0.6501314845804447, "grad_norm": 0.3956695795059204, "learning_rate": 9.590850900250755e-06, "loss": 0.3894, "step": 1813 }, { "epoch": 0.6504900788907483, "grad_norm": 0.3733072876930237, "learning_rate": 9.590023706385363e-06, "loss": 0.3546, "step": 1814 }, { "epoch": 0.6508486732010519, "grad_norm": 0.39578962326049805, "learning_rate": 9.58919571292356e-06, "loss": 0.3727, "step": 1815 }, { "epoch": 0.6512072675113555, "grad_norm": 0.38478192687034607, "learning_rate": 9.588366920009589e-06, "loss": 0.3823, "step": 1816 }, { "epoch": 0.6515658618216591, "grad_norm": 0.35758644342422485, "learning_rate": 9.58753732778782e-06, "loss": 0.3552, "step": 1817 }, { "epoch": 0.6519244561319627, "grad_norm": 0.40094149112701416, "learning_rate": 9.586706936402781e-06, "loss": 0.3811, "step": 1818 }, { "epoch": 0.6522830504422663, "grad_norm": 0.3678371012210846, "learning_rate": 9.585875745999122e-06, "loss": 0.3418, "step": 1819 }, { "epoch": 0.6526416447525699, "grad_norm": 0.41707396507263184, "learning_rate": 9.58504375672164e-06, "loss": 0.3552, "step": 1820 }, { "epoch": 0.6530002390628735, "grad_norm": 0.41297441720962524, "learning_rate": 9.58421096871527e-06, "loss": 0.3348, "step": 1821 }, { "epoch": 0.6533588333731771, "grad_norm": 0.3346267640590668, "learning_rate": 9.58337738212509e-06, "loss": 0.3471, "step": 1822 }, { "epoch": 0.6537174276834807, "grad_norm": 0.3709922134876251, "learning_rate": 9.582542997096308e-06, "loss": 0.3389, "step": 1823 }, { "epoch": 0.6540760219937843, "grad_norm": 0.3876500129699707, "learning_rate": 9.581707813774279e-06, "loss": 0.3664, "step": 1824 }, { "epoch": 0.654434616304088, "grad_norm": 0.43135011196136475, "learning_rate": 9.580871832304495e-06, "loss": 0.3893, "step": 1825 }, { "epoch": 0.6547932106143916, "grad_norm": 0.3679516911506653, "learning_rate": 9.580035052832585e-06, "loss": 0.3519, "step": 1826 }, { "epoch": 0.6551518049246952, "grad_norm": 0.36492493748664856, "learning_rate": 9.57919747550432e-06, "loss": 0.3512, "step": 1827 }, { "epoch": 0.6555103992349988, "grad_norm": 0.4053068459033966, "learning_rate": 9.578359100465608e-06, "loss": 0.3444, "step": 1828 }, { "epoch": 0.6558689935453024, "grad_norm": 0.36578094959259033, "learning_rate": 9.577519927862496e-06, "loss": 0.3453, "step": 1829 }, { "epoch": 0.656227587855606, "grad_norm": 0.3741416931152344, "learning_rate": 9.576679957841173e-06, "loss": 0.3422, "step": 1830 }, { "epoch": 0.6565861821659096, "grad_norm": 0.41673269867897034, "learning_rate": 9.57583919054796e-06, "loss": 0.3599, "step": 1831 }, { "epoch": 0.6569447764762133, "grad_norm": 0.3809753656387329, "learning_rate": 9.574997626129325e-06, "loss": 0.3561, "step": 1832 }, { "epoch": 0.6573033707865169, "grad_norm": 0.40083205699920654, "learning_rate": 9.57415526473187e-06, "loss": 0.3756, "step": 1833 }, { "epoch": 0.6576619650968205, "grad_norm": 0.3797861337661743, "learning_rate": 9.573312106502338e-06, "loss": 0.3595, "step": 1834 }, { "epoch": 0.6580205594071241, "grad_norm": 0.392580509185791, "learning_rate": 9.572468151587606e-06, "loss": 0.3685, "step": 1835 }, { "epoch": 0.6583791537174277, "grad_norm": 0.4202907383441925, "learning_rate": 9.571623400134698e-06, "loss": 0.3649, "step": 1836 }, { "epoch": 0.6587377480277313, "grad_norm": 0.37294214963912964, "learning_rate": 9.570777852290769e-06, "loss": 0.3366, "step": 1837 }, { "epoch": 0.6590963423380349, "grad_norm": 0.4056069254875183, "learning_rate": 9.569931508203119e-06, "loss": 0.3664, "step": 1838 }, { "epoch": 0.6594549366483385, "grad_norm": 0.37191838026046753, "learning_rate": 9.569084368019182e-06, "loss": 0.3719, "step": 1839 }, { "epoch": 0.6598135309586421, "grad_norm": 0.41509804129600525, "learning_rate": 9.568236431886532e-06, "loss": 0.3565, "step": 1840 }, { "epoch": 0.6601721252689458, "grad_norm": 0.39488843083381653, "learning_rate": 9.567387699952886e-06, "loss": 0.3567, "step": 1841 }, { "epoch": 0.6605307195792494, "grad_norm": 0.4043178856372833, "learning_rate": 9.566538172366092e-06, "loss": 0.3794, "step": 1842 }, { "epoch": 0.660889313889553, "grad_norm": 0.3717840909957886, "learning_rate": 9.565687849274139e-06, "loss": 0.3698, "step": 1843 }, { "epoch": 0.6612479081998566, "grad_norm": 0.37067562341690063, "learning_rate": 9.564836730825157e-06, "loss": 0.3672, "step": 1844 }, { "epoch": 0.6616065025101602, "grad_norm": 0.39635220170021057, "learning_rate": 9.563984817167417e-06, "loss": 0.3718, "step": 1845 }, { "epoch": 0.6619650968204638, "grad_norm": 0.41219455003738403, "learning_rate": 9.563132108449322e-06, "loss": 0.3712, "step": 1846 }, { "epoch": 0.6623236911307674, "grad_norm": 0.3787180185317993, "learning_rate": 9.562278604819417e-06, "loss": 0.3517, "step": 1847 }, { "epoch": 0.662682285441071, "grad_norm": 0.4235913157463074, "learning_rate": 9.561424306426385e-06, "loss": 0.3491, "step": 1848 }, { "epoch": 0.6630408797513746, "grad_norm": 0.3760896325111389, "learning_rate": 9.560569213419046e-06, "loss": 0.3425, "step": 1849 }, { "epoch": 0.6633994740616782, "grad_norm": 0.4632433354854584, "learning_rate": 9.559713325946362e-06, "loss": 0.3632, "step": 1850 }, { "epoch": 0.6637580683719818, "grad_norm": 0.4013552963733673, "learning_rate": 9.558856644157432e-06, "loss": 0.3658, "step": 1851 }, { "epoch": 0.6641166626822854, "grad_norm": 0.41313841938972473, "learning_rate": 9.55799916820149e-06, "loss": 0.3582, "step": 1852 }, { "epoch": 0.664475256992589, "grad_norm": 0.48834723234176636, "learning_rate": 9.557140898227913e-06, "loss": 0.346, "step": 1853 }, { "epoch": 0.6648338513028926, "grad_norm": 0.41744160652160645, "learning_rate": 9.556281834386212e-06, "loss": 0.3824, "step": 1854 }, { "epoch": 0.6651924456131962, "grad_norm": 0.44249096512794495, "learning_rate": 9.555421976826041e-06, "loss": 0.3634, "step": 1855 }, { "epoch": 0.6655510399234998, "grad_norm": 0.46319201588630676, "learning_rate": 9.554561325697189e-06, "loss": 0.3354, "step": 1856 }, { "epoch": 0.6659096342338034, "grad_norm": 0.4305669367313385, "learning_rate": 9.553699881149583e-06, "loss": 0.3596, "step": 1857 }, { "epoch": 0.666268228544107, "grad_norm": 0.4101371169090271, "learning_rate": 9.552837643333291e-06, "loss": 0.3585, "step": 1858 }, { "epoch": 0.6666268228544107, "grad_norm": 0.4292386472225189, "learning_rate": 9.551974612398517e-06, "loss": 0.353, "step": 1859 }, { "epoch": 0.6669854171647143, "grad_norm": 0.46649712324142456, "learning_rate": 9.551110788495603e-06, "loss": 0.3639, "step": 1860 }, { "epoch": 0.667344011475018, "grad_norm": 0.4122944474220276, "learning_rate": 9.550246171775032e-06, "loss": 0.3535, "step": 1861 }, { "epoch": 0.6677026057853216, "grad_norm": 0.42105230689048767, "learning_rate": 9.54938076238742e-06, "loss": 0.3924, "step": 1862 }, { "epoch": 0.6680612000956252, "grad_norm": 0.42980125546455383, "learning_rate": 9.548514560483526e-06, "loss": 0.3521, "step": 1863 }, { "epoch": 0.6684197944059288, "grad_norm": 0.38371995091438293, "learning_rate": 9.547647566214242e-06, "loss": 0.373, "step": 1864 }, { "epoch": 0.6687783887162324, "grad_norm": 0.38646170496940613, "learning_rate": 9.546779779730607e-06, "loss": 0.3707, "step": 1865 }, { "epoch": 0.669136983026536, "grad_norm": 0.4057123363018036, "learning_rate": 9.545911201183786e-06, "loss": 0.3675, "step": 1866 }, { "epoch": 0.6694955773368396, "grad_norm": 0.3848406970500946, "learning_rate": 9.545041830725091e-06, "loss": 0.3531, "step": 1867 }, { "epoch": 0.6698541716471432, "grad_norm": 0.41321423649787903, "learning_rate": 9.544171668505968e-06, "loss": 0.3656, "step": 1868 }, { "epoch": 0.6702127659574468, "grad_norm": 0.3715020716190338, "learning_rate": 9.543300714678004e-06, "loss": 0.3502, "step": 1869 }, { "epoch": 0.6705713602677504, "grad_norm": 0.3930765986442566, "learning_rate": 9.54242896939292e-06, "loss": 0.3518, "step": 1870 }, { "epoch": 0.670929954578054, "grad_norm": 0.42161867022514343, "learning_rate": 9.541556432802577e-06, "loss": 0.367, "step": 1871 }, { "epoch": 0.6712885488883577, "grad_norm": 0.3502013087272644, "learning_rate": 9.540683105058974e-06, "loss": 0.3551, "step": 1872 }, { "epoch": 0.6716471431986613, "grad_norm": 0.38261154294013977, "learning_rate": 9.539808986314247e-06, "loss": 0.355, "step": 1873 }, { "epoch": 0.6720057375089649, "grad_norm": 0.3921460807323456, "learning_rate": 9.53893407672067e-06, "loss": 0.367, "step": 1874 }, { "epoch": 0.6723643318192685, "grad_norm": 0.3974815905094147, "learning_rate": 9.538058376430657e-06, "loss": 0.3465, "step": 1875 }, { "epoch": 0.6727229261295721, "grad_norm": 0.3679879903793335, "learning_rate": 9.537181885596755e-06, "loss": 0.3732, "step": 1876 }, { "epoch": 0.6730815204398757, "grad_norm": 0.3723456859588623, "learning_rate": 9.536304604371653e-06, "loss": 0.3476, "step": 1877 }, { "epoch": 0.6734401147501793, "grad_norm": 0.41995689272880554, "learning_rate": 9.535426532908175e-06, "loss": 0.391, "step": 1878 }, { "epoch": 0.6737987090604829, "grad_norm": 0.3778621256351471, "learning_rate": 9.534547671359285e-06, "loss": 0.3453, "step": 1879 }, { "epoch": 0.6741573033707865, "grad_norm": 0.4261545240879059, "learning_rate": 9.533668019878083e-06, "loss": 0.3544, "step": 1880 }, { "epoch": 0.6745158976810901, "grad_norm": 0.3552089333534241, "learning_rate": 9.532787578617806e-06, "loss": 0.3517, "step": 1881 }, { "epoch": 0.6748744919913937, "grad_norm": 0.3975965678691864, "learning_rate": 9.531906347731832e-06, "loss": 0.3567, "step": 1882 }, { "epoch": 0.6752330863016973, "grad_norm": 0.4767821133136749, "learning_rate": 9.531024327373672e-06, "loss": 0.3682, "step": 1883 }, { "epoch": 0.6755916806120009, "grad_norm": 0.3638446033000946, "learning_rate": 9.530141517696975e-06, "loss": 0.3545, "step": 1884 }, { "epoch": 0.6759502749223045, "grad_norm": 0.3827984035015106, "learning_rate": 9.529257918855535e-06, "loss": 0.3591, "step": 1885 }, { "epoch": 0.6763088692326081, "grad_norm": 0.39635545015335083, "learning_rate": 9.528373531003274e-06, "loss": 0.3608, "step": 1886 }, { "epoch": 0.6766674635429117, "grad_norm": 0.388190358877182, "learning_rate": 9.527488354294255e-06, "loss": 0.3648, "step": 1887 }, { "epoch": 0.6770260578532153, "grad_norm": 0.4023246467113495, "learning_rate": 9.526602388882677e-06, "loss": 0.3565, "step": 1888 }, { "epoch": 0.677384652163519, "grad_norm": 0.38716360926628113, "learning_rate": 9.52571563492288e-06, "loss": 0.3525, "step": 1889 }, { "epoch": 0.6777432464738227, "grad_norm": 0.3946819305419922, "learning_rate": 9.52482809256934e-06, "loss": 0.3712, "step": 1890 }, { "epoch": 0.6781018407841263, "grad_norm": 0.46437007188796997, "learning_rate": 9.523939761976669e-06, "loss": 0.3741, "step": 1891 }, { "epoch": 0.6784604350944299, "grad_norm": 0.4002824127674103, "learning_rate": 9.523050643299616e-06, "loss": 0.3628, "step": 1892 }, { "epoch": 0.6788190294047335, "grad_norm": 0.45203015208244324, "learning_rate": 9.52216073669307e-06, "loss": 0.3722, "step": 1893 }, { "epoch": 0.6791776237150371, "grad_norm": 0.3960166573524475, "learning_rate": 9.521270042312055e-06, "loss": 0.3465, "step": 1894 }, { "epoch": 0.6795362180253407, "grad_norm": 0.35766029357910156, "learning_rate": 9.52037856031173e-06, "loss": 0.3608, "step": 1895 }, { "epoch": 0.6798948123356443, "grad_norm": 0.44345611333847046, "learning_rate": 9.519486290847396e-06, "loss": 0.3551, "step": 1896 }, { "epoch": 0.6802534066459479, "grad_norm": 0.43939927220344543, "learning_rate": 9.518593234074492e-06, "loss": 0.3443, "step": 1897 }, { "epoch": 0.6806120009562515, "grad_norm": 0.4123672544956207, "learning_rate": 9.517699390148584e-06, "loss": 0.3646, "step": 1898 }, { "epoch": 0.6809705952665551, "grad_norm": 0.47310659289360046, "learning_rate": 9.516804759225391e-06, "loss": 0.3688, "step": 1899 }, { "epoch": 0.6813291895768587, "grad_norm": 0.36326470971107483, "learning_rate": 9.515909341460754e-06, "loss": 0.3294, "step": 1900 }, { "epoch": 0.6816877838871623, "grad_norm": 0.390691339969635, "learning_rate": 9.515013137010663e-06, "loss": 0.3636, "step": 1901 }, { "epoch": 0.6820463781974659, "grad_norm": 0.44771578907966614, "learning_rate": 9.514116146031234e-06, "loss": 0.3529, "step": 1902 }, { "epoch": 0.6824049725077695, "grad_norm": 0.40300214290618896, "learning_rate": 9.513218368678727e-06, "loss": 0.3337, "step": 1903 }, { "epoch": 0.6827635668180732, "grad_norm": 0.4072263240814209, "learning_rate": 9.512319805109542e-06, "loss": 0.3663, "step": 1904 }, { "epoch": 0.6831221611283768, "grad_norm": 0.41157323122024536, "learning_rate": 9.511420455480205e-06, "loss": 0.3781, "step": 1905 }, { "epoch": 0.6834807554386804, "grad_norm": 0.43454718589782715, "learning_rate": 9.510520319947391e-06, "loss": 0.3606, "step": 1906 }, { "epoch": 0.683839349748984, "grad_norm": 0.37764570116996765, "learning_rate": 9.509619398667905e-06, "loss": 0.369, "step": 1907 }, { "epoch": 0.6841979440592876, "grad_norm": 0.4317001402378082, "learning_rate": 9.508717691798687e-06, "loss": 0.3879, "step": 1908 }, { "epoch": 0.6845565383695912, "grad_norm": 0.4399009943008423, "learning_rate": 9.507815199496822e-06, "loss": 0.3564, "step": 1909 }, { "epoch": 0.6849151326798948, "grad_norm": 0.44662052392959595, "learning_rate": 9.506911921919522e-06, "loss": 0.3706, "step": 1910 }, { "epoch": 0.6852737269901984, "grad_norm": 0.4220474660396576, "learning_rate": 9.506007859224144e-06, "loss": 0.3449, "step": 1911 }, { "epoch": 0.685632321300502, "grad_norm": 0.3891599774360657, "learning_rate": 9.50510301156818e-06, "loss": 0.3387, "step": 1912 }, { "epoch": 0.6859909156108056, "grad_norm": 0.40884459018707275, "learning_rate": 9.504197379109255e-06, "loss": 0.3448, "step": 1913 }, { "epoch": 0.6863495099211092, "grad_norm": 0.4019487798213959, "learning_rate": 9.503290962005132e-06, "loss": 0.3465, "step": 1914 }, { "epoch": 0.6867081042314128, "grad_norm": 0.4468240737915039, "learning_rate": 9.502383760413712e-06, "loss": 0.3678, "step": 1915 }, { "epoch": 0.6870666985417164, "grad_norm": 0.4288349151611328, "learning_rate": 9.501475774493034e-06, "loss": 0.377, "step": 1916 }, { "epoch": 0.68742529285202, "grad_norm": 0.39700329303741455, "learning_rate": 9.500567004401273e-06, "loss": 0.3537, "step": 1917 }, { "epoch": 0.6877838871623236, "grad_norm": 0.3933379650115967, "learning_rate": 9.499657450296735e-06, "loss": 0.336, "step": 1918 }, { "epoch": 0.6881424814726274, "grad_norm": 0.4601464867591858, "learning_rate": 9.498747112337871e-06, "loss": 0.3551, "step": 1919 }, { "epoch": 0.688501075782931, "grad_norm": 0.38122591376304626, "learning_rate": 9.497835990683265e-06, "loss": 0.3659, "step": 1920 }, { "epoch": 0.6888596700932346, "grad_norm": 0.39028245210647583, "learning_rate": 9.496924085491635e-06, "loss": 0.3493, "step": 1921 }, { "epoch": 0.6892182644035382, "grad_norm": 0.38584306836128235, "learning_rate": 9.496011396921838e-06, "loss": 0.3839, "step": 1922 }, { "epoch": 0.6895768587138418, "grad_norm": 0.37603431940078735, "learning_rate": 9.495097925132868e-06, "loss": 0.3527, "step": 1923 }, { "epoch": 0.6899354530241454, "grad_norm": 0.38679981231689453, "learning_rate": 9.494183670283854e-06, "loss": 0.3559, "step": 1924 }, { "epoch": 0.690294047334449, "grad_norm": 0.3750241696834564, "learning_rate": 9.493268632534063e-06, "loss": 0.3454, "step": 1925 }, { "epoch": 0.6906526416447526, "grad_norm": 0.4141501784324646, "learning_rate": 9.492352812042898e-06, "loss": 0.3797, "step": 1926 }, { "epoch": 0.6910112359550562, "grad_norm": 0.4069227874279022, "learning_rate": 9.491436208969895e-06, "loss": 0.3583, "step": 1927 }, { "epoch": 0.6913698302653598, "grad_norm": 0.3509413003921509, "learning_rate": 9.490518823474731e-06, "loss": 0.3609, "step": 1928 }, { "epoch": 0.6917284245756634, "grad_norm": 0.4087078273296356, "learning_rate": 9.489600655717217e-06, "loss": 0.3536, "step": 1929 }, { "epoch": 0.692087018885967, "grad_norm": 0.3445793688297272, "learning_rate": 9.488681705857301e-06, "loss": 0.3932, "step": 1930 }, { "epoch": 0.6924456131962706, "grad_norm": 0.4170268774032593, "learning_rate": 9.487761974055068e-06, "loss": 0.3785, "step": 1931 }, { "epoch": 0.6928042075065742, "grad_norm": 0.41200122237205505, "learning_rate": 9.486841460470735e-06, "loss": 0.363, "step": 1932 }, { "epoch": 0.6931628018168778, "grad_norm": 0.3943406343460083, "learning_rate": 9.48592016526466e-06, "loss": 0.3675, "step": 1933 }, { "epoch": 0.6935213961271814, "grad_norm": 0.4250544309616089, "learning_rate": 9.484998088597337e-06, "loss": 0.3669, "step": 1934 }, { "epoch": 0.693879990437485, "grad_norm": 0.3902167081832886, "learning_rate": 9.484075230629393e-06, "loss": 0.3451, "step": 1935 }, { "epoch": 0.6942385847477887, "grad_norm": 0.41075414419174194, "learning_rate": 9.483151591521593e-06, "loss": 0.3764, "step": 1936 }, { "epoch": 0.6945971790580923, "grad_norm": 0.42418068647384644, "learning_rate": 9.48222717143484e-06, "loss": 0.3534, "step": 1937 }, { "epoch": 0.6949557733683959, "grad_norm": 0.4101322889328003, "learning_rate": 9.481301970530166e-06, "loss": 0.3598, "step": 1938 }, { "epoch": 0.6953143676786995, "grad_norm": 0.3806018829345703, "learning_rate": 9.480375988968747e-06, "loss": 0.3521, "step": 1939 }, { "epoch": 0.6956729619890031, "grad_norm": 0.3833078444004059, "learning_rate": 9.479449226911892e-06, "loss": 0.3449, "step": 1940 }, { "epoch": 0.6960315562993067, "grad_norm": 0.40837106108665466, "learning_rate": 9.478521684521045e-06, "loss": 0.3655, "step": 1941 }, { "epoch": 0.6963901506096103, "grad_norm": 0.36993086338043213, "learning_rate": 9.477593361957786e-06, "loss": 0.3469, "step": 1942 }, { "epoch": 0.6967487449199139, "grad_norm": 0.4187993109226227, "learning_rate": 9.476664259383835e-06, "loss": 0.3792, "step": 1943 }, { "epoch": 0.6971073392302175, "grad_norm": 0.37754446268081665, "learning_rate": 9.475734376961042e-06, "loss": 0.348, "step": 1944 }, { "epoch": 0.6974659335405211, "grad_norm": 0.405375599861145, "learning_rate": 9.474803714851395e-06, "loss": 0.3498, "step": 1945 }, { "epoch": 0.6978245278508247, "grad_norm": 0.383340984582901, "learning_rate": 9.473872273217019e-06, "loss": 0.3512, "step": 1946 }, { "epoch": 0.6981831221611283, "grad_norm": 0.43159833550453186, "learning_rate": 9.472940052220174e-06, "loss": 0.3499, "step": 1947 }, { "epoch": 0.698541716471432, "grad_norm": 0.4059131145477295, "learning_rate": 9.472007052023255e-06, "loss": 0.3377, "step": 1948 }, { "epoch": 0.6989003107817356, "grad_norm": 0.3528841435909271, "learning_rate": 9.471073272788796e-06, "loss": 0.3531, "step": 1949 }, { "epoch": 0.6992589050920393, "grad_norm": 0.39193400740623474, "learning_rate": 9.470138714679462e-06, "loss": 0.3762, "step": 1950 }, { "epoch": 0.6996174994023429, "grad_norm": 0.4337261915206909, "learning_rate": 9.469203377858056e-06, "loss": 0.3895, "step": 1951 }, { "epoch": 0.6999760937126465, "grad_norm": 0.3616868853569031, "learning_rate": 9.468267262487516e-06, "loss": 0.3412, "step": 1952 }, { "epoch": 0.7003346880229501, "grad_norm": 0.4037695825099945, "learning_rate": 9.46733036873092e-06, "loss": 0.4035, "step": 1953 }, { "epoch": 0.7006932823332537, "grad_norm": 0.42859041690826416, "learning_rate": 9.466392696751474e-06, "loss": 0.3628, "step": 1954 }, { "epoch": 0.7010518766435573, "grad_norm": 0.4208241403102875, "learning_rate": 9.465454246712524e-06, "loss": 0.3779, "step": 1955 }, { "epoch": 0.7014104709538609, "grad_norm": 0.366580992937088, "learning_rate": 9.464515018777551e-06, "loss": 0.3667, "step": 1956 }, { "epoch": 0.7017690652641645, "grad_norm": 0.39932820200920105, "learning_rate": 9.463575013110173e-06, "loss": 0.3646, "step": 1957 }, { "epoch": 0.7021276595744681, "grad_norm": 0.43439218401908875, "learning_rate": 9.46263422987414e-06, "loss": 0.3661, "step": 1958 }, { "epoch": 0.7024862538847717, "grad_norm": 0.35394051671028137, "learning_rate": 9.46169266923334e-06, "loss": 0.3424, "step": 1959 }, { "epoch": 0.7028448481950753, "grad_norm": 0.3899310529232025, "learning_rate": 9.460750331351795e-06, "loss": 0.3491, "step": 1960 }, { "epoch": 0.7032034425053789, "grad_norm": 0.43662160634994507, "learning_rate": 9.459807216393667e-06, "loss": 0.389, "step": 1961 }, { "epoch": 0.7035620368156825, "grad_norm": 0.3895277678966522, "learning_rate": 9.458863324523244e-06, "loss": 0.3777, "step": 1962 }, { "epoch": 0.7039206311259861, "grad_norm": 0.4320993423461914, "learning_rate": 9.457918655904959e-06, "loss": 0.3817, "step": 1963 }, { "epoch": 0.7042792254362897, "grad_norm": 0.36715683341026306, "learning_rate": 9.456973210703375e-06, "loss": 0.3548, "step": 1964 }, { "epoch": 0.7046378197465933, "grad_norm": 0.3908352255821228, "learning_rate": 9.45602698908319e-06, "loss": 0.3588, "step": 1965 }, { "epoch": 0.704996414056897, "grad_norm": 0.3805690109729767, "learning_rate": 9.45507999120924e-06, "loss": 0.349, "step": 1966 }, { "epoch": 0.7053550083672006, "grad_norm": 0.3768234848976135, "learning_rate": 9.454132217246496e-06, "loss": 0.3864, "step": 1967 }, { "epoch": 0.7057136026775042, "grad_norm": 0.38390815258026123, "learning_rate": 9.453183667360062e-06, "loss": 0.373, "step": 1968 }, { "epoch": 0.7060721969878078, "grad_norm": 0.36246126890182495, "learning_rate": 9.452234341715179e-06, "loss": 0.3511, "step": 1969 }, { "epoch": 0.7064307912981114, "grad_norm": 0.38458922505378723, "learning_rate": 9.45128424047722e-06, "loss": 0.3574, "step": 1970 }, { "epoch": 0.706789385608415, "grad_norm": 0.3808544874191284, "learning_rate": 9.4503333638117e-06, "loss": 0.3291, "step": 1971 }, { "epoch": 0.7071479799187186, "grad_norm": 0.3708624541759491, "learning_rate": 9.44938171188426e-06, "loss": 0.3639, "step": 1972 }, { "epoch": 0.7075065742290222, "grad_norm": 0.4029712378978729, "learning_rate": 9.448429284860684e-06, "loss": 0.3752, "step": 1973 }, { "epoch": 0.7078651685393258, "grad_norm": 0.4237571656703949, "learning_rate": 9.447476082906885e-06, "loss": 0.3659, "step": 1974 }, { "epoch": 0.7082237628496294, "grad_norm": 0.40192681550979614, "learning_rate": 9.446522106188918e-06, "loss": 0.3385, "step": 1975 }, { "epoch": 0.708582357159933, "grad_norm": 0.42712923884391785, "learning_rate": 9.445567354872967e-06, "loss": 0.3847, "step": 1976 }, { "epoch": 0.7089409514702367, "grad_norm": 0.41631412506103516, "learning_rate": 9.44461182912535e-06, "loss": 0.3443, "step": 1977 }, { "epoch": 0.7092995457805403, "grad_norm": 0.3986974358558655, "learning_rate": 9.443655529112523e-06, "loss": 0.3668, "step": 1978 }, { "epoch": 0.7096581400908439, "grad_norm": 0.39414528012275696, "learning_rate": 9.442698455001081e-06, "loss": 0.3575, "step": 1979 }, { "epoch": 0.7100167344011475, "grad_norm": 0.417914479970932, "learning_rate": 9.441740606957743e-06, "loss": 0.3528, "step": 1980 }, { "epoch": 0.7103753287114511, "grad_norm": 0.3982137441635132, "learning_rate": 9.440781985149375e-06, "loss": 0.3544, "step": 1981 }, { "epoch": 0.7107339230217548, "grad_norm": 0.35570192337036133, "learning_rate": 9.439822589742967e-06, "loss": 0.3576, "step": 1982 }, { "epoch": 0.7110925173320584, "grad_norm": 0.38109031319618225, "learning_rate": 9.438862420905652e-06, "loss": 0.3655, "step": 1983 }, { "epoch": 0.711451111642362, "grad_norm": 0.3836709260940552, "learning_rate": 9.437901478804694e-06, "loss": 0.3604, "step": 1984 }, { "epoch": 0.7118097059526656, "grad_norm": 0.3449647128582001, "learning_rate": 9.43693976360749e-06, "loss": 0.3579, "step": 1985 }, { "epoch": 0.7121683002629692, "grad_norm": 0.3763873875141144, "learning_rate": 9.435977275481574e-06, "loss": 0.383, "step": 1986 }, { "epoch": 0.7125268945732728, "grad_norm": 0.3915380537509918, "learning_rate": 9.435014014594617e-06, "loss": 0.385, "step": 1987 }, { "epoch": 0.7128854888835764, "grad_norm": 0.3676697313785553, "learning_rate": 9.434049981114421e-06, "loss": 0.3532, "step": 1988 }, { "epoch": 0.71324408319388, "grad_norm": 0.37124544382095337, "learning_rate": 9.433085175208923e-06, "loss": 0.3625, "step": 1989 }, { "epoch": 0.7136026775041836, "grad_norm": 0.38197505474090576, "learning_rate": 9.432119597046193e-06, "loss": 0.3743, "step": 1990 }, { "epoch": 0.7139612718144872, "grad_norm": 0.38396817445755005, "learning_rate": 9.431153246794442e-06, "loss": 0.3545, "step": 1991 }, { "epoch": 0.7143198661247908, "grad_norm": 0.36828818917274475, "learning_rate": 9.43018612462201e-06, "loss": 0.4144, "step": 1992 }, { "epoch": 0.7146784604350944, "grad_norm": 0.36922335624694824, "learning_rate": 9.42921823069737e-06, "loss": 0.3696, "step": 1993 }, { "epoch": 0.715037054745398, "grad_norm": 0.3920401930809021, "learning_rate": 9.428249565189136e-06, "loss": 0.3502, "step": 1994 }, { "epoch": 0.7153956490557016, "grad_norm": 0.3900884985923767, "learning_rate": 9.427280128266049e-06, "loss": 0.3445, "step": 1995 }, { "epoch": 0.7157542433660052, "grad_norm": 0.33221226930618286, "learning_rate": 9.426309920096992e-06, "loss": 0.337, "step": 1996 }, { "epoch": 0.7161128376763088, "grad_norm": 0.3704846501350403, "learning_rate": 9.425338940850974e-06, "loss": 0.3769, "step": 1997 }, { "epoch": 0.7164714319866125, "grad_norm": 0.3810715973377228, "learning_rate": 9.424367190697146e-06, "loss": 0.3303, "step": 1998 }, { "epoch": 0.7168300262969161, "grad_norm": 0.3524613082408905, "learning_rate": 9.423394669804787e-06, "loss": 0.3412, "step": 1999 }, { "epoch": 0.7171886206072197, "grad_norm": 0.3743881285190582, "learning_rate": 9.422421378343314e-06, "loss": 0.3534, "step": 2000 }, { "epoch": 0.7175472149175233, "grad_norm": 0.3661758005619049, "learning_rate": 9.42144731648228e-06, "loss": 0.3629, "step": 2001 }, { "epoch": 0.7179058092278269, "grad_norm": 0.3964557647705078, "learning_rate": 9.420472484391365e-06, "loss": 0.3716, "step": 2002 }, { "epoch": 0.7182644035381305, "grad_norm": 0.4041043817996979, "learning_rate": 9.419496882240393e-06, "loss": 0.3566, "step": 2003 }, { "epoch": 0.7186229978484341, "grad_norm": 0.3766833245754242, "learning_rate": 9.418520510199313e-06, "loss": 0.3569, "step": 2004 }, { "epoch": 0.7189815921587378, "grad_norm": 0.38120585680007935, "learning_rate": 9.417543368438214e-06, "loss": 0.3625, "step": 2005 }, { "epoch": 0.7193401864690414, "grad_norm": 0.39976388216018677, "learning_rate": 9.416565457127315e-06, "loss": 0.3616, "step": 2006 }, { "epoch": 0.719698780779345, "grad_norm": 0.38916778564453125, "learning_rate": 9.415586776436973e-06, "loss": 0.3797, "step": 2007 }, { "epoch": 0.7200573750896486, "grad_norm": 0.3876936733722687, "learning_rate": 9.414607326537678e-06, "loss": 0.3718, "step": 2008 }, { "epoch": 0.7204159693999522, "grad_norm": 0.3564271330833435, "learning_rate": 9.413627107600052e-06, "loss": 0.3457, "step": 2009 }, { "epoch": 0.7207745637102558, "grad_norm": 0.3817523121833801, "learning_rate": 9.412646119794851e-06, "loss": 0.3503, "step": 2010 }, { "epoch": 0.7211331580205594, "grad_norm": 0.37277600169181824, "learning_rate": 9.411664363292968e-06, "loss": 0.3572, "step": 2011 }, { "epoch": 0.721491752330863, "grad_norm": 0.38603824377059937, "learning_rate": 9.410681838265427e-06, "loss": 0.3417, "step": 2012 }, { "epoch": 0.7218503466411667, "grad_norm": 0.3509560227394104, "learning_rate": 9.40969854488339e-06, "loss": 0.3366, "step": 2013 }, { "epoch": 0.7222089409514703, "grad_norm": 0.3698989748954773, "learning_rate": 9.408714483318145e-06, "loss": 0.3569, "step": 2014 }, { "epoch": 0.7225675352617739, "grad_norm": 0.3726026117801666, "learning_rate": 9.40772965374112e-06, "loss": 0.3536, "step": 2015 }, { "epoch": 0.7229261295720775, "grad_norm": 0.3902990221977234, "learning_rate": 9.406744056323877e-06, "loss": 0.3251, "step": 2016 }, { "epoch": 0.7232847238823811, "grad_norm": 0.34958112239837646, "learning_rate": 9.40575769123811e-06, "loss": 0.326, "step": 2017 }, { "epoch": 0.7236433181926847, "grad_norm": 0.3602043092250824, "learning_rate": 9.404770558655645e-06, "loss": 0.3557, "step": 2018 }, { "epoch": 0.7240019125029883, "grad_norm": 0.37276193499565125, "learning_rate": 9.403782658748443e-06, "loss": 0.377, "step": 2019 }, { "epoch": 0.7243605068132919, "grad_norm": 0.37553995847702026, "learning_rate": 9.402793991688604e-06, "loss": 0.3438, "step": 2020 }, { "epoch": 0.7247191011235955, "grad_norm": 0.3816829025745392, "learning_rate": 9.401804557648354e-06, "loss": 0.359, "step": 2021 }, { "epoch": 0.7250776954338991, "grad_norm": 0.3663354814052582, "learning_rate": 9.400814356800052e-06, "loss": 0.3272, "step": 2022 }, { "epoch": 0.7254362897442027, "grad_norm": 0.35934266448020935, "learning_rate": 9.3998233893162e-06, "loss": 0.3463, "step": 2023 }, { "epoch": 0.7257948840545063, "grad_norm": 0.37038493156433105, "learning_rate": 9.398831655369422e-06, "loss": 0.34, "step": 2024 }, { "epoch": 0.7261534783648099, "grad_norm": 0.3916515111923218, "learning_rate": 9.397839155132487e-06, "loss": 0.3488, "step": 2025 }, { "epoch": 0.7265120726751135, "grad_norm": 0.3606298565864563, "learning_rate": 9.39684588877829e-06, "loss": 0.3483, "step": 2026 }, { "epoch": 0.7268706669854171, "grad_norm": 0.3896675109863281, "learning_rate": 9.395851856479857e-06, "loss": 0.3594, "step": 2027 }, { "epoch": 0.7272292612957207, "grad_norm": 0.4024273455142975, "learning_rate": 9.394857058410353e-06, "loss": 0.3553, "step": 2028 }, { "epoch": 0.7275878556060243, "grad_norm": 0.41454094648361206, "learning_rate": 9.39386149474308e-06, "loss": 0.3596, "step": 2029 }, { "epoch": 0.727946449916328, "grad_norm": 0.3438170254230499, "learning_rate": 9.392865165651462e-06, "loss": 0.3547, "step": 2030 }, { "epoch": 0.7283050442266316, "grad_norm": 0.39066681265830994, "learning_rate": 9.391868071309068e-06, "loss": 0.3325, "step": 2031 }, { "epoch": 0.7286636385369352, "grad_norm": 0.41253945231437683, "learning_rate": 9.39087021188959e-06, "loss": 0.3421, "step": 2032 }, { "epoch": 0.7290222328472388, "grad_norm": 0.36752575635910034, "learning_rate": 9.389871587566859e-06, "loss": 0.3586, "step": 2033 }, { "epoch": 0.7293808271575425, "grad_norm": 0.33993157744407654, "learning_rate": 9.38887219851484e-06, "loss": 0.3392, "step": 2034 }, { "epoch": 0.7297394214678461, "grad_norm": 0.39209017157554626, "learning_rate": 9.38787204490763e-06, "loss": 0.3705, "step": 2035 }, { "epoch": 0.7300980157781497, "grad_norm": 0.4077310860157013, "learning_rate": 9.386871126919461e-06, "loss": 0.3729, "step": 2036 }, { "epoch": 0.7304566100884533, "grad_norm": 0.3823706805706024, "learning_rate": 9.38586944472469e-06, "loss": 0.3747, "step": 2037 }, { "epoch": 0.7308152043987569, "grad_norm": 0.4201653003692627, "learning_rate": 9.38486699849782e-06, "loss": 0.3694, "step": 2038 }, { "epoch": 0.7311737987090605, "grad_norm": 0.3727804720401764, "learning_rate": 9.383863788413474e-06, "loss": 0.3574, "step": 2039 }, { "epoch": 0.7315323930193641, "grad_norm": 0.35369542241096497, "learning_rate": 9.382859814646416e-06, "loss": 0.3483, "step": 2040 }, { "epoch": 0.7318909873296677, "grad_norm": 0.3983795642852783, "learning_rate": 9.381855077371545e-06, "loss": 0.3763, "step": 2041 }, { "epoch": 0.7322495816399713, "grad_norm": 0.3722097873687744, "learning_rate": 9.380849576763886e-06, "loss": 0.3341, "step": 2042 }, { "epoch": 0.7326081759502749, "grad_norm": 0.39467400312423706, "learning_rate": 9.379843312998602e-06, "loss": 0.3646, "step": 2043 }, { "epoch": 0.7329667702605785, "grad_norm": 0.41221240162849426, "learning_rate": 9.378836286250986e-06, "loss": 0.3597, "step": 2044 }, { "epoch": 0.7333253645708822, "grad_norm": 0.38870227336883545, "learning_rate": 9.377828496696465e-06, "loss": 0.3508, "step": 2045 }, { "epoch": 0.7336839588811858, "grad_norm": 0.3793169856071472, "learning_rate": 9.376819944510598e-06, "loss": 0.3664, "step": 2046 }, { "epoch": 0.7340425531914894, "grad_norm": 0.3732714056968689, "learning_rate": 9.375810629869082e-06, "loss": 0.3491, "step": 2047 }, { "epoch": 0.734401147501793, "grad_norm": 0.36908695101737976, "learning_rate": 9.37480055294774e-06, "loss": 0.35, "step": 2048 }, { "epoch": 0.7347597418120966, "grad_norm": 0.4315034747123718, "learning_rate": 9.37378971392253e-06, "loss": 0.355, "step": 2049 }, { "epoch": 0.7351183361224002, "grad_norm": 0.3885057866573334, "learning_rate": 9.372778112969546e-06, "loss": 0.3789, "step": 2050 }, { "epoch": 0.7354769304327038, "grad_norm": 0.3933302164077759, "learning_rate": 9.371765750265008e-06, "loss": 0.3509, "step": 2051 }, { "epoch": 0.7358355247430074, "grad_norm": 0.38688135147094727, "learning_rate": 9.370752625985275e-06, "loss": 0.3723, "step": 2052 }, { "epoch": 0.736194119053311, "grad_norm": 0.39628151059150696, "learning_rate": 9.369738740306837e-06, "loss": 0.3791, "step": 2053 }, { "epoch": 0.7365527133636146, "grad_norm": 0.36329033970832825, "learning_rate": 9.368724093406316e-06, "loss": 0.3442, "step": 2054 }, { "epoch": 0.7369113076739182, "grad_norm": 0.3901868164539337, "learning_rate": 9.367708685460466e-06, "loss": 0.3646, "step": 2055 }, { "epoch": 0.7372699019842218, "grad_norm": 0.4209134876728058, "learning_rate": 9.366692516646173e-06, "loss": 0.3581, "step": 2056 }, { "epoch": 0.7376284962945254, "grad_norm": 0.4151628911495209, "learning_rate": 9.365675587140461e-06, "loss": 0.3487, "step": 2057 }, { "epoch": 0.737987090604829, "grad_norm": 0.3775520622730255, "learning_rate": 9.364657897120476e-06, "loss": 0.363, "step": 2058 }, { "epoch": 0.7383456849151326, "grad_norm": 0.4037609398365021, "learning_rate": 9.363639446763508e-06, "loss": 0.3457, "step": 2059 }, { "epoch": 0.7387042792254362, "grad_norm": 0.38822799921035767, "learning_rate": 9.362620236246973e-06, "loss": 0.3415, "step": 2060 }, { "epoch": 0.7390628735357399, "grad_norm": 0.37429332733154297, "learning_rate": 9.361600265748419e-06, "loss": 0.357, "step": 2061 }, { "epoch": 0.7394214678460435, "grad_norm": 0.4299640655517578, "learning_rate": 9.360579535445529e-06, "loss": 0.3661, "step": 2062 }, { "epoch": 0.7397800621563472, "grad_norm": 0.3582015335559845, "learning_rate": 9.359558045516116e-06, "loss": 0.3529, "step": 2063 }, { "epoch": 0.7401386564666508, "grad_norm": 0.35839077830314636, "learning_rate": 9.35853579613813e-06, "loss": 0.3637, "step": 2064 }, { "epoch": 0.7404972507769544, "grad_norm": 0.4509050250053406, "learning_rate": 9.357512787489646e-06, "loss": 0.3606, "step": 2065 }, { "epoch": 0.740855845087258, "grad_norm": 0.39225664734840393, "learning_rate": 9.35648901974888e-06, "loss": 0.3822, "step": 2066 }, { "epoch": 0.7412144393975616, "grad_norm": 0.38618141412734985, "learning_rate": 9.355464493094173e-06, "loss": 0.3532, "step": 2067 }, { "epoch": 0.7415730337078652, "grad_norm": 0.41065865755081177, "learning_rate": 9.354439207703998e-06, "loss": 0.3549, "step": 2068 }, { "epoch": 0.7419316280181688, "grad_norm": 0.4004566967487335, "learning_rate": 9.353413163756968e-06, "loss": 0.3455, "step": 2069 }, { "epoch": 0.7422902223284724, "grad_norm": 0.39232027530670166, "learning_rate": 9.352386361431819e-06, "loss": 0.3713, "step": 2070 }, { "epoch": 0.742648816638776, "grad_norm": 0.3862834572792053, "learning_rate": 9.351358800907426e-06, "loss": 0.3718, "step": 2071 }, { "epoch": 0.7430074109490796, "grad_norm": 0.3610888123512268, "learning_rate": 9.350330482362791e-06, "loss": 0.3495, "step": 2072 }, { "epoch": 0.7433660052593832, "grad_norm": 0.39378729462623596, "learning_rate": 9.349301405977052e-06, "loss": 0.3568, "step": 2073 }, { "epoch": 0.7437245995696868, "grad_norm": 0.3977683186531067, "learning_rate": 9.348271571929477e-06, "loss": 0.3481, "step": 2074 }, { "epoch": 0.7440831938799904, "grad_norm": 0.40455159544944763, "learning_rate": 9.347240980399466e-06, "loss": 0.3686, "step": 2075 }, { "epoch": 0.744441788190294, "grad_norm": 0.3836708068847656, "learning_rate": 9.34620963156655e-06, "loss": 0.3529, "step": 2076 }, { "epoch": 0.7448003825005977, "grad_norm": 0.3868941068649292, "learning_rate": 9.345177525610394e-06, "loss": 0.3598, "step": 2077 }, { "epoch": 0.7451589768109013, "grad_norm": 0.40979376435279846, "learning_rate": 9.344144662710797e-06, "loss": 0.3519, "step": 2078 }, { "epoch": 0.7455175711212049, "grad_norm": 0.4085884690284729, "learning_rate": 9.343111043047682e-06, "loss": 0.3557, "step": 2079 }, { "epoch": 0.7458761654315085, "grad_norm": 0.40221160650253296, "learning_rate": 9.342076666801112e-06, "loss": 0.3728, "step": 2080 }, { "epoch": 0.7462347597418121, "grad_norm": 0.4617774784564972, "learning_rate": 9.341041534151277e-06, "loss": 0.3902, "step": 2081 }, { "epoch": 0.7465933540521157, "grad_norm": 0.3836444020271301, "learning_rate": 9.340005645278502e-06, "loss": 0.36, "step": 2082 }, { "epoch": 0.7469519483624193, "grad_norm": 0.3441208600997925, "learning_rate": 9.33896900036324e-06, "loss": 0.3538, "step": 2083 }, { "epoch": 0.7473105426727229, "grad_norm": 0.3911764919757843, "learning_rate": 9.337931599586079e-06, "loss": 0.3557, "step": 2084 }, { "epoch": 0.7476691369830265, "grad_norm": 0.37222355604171753, "learning_rate": 9.336893443127739e-06, "loss": 0.3448, "step": 2085 }, { "epoch": 0.7480277312933301, "grad_norm": 0.37651363015174866, "learning_rate": 9.335854531169066e-06, "loss": 0.3537, "step": 2086 }, { "epoch": 0.7483863256036337, "grad_norm": 0.42707857489585876, "learning_rate": 9.334814863891047e-06, "loss": 0.3631, "step": 2087 }, { "epoch": 0.7487449199139373, "grad_norm": 0.3578762114048004, "learning_rate": 9.33377444147479e-06, "loss": 0.3534, "step": 2088 }, { "epoch": 0.7491035142242409, "grad_norm": 0.37557253241539, "learning_rate": 9.33273326410154e-06, "loss": 0.3672, "step": 2089 }, { "epoch": 0.7494621085345445, "grad_norm": 0.41381552815437317, "learning_rate": 9.331691331952679e-06, "loss": 0.3577, "step": 2090 }, { "epoch": 0.7498207028448481, "grad_norm": 0.34574413299560547, "learning_rate": 9.33064864520971e-06, "loss": 0.3571, "step": 2091 }, { "epoch": 0.7501792971551519, "grad_norm": 0.3674044609069824, "learning_rate": 9.329605204054276e-06, "loss": 0.3343, "step": 2092 }, { "epoch": 0.7505378914654555, "grad_norm": 0.3982119858264923, "learning_rate": 9.328561008668143e-06, "loss": 0.3799, "step": 2093 }, { "epoch": 0.7508964857757591, "grad_norm": 0.3712795674800873, "learning_rate": 9.327516059233214e-06, "loss": 0.3754, "step": 2094 }, { "epoch": 0.7512550800860627, "grad_norm": 0.41393813490867615, "learning_rate": 9.326470355931528e-06, "loss": 0.3638, "step": 2095 }, { "epoch": 0.7516136743963663, "grad_norm": 0.5335837602615356, "learning_rate": 9.325423898945242e-06, "loss": 0.3832, "step": 2096 }, { "epoch": 0.7519722687066699, "grad_norm": 0.3771614730358124, "learning_rate": 9.324376688456656e-06, "loss": 0.3697, "step": 2097 }, { "epoch": 0.7523308630169735, "grad_norm": 0.38377219438552856, "learning_rate": 9.323328724648197e-06, "loss": 0.3509, "step": 2098 }, { "epoch": 0.7526894573272771, "grad_norm": 0.4060542583465576, "learning_rate": 9.322280007702425e-06, "loss": 0.3646, "step": 2099 }, { "epoch": 0.7530480516375807, "grad_norm": 0.43353280425071716, "learning_rate": 9.321230537802028e-06, "loss": 0.3569, "step": 2100 }, { "epoch": 0.7534066459478843, "grad_norm": 0.4157603085041046, "learning_rate": 9.320180315129827e-06, "loss": 0.3486, "step": 2101 }, { "epoch": 0.7537652402581879, "grad_norm": 0.37503746151924133, "learning_rate": 9.319129339868773e-06, "loss": 0.3528, "step": 2102 }, { "epoch": 0.7541238345684915, "grad_norm": 0.3738897740840912, "learning_rate": 9.318077612201952e-06, "loss": 0.3588, "step": 2103 }, { "epoch": 0.7544824288787951, "grad_norm": 0.3742627501487732, "learning_rate": 9.317025132312576e-06, "loss": 0.3404, "step": 2104 }, { "epoch": 0.7548410231890987, "grad_norm": 0.38475102186203003, "learning_rate": 9.315971900383991e-06, "loss": 0.3611, "step": 2105 }, { "epoch": 0.7551996174994023, "grad_norm": 0.37441590428352356, "learning_rate": 9.314917916599673e-06, "loss": 0.3477, "step": 2106 }, { "epoch": 0.755558211809706, "grad_norm": 0.389096200466156, "learning_rate": 9.31386318114323e-06, "loss": 0.3847, "step": 2107 }, { "epoch": 0.7559168061200096, "grad_norm": 0.4141106605529785, "learning_rate": 9.312807694198397e-06, "loss": 0.3773, "step": 2108 }, { "epoch": 0.7562754004303132, "grad_norm": 0.3922930657863617, "learning_rate": 9.311751455949049e-06, "loss": 0.3459, "step": 2109 }, { "epoch": 0.7566339947406168, "grad_norm": 0.40208539366722107, "learning_rate": 9.310694466579182e-06, "loss": 0.3641, "step": 2110 }, { "epoch": 0.7569925890509204, "grad_norm": 0.3715185523033142, "learning_rate": 9.309636726272929e-06, "loss": 0.3469, "step": 2111 }, { "epoch": 0.757351183361224, "grad_norm": 0.47743403911590576, "learning_rate": 9.308578235214547e-06, "loss": 0.3463, "step": 2112 }, { "epoch": 0.7577097776715276, "grad_norm": 0.41536179184913635, "learning_rate": 9.307518993588434e-06, "loss": 0.3333, "step": 2113 }, { "epoch": 0.7580683719818312, "grad_norm": 0.3584250807762146, "learning_rate": 9.30645900157911e-06, "loss": 0.3521, "step": 2114 }, { "epoch": 0.7584269662921348, "grad_norm": 0.4049919843673706, "learning_rate": 9.30539825937123e-06, "loss": 0.3489, "step": 2115 }, { "epoch": 0.7587855606024384, "grad_norm": 0.46598780155181885, "learning_rate": 9.304336767149578e-06, "loss": 0.3867, "step": 2116 }, { "epoch": 0.759144154912742, "grad_norm": 0.3886434733867645, "learning_rate": 9.30327452509907e-06, "loss": 0.3331, "step": 2117 }, { "epoch": 0.7595027492230456, "grad_norm": 0.3766120672225952, "learning_rate": 9.30221153340475e-06, "loss": 0.3525, "step": 2118 }, { "epoch": 0.7598613435333492, "grad_norm": 0.4715152680873871, "learning_rate": 9.3011477922518e-06, "loss": 0.3539, "step": 2119 }, { "epoch": 0.7602199378436528, "grad_norm": 0.431681364774704, "learning_rate": 9.300083301825518e-06, "loss": 0.3582, "step": 2120 }, { "epoch": 0.7605785321539565, "grad_norm": 0.42568767070770264, "learning_rate": 9.29901806231135e-06, "loss": 0.3671, "step": 2121 }, { "epoch": 0.7609371264642601, "grad_norm": 0.3766801655292511, "learning_rate": 9.297952073894858e-06, "loss": 0.3347, "step": 2122 }, { "epoch": 0.7612957207745638, "grad_norm": 0.4638417363166809, "learning_rate": 9.296885336761744e-06, "loss": 0.3615, "step": 2123 }, { "epoch": 0.7616543150848674, "grad_norm": 0.3943180441856384, "learning_rate": 9.295817851097836e-06, "loss": 0.3563, "step": 2124 }, { "epoch": 0.762012909395171, "grad_norm": 0.4267231822013855, "learning_rate": 9.294749617089094e-06, "loss": 0.3646, "step": 2125 }, { "epoch": 0.7623715037054746, "grad_norm": 0.352546751499176, "learning_rate": 9.293680634921607e-06, "loss": 0.3544, "step": 2126 }, { "epoch": 0.7627300980157782, "grad_norm": 0.4030573070049286, "learning_rate": 9.292610904781594e-06, "loss": 0.3244, "step": 2127 }, { "epoch": 0.7630886923260818, "grad_norm": 0.3713708221912384, "learning_rate": 9.291540426855408e-06, "loss": 0.375, "step": 2128 }, { "epoch": 0.7634472866363854, "grad_norm": 0.40489518642425537, "learning_rate": 9.290469201329527e-06, "loss": 0.3508, "step": 2129 }, { "epoch": 0.763805880946689, "grad_norm": 0.4003666043281555, "learning_rate": 9.289397228390562e-06, "loss": 0.3434, "step": 2130 }, { "epoch": 0.7641644752569926, "grad_norm": 0.38738366961479187, "learning_rate": 9.288324508225255e-06, "loss": 0.3386, "step": 2131 }, { "epoch": 0.7645230695672962, "grad_norm": 0.435145765542984, "learning_rate": 9.287251041020477e-06, "loss": 0.354, "step": 2132 }, { "epoch": 0.7648816638775998, "grad_norm": 0.42728814482688904, "learning_rate": 9.286176826963229e-06, "loss": 0.3645, "step": 2133 }, { "epoch": 0.7652402581879034, "grad_norm": 0.3870708644390106, "learning_rate": 9.285101866240644e-06, "loss": 0.36, "step": 2134 }, { "epoch": 0.765598852498207, "grad_norm": 0.37688371539115906, "learning_rate": 9.284026159039981e-06, "loss": 0.356, "step": 2135 }, { "epoch": 0.7659574468085106, "grad_norm": 0.3830340504646301, "learning_rate": 9.282949705548632e-06, "loss": 0.3726, "step": 2136 }, { "epoch": 0.7663160411188142, "grad_norm": 0.39926815032958984, "learning_rate": 9.281872505954121e-06, "loss": 0.3466, "step": 2137 }, { "epoch": 0.7666746354291178, "grad_norm": 0.34877490997314453, "learning_rate": 9.280794560444098e-06, "loss": 0.369, "step": 2138 }, { "epoch": 0.7670332297394215, "grad_norm": 0.41009020805358887, "learning_rate": 9.279715869206343e-06, "loss": 0.3614, "step": 2139 }, { "epoch": 0.7673918240497251, "grad_norm": 0.4354444444179535, "learning_rate": 9.278636432428768e-06, "loss": 0.3679, "step": 2140 }, { "epoch": 0.7677504183600287, "grad_norm": 0.3619541525840759, "learning_rate": 9.277556250299415e-06, "loss": 0.3494, "step": 2141 }, { "epoch": 0.7681090126703323, "grad_norm": 0.43971383571624756, "learning_rate": 9.276475323006458e-06, "loss": 0.3546, "step": 2142 }, { "epoch": 0.7684676069806359, "grad_norm": 0.38864752650260925, "learning_rate": 9.275393650738192e-06, "loss": 0.364, "step": 2143 }, { "epoch": 0.7688262012909395, "grad_norm": 0.4216707944869995, "learning_rate": 9.274311233683051e-06, "loss": 0.3714, "step": 2144 }, { "epoch": 0.7691847956012431, "grad_norm": 0.3702012896537781, "learning_rate": 9.273228072029594e-06, "loss": 0.3662, "step": 2145 }, { "epoch": 0.7695433899115467, "grad_norm": 0.36599987745285034, "learning_rate": 9.272144165966513e-06, "loss": 0.3436, "step": 2146 }, { "epoch": 0.7699019842218503, "grad_norm": 0.42614033818244934, "learning_rate": 9.271059515682628e-06, "loss": 0.3562, "step": 2147 }, { "epoch": 0.7702605785321539, "grad_norm": 0.41111502051353455, "learning_rate": 9.269974121366887e-06, "loss": 0.3643, "step": 2148 }, { "epoch": 0.7706191728424575, "grad_norm": 0.36716288328170776, "learning_rate": 9.268887983208367e-06, "loss": 0.3706, "step": 2149 }, { "epoch": 0.7709777671527612, "grad_norm": 0.3776208758354187, "learning_rate": 9.267801101396284e-06, "loss": 0.3693, "step": 2150 }, { "epoch": 0.7713363614630648, "grad_norm": 0.40905457735061646, "learning_rate": 9.266713476119967e-06, "loss": 0.3301, "step": 2151 }, { "epoch": 0.7716949557733684, "grad_norm": 0.3310464918613434, "learning_rate": 9.265625107568888e-06, "loss": 0.3438, "step": 2152 }, { "epoch": 0.772053550083672, "grad_norm": 0.3878943622112274, "learning_rate": 9.264535995932647e-06, "loss": 0.3566, "step": 2153 }, { "epoch": 0.7724121443939757, "grad_norm": 0.38784465193748474, "learning_rate": 9.263446141400964e-06, "loss": 0.3728, "step": 2154 }, { "epoch": 0.7727707387042793, "grad_norm": 0.36248770356178284, "learning_rate": 9.262355544163701e-06, "loss": 0.35, "step": 2155 }, { "epoch": 0.7731293330145829, "grad_norm": 0.4039632976055145, "learning_rate": 9.261264204410843e-06, "loss": 0.3637, "step": 2156 }, { "epoch": 0.7734879273248865, "grad_norm": 0.42829498648643494, "learning_rate": 9.2601721223325e-06, "loss": 0.3488, "step": 2157 }, { "epoch": 0.7738465216351901, "grad_norm": 0.3666122555732727, "learning_rate": 9.259079298118921e-06, "loss": 0.3522, "step": 2158 }, { "epoch": 0.7742051159454937, "grad_norm": 0.39282819628715515, "learning_rate": 9.257985731960476e-06, "loss": 0.3633, "step": 2159 }, { "epoch": 0.7745637102557973, "grad_norm": 0.3794007897377014, "learning_rate": 9.25689142404767e-06, "loss": 0.3294, "step": 2160 }, { "epoch": 0.7749223045661009, "grad_norm": 0.39201727509498596, "learning_rate": 9.255796374571135e-06, "loss": 0.3612, "step": 2161 }, { "epoch": 0.7752808988764045, "grad_norm": 0.3748243451118469, "learning_rate": 9.25470058372163e-06, "loss": 0.3699, "step": 2162 }, { "epoch": 0.7756394931867081, "grad_norm": 0.3795396387577057, "learning_rate": 9.253604051690047e-06, "loss": 0.3453, "step": 2163 }, { "epoch": 0.7759980874970117, "grad_norm": 0.3469505310058594, "learning_rate": 9.252506778667403e-06, "loss": 0.3471, "step": 2164 }, { "epoch": 0.7763566818073153, "grad_norm": 0.36377769708633423, "learning_rate": 9.251408764844847e-06, "loss": 0.3599, "step": 2165 }, { "epoch": 0.7767152761176189, "grad_norm": 0.3809606432914734, "learning_rate": 9.25031001041366e-06, "loss": 0.382, "step": 2166 }, { "epoch": 0.7770738704279225, "grad_norm": 0.40054744482040405, "learning_rate": 9.249210515565243e-06, "loss": 0.3714, "step": 2167 }, { "epoch": 0.7774324647382261, "grad_norm": 0.3787843883037567, "learning_rate": 9.248110280491136e-06, "loss": 0.3748, "step": 2168 }, { "epoch": 0.7777910590485297, "grad_norm": 0.36506497859954834, "learning_rate": 9.247009305383002e-06, "loss": 0.3411, "step": 2169 }, { "epoch": 0.7781496533588333, "grad_norm": 0.37694743275642395, "learning_rate": 9.245907590432632e-06, "loss": 0.3378, "step": 2170 }, { "epoch": 0.778508247669137, "grad_norm": 0.3853205740451813, "learning_rate": 9.244805135831952e-06, "loss": 0.3331, "step": 2171 }, { "epoch": 0.7788668419794406, "grad_norm": 0.35212552547454834, "learning_rate": 9.24370194177301e-06, "loss": 0.3633, "step": 2172 }, { "epoch": 0.7792254362897442, "grad_norm": 0.4176729917526245, "learning_rate": 9.242598008447988e-06, "loss": 0.3401, "step": 2173 }, { "epoch": 0.7795840306000478, "grad_norm": 0.42004629969596863, "learning_rate": 9.24149333604919e-06, "loss": 0.3512, "step": 2174 }, { "epoch": 0.7799426249103514, "grad_norm": 0.3714663088321686, "learning_rate": 9.24038792476906e-06, "loss": 0.3792, "step": 2175 }, { "epoch": 0.780301219220655, "grad_norm": 0.43760767579078674, "learning_rate": 9.239281774800159e-06, "loss": 0.369, "step": 2176 }, { "epoch": 0.7806598135309586, "grad_norm": 0.4036957919597626, "learning_rate": 9.238174886335186e-06, "loss": 0.3596, "step": 2177 }, { "epoch": 0.7810184078412622, "grad_norm": 0.35299479961395264, "learning_rate": 9.237067259566961e-06, "loss": 0.3498, "step": 2178 }, { "epoch": 0.7813770021515659, "grad_norm": 0.37431034445762634, "learning_rate": 9.235958894688437e-06, "loss": 0.3577, "step": 2179 }, { "epoch": 0.7817355964618695, "grad_norm": 0.37591513991355896, "learning_rate": 9.234849791892694e-06, "loss": 0.3609, "step": 2180 }, { "epoch": 0.7820941907721731, "grad_norm": 0.39686834812164307, "learning_rate": 9.233739951372942e-06, "loss": 0.3538, "step": 2181 }, { "epoch": 0.7824527850824767, "grad_norm": 0.3881370723247528, "learning_rate": 9.232629373322519e-06, "loss": 0.3386, "step": 2182 }, { "epoch": 0.7828113793927803, "grad_norm": 0.35243865847587585, "learning_rate": 9.231518057934893e-06, "loss": 0.3491, "step": 2183 }, { "epoch": 0.7831699737030839, "grad_norm": 0.4429170489311218, "learning_rate": 9.230406005403652e-06, "loss": 0.3844, "step": 2184 }, { "epoch": 0.7835285680133875, "grad_norm": 0.40860122442245483, "learning_rate": 9.229293215922526e-06, "loss": 0.3309, "step": 2185 }, { "epoch": 0.7838871623236912, "grad_norm": 0.37233731150627136, "learning_rate": 9.228179689685361e-06, "loss": 0.3667, "step": 2186 }, { "epoch": 0.7842457566339948, "grad_norm": 0.42062973976135254, "learning_rate": 9.227065426886143e-06, "loss": 0.353, "step": 2187 }, { "epoch": 0.7846043509442984, "grad_norm": 0.4401680529117584, "learning_rate": 9.225950427718974e-06, "loss": 0.3474, "step": 2188 }, { "epoch": 0.784962945254602, "grad_norm": 0.3864251375198364, "learning_rate": 9.224834692378095e-06, "loss": 0.3627, "step": 2189 }, { "epoch": 0.7853215395649056, "grad_norm": 0.3772638738155365, "learning_rate": 9.223718221057866e-06, "loss": 0.3598, "step": 2190 }, { "epoch": 0.7856801338752092, "grad_norm": 0.386770099401474, "learning_rate": 9.222601013952784e-06, "loss": 0.346, "step": 2191 }, { "epoch": 0.7860387281855128, "grad_norm": 0.4413785934448242, "learning_rate": 9.221483071257466e-06, "loss": 0.3472, "step": 2192 }, { "epoch": 0.7863973224958164, "grad_norm": 0.3821863830089569, "learning_rate": 9.220364393166665e-06, "loss": 0.3572, "step": 2193 }, { "epoch": 0.78675591680612, "grad_norm": 0.41349831223487854, "learning_rate": 9.219244979875257e-06, "loss": 0.3416, "step": 2194 }, { "epoch": 0.7871145111164236, "grad_norm": 0.4208340048789978, "learning_rate": 9.218124831578246e-06, "loss": 0.374, "step": 2195 }, { "epoch": 0.7874731054267272, "grad_norm": 0.3779623806476593, "learning_rate": 9.217003948470765e-06, "loss": 0.3611, "step": 2196 }, { "epoch": 0.7878316997370308, "grad_norm": 0.4251090884208679, "learning_rate": 9.215882330748079e-06, "loss": 0.335, "step": 2197 }, { "epoch": 0.7881902940473344, "grad_norm": 0.3996030390262604, "learning_rate": 9.214759978605572e-06, "loss": 0.3567, "step": 2198 }, { "epoch": 0.788548888357638, "grad_norm": 0.3663405478000641, "learning_rate": 9.213636892238765e-06, "loss": 0.3803, "step": 2199 }, { "epoch": 0.7889074826679416, "grad_norm": 0.37049320340156555, "learning_rate": 9.212513071843301e-06, "loss": 0.3486, "step": 2200 }, { "epoch": 0.7892660769782452, "grad_norm": 0.3614680767059326, "learning_rate": 9.211388517614955e-06, "loss": 0.3556, "step": 2201 }, { "epoch": 0.7896246712885489, "grad_norm": 0.3690853714942932, "learning_rate": 9.210263229749626e-06, "loss": 0.346, "step": 2202 }, { "epoch": 0.7899832655988525, "grad_norm": 0.4084676504135132, "learning_rate": 9.209137208443344e-06, "loss": 0.365, "step": 2203 }, { "epoch": 0.7903418599091561, "grad_norm": 0.3476482331752777, "learning_rate": 9.208010453892264e-06, "loss": 0.3424, "step": 2204 }, { "epoch": 0.7907004542194597, "grad_norm": 0.4170786738395691, "learning_rate": 9.206882966292671e-06, "loss": 0.3639, "step": 2205 }, { "epoch": 0.7910590485297633, "grad_norm": 0.3653729557991028, "learning_rate": 9.205754745840977e-06, "loss": 0.3854, "step": 2206 }, { "epoch": 0.791417642840067, "grad_norm": 0.353196918964386, "learning_rate": 9.204625792733724e-06, "loss": 0.3398, "step": 2207 }, { "epoch": 0.7917762371503706, "grad_norm": 0.3695617914199829, "learning_rate": 9.203496107167576e-06, "loss": 0.3703, "step": 2208 }, { "epoch": 0.7921348314606742, "grad_norm": 0.37979593873023987, "learning_rate": 9.202365689339325e-06, "loss": 0.3717, "step": 2209 }, { "epoch": 0.7924934257709778, "grad_norm": 0.39562341570854187, "learning_rate": 9.2012345394459e-06, "loss": 0.3529, "step": 2210 }, { "epoch": 0.7928520200812814, "grad_norm": 0.38890814781188965, "learning_rate": 9.200102657684345e-06, "loss": 0.359, "step": 2211 }, { "epoch": 0.793210614391585, "grad_norm": 0.3523649275302887, "learning_rate": 9.198970044251841e-06, "loss": 0.3604, "step": 2212 }, { "epoch": 0.7935692087018886, "grad_norm": 0.3490552306175232, "learning_rate": 9.197836699345692e-06, "loss": 0.3612, "step": 2213 }, { "epoch": 0.7939278030121922, "grad_norm": 0.3534531593322754, "learning_rate": 9.196702623163329e-06, "loss": 0.3513, "step": 2214 }, { "epoch": 0.7942863973224958, "grad_norm": 0.3723076581954956, "learning_rate": 9.195567815902313e-06, "loss": 0.3465, "step": 2215 }, { "epoch": 0.7946449916327994, "grad_norm": 0.39197221398353577, "learning_rate": 9.19443227776033e-06, "loss": 0.3619, "step": 2216 }, { "epoch": 0.795003585943103, "grad_norm": 0.3830562233924866, "learning_rate": 9.193296008935196e-06, "loss": 0.352, "step": 2217 }, { "epoch": 0.7953621802534067, "grad_norm": 0.35924264788627625, "learning_rate": 9.192159009624848e-06, "loss": 0.3589, "step": 2218 }, { "epoch": 0.7957207745637103, "grad_norm": 0.3659489154815674, "learning_rate": 9.191021280027359e-06, "loss": 0.3418, "step": 2219 }, { "epoch": 0.7960793688740139, "grad_norm": 0.4030595123767853, "learning_rate": 9.189882820340925e-06, "loss": 0.3548, "step": 2220 }, { "epoch": 0.7964379631843175, "grad_norm": 0.40808334946632385, "learning_rate": 9.188743630763869e-06, "loss": 0.3314, "step": 2221 }, { "epoch": 0.7967965574946211, "grad_norm": 0.3569730818271637, "learning_rate": 9.18760371149464e-06, "loss": 0.3252, "step": 2222 }, { "epoch": 0.7971551518049247, "grad_norm": 0.38228797912597656, "learning_rate": 9.186463062731814e-06, "loss": 0.3604, "step": 2223 }, { "epoch": 0.7975137461152283, "grad_norm": 0.400971382856369, "learning_rate": 9.1853216846741e-06, "loss": 0.3696, "step": 2224 }, { "epoch": 0.7978723404255319, "grad_norm": 0.3869819641113281, "learning_rate": 9.184179577520326e-06, "loss": 0.3598, "step": 2225 }, { "epoch": 0.7982309347358355, "grad_norm": 0.40037569403648376, "learning_rate": 9.183036741469452e-06, "loss": 0.3872, "step": 2226 }, { "epoch": 0.7985895290461391, "grad_norm": 0.3860408067703247, "learning_rate": 9.181893176720565e-06, "loss": 0.3749, "step": 2227 }, { "epoch": 0.7989481233564427, "grad_norm": 0.3926198482513428, "learning_rate": 9.180748883472874e-06, "loss": 0.3587, "step": 2228 }, { "epoch": 0.7993067176667463, "grad_norm": 0.3599226176738739, "learning_rate": 9.17960386192572e-06, "loss": 0.3532, "step": 2229 }, { "epoch": 0.7996653119770499, "grad_norm": 0.4005785584449768, "learning_rate": 9.178458112278571e-06, "loss": 0.3735, "step": 2230 }, { "epoch": 0.8000239062873535, "grad_norm": 0.3679003119468689, "learning_rate": 9.177311634731021e-06, "loss": 0.3481, "step": 2231 }, { "epoch": 0.8003825005976571, "grad_norm": 0.37158915400505066, "learning_rate": 9.176164429482786e-06, "loss": 0.3633, "step": 2232 }, { "epoch": 0.8007410949079607, "grad_norm": 0.35070598125457764, "learning_rate": 9.175016496733713e-06, "loss": 0.3532, "step": 2233 }, { "epoch": 0.8010996892182644, "grad_norm": 0.3808312714099884, "learning_rate": 9.173867836683779e-06, "loss": 0.3621, "step": 2234 }, { "epoch": 0.801458283528568, "grad_norm": 0.37311479449272156, "learning_rate": 9.172718449533082e-06, "loss": 0.3495, "step": 2235 }, { "epoch": 0.8018168778388717, "grad_norm": 0.38493672013282776, "learning_rate": 9.171568335481849e-06, "loss": 0.3554, "step": 2236 }, { "epoch": 0.8021754721491753, "grad_norm": 0.3694417476654053, "learning_rate": 9.170417494730433e-06, "loss": 0.3639, "step": 2237 }, { "epoch": 0.8025340664594789, "grad_norm": 0.35131850838661194, "learning_rate": 9.169265927479313e-06, "loss": 0.3484, "step": 2238 }, { "epoch": 0.8028926607697825, "grad_norm": 0.3810049593448639, "learning_rate": 9.1681136339291e-06, "loss": 0.3264, "step": 2239 }, { "epoch": 0.8032512550800861, "grad_norm": 0.442504346370697, "learning_rate": 9.166960614280522e-06, "loss": 0.3344, "step": 2240 }, { "epoch": 0.8036098493903897, "grad_norm": 0.3810916841030121, "learning_rate": 9.165806868734444e-06, "loss": 0.3415, "step": 2241 }, { "epoch": 0.8039684437006933, "grad_norm": 0.3743234872817993, "learning_rate": 9.164652397491846e-06, "loss": 0.3657, "step": 2242 }, { "epoch": 0.8043270380109969, "grad_norm": 0.4232233464717865, "learning_rate": 9.163497200753844e-06, "loss": 0.3493, "step": 2243 }, { "epoch": 0.8046856323213005, "grad_norm": 0.38885512948036194, "learning_rate": 9.162341278721676e-06, "loss": 0.3467, "step": 2244 }, { "epoch": 0.8050442266316041, "grad_norm": 0.4188341796398163, "learning_rate": 9.161184631596708e-06, "loss": 0.3492, "step": 2245 }, { "epoch": 0.8054028209419077, "grad_norm": 0.37425491213798523, "learning_rate": 9.16002725958043e-06, "loss": 0.3526, "step": 2246 }, { "epoch": 0.8057614152522113, "grad_norm": 0.35482531785964966, "learning_rate": 9.158869162874463e-06, "loss": 0.3575, "step": 2247 }, { "epoch": 0.806120009562515, "grad_norm": 0.3592093884944916, "learning_rate": 9.157710341680547e-06, "loss": 0.332, "step": 2248 }, { "epoch": 0.8064786038728186, "grad_norm": 0.4011440873146057, "learning_rate": 9.156550796200553e-06, "loss": 0.3613, "step": 2249 }, { "epoch": 0.8068371981831222, "grad_norm": 0.35280317068099976, "learning_rate": 9.155390526636482e-06, "loss": 0.3438, "step": 2250 }, { "epoch": 0.8071957924934258, "grad_norm": 0.39658236503601074, "learning_rate": 9.154229533190451e-06, "loss": 0.351, "step": 2251 }, { "epoch": 0.8075543868037294, "grad_norm": 0.36350172758102417, "learning_rate": 9.153067816064711e-06, "loss": 0.3534, "step": 2252 }, { "epoch": 0.807912981114033, "grad_norm": 0.36577773094177246, "learning_rate": 9.151905375461636e-06, "loss": 0.3419, "step": 2253 }, { "epoch": 0.8082715754243366, "grad_norm": 0.43069592118263245, "learning_rate": 9.150742211583728e-06, "loss": 0.3523, "step": 2254 }, { "epoch": 0.8086301697346402, "grad_norm": 0.3433162271976471, "learning_rate": 9.149578324633613e-06, "loss": 0.3366, "step": 2255 }, { "epoch": 0.8089887640449438, "grad_norm": 0.37179696559906006, "learning_rate": 9.148413714814044e-06, "loss": 0.3564, "step": 2256 }, { "epoch": 0.8093473583552474, "grad_norm": 0.40652045607566833, "learning_rate": 9.1472483823279e-06, "loss": 0.3652, "step": 2257 }, { "epoch": 0.809705952665551, "grad_norm": 0.3680932819843292, "learning_rate": 9.146082327378183e-06, "loss": 0.3453, "step": 2258 }, { "epoch": 0.8100645469758546, "grad_norm": 0.36118781566619873, "learning_rate": 9.144915550168027e-06, "loss": 0.3795, "step": 2259 }, { "epoch": 0.8104231412861582, "grad_norm": 0.3895390033721924, "learning_rate": 9.143748050900685e-06, "loss": 0.3514, "step": 2260 }, { "epoch": 0.8107817355964618, "grad_norm": 0.35362422466278076, "learning_rate": 9.142579829779542e-06, "loss": 0.3717, "step": 2261 }, { "epoch": 0.8111403299067654, "grad_norm": 0.39481309056282043, "learning_rate": 9.141410887008104e-06, "loss": 0.368, "step": 2262 }, { "epoch": 0.811498924217069, "grad_norm": 0.3676830530166626, "learning_rate": 9.140241222790006e-06, "loss": 0.3702, "step": 2263 }, { "epoch": 0.8118575185273726, "grad_norm": 0.38325080275535583, "learning_rate": 9.139070837329002e-06, "loss": 0.3536, "step": 2264 }, { "epoch": 0.8122161128376764, "grad_norm": 0.4031727910041809, "learning_rate": 9.137899730828985e-06, "loss": 0.3559, "step": 2265 }, { "epoch": 0.81257470714798, "grad_norm": 0.3804713189601898, "learning_rate": 9.136727903493958e-06, "loss": 0.3712, "step": 2266 }, { "epoch": 0.8129333014582836, "grad_norm": 0.4170338213443756, "learning_rate": 9.135555355528063e-06, "loss": 0.3607, "step": 2267 }, { "epoch": 0.8132918957685872, "grad_norm": 0.3656937777996063, "learning_rate": 9.134382087135558e-06, "loss": 0.3648, "step": 2268 }, { "epoch": 0.8136504900788908, "grad_norm": 0.42970263957977295, "learning_rate": 9.133208098520829e-06, "loss": 0.3478, "step": 2269 }, { "epoch": 0.8140090843891944, "grad_norm": 0.3424944579601288, "learning_rate": 9.132033389888392e-06, "loss": 0.3692, "step": 2270 }, { "epoch": 0.814367678699498, "grad_norm": 0.3752778470516205, "learning_rate": 9.130857961442883e-06, "loss": 0.3557, "step": 2271 }, { "epoch": 0.8147262730098016, "grad_norm": 0.4034355580806732, "learning_rate": 9.129681813389063e-06, "loss": 0.3648, "step": 2272 }, { "epoch": 0.8150848673201052, "grad_norm": 0.3768298923969269, "learning_rate": 9.128504945931825e-06, "loss": 0.3562, "step": 2273 }, { "epoch": 0.8154434616304088, "grad_norm": 0.36145415902137756, "learning_rate": 9.12732735927618e-06, "loss": 0.3542, "step": 2274 }, { "epoch": 0.8158020559407124, "grad_norm": 0.387369304895401, "learning_rate": 9.126149053627268e-06, "loss": 0.3589, "step": 2275 }, { "epoch": 0.816160650251016, "grad_norm": 0.35943979024887085, "learning_rate": 9.124970029190354e-06, "loss": 0.3392, "step": 2276 }, { "epoch": 0.8165192445613196, "grad_norm": 0.3538007140159607, "learning_rate": 9.123790286170828e-06, "loss": 0.3734, "step": 2277 }, { "epoch": 0.8168778388716232, "grad_norm": 0.3331281244754791, "learning_rate": 9.122609824774203e-06, "loss": 0.3448, "step": 2278 }, { "epoch": 0.8172364331819268, "grad_norm": 0.34627193212509155, "learning_rate": 9.121428645206121e-06, "loss": 0.3629, "step": 2279 }, { "epoch": 0.8175950274922305, "grad_norm": 0.3548154830932617, "learning_rate": 9.120246747672347e-06, "loss": 0.343, "step": 2280 }, { "epoch": 0.8179536218025341, "grad_norm": 0.3355717360973358, "learning_rate": 9.11906413237877e-06, "loss": 0.3568, "step": 2281 }, { "epoch": 0.8183122161128377, "grad_norm": 0.3908214569091797, "learning_rate": 9.117880799531407e-06, "loss": 0.356, "step": 2282 }, { "epoch": 0.8186708104231413, "grad_norm": 0.39158642292022705, "learning_rate": 9.116696749336397e-06, "loss": 0.3751, "step": 2283 }, { "epoch": 0.8190294047334449, "grad_norm": 0.34640195965766907, "learning_rate": 9.115511982000005e-06, "loss": 0.3772, "step": 2284 }, { "epoch": 0.8193879990437485, "grad_norm": 0.37508758902549744, "learning_rate": 9.114326497728621e-06, "loss": 0.3656, "step": 2285 }, { "epoch": 0.8197465933540521, "grad_norm": 0.37802112102508545, "learning_rate": 9.113140296728762e-06, "loss": 0.3402, "step": 2286 }, { "epoch": 0.8201051876643557, "grad_norm": 0.3848439157009125, "learning_rate": 9.111953379207065e-06, "loss": 0.332, "step": 2287 }, { "epoch": 0.8204637819746593, "grad_norm": 0.36274582147598267, "learning_rate": 9.110765745370298e-06, "loss": 0.3481, "step": 2288 }, { "epoch": 0.8208223762849629, "grad_norm": 0.3379366993904114, "learning_rate": 9.109577395425348e-06, "loss": 0.3222, "step": 2289 }, { "epoch": 0.8211809705952665, "grad_norm": 0.36900463700294495, "learning_rate": 9.108388329579232e-06, "loss": 0.3596, "step": 2290 }, { "epoch": 0.8215395649055701, "grad_norm": 0.3537020981311798, "learning_rate": 9.107198548039085e-06, "loss": 0.3507, "step": 2291 }, { "epoch": 0.8218981592158737, "grad_norm": 0.360638290643692, "learning_rate": 9.106008051012172e-06, "loss": 0.3398, "step": 2292 }, { "epoch": 0.8222567535261773, "grad_norm": 0.37464940547943115, "learning_rate": 9.104816838705883e-06, "loss": 0.3621, "step": 2293 }, { "epoch": 0.822615347836481, "grad_norm": 0.3459709882736206, "learning_rate": 9.103624911327731e-06, "loss": 0.3516, "step": 2294 }, { "epoch": 0.8229739421467847, "grad_norm": 0.4258389472961426, "learning_rate": 9.10243226908535e-06, "loss": 0.3751, "step": 2295 }, { "epoch": 0.8233325364570883, "grad_norm": 0.40299683809280396, "learning_rate": 9.101238912186506e-06, "loss": 0.3468, "step": 2296 }, { "epoch": 0.8236911307673919, "grad_norm": 0.3585318326950073, "learning_rate": 9.100044840839085e-06, "loss": 0.3436, "step": 2297 }, { "epoch": 0.8240497250776955, "grad_norm": 0.4135581851005554, "learning_rate": 9.098850055251094e-06, "loss": 0.3657, "step": 2298 }, { "epoch": 0.8244083193879991, "grad_norm": 0.3526172935962677, "learning_rate": 9.097654555630673e-06, "loss": 0.3484, "step": 2299 }, { "epoch": 0.8247669136983027, "grad_norm": 0.3709033727645874, "learning_rate": 9.096458342186078e-06, "loss": 0.376, "step": 2300 }, { "epoch": 0.8251255080086063, "grad_norm": 0.40276408195495605, "learning_rate": 9.095261415125698e-06, "loss": 0.3523, "step": 2301 }, { "epoch": 0.8254841023189099, "grad_norm": 0.36227989196777344, "learning_rate": 9.094063774658034e-06, "loss": 0.3221, "step": 2302 }, { "epoch": 0.8258426966292135, "grad_norm": 0.4298408329486847, "learning_rate": 9.092865420991727e-06, "loss": 0.3765, "step": 2303 }, { "epoch": 0.8262012909395171, "grad_norm": 0.39419451355934143, "learning_rate": 9.091666354335529e-06, "loss": 0.3528, "step": 2304 }, { "epoch": 0.8265598852498207, "grad_norm": 0.362019807100296, "learning_rate": 9.090466574898323e-06, "loss": 0.3304, "step": 2305 }, { "epoch": 0.8269184795601243, "grad_norm": 0.41380879282951355, "learning_rate": 9.089266082889113e-06, "loss": 0.3669, "step": 2306 }, { "epoch": 0.8272770738704279, "grad_norm": 0.35779842734336853, "learning_rate": 9.088064878517027e-06, "loss": 0.3398, "step": 2307 }, { "epoch": 0.8276356681807315, "grad_norm": 0.38779276609420776, "learning_rate": 9.086862961991322e-06, "loss": 0.3372, "step": 2308 }, { "epoch": 0.8279942624910351, "grad_norm": 0.40026524662971497, "learning_rate": 9.085660333521373e-06, "loss": 0.3397, "step": 2309 }, { "epoch": 0.8283528568013387, "grad_norm": 0.35650381445884705, "learning_rate": 9.084456993316683e-06, "loss": 0.3532, "step": 2310 }, { "epoch": 0.8287114511116423, "grad_norm": 0.3560475707054138, "learning_rate": 9.08325294158688e-06, "loss": 0.3541, "step": 2311 }, { "epoch": 0.829070045421946, "grad_norm": 0.4076184630393982, "learning_rate": 9.082048178541708e-06, "loss": 0.3395, "step": 2312 }, { "epoch": 0.8294286397322496, "grad_norm": 0.4355037212371826, "learning_rate": 9.080842704391045e-06, "loss": 0.357, "step": 2313 }, { "epoch": 0.8297872340425532, "grad_norm": 0.37993136048316956, "learning_rate": 9.079636519344886e-06, "loss": 0.3791, "step": 2314 }, { "epoch": 0.8301458283528568, "grad_norm": 0.4204128086566925, "learning_rate": 9.078429623613352e-06, "loss": 0.3629, "step": 2315 }, { "epoch": 0.8305044226631604, "grad_norm": 0.4536810517311096, "learning_rate": 9.077222017406688e-06, "loss": 0.3524, "step": 2316 }, { "epoch": 0.830863016973464, "grad_norm": 0.36556166410446167, "learning_rate": 9.076013700935266e-06, "loss": 0.3611, "step": 2317 }, { "epoch": 0.8312216112837676, "grad_norm": 0.36537328362464905, "learning_rate": 9.074804674409575e-06, "loss": 0.356, "step": 2318 }, { "epoch": 0.8315802055940712, "grad_norm": 0.4722171723842621, "learning_rate": 9.073594938040231e-06, "loss": 0.3593, "step": 2319 }, { "epoch": 0.8319387999043748, "grad_norm": 0.45227310061454773, "learning_rate": 9.072384492037977e-06, "loss": 0.3486, "step": 2320 }, { "epoch": 0.8322973942146784, "grad_norm": 0.3485499918460846, "learning_rate": 9.071173336613675e-06, "loss": 0.3544, "step": 2321 }, { "epoch": 0.832655988524982, "grad_norm": 0.46017932891845703, "learning_rate": 9.06996147197831e-06, "loss": 0.3594, "step": 2322 }, { "epoch": 0.8330145828352857, "grad_norm": 0.4383504390716553, "learning_rate": 9.068748898342996e-06, "loss": 0.3436, "step": 2323 }, { "epoch": 0.8333731771455893, "grad_norm": 0.40630608797073364, "learning_rate": 9.067535615918964e-06, "loss": 0.3764, "step": 2324 }, { "epoch": 0.833731771455893, "grad_norm": 0.3565645217895508, "learning_rate": 9.066321624917573e-06, "loss": 0.3478, "step": 2325 }, { "epoch": 0.8340903657661966, "grad_norm": 0.3840406835079193, "learning_rate": 9.065106925550305e-06, "loss": 0.351, "step": 2326 }, { "epoch": 0.8344489600765002, "grad_norm": 0.40134546160697937, "learning_rate": 9.06389151802876e-06, "loss": 0.3395, "step": 2327 }, { "epoch": 0.8348075543868038, "grad_norm": 0.34495627880096436, "learning_rate": 9.062675402564674e-06, "loss": 0.3427, "step": 2328 }, { "epoch": 0.8351661486971074, "grad_norm": 0.3630557358264923, "learning_rate": 9.06145857936989e-06, "loss": 0.3465, "step": 2329 }, { "epoch": 0.835524743007411, "grad_norm": 0.39344796538352966, "learning_rate": 9.060241048656385e-06, "loss": 0.3331, "step": 2330 }, { "epoch": 0.8358833373177146, "grad_norm": 0.42807573080062866, "learning_rate": 9.05902281063626e-06, "loss": 0.3661, "step": 2331 }, { "epoch": 0.8362419316280182, "grad_norm": 0.38318169116973877, "learning_rate": 9.05780386552173e-06, "loss": 0.3478, "step": 2332 }, { "epoch": 0.8366005259383218, "grad_norm": 0.3640018105506897, "learning_rate": 9.056584213525144e-06, "loss": 0.341, "step": 2333 }, { "epoch": 0.8369591202486254, "grad_norm": 0.4666867256164551, "learning_rate": 9.055363854858966e-06, "loss": 0.3614, "step": 2334 }, { "epoch": 0.837317714558929, "grad_norm": 0.434843510389328, "learning_rate": 9.05414278973579e-06, "loss": 0.3374, "step": 2335 }, { "epoch": 0.8376763088692326, "grad_norm": 0.35362449288368225, "learning_rate": 9.052921018368323e-06, "loss": 0.3375, "step": 2336 }, { "epoch": 0.8380349031795362, "grad_norm": 0.4746417999267578, "learning_rate": 9.051698540969407e-06, "loss": 0.3563, "step": 2337 }, { "epoch": 0.8383934974898398, "grad_norm": 0.47440558671951294, "learning_rate": 9.050475357752001e-06, "loss": 0.3734, "step": 2338 }, { "epoch": 0.8387520918001434, "grad_norm": 0.4035123884677887, "learning_rate": 9.049251468929184e-06, "loss": 0.3416, "step": 2339 }, { "epoch": 0.839110686110447, "grad_norm": 0.4413609802722931, "learning_rate": 9.048026874714163e-06, "loss": 0.3731, "step": 2340 }, { "epoch": 0.8394692804207506, "grad_norm": 0.3796933591365814, "learning_rate": 9.04680157532027e-06, "loss": 0.3388, "step": 2341 }, { "epoch": 0.8398278747310542, "grad_norm": 0.39830639958381653, "learning_rate": 9.045575570960949e-06, "loss": 0.358, "step": 2342 }, { "epoch": 0.8401864690413579, "grad_norm": 0.4191424548625946, "learning_rate": 9.044348861849777e-06, "loss": 0.3599, "step": 2343 }, { "epoch": 0.8405450633516615, "grad_norm": 0.3686538338661194, "learning_rate": 9.043121448200451e-06, "loss": 0.3356, "step": 2344 }, { "epoch": 0.8409036576619651, "grad_norm": 0.35646313428878784, "learning_rate": 9.04189333022679e-06, "loss": 0.363, "step": 2345 }, { "epoch": 0.8412622519722687, "grad_norm": 0.42456668615341187, "learning_rate": 9.040664508142735e-06, "loss": 0.3382, "step": 2346 }, { "epoch": 0.8416208462825723, "grad_norm": 0.373981773853302, "learning_rate": 9.039434982162354e-06, "loss": 0.3396, "step": 2347 }, { "epoch": 0.8419794405928759, "grad_norm": 0.36375588178634644, "learning_rate": 9.03820475249983e-06, "loss": 0.3394, "step": 2348 }, { "epoch": 0.8423380349031795, "grad_norm": 0.39896953105926514, "learning_rate": 9.036973819369474e-06, "loss": 0.3454, "step": 2349 }, { "epoch": 0.8426966292134831, "grad_norm": 0.40849414467811584, "learning_rate": 9.03574218298572e-06, "loss": 0.3552, "step": 2350 }, { "epoch": 0.8430552235237867, "grad_norm": 0.4496278762817383, "learning_rate": 9.034509843563122e-06, "loss": 0.3516, "step": 2351 }, { "epoch": 0.8434138178340904, "grad_norm": 0.3768489360809326, "learning_rate": 9.033276801316355e-06, "loss": 0.3583, "step": 2352 }, { "epoch": 0.843772412144394, "grad_norm": 0.4394373297691345, "learning_rate": 9.032043056460223e-06, "loss": 0.3528, "step": 2353 }, { "epoch": 0.8441310064546976, "grad_norm": 0.44838663935661316, "learning_rate": 9.030808609209647e-06, "loss": 0.3454, "step": 2354 }, { "epoch": 0.8444896007650012, "grad_norm": 0.3797253370285034, "learning_rate": 9.029573459779669e-06, "loss": 0.3428, "step": 2355 }, { "epoch": 0.8448481950753048, "grad_norm": 0.4224797189235687, "learning_rate": 9.028337608385457e-06, "loss": 0.3408, "step": 2356 }, { "epoch": 0.8452067893856084, "grad_norm": 0.4048996865749359, "learning_rate": 9.027101055242303e-06, "loss": 0.3559, "step": 2357 }, { "epoch": 0.845565383695912, "grad_norm": 0.34832194447517395, "learning_rate": 9.025863800565614e-06, "loss": 0.3306, "step": 2358 }, { "epoch": 0.8459239780062157, "grad_norm": 0.4188747704029083, "learning_rate": 9.024625844570927e-06, "loss": 0.3543, "step": 2359 }, { "epoch": 0.8462825723165193, "grad_norm": 0.4023209810256958, "learning_rate": 9.023387187473896e-06, "loss": 0.3512, "step": 2360 }, { "epoch": 0.8466411666268229, "grad_norm": 0.3680308163166046, "learning_rate": 9.0221478294903e-06, "loss": 0.383, "step": 2361 }, { "epoch": 0.8469997609371265, "grad_norm": 0.4280959367752075, "learning_rate": 9.020907770836038e-06, "loss": 0.3377, "step": 2362 }, { "epoch": 0.8473583552474301, "grad_norm": 0.3758417069911957, "learning_rate": 9.019667011727133e-06, "loss": 0.3914, "step": 2363 }, { "epoch": 0.8477169495577337, "grad_norm": 0.34659233689308167, "learning_rate": 9.018425552379728e-06, "loss": 0.347, "step": 2364 }, { "epoch": 0.8480755438680373, "grad_norm": 0.3794516324996948, "learning_rate": 9.01718339301009e-06, "loss": 0.3363, "step": 2365 }, { "epoch": 0.8484341381783409, "grad_norm": 0.38799580931663513, "learning_rate": 9.015940533834606e-06, "loss": 0.373, "step": 2366 }, { "epoch": 0.8487927324886445, "grad_norm": 0.34814009070396423, "learning_rate": 9.01469697506979e-06, "loss": 0.3365, "step": 2367 }, { "epoch": 0.8491513267989481, "grad_norm": 0.43146517872810364, "learning_rate": 9.013452716932268e-06, "loss": 0.3509, "step": 2368 }, { "epoch": 0.8495099211092517, "grad_norm": 0.39291611313819885, "learning_rate": 9.012207759638794e-06, "loss": 0.3641, "step": 2369 }, { "epoch": 0.8498685154195553, "grad_norm": 0.3820742964744568, "learning_rate": 9.010962103406248e-06, "loss": 0.3439, "step": 2370 }, { "epoch": 0.8502271097298589, "grad_norm": 0.436469167470932, "learning_rate": 9.009715748451625e-06, "loss": 0.3498, "step": 2371 }, { "epoch": 0.8505857040401625, "grad_norm": 0.38871583342552185, "learning_rate": 9.008468694992044e-06, "loss": 0.3547, "step": 2372 }, { "epoch": 0.8509442983504661, "grad_norm": 0.3650503158569336, "learning_rate": 9.007220943244744e-06, "loss": 0.3342, "step": 2373 }, { "epoch": 0.8513028926607697, "grad_norm": 0.43888601660728455, "learning_rate": 9.005972493427088e-06, "loss": 0.3737, "step": 2374 }, { "epoch": 0.8516614869710734, "grad_norm": 0.4087766110897064, "learning_rate": 9.004723345756564e-06, "loss": 0.3288, "step": 2375 }, { "epoch": 0.852020081281377, "grad_norm": 0.37847256660461426, "learning_rate": 9.003473500450771e-06, "loss": 0.3562, "step": 2376 }, { "epoch": 0.8523786755916806, "grad_norm": 0.41861897706985474, "learning_rate": 9.002222957727438e-06, "loss": 0.3649, "step": 2377 }, { "epoch": 0.8527372699019842, "grad_norm": 0.3762253224849701, "learning_rate": 9.000971717804415e-06, "loss": 0.3501, "step": 2378 }, { "epoch": 0.8530958642122878, "grad_norm": 0.35413968563079834, "learning_rate": 8.99971978089967e-06, "loss": 0.3482, "step": 2379 }, { "epoch": 0.8534544585225915, "grad_norm": 0.36459246277809143, "learning_rate": 8.998467147231297e-06, "loss": 0.3671, "step": 2380 }, { "epoch": 0.8538130528328951, "grad_norm": 0.36195236444473267, "learning_rate": 8.997213817017508e-06, "loss": 0.3607, "step": 2381 }, { "epoch": 0.8541716471431987, "grad_norm": 0.34710273146629333, "learning_rate": 8.995959790476633e-06, "loss": 0.3498, "step": 2382 }, { "epoch": 0.8545302414535023, "grad_norm": 0.40385037660598755, "learning_rate": 8.994705067827131e-06, "loss": 0.3519, "step": 2383 }, { "epoch": 0.8548888357638059, "grad_norm": 0.3378605842590332, "learning_rate": 8.99344964928758e-06, "loss": 0.3502, "step": 2384 }, { "epoch": 0.8552474300741095, "grad_norm": 0.37880754470825195, "learning_rate": 8.992193535076673e-06, "loss": 0.3592, "step": 2385 }, { "epoch": 0.8556060243844131, "grad_norm": 0.40741488337516785, "learning_rate": 8.990936725413233e-06, "loss": 0.3586, "step": 2386 }, { "epoch": 0.8559646186947167, "grad_norm": 0.3696553707122803, "learning_rate": 8.9896792205162e-06, "loss": 0.3563, "step": 2387 }, { "epoch": 0.8563232130050203, "grad_norm": 0.3898650109767914, "learning_rate": 8.988421020604635e-06, "loss": 0.3703, "step": 2388 }, { "epoch": 0.856681807315324, "grad_norm": 0.39729636907577515, "learning_rate": 8.987162125897718e-06, "loss": 0.3654, "step": 2389 }, { "epoch": 0.8570404016256276, "grad_norm": 0.3448394536972046, "learning_rate": 8.985902536614752e-06, "loss": 0.3669, "step": 2390 }, { "epoch": 0.8573989959359312, "grad_norm": 0.36236372590065, "learning_rate": 8.984642252975167e-06, "loss": 0.3429, "step": 2391 }, { "epoch": 0.8577575902462348, "grad_norm": 0.38021960854530334, "learning_rate": 8.983381275198502e-06, "loss": 0.3341, "step": 2392 }, { "epoch": 0.8581161845565384, "grad_norm": 0.36025333404541016, "learning_rate": 8.982119603504426e-06, "loss": 0.3497, "step": 2393 }, { "epoch": 0.858474778866842, "grad_norm": 0.32650306820869446, "learning_rate": 8.980857238112726e-06, "loss": 0.3271, "step": 2394 }, { "epoch": 0.8588333731771456, "grad_norm": 0.37119120359420776, "learning_rate": 8.979594179243312e-06, "loss": 0.3332, "step": 2395 }, { "epoch": 0.8591919674874492, "grad_norm": 0.3815891742706299, "learning_rate": 8.978330427116208e-06, "loss": 0.3523, "step": 2396 }, { "epoch": 0.8595505617977528, "grad_norm": 0.3953537940979004, "learning_rate": 8.977065981951567e-06, "loss": 0.3764, "step": 2397 }, { "epoch": 0.8599091561080564, "grad_norm": 0.357017457485199, "learning_rate": 8.975800843969659e-06, "loss": 0.3536, "step": 2398 }, { "epoch": 0.86026775041836, "grad_norm": 0.43168655037879944, "learning_rate": 8.974535013390875e-06, "loss": 0.353, "step": 2399 }, { "epoch": 0.8606263447286636, "grad_norm": 0.3407629132270813, "learning_rate": 8.973268490435727e-06, "loss": 0.349, "step": 2400 }, { "epoch": 0.8609849390389672, "grad_norm": 0.3765570819377899, "learning_rate": 8.972001275324843e-06, "loss": 0.3707, "step": 2401 }, { "epoch": 0.8613435333492708, "grad_norm": 0.42577287554740906, "learning_rate": 8.970733368278982e-06, "loss": 0.367, "step": 2402 }, { "epoch": 0.8617021276595744, "grad_norm": 0.37916138768196106, "learning_rate": 8.969464769519017e-06, "loss": 0.355, "step": 2403 }, { "epoch": 0.862060721969878, "grad_norm": 0.39396169781684875, "learning_rate": 8.968195479265935e-06, "loss": 0.3668, "step": 2404 }, { "epoch": 0.8624193162801816, "grad_norm": 0.3772008717060089, "learning_rate": 8.966925497740857e-06, "loss": 0.3561, "step": 2405 }, { "epoch": 0.8627779105904853, "grad_norm": 0.3597481846809387, "learning_rate": 8.965654825165015e-06, "loss": 0.3228, "step": 2406 }, { "epoch": 0.8631365049007889, "grad_norm": 0.35259515047073364, "learning_rate": 8.964383461759764e-06, "loss": 0.3769, "step": 2407 }, { "epoch": 0.8634950992110925, "grad_norm": 0.40199384093284607, "learning_rate": 8.963111407746581e-06, "loss": 0.3541, "step": 2408 }, { "epoch": 0.8638536935213962, "grad_norm": 0.35034340620040894, "learning_rate": 8.96183866334706e-06, "loss": 0.3453, "step": 2409 }, { "epoch": 0.8642122878316998, "grad_norm": 0.3366229236125946, "learning_rate": 8.960565228782918e-06, "loss": 0.3483, "step": 2410 }, { "epoch": 0.8645708821420034, "grad_norm": 0.37330350279808044, "learning_rate": 8.959291104275991e-06, "loss": 0.3788, "step": 2411 }, { "epoch": 0.864929476452307, "grad_norm": 0.34344351291656494, "learning_rate": 8.958016290048235e-06, "loss": 0.354, "step": 2412 }, { "epoch": 0.8652880707626106, "grad_norm": 0.38492709398269653, "learning_rate": 8.95674078632173e-06, "loss": 0.3742, "step": 2413 }, { "epoch": 0.8656466650729142, "grad_norm": 0.3624281585216522, "learning_rate": 8.955464593318667e-06, "loss": 0.3551, "step": 2414 }, { "epoch": 0.8660052593832178, "grad_norm": 0.37460047006607056, "learning_rate": 8.954187711261366e-06, "loss": 0.3488, "step": 2415 }, { "epoch": 0.8663638536935214, "grad_norm": 0.38286709785461426, "learning_rate": 8.952910140372264e-06, "loss": 0.3698, "step": 2416 }, { "epoch": 0.866722448003825, "grad_norm": 0.36351078748703003, "learning_rate": 8.951631880873918e-06, "loss": 0.3407, "step": 2417 }, { "epoch": 0.8670810423141286, "grad_norm": 0.35471779108047485, "learning_rate": 8.950352932989004e-06, "loss": 0.3605, "step": 2418 }, { "epoch": 0.8674396366244322, "grad_norm": 0.347326397895813, "learning_rate": 8.949073296940319e-06, "loss": 0.3271, "step": 2419 }, { "epoch": 0.8677982309347358, "grad_norm": 0.3617226779460907, "learning_rate": 8.947792972950779e-06, "loss": 0.3573, "step": 2420 }, { "epoch": 0.8681568252450395, "grad_norm": 0.3548377454280853, "learning_rate": 8.94651196124342e-06, "loss": 0.3437, "step": 2421 }, { "epoch": 0.8685154195553431, "grad_norm": 0.38453933596611023, "learning_rate": 8.9452302620414e-06, "loss": 0.3442, "step": 2422 }, { "epoch": 0.8688740138656467, "grad_norm": 0.39330926537513733, "learning_rate": 8.943947875567993e-06, "loss": 0.3676, "step": 2423 }, { "epoch": 0.8692326081759503, "grad_norm": 0.3591073453426361, "learning_rate": 8.942664802046597e-06, "loss": 0.3527, "step": 2424 }, { "epoch": 0.8695912024862539, "grad_norm": 0.3390296399593353, "learning_rate": 8.941381041700726e-06, "loss": 0.3248, "step": 2425 }, { "epoch": 0.8699497967965575, "grad_norm": 0.3814776539802551, "learning_rate": 8.940096594754015e-06, "loss": 0.3467, "step": 2426 }, { "epoch": 0.8703083911068611, "grad_norm": 0.35481879115104675, "learning_rate": 8.938811461430219e-06, "loss": 0.3634, "step": 2427 }, { "epoch": 0.8706669854171647, "grad_norm": 0.37267786264419556, "learning_rate": 8.937525641953212e-06, "loss": 0.3504, "step": 2428 }, { "epoch": 0.8710255797274683, "grad_norm": 0.3520088791847229, "learning_rate": 8.936239136546987e-06, "loss": 0.3577, "step": 2429 }, { "epoch": 0.8713841740377719, "grad_norm": 0.3446783721446991, "learning_rate": 8.93495194543566e-06, "loss": 0.3346, "step": 2430 }, { "epoch": 0.8717427683480755, "grad_norm": 0.351759135723114, "learning_rate": 8.933664068843459e-06, "loss": 0.3352, "step": 2431 }, { "epoch": 0.8721013626583791, "grad_norm": 0.3578600585460663, "learning_rate": 8.932375506994741e-06, "loss": 0.3515, "step": 2432 }, { "epoch": 0.8724599569686827, "grad_norm": 0.3545148968696594, "learning_rate": 8.931086260113976e-06, "loss": 0.3593, "step": 2433 }, { "epoch": 0.8728185512789863, "grad_norm": 0.34869927167892456, "learning_rate": 8.929796328425752e-06, "loss": 0.355, "step": 2434 }, { "epoch": 0.8731771455892899, "grad_norm": 0.3599458932876587, "learning_rate": 8.928505712154783e-06, "loss": 0.3739, "step": 2435 }, { "epoch": 0.8735357398995935, "grad_norm": 0.3730643391609192, "learning_rate": 8.927214411525895e-06, "loss": 0.339, "step": 2436 }, { "epoch": 0.8738943342098972, "grad_norm": 0.34727683663368225, "learning_rate": 8.92592242676404e-06, "loss": 0.3599, "step": 2437 }, { "epoch": 0.8742529285202009, "grad_norm": 0.34206974506378174, "learning_rate": 8.924629758094281e-06, "loss": 0.3262, "step": 2438 }, { "epoch": 0.8746115228305045, "grad_norm": 0.3685920834541321, "learning_rate": 8.92333640574181e-06, "loss": 0.3539, "step": 2439 }, { "epoch": 0.8749701171408081, "grad_norm": 0.4361799657344818, "learning_rate": 8.922042369931933e-06, "loss": 0.3588, "step": 2440 }, { "epoch": 0.8753287114511117, "grad_norm": 0.3654775023460388, "learning_rate": 8.92074765089007e-06, "loss": 0.3338, "step": 2441 }, { "epoch": 0.8756873057614153, "grad_norm": 0.37759190797805786, "learning_rate": 8.91945224884177e-06, "loss": 0.3639, "step": 2442 }, { "epoch": 0.8760459000717189, "grad_norm": 0.3650391697883606, "learning_rate": 8.918156164012692e-06, "loss": 0.3561, "step": 2443 }, { "epoch": 0.8764044943820225, "grad_norm": 0.40065985918045044, "learning_rate": 8.916859396628623e-06, "loss": 0.3497, "step": 2444 }, { "epoch": 0.8767630886923261, "grad_norm": 0.351608544588089, "learning_rate": 8.915561946915458e-06, "loss": 0.3324, "step": 2445 }, { "epoch": 0.8771216830026297, "grad_norm": 0.365223228931427, "learning_rate": 8.914263815099222e-06, "loss": 0.3481, "step": 2446 }, { "epoch": 0.8774802773129333, "grad_norm": 0.4977547228336334, "learning_rate": 8.912965001406051e-06, "loss": 0.3737, "step": 2447 }, { "epoch": 0.8778388716232369, "grad_norm": 0.42224636673927307, "learning_rate": 8.911665506062203e-06, "loss": 0.3694, "step": 2448 }, { "epoch": 0.8781974659335405, "grad_norm": 0.3717834949493408, "learning_rate": 8.910365329294053e-06, "loss": 0.3548, "step": 2449 }, { "epoch": 0.8785560602438441, "grad_norm": 0.4464116096496582, "learning_rate": 8.909064471328098e-06, "loss": 0.3348, "step": 2450 }, { "epoch": 0.8789146545541477, "grad_norm": 0.44305315613746643, "learning_rate": 8.907762932390948e-06, "loss": 0.3535, "step": 2451 }, { "epoch": 0.8792732488644514, "grad_norm": 0.37925803661346436, "learning_rate": 8.90646071270934e-06, "loss": 0.3523, "step": 2452 }, { "epoch": 0.879631843174755, "grad_norm": 0.4093431234359741, "learning_rate": 8.905157812510122e-06, "loss": 0.3516, "step": 2453 }, { "epoch": 0.8799904374850586, "grad_norm": 0.4188474416732788, "learning_rate": 8.903854232020261e-06, "loss": 0.3627, "step": 2454 }, { "epoch": 0.8803490317953622, "grad_norm": 0.43983447551727295, "learning_rate": 8.90254997146685e-06, "loss": 0.3647, "step": 2455 }, { "epoch": 0.8807076261056658, "grad_norm": 0.39032450318336487, "learning_rate": 8.90124503107709e-06, "loss": 0.3745, "step": 2456 }, { "epoch": 0.8810662204159694, "grad_norm": 0.4455496370792389, "learning_rate": 8.89993941107831e-06, "loss": 0.352, "step": 2457 }, { "epoch": 0.881424814726273, "grad_norm": 0.40285950899124146, "learning_rate": 8.89863311169795e-06, "loss": 0.3484, "step": 2458 }, { "epoch": 0.8817834090365766, "grad_norm": 0.38715797662734985, "learning_rate": 8.897326133163574e-06, "loss": 0.3581, "step": 2459 }, { "epoch": 0.8821420033468802, "grad_norm": 0.4176146388053894, "learning_rate": 8.89601847570286e-06, "loss": 0.3487, "step": 2460 }, { "epoch": 0.8825005976571838, "grad_norm": 0.4214140474796295, "learning_rate": 8.894710139543605e-06, "loss": 0.3466, "step": 2461 }, { "epoch": 0.8828591919674874, "grad_norm": 0.3783847689628601, "learning_rate": 8.893401124913727e-06, "loss": 0.3999, "step": 2462 }, { "epoch": 0.883217786277791, "grad_norm": 0.4211769700050354, "learning_rate": 8.892091432041262e-06, "loss": 0.3401, "step": 2463 }, { "epoch": 0.8835763805880946, "grad_norm": 0.356523722410202, "learning_rate": 8.890781061154358e-06, "loss": 0.3565, "step": 2464 }, { "epoch": 0.8839349748983982, "grad_norm": 0.3973652124404907, "learning_rate": 8.88947001248129e-06, "loss": 0.3676, "step": 2465 }, { "epoch": 0.8842935692087018, "grad_norm": 0.3799475133419037, "learning_rate": 8.888158286250443e-06, "loss": 0.3498, "step": 2466 }, { "epoch": 0.8846521635190056, "grad_norm": 0.3773035705089569, "learning_rate": 8.886845882690326e-06, "loss": 0.3452, "step": 2467 }, { "epoch": 0.8850107578293092, "grad_norm": 0.3399107754230499, "learning_rate": 8.885532802029566e-06, "loss": 0.3545, "step": 2468 }, { "epoch": 0.8853693521396128, "grad_norm": 0.354875773191452, "learning_rate": 8.884219044496903e-06, "loss": 0.3643, "step": 2469 }, { "epoch": 0.8857279464499164, "grad_norm": 0.38265883922576904, "learning_rate": 8.882904610321196e-06, "loss": 0.3541, "step": 2470 }, { "epoch": 0.88608654076022, "grad_norm": 0.3724645972251892, "learning_rate": 8.881589499731427e-06, "loss": 0.3648, "step": 2471 }, { "epoch": 0.8864451350705236, "grad_norm": 0.33835694193840027, "learning_rate": 8.880273712956688e-06, "loss": 0.3258, "step": 2472 }, { "epoch": 0.8868037293808272, "grad_norm": 0.3530130386352539, "learning_rate": 8.878957250226199e-06, "loss": 0.3474, "step": 2473 }, { "epoch": 0.8871623236911308, "grad_norm": 0.3697161078453064, "learning_rate": 8.877640111769288e-06, "loss": 0.3527, "step": 2474 }, { "epoch": 0.8875209180014344, "grad_norm": 0.361643522977829, "learning_rate": 8.876322297815406e-06, "loss": 0.3627, "step": 2475 }, { "epoch": 0.887879512311738, "grad_norm": 0.382407546043396, "learning_rate": 8.875003808594118e-06, "loss": 0.3392, "step": 2476 }, { "epoch": 0.8882381066220416, "grad_norm": 0.37301716208457947, "learning_rate": 8.873684644335113e-06, "loss": 0.3572, "step": 2477 }, { "epoch": 0.8885967009323452, "grad_norm": 0.3749648928642273, "learning_rate": 8.87236480526819e-06, "loss": 0.367, "step": 2478 }, { "epoch": 0.8889552952426488, "grad_norm": 0.3862573206424713, "learning_rate": 8.87104429162327e-06, "loss": 0.3711, "step": 2479 }, { "epoch": 0.8893138895529524, "grad_norm": 0.36906927824020386, "learning_rate": 8.869723103630392e-06, "loss": 0.3495, "step": 2480 }, { "epoch": 0.889672483863256, "grad_norm": 0.3661883771419525, "learning_rate": 8.86840124151971e-06, "loss": 0.3497, "step": 2481 }, { "epoch": 0.8900310781735596, "grad_norm": 0.3671169579029083, "learning_rate": 8.867078705521494e-06, "loss": 0.3771, "step": 2482 }, { "epoch": 0.8903896724838632, "grad_norm": 0.3677719831466675, "learning_rate": 8.86575549586614e-06, "loss": 0.3504, "step": 2483 }, { "epoch": 0.8907482667941669, "grad_norm": 0.38348788022994995, "learning_rate": 8.86443161278415e-06, "loss": 0.3414, "step": 2484 }, { "epoch": 0.8911068611044705, "grad_norm": 0.3912539482116699, "learning_rate": 8.863107056506148e-06, "loss": 0.3654, "step": 2485 }, { "epoch": 0.8914654554147741, "grad_norm": 0.41111883521080017, "learning_rate": 8.861781827262882e-06, "loss": 0.3568, "step": 2486 }, { "epoch": 0.8918240497250777, "grad_norm": 0.40318477153778076, "learning_rate": 8.860455925285203e-06, "loss": 0.366, "step": 2487 }, { "epoch": 0.8921826440353813, "grad_norm": 0.35599055886268616, "learning_rate": 8.859129350804095e-06, "loss": 0.3458, "step": 2488 }, { "epoch": 0.8925412383456849, "grad_norm": 0.36483168601989746, "learning_rate": 8.857802104050644e-06, "loss": 0.3609, "step": 2489 }, { "epoch": 0.8928998326559885, "grad_norm": 0.3972878158092499, "learning_rate": 8.856474185256066e-06, "loss": 0.3448, "step": 2490 }, { "epoch": 0.8932584269662921, "grad_norm": 0.3673331141471863, "learning_rate": 8.855145594651687e-06, "loss": 0.3604, "step": 2491 }, { "epoch": 0.8936170212765957, "grad_norm": 0.37866324186325073, "learning_rate": 8.853816332468952e-06, "loss": 0.3617, "step": 2492 }, { "epoch": 0.8939756155868993, "grad_norm": 0.3908226191997528, "learning_rate": 8.85248639893942e-06, "loss": 0.3114, "step": 2493 }, { "epoch": 0.8943342098972029, "grad_norm": 0.3336457312107086, "learning_rate": 8.851155794294773e-06, "loss": 0.3507, "step": 2494 }, { "epoch": 0.8946928042075065, "grad_norm": 0.4089634418487549, "learning_rate": 8.849824518766804e-06, "loss": 0.3341, "step": 2495 }, { "epoch": 0.8950513985178102, "grad_norm": 0.414035439491272, "learning_rate": 8.848492572587426e-06, "loss": 0.3537, "step": 2496 }, { "epoch": 0.8954099928281138, "grad_norm": 0.3728415369987488, "learning_rate": 8.84715995598867e-06, "loss": 0.3275, "step": 2497 }, { "epoch": 0.8957685871384174, "grad_norm": 0.38268613815307617, "learning_rate": 8.84582666920268e-06, "loss": 0.3483, "step": 2498 }, { "epoch": 0.896127181448721, "grad_norm": 0.37874776124954224, "learning_rate": 8.844492712461719e-06, "loss": 0.3774, "step": 2499 }, { "epoch": 0.8964857757590247, "grad_norm": 0.34827232360839844, "learning_rate": 8.843158085998166e-06, "loss": 0.3394, "step": 2500 }, { "epoch": 0.8968443700693283, "grad_norm": 0.37626010179519653, "learning_rate": 8.841822790044519e-06, "loss": 0.3555, "step": 2501 }, { "epoch": 0.8972029643796319, "grad_norm": 0.3280787467956543, "learning_rate": 8.840486824833389e-06, "loss": 0.3274, "step": 2502 }, { "epoch": 0.8975615586899355, "grad_norm": 0.36396175622940063, "learning_rate": 8.839150190597505e-06, "loss": 0.3414, "step": 2503 }, { "epoch": 0.8979201530002391, "grad_norm": 0.3608286678791046, "learning_rate": 8.837812887569715e-06, "loss": 0.3388, "step": 2504 }, { "epoch": 0.8982787473105427, "grad_norm": 0.3722715675830841, "learning_rate": 8.836474915982977e-06, "loss": 0.3346, "step": 2505 }, { "epoch": 0.8986373416208463, "grad_norm": 0.3561643064022064, "learning_rate": 8.835136276070375e-06, "loss": 0.3352, "step": 2506 }, { "epoch": 0.8989959359311499, "grad_norm": 0.369765967130661, "learning_rate": 8.833796968065101e-06, "loss": 0.356, "step": 2507 }, { "epoch": 0.8993545302414535, "grad_norm": 0.3784436583518982, "learning_rate": 8.83245699220047e-06, "loss": 0.3793, "step": 2508 }, { "epoch": 0.8997131245517571, "grad_norm": 0.4114683270454407, "learning_rate": 8.831116348709903e-06, "loss": 0.3462, "step": 2509 }, { "epoch": 0.9000717188620607, "grad_norm": 0.3796180188655853, "learning_rate": 8.829775037826951e-06, "loss": 0.3649, "step": 2510 }, { "epoch": 0.9004303131723643, "grad_norm": 0.46950989961624146, "learning_rate": 8.828433059785273e-06, "loss": 0.3824, "step": 2511 }, { "epoch": 0.9007889074826679, "grad_norm": 0.3641186058521271, "learning_rate": 8.827090414818645e-06, "loss": 0.3522, "step": 2512 }, { "epoch": 0.9011475017929715, "grad_norm": 0.4337126314640045, "learning_rate": 8.82574710316096e-06, "loss": 0.361, "step": 2513 }, { "epoch": 0.9015060961032751, "grad_norm": 0.4458220303058624, "learning_rate": 8.824403125046225e-06, "loss": 0.3641, "step": 2514 }, { "epoch": 0.9018646904135788, "grad_norm": 0.3852304518222809, "learning_rate": 8.823058480708569e-06, "loss": 0.3441, "step": 2515 }, { "epoch": 0.9022232847238824, "grad_norm": 0.4857217073440552, "learning_rate": 8.821713170382232e-06, "loss": 0.3449, "step": 2516 }, { "epoch": 0.902581879034186, "grad_norm": 0.4345681071281433, "learning_rate": 8.82036719430157e-06, "loss": 0.3697, "step": 2517 }, { "epoch": 0.9029404733444896, "grad_norm": 0.35830628871917725, "learning_rate": 8.819020552701056e-06, "loss": 0.3399, "step": 2518 }, { "epoch": 0.9032990676547932, "grad_norm": 0.477615624666214, "learning_rate": 8.817673245815282e-06, "loss": 0.3616, "step": 2519 }, { "epoch": 0.9036576619650968, "grad_norm": 0.3376538157463074, "learning_rate": 8.81632527387895e-06, "loss": 0.3359, "step": 2520 }, { "epoch": 0.9040162562754004, "grad_norm": 0.3871312737464905, "learning_rate": 8.814976637126882e-06, "loss": 0.3403, "step": 2521 }, { "epoch": 0.904374850585704, "grad_norm": 0.3445852994918823, "learning_rate": 8.813627335794015e-06, "loss": 0.3602, "step": 2522 }, { "epoch": 0.9047334448960076, "grad_norm": 0.33969375491142273, "learning_rate": 8.8122773701154e-06, "loss": 0.3438, "step": 2523 }, { "epoch": 0.9050920392063112, "grad_norm": 0.3460400104522705, "learning_rate": 8.810926740326208e-06, "loss": 0.3445, "step": 2524 }, { "epoch": 0.9054506335166149, "grad_norm": 0.3909369111061096, "learning_rate": 8.80957544666172e-06, "loss": 0.3446, "step": 2525 }, { "epoch": 0.9058092278269185, "grad_norm": 0.3221897482872009, "learning_rate": 8.808223489357338e-06, "loss": 0.3115, "step": 2526 }, { "epoch": 0.9061678221372221, "grad_norm": 0.35584527254104614, "learning_rate": 8.806870868648576e-06, "loss": 0.3785, "step": 2527 }, { "epoch": 0.9065264164475257, "grad_norm": 0.38341760635375977, "learning_rate": 8.805517584771064e-06, "loss": 0.3294, "step": 2528 }, { "epoch": 0.9068850107578293, "grad_norm": 0.35215699672698975, "learning_rate": 8.80416363796055e-06, "loss": 0.3463, "step": 2529 }, { "epoch": 0.907243605068133, "grad_norm": 0.34763652086257935, "learning_rate": 8.802809028452895e-06, "loss": 0.3403, "step": 2530 }, { "epoch": 0.9076021993784366, "grad_norm": 0.37292686104774475, "learning_rate": 8.801453756484077e-06, "loss": 0.3544, "step": 2531 }, { "epoch": 0.9079607936887402, "grad_norm": 0.35156524181365967, "learning_rate": 8.800097822290186e-06, "loss": 0.3497, "step": 2532 }, { "epoch": 0.9083193879990438, "grad_norm": 0.3608061671257019, "learning_rate": 8.79874122610743e-06, "loss": 0.3762, "step": 2533 }, { "epoch": 0.9086779823093474, "grad_norm": 0.34855329990386963, "learning_rate": 8.797383968172137e-06, "loss": 0.3397, "step": 2534 }, { "epoch": 0.909036576619651, "grad_norm": 0.3210538625717163, "learning_rate": 8.79602604872074e-06, "loss": 0.3494, "step": 2535 }, { "epoch": 0.9093951709299546, "grad_norm": 0.36824241280555725, "learning_rate": 8.794667467989797e-06, "loss": 0.3502, "step": 2536 }, { "epoch": 0.9097537652402582, "grad_norm": 0.3687673807144165, "learning_rate": 8.793308226215976e-06, "loss": 0.3573, "step": 2537 }, { "epoch": 0.9101123595505618, "grad_norm": 0.39913204312324524, "learning_rate": 8.791948323636059e-06, "loss": 0.3722, "step": 2538 }, { "epoch": 0.9104709538608654, "grad_norm": 0.3341626226902008, "learning_rate": 8.790587760486946e-06, "loss": 0.3255, "step": 2539 }, { "epoch": 0.910829548171169, "grad_norm": 0.33783966302871704, "learning_rate": 8.789226537005651e-06, "loss": 0.3694, "step": 2540 }, { "epoch": 0.9111881424814726, "grad_norm": 0.346622496843338, "learning_rate": 8.787864653429307e-06, "loss": 0.3504, "step": 2541 }, { "epoch": 0.9115467367917762, "grad_norm": 0.33189189434051514, "learning_rate": 8.786502109995155e-06, "loss": 0.3361, "step": 2542 }, { "epoch": 0.9119053311020798, "grad_norm": 0.34694215655326843, "learning_rate": 8.785138906940554e-06, "loss": 0.3486, "step": 2543 }, { "epoch": 0.9122639254123834, "grad_norm": 0.38361379504203796, "learning_rate": 8.783775044502978e-06, "loss": 0.3415, "step": 2544 }, { "epoch": 0.912622519722687, "grad_norm": 0.3811246454715729, "learning_rate": 8.78241052292002e-06, "loss": 0.348, "step": 2545 }, { "epoch": 0.9129811140329906, "grad_norm": 0.33122703433036804, "learning_rate": 8.78104534242938e-06, "loss": 0.3595, "step": 2546 }, { "epoch": 0.9133397083432943, "grad_norm": 0.361448734998703, "learning_rate": 8.779679503268877e-06, "loss": 0.3511, "step": 2547 }, { "epoch": 0.9136983026535979, "grad_norm": 0.3683726191520691, "learning_rate": 8.778313005676446e-06, "loss": 0.3568, "step": 2548 }, { "epoch": 0.9140568969639015, "grad_norm": 0.3584938943386078, "learning_rate": 8.776945849890135e-06, "loss": 0.3578, "step": 2549 }, { "epoch": 0.9144154912742051, "grad_norm": 0.39094334840774536, "learning_rate": 8.775578036148104e-06, "loss": 0.3632, "step": 2550 }, { "epoch": 0.9147740855845087, "grad_norm": 0.39231517910957336, "learning_rate": 8.774209564688635e-06, "loss": 0.3752, "step": 2551 }, { "epoch": 0.9151326798948123, "grad_norm": 0.32923436164855957, "learning_rate": 8.772840435750116e-06, "loss": 0.3525, "step": 2552 }, { "epoch": 0.9154912742051159, "grad_norm": 0.33209189772605896, "learning_rate": 8.771470649571056e-06, "loss": 0.3395, "step": 2553 }, { "epoch": 0.9158498685154196, "grad_norm": 0.3747885823249817, "learning_rate": 8.770100206390075e-06, "loss": 0.3705, "step": 2554 }, { "epoch": 0.9162084628257232, "grad_norm": 0.34452664852142334, "learning_rate": 8.76872910644591e-06, "loss": 0.3736, "step": 2555 }, { "epoch": 0.9165670571360268, "grad_norm": 0.3545683026313782, "learning_rate": 8.767357349977408e-06, "loss": 0.3596, "step": 2556 }, { "epoch": 0.9169256514463304, "grad_norm": 0.3781019449234009, "learning_rate": 8.765984937223535e-06, "loss": 0.35, "step": 2557 }, { "epoch": 0.917284245756634, "grad_norm": 0.33548396825790405, "learning_rate": 8.76461186842337e-06, "loss": 0.3443, "step": 2558 }, { "epoch": 0.9176428400669376, "grad_norm": 0.375886470079422, "learning_rate": 8.763238143816105e-06, "loss": 0.3653, "step": 2559 }, { "epoch": 0.9180014343772412, "grad_norm": 0.34516069293022156, "learning_rate": 8.761863763641045e-06, "loss": 0.3426, "step": 2560 }, { "epoch": 0.9183600286875448, "grad_norm": 0.3899666368961334, "learning_rate": 8.760488728137615e-06, "loss": 0.3595, "step": 2561 }, { "epoch": 0.9187186229978485, "grad_norm": 0.34846755862236023, "learning_rate": 8.75911303754535e-06, "loss": 0.3623, "step": 2562 }, { "epoch": 0.9190772173081521, "grad_norm": 0.34234166145324707, "learning_rate": 8.757736692103897e-06, "loss": 0.3332, "step": 2563 }, { "epoch": 0.9194358116184557, "grad_norm": 0.34345290064811707, "learning_rate": 8.756359692053022e-06, "loss": 0.3569, "step": 2564 }, { "epoch": 0.9197944059287593, "grad_norm": 0.34491202235221863, "learning_rate": 8.754982037632602e-06, "loss": 0.3594, "step": 2565 }, { "epoch": 0.9201530002390629, "grad_norm": 0.36487120389938354, "learning_rate": 8.753603729082629e-06, "loss": 0.3592, "step": 2566 }, { "epoch": 0.9205115945493665, "grad_norm": 0.35835328698158264, "learning_rate": 8.752224766643206e-06, "loss": 0.3476, "step": 2567 }, { "epoch": 0.9208701888596701, "grad_norm": 0.3669874966144562, "learning_rate": 8.750845150554557e-06, "loss": 0.3336, "step": 2568 }, { "epoch": 0.9212287831699737, "grad_norm": 0.34092339873313904, "learning_rate": 8.749464881057014e-06, "loss": 0.3503, "step": 2569 }, { "epoch": 0.9215873774802773, "grad_norm": 0.3612149953842163, "learning_rate": 8.748083958391022e-06, "loss": 0.3518, "step": 2570 }, { "epoch": 0.9219459717905809, "grad_norm": 0.37521079182624817, "learning_rate": 8.746702382797143e-06, "loss": 0.3667, "step": 2571 }, { "epoch": 0.9223045661008845, "grad_norm": 0.3916221261024475, "learning_rate": 8.745320154516054e-06, "loss": 0.3464, "step": 2572 }, { "epoch": 0.9226631604111881, "grad_norm": 0.3556962013244629, "learning_rate": 8.74393727378854e-06, "loss": 0.3642, "step": 2573 }, { "epoch": 0.9230217547214917, "grad_norm": 0.36459964513778687, "learning_rate": 8.742553740855507e-06, "loss": 0.3664, "step": 2574 }, { "epoch": 0.9233803490317953, "grad_norm": 0.3935241103172302, "learning_rate": 8.741169555957966e-06, "loss": 0.3759, "step": 2575 }, { "epoch": 0.9237389433420989, "grad_norm": 0.34197789430618286, "learning_rate": 8.73978471933705e-06, "loss": 0.3405, "step": 2576 }, { "epoch": 0.9240975376524025, "grad_norm": 0.37397751212120056, "learning_rate": 8.738399231234e-06, "loss": 0.3719, "step": 2577 }, { "epoch": 0.9244561319627062, "grad_norm": 0.38494426012039185, "learning_rate": 8.737013091890176e-06, "loss": 0.3273, "step": 2578 }, { "epoch": 0.9248147262730098, "grad_norm": 0.33781954646110535, "learning_rate": 8.735626301547042e-06, "loss": 0.3457, "step": 2579 }, { "epoch": 0.9251733205833134, "grad_norm": 0.4163830876350403, "learning_rate": 8.734238860446184e-06, "loss": 0.3635, "step": 2580 }, { "epoch": 0.925531914893617, "grad_norm": 0.3360899090766907, "learning_rate": 8.732850768829301e-06, "loss": 0.3545, "step": 2581 }, { "epoch": 0.9258905092039207, "grad_norm": 0.3279424011707306, "learning_rate": 8.7314620269382e-06, "loss": 0.3659, "step": 2582 }, { "epoch": 0.9262491035142243, "grad_norm": 0.36606091260910034, "learning_rate": 8.730072635014803e-06, "loss": 0.3548, "step": 2583 }, { "epoch": 0.9266076978245279, "grad_norm": 0.3471266031265259, "learning_rate": 8.728682593301149e-06, "loss": 0.3293, "step": 2584 }, { "epoch": 0.9269662921348315, "grad_norm": 0.3417438864707947, "learning_rate": 8.727291902039386e-06, "loss": 0.3454, "step": 2585 }, { "epoch": 0.9273248864451351, "grad_norm": 0.3756808936595917, "learning_rate": 8.725900561471778e-06, "loss": 0.3411, "step": 2586 }, { "epoch": 0.9276834807554387, "grad_norm": 0.3448803424835205, "learning_rate": 8.7245085718407e-06, "loss": 0.3337, "step": 2587 }, { "epoch": 0.9280420750657423, "grad_norm": 0.4053977429866791, "learning_rate": 8.723115933388641e-06, "loss": 0.3432, "step": 2588 }, { "epoch": 0.9284006693760459, "grad_norm": 0.3965558409690857, "learning_rate": 8.721722646358204e-06, "loss": 0.3557, "step": 2589 }, { "epoch": 0.9287592636863495, "grad_norm": 0.33434680104255676, "learning_rate": 8.720328710992104e-06, "loss": 0.363, "step": 2590 }, { "epoch": 0.9291178579966531, "grad_norm": 0.3725975751876831, "learning_rate": 8.718934127533165e-06, "loss": 0.3676, "step": 2591 }, { "epoch": 0.9294764523069567, "grad_norm": 0.3713395297527313, "learning_rate": 8.717538896224333e-06, "loss": 0.3607, "step": 2592 }, { "epoch": 0.9298350466172604, "grad_norm": 0.3830143213272095, "learning_rate": 8.716143017308658e-06, "loss": 0.339, "step": 2593 }, { "epoch": 0.930193640927564, "grad_norm": 0.34927359223365784, "learning_rate": 8.714746491029308e-06, "loss": 0.3355, "step": 2594 }, { "epoch": 0.9305522352378676, "grad_norm": 0.43157634139060974, "learning_rate": 8.713349317629563e-06, "loss": 0.3759, "step": 2595 }, { "epoch": 0.9309108295481712, "grad_norm": 0.4357191324234009, "learning_rate": 8.711951497352813e-06, "loss": 0.3464, "step": 2596 }, { "epoch": 0.9312694238584748, "grad_norm": 0.36743804812431335, "learning_rate": 8.710553030442563e-06, "loss": 0.342, "step": 2597 }, { "epoch": 0.9316280181687784, "grad_norm": 0.408102422952652, "learning_rate": 8.709153917142433e-06, "loss": 0.3429, "step": 2598 }, { "epoch": 0.931986612479082, "grad_norm": 0.36393821239471436, "learning_rate": 8.70775415769615e-06, "loss": 0.3448, "step": 2599 }, { "epoch": 0.9323452067893856, "grad_norm": 0.4241810739040375, "learning_rate": 8.706353752347556e-06, "loss": 0.362, "step": 2600 }, { "epoch": 0.9327038010996892, "grad_norm": 0.41553595662117004, "learning_rate": 8.70495270134061e-06, "loss": 0.3511, "step": 2601 }, { "epoch": 0.9330623954099928, "grad_norm": 0.38056299090385437, "learning_rate": 8.703551004919373e-06, "loss": 0.3355, "step": 2602 }, { "epoch": 0.9334209897202964, "grad_norm": 0.3876306712627411, "learning_rate": 8.702148663328032e-06, "loss": 0.3444, "step": 2603 }, { "epoch": 0.9337795840306, "grad_norm": 0.3676066994667053, "learning_rate": 8.700745676810873e-06, "loss": 0.3495, "step": 2604 }, { "epoch": 0.9341381783409036, "grad_norm": 0.38914960622787476, "learning_rate": 8.699342045612304e-06, "loss": 0.3186, "step": 2605 }, { "epoch": 0.9344967726512072, "grad_norm": 0.3856305181980133, "learning_rate": 8.697937769976844e-06, "loss": 0.3653, "step": 2606 }, { "epoch": 0.9348553669615108, "grad_norm": 0.4340197741985321, "learning_rate": 8.696532850149117e-06, "loss": 0.3822, "step": 2607 }, { "epoch": 0.9352139612718144, "grad_norm": 0.4005105793476105, "learning_rate": 8.695127286373867e-06, "loss": 0.3454, "step": 2608 }, { "epoch": 0.935572555582118, "grad_norm": 0.339857816696167, "learning_rate": 8.693721078895947e-06, "loss": 0.3501, "step": 2609 }, { "epoch": 0.9359311498924217, "grad_norm": 0.3869835436344147, "learning_rate": 8.692314227960325e-06, "loss": 0.3526, "step": 2610 }, { "epoch": 0.9362897442027254, "grad_norm": 0.3510091006755829, "learning_rate": 8.690906733812077e-06, "loss": 0.3506, "step": 2611 }, { "epoch": 0.936648338513029, "grad_norm": 0.39825791120529175, "learning_rate": 8.689498596696391e-06, "loss": 0.3547, "step": 2612 }, { "epoch": 0.9370069328233326, "grad_norm": 0.3305557668209076, "learning_rate": 8.688089816858571e-06, "loss": 0.3523, "step": 2613 }, { "epoch": 0.9373655271336362, "grad_norm": 0.3807455599308014, "learning_rate": 8.686680394544031e-06, "loss": 0.3675, "step": 2614 }, { "epoch": 0.9377241214439398, "grad_norm": 0.377908319234848, "learning_rate": 8.6852703299983e-06, "loss": 0.3519, "step": 2615 }, { "epoch": 0.9380827157542434, "grad_norm": 0.3365219235420227, "learning_rate": 8.683859623467006e-06, "loss": 0.3551, "step": 2616 }, { "epoch": 0.938441310064547, "grad_norm": 0.36781609058380127, "learning_rate": 8.682448275195909e-06, "loss": 0.3129, "step": 2617 }, { "epoch": 0.9387999043748506, "grad_norm": 0.40904155373573303, "learning_rate": 8.681036285430864e-06, "loss": 0.3635, "step": 2618 }, { "epoch": 0.9391584986851542, "grad_norm": 0.3550563454627991, "learning_rate": 8.679623654417845e-06, "loss": 0.3721, "step": 2619 }, { "epoch": 0.9395170929954578, "grad_norm": 0.3745860159397125, "learning_rate": 8.678210382402938e-06, "loss": 0.3496, "step": 2620 }, { "epoch": 0.9398756873057614, "grad_norm": 0.3900327980518341, "learning_rate": 8.67679646963234e-06, "loss": 0.3672, "step": 2621 }, { "epoch": 0.940234281616065, "grad_norm": 0.3725915253162384, "learning_rate": 8.675381916352356e-06, "loss": 0.3591, "step": 2622 }, { "epoch": 0.9405928759263686, "grad_norm": 0.34069082140922546, "learning_rate": 8.673966722809408e-06, "loss": 0.3518, "step": 2623 }, { "epoch": 0.9409514702366722, "grad_norm": 0.4285392463207245, "learning_rate": 8.672550889250027e-06, "loss": 0.347, "step": 2624 }, { "epoch": 0.9413100645469759, "grad_norm": 0.43357977271080017, "learning_rate": 8.671134415920855e-06, "loss": 0.3685, "step": 2625 }, { "epoch": 0.9416686588572795, "grad_norm": 0.3543405532836914, "learning_rate": 8.669717303068645e-06, "loss": 0.3569, "step": 2626 }, { "epoch": 0.9420272531675831, "grad_norm": 0.3714234232902527, "learning_rate": 8.668299550940265e-06, "loss": 0.3451, "step": 2627 }, { "epoch": 0.9423858474778867, "grad_norm": 0.49402710795402527, "learning_rate": 8.66688115978269e-06, "loss": 0.3635, "step": 2628 }, { "epoch": 0.9427444417881903, "grad_norm": 0.3900119364261627, "learning_rate": 8.66546212984301e-06, "loss": 0.3593, "step": 2629 }, { "epoch": 0.9431030360984939, "grad_norm": 0.3757719099521637, "learning_rate": 8.664042461368421e-06, "loss": 0.3579, "step": 2630 }, { "epoch": 0.9434616304087975, "grad_norm": 0.3793429434299469, "learning_rate": 8.662622154606238e-06, "loss": 0.3354, "step": 2631 }, { "epoch": 0.9438202247191011, "grad_norm": 0.42325499653816223, "learning_rate": 8.661201209803878e-06, "loss": 0.3478, "step": 2632 }, { "epoch": 0.9441788190294047, "grad_norm": 0.37490707635879517, "learning_rate": 8.659779627208879e-06, "loss": 0.3526, "step": 2633 }, { "epoch": 0.9445374133397083, "grad_norm": 0.3385685980319977, "learning_rate": 8.65835740706888e-06, "loss": 0.3563, "step": 2634 }, { "epoch": 0.9448960076500119, "grad_norm": 0.4113914668560028, "learning_rate": 8.656934549631641e-06, "loss": 0.3332, "step": 2635 }, { "epoch": 0.9452546019603155, "grad_norm": 0.4271351993083954, "learning_rate": 8.655511055145027e-06, "loss": 0.3577, "step": 2636 }, { "epoch": 0.9456131962706191, "grad_norm": 0.3761797845363617, "learning_rate": 8.654086923857012e-06, "loss": 0.3594, "step": 2637 }, { "epoch": 0.9459717905809227, "grad_norm": 0.3633987307548523, "learning_rate": 8.652662156015688e-06, "loss": 0.3358, "step": 2638 }, { "epoch": 0.9463303848912263, "grad_norm": 0.39259907603263855, "learning_rate": 8.651236751869251e-06, "loss": 0.3665, "step": 2639 }, { "epoch": 0.9466889792015301, "grad_norm": 0.35492897033691406, "learning_rate": 8.649810711666012e-06, "loss": 0.3402, "step": 2640 }, { "epoch": 0.9470475735118337, "grad_norm": 0.3673602044582367, "learning_rate": 8.648384035654393e-06, "loss": 0.3536, "step": 2641 }, { "epoch": 0.9474061678221373, "grad_norm": 0.3800713121891022, "learning_rate": 8.646956724082925e-06, "loss": 0.378, "step": 2642 }, { "epoch": 0.9477647621324409, "grad_norm": 0.3916242718696594, "learning_rate": 8.645528777200246e-06, "loss": 0.3627, "step": 2643 }, { "epoch": 0.9481233564427445, "grad_norm": 0.36197736859321594, "learning_rate": 8.644100195255114e-06, "loss": 0.3353, "step": 2644 }, { "epoch": 0.9484819507530481, "grad_norm": 0.3605659306049347, "learning_rate": 8.642670978496393e-06, "loss": 0.3357, "step": 2645 }, { "epoch": 0.9488405450633517, "grad_norm": 0.39247795939445496, "learning_rate": 8.64124112717305e-06, "loss": 0.3534, "step": 2646 }, { "epoch": 0.9491991393736553, "grad_norm": 0.3873645067214966, "learning_rate": 8.639810641534178e-06, "loss": 0.3359, "step": 2647 }, { "epoch": 0.9495577336839589, "grad_norm": 0.3753073215484619, "learning_rate": 8.638379521828966e-06, "loss": 0.3356, "step": 2648 }, { "epoch": 0.9499163279942625, "grad_norm": 0.3843511641025543, "learning_rate": 8.636947768306722e-06, "loss": 0.3308, "step": 2649 }, { "epoch": 0.9502749223045661, "grad_norm": 0.3909527659416199, "learning_rate": 8.635515381216861e-06, "loss": 0.3518, "step": 2650 }, { "epoch": 0.9506335166148697, "grad_norm": 0.39099881052970886, "learning_rate": 8.63408236080891e-06, "loss": 0.3432, "step": 2651 }, { "epoch": 0.9509921109251733, "grad_norm": 0.38027849793434143, "learning_rate": 8.632648707332507e-06, "loss": 0.359, "step": 2652 }, { "epoch": 0.9513507052354769, "grad_norm": 0.3482306897640228, "learning_rate": 8.631214421037398e-06, "loss": 0.3457, "step": 2653 }, { "epoch": 0.9517092995457805, "grad_norm": 0.4368753135204315, "learning_rate": 8.629779502173436e-06, "loss": 0.3648, "step": 2654 }, { "epoch": 0.9520678938560841, "grad_norm": 0.37262582778930664, "learning_rate": 8.628343950990594e-06, "loss": 0.3474, "step": 2655 }, { "epoch": 0.9524264881663878, "grad_norm": 0.4139047861099243, "learning_rate": 8.626907767738948e-06, "loss": 0.3539, "step": 2656 }, { "epoch": 0.9527850824766914, "grad_norm": 0.36952731013298035, "learning_rate": 8.625470952668685e-06, "loss": 0.3411, "step": 2657 }, { "epoch": 0.953143676786995, "grad_norm": 0.4174867868423462, "learning_rate": 8.624033506030101e-06, "loss": 0.3561, "step": 2658 }, { "epoch": 0.9535022710972986, "grad_norm": 0.3407187759876251, "learning_rate": 8.622595428073606e-06, "loss": 0.3462, "step": 2659 }, { "epoch": 0.9538608654076022, "grad_norm": 0.3750357925891876, "learning_rate": 8.621156719049717e-06, "loss": 0.3436, "step": 2660 }, { "epoch": 0.9542194597179058, "grad_norm": 0.36740779876708984, "learning_rate": 8.61971737920906e-06, "loss": 0.3389, "step": 2661 }, { "epoch": 0.9545780540282094, "grad_norm": 0.37082937359809875, "learning_rate": 8.618277408802375e-06, "loss": 0.3452, "step": 2662 }, { "epoch": 0.954936648338513, "grad_norm": 0.355161190032959, "learning_rate": 8.616836808080508e-06, "loss": 0.3434, "step": 2663 }, { "epoch": 0.9552952426488166, "grad_norm": 0.3571675717830658, "learning_rate": 8.61539557729442e-06, "loss": 0.3458, "step": 2664 }, { "epoch": 0.9556538369591202, "grad_norm": 0.3521101176738739, "learning_rate": 8.613953716695169e-06, "loss": 0.3453, "step": 2665 }, { "epoch": 0.9560124312694238, "grad_norm": 0.35146695375442505, "learning_rate": 8.61251122653394e-06, "loss": 0.3399, "step": 2666 }, { "epoch": 0.9563710255797274, "grad_norm": 0.3537027835845947, "learning_rate": 8.611068107062016e-06, "loss": 0.3473, "step": 2667 }, { "epoch": 0.956729619890031, "grad_norm": 0.37516435980796814, "learning_rate": 8.609624358530792e-06, "loss": 0.3493, "step": 2668 }, { "epoch": 0.9570882142003347, "grad_norm": 0.339772492647171, "learning_rate": 8.608179981191773e-06, "loss": 0.3438, "step": 2669 }, { "epoch": 0.9574468085106383, "grad_norm": 0.37372368574142456, "learning_rate": 8.606734975296578e-06, "loss": 0.3639, "step": 2670 }, { "epoch": 0.957805402820942, "grad_norm": 0.4070110321044922, "learning_rate": 8.605289341096929e-06, "loss": 0.339, "step": 2671 }, { "epoch": 0.9581639971312456, "grad_norm": 0.3517730236053467, "learning_rate": 8.60384307884466e-06, "loss": 0.3619, "step": 2672 }, { "epoch": 0.9585225914415492, "grad_norm": 0.377472847700119, "learning_rate": 8.602396188791714e-06, "loss": 0.3498, "step": 2673 }, { "epoch": 0.9588811857518528, "grad_norm": 0.4167480170726776, "learning_rate": 8.600948671190143e-06, "loss": 0.3516, "step": 2674 }, { "epoch": 0.9592397800621564, "grad_norm": 0.3630529046058655, "learning_rate": 8.599500526292112e-06, "loss": 0.3509, "step": 2675 }, { "epoch": 0.95959837437246, "grad_norm": 0.35381513833999634, "learning_rate": 8.598051754349892e-06, "loss": 0.3362, "step": 2676 }, { "epoch": 0.9599569686827636, "grad_norm": 0.4330054521560669, "learning_rate": 8.59660235561586e-06, "loss": 0.3605, "step": 2677 }, { "epoch": 0.9603155629930672, "grad_norm": 0.3781980276107788, "learning_rate": 8.59515233034251e-06, "loss": 0.3696, "step": 2678 }, { "epoch": 0.9606741573033708, "grad_norm": 0.36627617478370667, "learning_rate": 8.593701678782436e-06, "loss": 0.3424, "step": 2679 }, { "epoch": 0.9610327516136744, "grad_norm": 0.3843725323677063, "learning_rate": 8.59225040118835e-06, "loss": 0.348, "step": 2680 }, { "epoch": 0.961391345923978, "grad_norm": 0.3387001156806946, "learning_rate": 8.59079849781307e-06, "loss": 0.3264, "step": 2681 }, { "epoch": 0.9617499402342816, "grad_norm": 0.3537123501300812, "learning_rate": 8.589345968909517e-06, "loss": 0.3645, "step": 2682 }, { "epoch": 0.9621085345445852, "grad_norm": 0.3354322612285614, "learning_rate": 8.58789281473073e-06, "loss": 0.3383, "step": 2683 }, { "epoch": 0.9624671288548888, "grad_norm": 0.4306068420410156, "learning_rate": 8.586439035529852e-06, "loss": 0.3518, "step": 2684 }, { "epoch": 0.9628257231651924, "grad_norm": 0.4085122048854828, "learning_rate": 8.584984631560138e-06, "loss": 0.3336, "step": 2685 }, { "epoch": 0.963184317475496, "grad_norm": 0.33062994480133057, "learning_rate": 8.583529603074945e-06, "loss": 0.3274, "step": 2686 }, { "epoch": 0.9635429117857996, "grad_norm": 0.42859509587287903, "learning_rate": 8.582073950327748e-06, "loss": 0.348, "step": 2687 }, { "epoch": 0.9639015060961033, "grad_norm": 0.38728511333465576, "learning_rate": 8.580617673572123e-06, "loss": 0.357, "step": 2688 }, { "epoch": 0.9642601004064069, "grad_norm": 0.4168683588504791, "learning_rate": 8.579160773061761e-06, "loss": 0.3607, "step": 2689 }, { "epoch": 0.9646186947167105, "grad_norm": 0.3668745458126068, "learning_rate": 8.577703249050456e-06, "loss": 0.3513, "step": 2690 }, { "epoch": 0.9649772890270141, "grad_norm": 0.32363957166671753, "learning_rate": 8.576245101792114e-06, "loss": 0.3635, "step": 2691 }, { "epoch": 0.9653358833373177, "grad_norm": 0.3853330910205841, "learning_rate": 8.57478633154075e-06, "loss": 0.3358, "step": 2692 }, { "epoch": 0.9656944776476213, "grad_norm": 0.3742659389972687, "learning_rate": 8.573326938550484e-06, "loss": 0.3625, "step": 2693 }, { "epoch": 0.9660530719579249, "grad_norm": 0.3602696359157562, "learning_rate": 8.571866923075547e-06, "loss": 0.3527, "step": 2694 }, { "epoch": 0.9664116662682285, "grad_norm": 0.36186107993125916, "learning_rate": 8.570406285370282e-06, "loss": 0.3436, "step": 2695 }, { "epoch": 0.9667702605785321, "grad_norm": 0.35238897800445557, "learning_rate": 8.568945025689132e-06, "loss": 0.3515, "step": 2696 }, { "epoch": 0.9671288548888357, "grad_norm": 0.36667346954345703, "learning_rate": 8.567483144286656e-06, "loss": 0.3258, "step": 2697 }, { "epoch": 0.9674874491991394, "grad_norm": 0.46498721837997437, "learning_rate": 8.566020641417516e-06, "loss": 0.3736, "step": 2698 }, { "epoch": 0.967846043509443, "grad_norm": 0.3435336947441101, "learning_rate": 8.564557517336487e-06, "loss": 0.3367, "step": 2699 }, { "epoch": 0.9682046378197466, "grad_norm": 0.38573309779167175, "learning_rate": 8.563093772298448e-06, "loss": 0.3442, "step": 2700 }, { "epoch": 0.9685632321300502, "grad_norm": 0.38786935806274414, "learning_rate": 8.561629406558388e-06, "loss": 0.3769, "step": 2701 }, { "epoch": 0.9689218264403539, "grad_norm": 0.39504286646842957, "learning_rate": 8.560164420371404e-06, "loss": 0.3514, "step": 2702 }, { "epoch": 0.9692804207506575, "grad_norm": 0.37325549125671387, "learning_rate": 8.558698813992704e-06, "loss": 0.3482, "step": 2703 }, { "epoch": 0.9696390150609611, "grad_norm": 0.40570053458213806, "learning_rate": 8.557232587677597e-06, "loss": 0.338, "step": 2704 }, { "epoch": 0.9699976093712647, "grad_norm": 0.4405112564563751, "learning_rate": 8.555765741681506e-06, "loss": 0.3501, "step": 2705 }, { "epoch": 0.9703562036815683, "grad_norm": 0.3473097085952759, "learning_rate": 8.554298276259962e-06, "loss": 0.378, "step": 2706 }, { "epoch": 0.9707147979918719, "grad_norm": 0.34742841124534607, "learning_rate": 8.552830191668597e-06, "loss": 0.3538, "step": 2707 }, { "epoch": 0.9710733923021755, "grad_norm": 0.38723981380462646, "learning_rate": 8.551361488163162e-06, "loss": 0.3545, "step": 2708 }, { "epoch": 0.9714319866124791, "grad_norm": 0.38712453842163086, "learning_rate": 8.549892165999505e-06, "loss": 0.3538, "step": 2709 }, { "epoch": 0.9717905809227827, "grad_norm": 0.3799491226673126, "learning_rate": 8.54842222543359e-06, "loss": 0.3469, "step": 2710 }, { "epoch": 0.9721491752330863, "grad_norm": 0.3917875289916992, "learning_rate": 8.546951666721483e-06, "loss": 0.3608, "step": 2711 }, { "epoch": 0.9725077695433899, "grad_norm": 0.35903263092041016, "learning_rate": 8.54548049011936e-06, "loss": 0.3745, "step": 2712 }, { "epoch": 0.9728663638536935, "grad_norm": 0.37339580059051514, "learning_rate": 8.544008695883506e-06, "loss": 0.3383, "step": 2713 }, { "epoch": 0.9732249581639971, "grad_norm": 0.38502711057662964, "learning_rate": 8.54253628427031e-06, "loss": 0.3397, "step": 2714 }, { "epoch": 0.9735835524743007, "grad_norm": 0.3820286989212036, "learning_rate": 8.541063255536273e-06, "loss": 0.333, "step": 2715 }, { "epoch": 0.9739421467846043, "grad_norm": 0.40578171610832214, "learning_rate": 8.539589609937999e-06, "loss": 0.3347, "step": 2716 }, { "epoch": 0.9743007410949079, "grad_norm": 0.3539571762084961, "learning_rate": 8.538115347732202e-06, "loss": 0.3282, "step": 2717 }, { "epoch": 0.9746593354052115, "grad_norm": 0.41249334812164307, "learning_rate": 8.536640469175705e-06, "loss": 0.3405, "step": 2718 }, { "epoch": 0.9750179297155152, "grad_norm": 0.38533735275268555, "learning_rate": 8.535164974525433e-06, "loss": 0.3581, "step": 2719 }, { "epoch": 0.9753765240258188, "grad_norm": 0.37697070837020874, "learning_rate": 8.533688864038428e-06, "loss": 0.3527, "step": 2720 }, { "epoch": 0.9757351183361224, "grad_norm": 0.37526142597198486, "learning_rate": 8.532212137971825e-06, "loss": 0.3456, "step": 2721 }, { "epoch": 0.976093712646426, "grad_norm": 0.3525881767272949, "learning_rate": 8.53073479658288e-06, "loss": 0.3367, "step": 2722 }, { "epoch": 0.9764523069567296, "grad_norm": 0.39559653401374817, "learning_rate": 8.529256840128949e-06, "loss": 0.3501, "step": 2723 }, { "epoch": 0.9768109012670332, "grad_norm": 0.33686548471450806, "learning_rate": 8.527778268867497e-06, "loss": 0.3717, "step": 2724 }, { "epoch": 0.9771694955773368, "grad_norm": 0.34716734290122986, "learning_rate": 8.526299083056092e-06, "loss": 0.3491, "step": 2725 }, { "epoch": 0.9775280898876404, "grad_norm": 0.38101932406425476, "learning_rate": 8.524819282952419e-06, "loss": 0.376, "step": 2726 }, { "epoch": 0.9778866841979441, "grad_norm": 0.4112134873867035, "learning_rate": 8.52333886881426e-06, "loss": 0.3397, "step": 2727 }, { "epoch": 0.9782452785082477, "grad_norm": 0.40201064944267273, "learning_rate": 8.521857840899505e-06, "loss": 0.3787, "step": 2728 }, { "epoch": 0.9786038728185513, "grad_norm": 0.3870185911655426, "learning_rate": 8.52037619946616e-06, "loss": 0.3555, "step": 2729 }, { "epoch": 0.9789624671288549, "grad_norm": 0.4137263000011444, "learning_rate": 8.518893944772328e-06, "loss": 0.3599, "step": 2730 }, { "epoch": 0.9793210614391585, "grad_norm": 0.40090376138687134, "learning_rate": 8.517411077076223e-06, "loss": 0.3541, "step": 2731 }, { "epoch": 0.9796796557494621, "grad_norm": 0.35137009620666504, "learning_rate": 8.515927596636167e-06, "loss": 0.3297, "step": 2732 }, { "epoch": 0.9800382500597657, "grad_norm": 0.3804851770401001, "learning_rate": 8.514443503710582e-06, "loss": 0.353, "step": 2733 }, { "epoch": 0.9803968443700694, "grad_norm": 0.35090699791908264, "learning_rate": 8.512958798558005e-06, "loss": 0.333, "step": 2734 }, { "epoch": 0.980755438680373, "grad_norm": 0.38634011149406433, "learning_rate": 8.511473481437079e-06, "loss": 0.3463, "step": 2735 }, { "epoch": 0.9811140329906766, "grad_norm": 0.3696722686290741, "learning_rate": 8.509987552606544e-06, "loss": 0.3623, "step": 2736 }, { "epoch": 0.9814726273009802, "grad_norm": 0.40353333950042725, "learning_rate": 8.508501012325257e-06, "loss": 0.3605, "step": 2737 }, { "epoch": 0.9818312216112838, "grad_norm": 0.3793852925300598, "learning_rate": 8.50701386085218e-06, "loss": 0.3522, "step": 2738 }, { "epoch": 0.9821898159215874, "grad_norm": 0.35881325602531433, "learning_rate": 8.505526098446377e-06, "loss": 0.3752, "step": 2739 }, { "epoch": 0.982548410231891, "grad_norm": 0.41322726011276245, "learning_rate": 8.504037725367022e-06, "loss": 0.3701, "step": 2740 }, { "epoch": 0.9829070045421946, "grad_norm": 0.36259526014328003, "learning_rate": 8.502548741873393e-06, "loss": 0.3395, "step": 2741 }, { "epoch": 0.9832655988524982, "grad_norm": 0.35688963532447815, "learning_rate": 8.501059148224876e-06, "loss": 0.3325, "step": 2742 }, { "epoch": 0.9836241931628018, "grad_norm": 0.38992753624916077, "learning_rate": 8.499568944680962e-06, "loss": 0.3475, "step": 2743 }, { "epoch": 0.9839827874731054, "grad_norm": 0.4081912934780121, "learning_rate": 8.498078131501252e-06, "loss": 0.3357, "step": 2744 }, { "epoch": 0.984341381783409, "grad_norm": 0.3642245829105377, "learning_rate": 8.496586708945446e-06, "loss": 0.377, "step": 2745 }, { "epoch": 0.9846999760937126, "grad_norm": 0.35196733474731445, "learning_rate": 8.495094677273358e-06, "loss": 0.327, "step": 2746 }, { "epoch": 0.9850585704040162, "grad_norm": 0.3688993453979492, "learning_rate": 8.493602036744904e-06, "loss": 0.3561, "step": 2747 }, { "epoch": 0.9854171647143198, "grad_norm": 0.3575614392757416, "learning_rate": 8.492108787620106e-06, "loss": 0.3519, "step": 2748 }, { "epoch": 0.9857757590246234, "grad_norm": 0.4070189595222473, "learning_rate": 8.490614930159092e-06, "loss": 0.358, "step": 2749 }, { "epoch": 0.986134353334927, "grad_norm": 0.33736538887023926, "learning_rate": 8.489120464622098e-06, "loss": 0.3471, "step": 2750 }, { "epoch": 0.9864929476452307, "grad_norm": 0.39548438787460327, "learning_rate": 8.487625391269463e-06, "loss": 0.3421, "step": 2751 }, { "epoch": 0.9868515419555343, "grad_norm": 0.3965354263782501, "learning_rate": 8.486129710361634e-06, "loss": 0.3363, "step": 2752 }, { "epoch": 0.9872101362658379, "grad_norm": 0.3549104332923889, "learning_rate": 8.484633422159163e-06, "loss": 0.3411, "step": 2753 }, { "epoch": 0.9875687305761415, "grad_norm": 0.36027058959007263, "learning_rate": 8.48313652692271e-06, "loss": 0.3401, "step": 2754 }, { "epoch": 0.9879273248864451, "grad_norm": 0.3895283639431, "learning_rate": 8.481639024913038e-06, "loss": 0.3282, "step": 2755 }, { "epoch": 0.9882859191967488, "grad_norm": 0.388168066740036, "learning_rate": 8.480140916391013e-06, "loss": 0.3633, "step": 2756 }, { "epoch": 0.9886445135070524, "grad_norm": 0.4107179045677185, "learning_rate": 8.478642201617615e-06, "loss": 0.3471, "step": 2757 }, { "epoch": 0.989003107817356, "grad_norm": 0.36343634128570557, "learning_rate": 8.477142880853923e-06, "loss": 0.349, "step": 2758 }, { "epoch": 0.9893617021276596, "grad_norm": 0.4104796051979065, "learning_rate": 8.475642954361121e-06, "loss": 0.3464, "step": 2759 }, { "epoch": 0.9897202964379632, "grad_norm": 0.3921293318271637, "learning_rate": 8.474142422400505e-06, "loss": 0.3408, "step": 2760 }, { "epoch": 0.9900788907482668, "grad_norm": 0.39047902822494507, "learning_rate": 8.47264128523347e-06, "loss": 0.3489, "step": 2761 }, { "epoch": 0.9904374850585704, "grad_norm": 0.4375460743904114, "learning_rate": 8.47113954312152e-06, "loss": 0.3581, "step": 2762 }, { "epoch": 0.990796079368874, "grad_norm": 0.40973344445228577, "learning_rate": 8.469637196326261e-06, "loss": 0.3755, "step": 2763 }, { "epoch": 0.9911546736791776, "grad_norm": 0.37713509798049927, "learning_rate": 8.468134245109408e-06, "loss": 0.3396, "step": 2764 }, { "epoch": 0.9915132679894813, "grad_norm": 0.4158433973789215, "learning_rate": 8.46663068973278e-06, "loss": 0.3446, "step": 2765 }, { "epoch": 0.9918718622997849, "grad_norm": 0.4120822846889496, "learning_rate": 8.465126530458299e-06, "loss": 0.3498, "step": 2766 }, { "epoch": 0.9922304566100885, "grad_norm": 0.4814397096633911, "learning_rate": 8.463621767547998e-06, "loss": 0.3497, "step": 2767 }, { "epoch": 0.9925890509203921, "grad_norm": 0.37950095534324646, "learning_rate": 8.462116401264007e-06, "loss": 0.3567, "step": 2768 }, { "epoch": 0.9929476452306957, "grad_norm": 0.40903428196907043, "learning_rate": 8.46061043186857e-06, "loss": 0.3634, "step": 2769 }, { "epoch": 0.9933062395409993, "grad_norm": 0.4470074474811554, "learning_rate": 8.459103859624028e-06, "loss": 0.3725, "step": 2770 }, { "epoch": 0.9936648338513029, "grad_norm": 0.35088464617729187, "learning_rate": 8.45759668479283e-06, "loss": 0.3665, "step": 2771 }, { "epoch": 0.9940234281616065, "grad_norm": 0.3608323633670807, "learning_rate": 8.456088907637535e-06, "loss": 0.3614, "step": 2772 }, { "epoch": 0.9943820224719101, "grad_norm": 0.35383543372154236, "learning_rate": 8.454580528420798e-06, "loss": 0.3356, "step": 2773 }, { "epoch": 0.9947406167822137, "grad_norm": 0.4329584538936615, "learning_rate": 8.453071547405387e-06, "loss": 0.3618, "step": 2774 }, { "epoch": 0.9950992110925173, "grad_norm": 0.3608362376689911, "learning_rate": 8.451561964854169e-06, "loss": 0.3445, "step": 2775 }, { "epoch": 0.9954578054028209, "grad_norm": 0.4028618037700653, "learning_rate": 8.450051781030117e-06, "loss": 0.3491, "step": 2776 }, { "epoch": 0.9958163997131245, "grad_norm": 0.3762362599372864, "learning_rate": 8.44854099619631e-06, "loss": 0.3762, "step": 2777 }, { "epoch": 0.9961749940234281, "grad_norm": 0.37563541531562805, "learning_rate": 8.447029610615936e-06, "loss": 0.3636, "step": 2778 }, { "epoch": 0.9965335883337317, "grad_norm": 0.3822872042655945, "learning_rate": 8.445517624552277e-06, "loss": 0.357, "step": 2779 }, { "epoch": 0.9968921826440353, "grad_norm": 0.3895609974861145, "learning_rate": 8.444005038268728e-06, "loss": 0.3562, "step": 2780 }, { "epoch": 0.997250776954339, "grad_norm": 0.36931517720222473, "learning_rate": 8.442491852028786e-06, "loss": 0.3365, "step": 2781 }, { "epoch": 0.9976093712646426, "grad_norm": 0.3750978410243988, "learning_rate": 8.440978066096055e-06, "loss": 0.3548, "step": 2782 }, { "epoch": 0.9979679655749462, "grad_norm": 0.40199658274650574, "learning_rate": 8.439463680734238e-06, "loss": 0.3477, "step": 2783 }, { "epoch": 0.9983265598852499, "grad_norm": 0.39142102003097534, "learning_rate": 8.43794869620715e-06, "loss": 0.3711, "step": 2784 }, { "epoch": 0.9986851541955535, "grad_norm": 0.4043262302875519, "learning_rate": 8.4364331127787e-06, "loss": 0.3476, "step": 2785 }, { "epoch": 0.9990437485058571, "grad_norm": 0.43574947118759155, "learning_rate": 8.434916930712911e-06, "loss": 0.3356, "step": 2786 }, { "epoch": 0.9994023428161607, "grad_norm": 0.44782131910324097, "learning_rate": 8.433400150273907e-06, "loss": 0.3535, "step": 2787 }, { "epoch": 0.9997609371264643, "grad_norm": 0.368845671415329, "learning_rate": 8.431882771725913e-06, "loss": 0.3466, "step": 2788 }, { "epoch": 1.0002390628735358, "grad_norm": 0.6547194719314575, "learning_rate": 8.430364795333264e-06, "loss": 0.513, "step": 2789 }, { "epoch": 1.0005976571838393, "grad_norm": 0.43506598472595215, "learning_rate": 8.428846221360396e-06, "loss": 0.3368, "step": 2790 }, { "epoch": 1.000956251494143, "grad_norm": 0.45603877305984497, "learning_rate": 8.427327050071848e-06, "loss": 0.3304, "step": 2791 }, { "epoch": 1.0013148458044465, "grad_norm": 0.377022922039032, "learning_rate": 8.425807281732263e-06, "loss": 0.3209, "step": 2792 }, { "epoch": 1.0016734401147502, "grad_norm": 0.4223003685474396, "learning_rate": 8.424286916606394e-06, "loss": 0.3165, "step": 2793 }, { "epoch": 1.0020320344250537, "grad_norm": 0.42066872119903564, "learning_rate": 8.42276595495909e-06, "loss": 0.3437, "step": 2794 }, { "epoch": 1.0023906287353574, "grad_norm": 0.3563951551914215, "learning_rate": 8.421244397055305e-06, "loss": 0.3203, "step": 2795 }, { "epoch": 1.002749223045661, "grad_norm": 0.37510964274406433, "learning_rate": 8.419722243160105e-06, "loss": 0.3162, "step": 2796 }, { "epoch": 1.0031078173559647, "grad_norm": 0.3688391149044037, "learning_rate": 8.41819949353865e-06, "loss": 0.3068, "step": 2797 }, { "epoch": 1.0034664116662682, "grad_norm": 0.35164907574653625, "learning_rate": 8.416676148456208e-06, "loss": 0.3117, "step": 2798 }, { "epoch": 1.0038250059765719, "grad_norm": 0.4284220039844513, "learning_rate": 8.415152208178153e-06, "loss": 0.3184, "step": 2799 }, { "epoch": 1.0041836002868754, "grad_norm": 0.3561032712459564, "learning_rate": 8.413627672969958e-06, "loss": 0.3282, "step": 2800 }, { "epoch": 1.004542194597179, "grad_norm": 0.3712702989578247, "learning_rate": 8.4121025430972e-06, "loss": 0.3182, "step": 2801 }, { "epoch": 1.0049007889074826, "grad_norm": 0.3794056475162506, "learning_rate": 8.410576818825564e-06, "loss": 0.3634, "step": 2802 }, { "epoch": 1.0052593832177863, "grad_norm": 0.3413942754268646, "learning_rate": 8.409050500420835e-06, "loss": 0.3216, "step": 2803 }, { "epoch": 1.0056179775280898, "grad_norm": 0.3895426094532013, "learning_rate": 8.407523588148904e-06, "loss": 0.3507, "step": 2804 }, { "epoch": 1.0059765718383935, "grad_norm": 0.35505685210227966, "learning_rate": 8.40599608227576e-06, "loss": 0.3052, "step": 2805 }, { "epoch": 1.006335166148697, "grad_norm": 0.37495484948158264, "learning_rate": 8.404467983067504e-06, "loss": 0.3563, "step": 2806 }, { "epoch": 1.0066937604590007, "grad_norm": 0.36900439858436584, "learning_rate": 8.402939290790335e-06, "loss": 0.2981, "step": 2807 }, { "epoch": 1.0070523547693044, "grad_norm": 0.3984183967113495, "learning_rate": 8.40141000571055e-06, "loss": 0.3698, "step": 2808 }, { "epoch": 1.007410949079608, "grad_norm": 0.3570784032344818, "learning_rate": 8.399880128094562e-06, "loss": 0.2948, "step": 2809 }, { "epoch": 1.0077695433899116, "grad_norm": 0.35862818360328674, "learning_rate": 8.39834965820888e-06, "loss": 0.308, "step": 2810 }, { "epoch": 1.0081281377002151, "grad_norm": 0.42994409799575806, "learning_rate": 8.39681859632011e-06, "loss": 0.4081, "step": 2811 }, { "epoch": 1.0084867320105189, "grad_norm": 0.3845164477825165, "learning_rate": 8.395286942694977e-06, "loss": 0.3066, "step": 2812 }, { "epoch": 1.0088453263208224, "grad_norm": 0.41559088230133057, "learning_rate": 8.393754697600291e-06, "loss": 0.3496, "step": 2813 }, { "epoch": 1.009203920631126, "grad_norm": 0.3877028524875641, "learning_rate": 8.392221861302982e-06, "loss": 0.3363, "step": 2814 }, { "epoch": 1.0095625149414296, "grad_norm": 0.40473201870918274, "learning_rate": 8.390688434070068e-06, "loss": 0.3143, "step": 2815 }, { "epoch": 1.0099211092517333, "grad_norm": 0.38878074288368225, "learning_rate": 8.389154416168681e-06, "loss": 0.335, "step": 2816 }, { "epoch": 1.0102797035620368, "grad_norm": 0.3945930302143097, "learning_rate": 8.387619807866052e-06, "loss": 0.344, "step": 2817 }, { "epoch": 1.0106382978723405, "grad_norm": 0.352280855178833, "learning_rate": 8.386084609429513e-06, "loss": 0.2884, "step": 2818 }, { "epoch": 1.010996892182644, "grad_norm": 0.3647370934486389, "learning_rate": 8.384548821126497e-06, "loss": 0.297, "step": 2819 }, { "epoch": 1.0113554864929477, "grad_norm": 0.43928974866867065, "learning_rate": 8.383012443224549e-06, "loss": 0.3849, "step": 2820 }, { "epoch": 1.0117140808032512, "grad_norm": 0.34051448106765747, "learning_rate": 8.381475475991307e-06, "loss": 0.2971, "step": 2821 }, { "epoch": 1.012072675113555, "grad_norm": 0.3800179660320282, "learning_rate": 8.379937919694517e-06, "loss": 0.3309, "step": 2822 }, { "epoch": 1.0124312694238584, "grad_norm": 0.39694660902023315, "learning_rate": 8.378399774602026e-06, "loss": 0.3419, "step": 2823 }, { "epoch": 1.0127898637341621, "grad_norm": 0.373401939868927, "learning_rate": 8.376861040981784e-06, "loss": 0.3363, "step": 2824 }, { "epoch": 1.0131484580444656, "grad_norm": 0.37300604581832886, "learning_rate": 8.375321719101843e-06, "loss": 0.3439, "step": 2825 }, { "epoch": 1.0135070523547693, "grad_norm": 0.3410003185272217, "learning_rate": 8.373781809230355e-06, "loss": 0.3035, "step": 2826 }, { "epoch": 1.0138656466650728, "grad_norm": 0.33707913756370544, "learning_rate": 8.372241311635581e-06, "loss": 0.3065, "step": 2827 }, { "epoch": 1.0142242409753766, "grad_norm": 0.3145730495452881, "learning_rate": 8.37070022658588e-06, "loss": 0.3012, "step": 2828 }, { "epoch": 1.01458283528568, "grad_norm": 0.3821297287940979, "learning_rate": 8.36915855434971e-06, "loss": 0.3385, "step": 2829 }, { "epoch": 1.0149414295959838, "grad_norm": 0.4267553985118866, "learning_rate": 8.367616295195639e-06, "loss": 0.3623, "step": 2830 }, { "epoch": 1.0153000239062873, "grad_norm": 0.3541501462459564, "learning_rate": 8.366073449392333e-06, "loss": 0.3266, "step": 2831 }, { "epoch": 1.015658618216591, "grad_norm": 0.3379872739315033, "learning_rate": 8.36453001720856e-06, "loss": 0.3131, "step": 2832 }, { "epoch": 1.0160172125268945, "grad_norm": 0.35726022720336914, "learning_rate": 8.362985998913188e-06, "loss": 0.3045, "step": 2833 }, { "epoch": 1.0163758068371982, "grad_norm": 0.3656623661518097, "learning_rate": 8.361441394775194e-06, "loss": 0.3496, "step": 2834 }, { "epoch": 1.0167344011475017, "grad_norm": 0.37902411818504333, "learning_rate": 8.359896205063651e-06, "loss": 0.336, "step": 2835 }, { "epoch": 1.0170929954578054, "grad_norm": 0.4055723249912262, "learning_rate": 8.358350430047734e-06, "loss": 0.33, "step": 2836 }, { "epoch": 1.0174515897681091, "grad_norm": 0.3761613965034485, "learning_rate": 8.356804069996726e-06, "loss": 0.3534, "step": 2837 }, { "epoch": 1.0178101840784126, "grad_norm": 0.33750176429748535, "learning_rate": 8.355257125180006e-06, "loss": 0.297, "step": 2838 }, { "epoch": 1.0181687783887163, "grad_norm": 0.4332830607891083, "learning_rate": 8.353709595867052e-06, "loss": 0.3509, "step": 2839 }, { "epoch": 1.0185273726990198, "grad_norm": 0.3511948883533478, "learning_rate": 8.352161482327456e-06, "loss": 0.3231, "step": 2840 }, { "epoch": 1.0188859670093235, "grad_norm": 0.3783717155456543, "learning_rate": 8.350612784830899e-06, "loss": 0.3242, "step": 2841 }, { "epoch": 1.019244561319627, "grad_norm": 0.3708619475364685, "learning_rate": 8.34906350364717e-06, "loss": 0.3242, "step": 2842 }, { "epoch": 1.0196031556299308, "grad_norm": 0.36664342880249023, "learning_rate": 8.34751363904616e-06, "loss": 0.3749, "step": 2843 }, { "epoch": 1.0199617499402343, "grad_norm": 0.31134897470474243, "learning_rate": 8.345963191297861e-06, "loss": 0.2974, "step": 2844 }, { "epoch": 1.020320344250538, "grad_norm": 0.3504027724266052, "learning_rate": 8.344412160672362e-06, "loss": 0.3598, "step": 2845 }, { "epoch": 1.0206789385608415, "grad_norm": 0.3727473318576813, "learning_rate": 8.34286054743986e-06, "loss": 0.3665, "step": 2846 }, { "epoch": 1.0210375328711452, "grad_norm": 0.34911254048347473, "learning_rate": 8.341308351870652e-06, "loss": 0.309, "step": 2847 }, { "epoch": 1.0213961271814487, "grad_norm": 0.35391440987586975, "learning_rate": 8.33975557423513e-06, "loss": 0.36, "step": 2848 }, { "epoch": 1.0217547214917524, "grad_norm": 0.36159613728523254, "learning_rate": 8.3382022148038e-06, "loss": 0.3511, "step": 2849 }, { "epoch": 1.0221133158020559, "grad_norm": 0.3618909418582916, "learning_rate": 8.336648273847257e-06, "loss": 0.327, "step": 2850 }, { "epoch": 1.0224719101123596, "grad_norm": 0.3594950735569, "learning_rate": 8.335093751636202e-06, "loss": 0.3252, "step": 2851 }, { "epoch": 1.022830504422663, "grad_norm": 0.3332076370716095, "learning_rate": 8.333538648441444e-06, "loss": 0.3218, "step": 2852 }, { "epoch": 1.0231890987329668, "grad_norm": 0.3852955996990204, "learning_rate": 8.331982964533878e-06, "loss": 0.3147, "step": 2853 }, { "epoch": 1.0235476930432703, "grad_norm": 0.334274560213089, "learning_rate": 8.330426700184516e-06, "loss": 0.3216, "step": 2854 }, { "epoch": 1.023906287353574, "grad_norm": 0.3475138247013092, "learning_rate": 8.32886985566446e-06, "loss": 0.3169, "step": 2855 }, { "epoch": 1.0242648816638775, "grad_norm": 0.3788859248161316, "learning_rate": 8.327312431244918e-06, "loss": 0.3783, "step": 2856 }, { "epoch": 1.0246234759741812, "grad_norm": 0.3142269551753998, "learning_rate": 8.3257544271972e-06, "loss": 0.2752, "step": 2857 }, { "epoch": 1.0249820702844847, "grad_norm": 0.35361480712890625, "learning_rate": 8.324195843792713e-06, "loss": 0.3656, "step": 2858 }, { "epoch": 1.0253406645947885, "grad_norm": 0.36604586243629456, "learning_rate": 8.322636681302966e-06, "loss": 0.3256, "step": 2859 }, { "epoch": 1.025699258905092, "grad_norm": 0.35707956552505493, "learning_rate": 8.321076939999574e-06, "loss": 0.3227, "step": 2860 }, { "epoch": 1.0260578532153957, "grad_norm": 0.31562539935112, "learning_rate": 8.319516620154247e-06, "loss": 0.3041, "step": 2861 }, { "epoch": 1.0264164475256992, "grad_norm": 0.4116665720939636, "learning_rate": 8.317955722038795e-06, "loss": 0.3415, "step": 2862 }, { "epoch": 1.0267750418360029, "grad_norm": 0.36402827501296997, "learning_rate": 8.316394245925134e-06, "loss": 0.3061, "step": 2863 }, { "epoch": 1.0271336361463064, "grad_norm": 0.35657480359077454, "learning_rate": 8.31483219208528e-06, "loss": 0.3642, "step": 2864 }, { "epoch": 1.02749223045661, "grad_norm": 0.35331034660339355, "learning_rate": 8.313269560791343e-06, "loss": 0.2917, "step": 2865 }, { "epoch": 1.0278508247669138, "grad_norm": 0.335231751203537, "learning_rate": 8.31170635231554e-06, "loss": 0.3297, "step": 2866 }, { "epoch": 1.0282094190772173, "grad_norm": 0.35152944922447205, "learning_rate": 8.310142566930189e-06, "loss": 0.3461, "step": 2867 }, { "epoch": 1.028568013387521, "grad_norm": 0.3463350832462311, "learning_rate": 8.308578204907702e-06, "loss": 0.3205, "step": 2868 }, { "epoch": 1.0289266076978245, "grad_norm": 0.3378060758113861, "learning_rate": 8.3070132665206e-06, "loss": 0.3125, "step": 2869 }, { "epoch": 1.0292852020081282, "grad_norm": 0.36740559339523315, "learning_rate": 8.305447752041496e-06, "loss": 0.3266, "step": 2870 }, { "epoch": 1.0296437963184317, "grad_norm": 0.36589184403419495, "learning_rate": 8.303881661743111e-06, "loss": 0.3052, "step": 2871 }, { "epoch": 1.0300023906287354, "grad_norm": 0.3650801181793213, "learning_rate": 8.302314995898264e-06, "loss": 0.3324, "step": 2872 }, { "epoch": 1.030360984939039, "grad_norm": 0.3891318440437317, "learning_rate": 8.300747754779867e-06, "loss": 0.3023, "step": 2873 }, { "epoch": 1.0307195792493427, "grad_norm": 0.37505292892456055, "learning_rate": 8.299179938660944e-06, "loss": 0.3505, "step": 2874 }, { "epoch": 1.0310781735596461, "grad_norm": 0.34785234928131104, "learning_rate": 8.297611547814612e-06, "loss": 0.3332, "step": 2875 }, { "epoch": 1.0314367678699499, "grad_norm": 0.4072534441947937, "learning_rate": 8.296042582514085e-06, "loss": 0.3094, "step": 2876 }, { "epoch": 1.0317953621802534, "grad_norm": 0.3790331184864044, "learning_rate": 8.294473043032688e-06, "loss": 0.3402, "step": 2877 }, { "epoch": 1.032153956490557, "grad_norm": 0.3045091927051544, "learning_rate": 8.29290292964384e-06, "loss": 0.3028, "step": 2878 }, { "epoch": 1.0325125508008606, "grad_norm": 0.41408249735832214, "learning_rate": 8.291332242621053e-06, "loss": 0.3803, "step": 2879 }, { "epoch": 1.0328711451111643, "grad_norm": 0.3553624749183655, "learning_rate": 8.289760982237951e-06, "loss": 0.3097, "step": 2880 }, { "epoch": 1.0332297394214678, "grad_norm": 0.3369748592376709, "learning_rate": 8.28818914876825e-06, "loss": 0.3024, "step": 2881 }, { "epoch": 1.0335883337317715, "grad_norm": 0.3483077585697174, "learning_rate": 8.286616742485771e-06, "loss": 0.3153, "step": 2882 }, { "epoch": 1.033946928042075, "grad_norm": 0.3473050892353058, "learning_rate": 8.28504376366443e-06, "loss": 0.3329, "step": 2883 }, { "epoch": 1.0343055223523787, "grad_norm": 0.370595246553421, "learning_rate": 8.283470212578243e-06, "loss": 0.3439, "step": 2884 }, { "epoch": 1.0346641166626822, "grad_norm": 0.3417757749557495, "learning_rate": 8.28189608950133e-06, "loss": 0.3234, "step": 2885 }, { "epoch": 1.035022710972986, "grad_norm": 0.36978933215141296, "learning_rate": 8.280321394707909e-06, "loss": 0.3173, "step": 2886 }, { "epoch": 1.0353813052832894, "grad_norm": 0.3806365132331848, "learning_rate": 8.278746128472294e-06, "loss": 0.3546, "step": 2887 }, { "epoch": 1.0357398995935931, "grad_norm": 0.3603343665599823, "learning_rate": 8.277170291068904e-06, "loss": 0.3223, "step": 2888 }, { "epoch": 1.0360984939038966, "grad_norm": 0.32979851961135864, "learning_rate": 8.275593882772252e-06, "loss": 0.3022, "step": 2889 }, { "epoch": 1.0364570882142004, "grad_norm": 0.37887656688690186, "learning_rate": 8.274016903856953e-06, "loss": 0.3319, "step": 2890 }, { "epoch": 1.0368156825245038, "grad_norm": 0.34117591381073, "learning_rate": 8.272439354597728e-06, "loss": 0.3311, "step": 2891 }, { "epoch": 1.0371742768348076, "grad_norm": 0.33498960733413696, "learning_rate": 8.270861235269384e-06, "loss": 0.322, "step": 2892 }, { "epoch": 1.037532871145111, "grad_norm": 0.3328034281730652, "learning_rate": 8.269282546146835e-06, "loss": 0.3094, "step": 2893 }, { "epoch": 1.0378914654554148, "grad_norm": 0.36888888478279114, "learning_rate": 8.267703287505097e-06, "loss": 0.3198, "step": 2894 }, { "epoch": 1.0382500597657183, "grad_norm": 0.3363962173461914, "learning_rate": 8.26612345961928e-06, "loss": 0.3125, "step": 2895 }, { "epoch": 1.038608654076022, "grad_norm": 0.35726621747016907, "learning_rate": 8.264543062764594e-06, "loss": 0.3344, "step": 2896 }, { "epoch": 1.0389672483863257, "grad_norm": 0.35002830624580383, "learning_rate": 8.262962097216351e-06, "loss": 0.3257, "step": 2897 }, { "epoch": 1.0393258426966292, "grad_norm": 0.3617922365665436, "learning_rate": 8.261380563249958e-06, "loss": 0.3487, "step": 2898 }, { "epoch": 1.039684437006933, "grad_norm": 0.33474430441856384, "learning_rate": 8.259798461140925e-06, "loss": 0.3365, "step": 2899 }, { "epoch": 1.0400430313172364, "grad_norm": 0.3757830262184143, "learning_rate": 8.258215791164856e-06, "loss": 0.3466, "step": 2900 }, { "epoch": 1.0404016256275401, "grad_norm": 0.3403554856777191, "learning_rate": 8.256632553597462e-06, "loss": 0.3242, "step": 2901 }, { "epoch": 1.0407602199378436, "grad_norm": 0.3710082471370697, "learning_rate": 8.255048748714544e-06, "loss": 0.336, "step": 2902 }, { "epoch": 1.0411188142481473, "grad_norm": 0.36893975734710693, "learning_rate": 8.253464376792007e-06, "loss": 0.3495, "step": 2903 }, { "epoch": 1.0414774085584508, "grad_norm": 0.42950353026390076, "learning_rate": 8.251879438105854e-06, "loss": 0.3331, "step": 2904 }, { "epoch": 1.0418360028687546, "grad_norm": 0.360411137342453, "learning_rate": 8.250293932932185e-06, "loss": 0.2996, "step": 2905 }, { "epoch": 1.042194597179058, "grad_norm": 0.32686296105384827, "learning_rate": 8.248707861547201e-06, "loss": 0.2991, "step": 2906 }, { "epoch": 1.0425531914893618, "grad_norm": 0.3871396780014038, "learning_rate": 8.247121224227199e-06, "loss": 0.3572, "step": 2907 }, { "epoch": 1.0429117857996653, "grad_norm": 0.4071953296661377, "learning_rate": 8.24553402124858e-06, "loss": 0.3181, "step": 2908 }, { "epoch": 1.043270380109969, "grad_norm": 0.3722195327281952, "learning_rate": 8.243946252887834e-06, "loss": 0.3168, "step": 2909 }, { "epoch": 1.0436289744202725, "grad_norm": 0.3445601165294647, "learning_rate": 8.24235791942156e-06, "loss": 0.3486, "step": 2910 }, { "epoch": 1.0439875687305762, "grad_norm": 0.3811541199684143, "learning_rate": 8.24076902112645e-06, "loss": 0.321, "step": 2911 }, { "epoch": 1.0443461630408797, "grad_norm": 0.3795638978481293, "learning_rate": 8.239179558279292e-06, "loss": 0.3294, "step": 2912 }, { "epoch": 1.0447047573511834, "grad_norm": 0.32207581400871277, "learning_rate": 8.237589531156977e-06, "loss": 0.2999, "step": 2913 }, { "epoch": 1.045063351661487, "grad_norm": 0.3458655774593353, "learning_rate": 8.235998940036494e-06, "loss": 0.324, "step": 2914 }, { "epoch": 1.0454219459717906, "grad_norm": 0.4343853294849396, "learning_rate": 8.234407785194929e-06, "loss": 0.3753, "step": 2915 }, { "epoch": 1.045780540282094, "grad_norm": 0.3900754451751709, "learning_rate": 8.232816066909462e-06, "loss": 0.3341, "step": 2916 }, { "epoch": 1.0461391345923978, "grad_norm": 0.33803486824035645, "learning_rate": 8.23122378545738e-06, "loss": 0.3146, "step": 2917 }, { "epoch": 1.0464977289027013, "grad_norm": 0.35210350155830383, "learning_rate": 8.229630941116063e-06, "loss": 0.3283, "step": 2918 }, { "epoch": 1.046856323213005, "grad_norm": 0.38211607933044434, "learning_rate": 8.228037534162986e-06, "loss": 0.2999, "step": 2919 }, { "epoch": 1.0472149175233085, "grad_norm": 0.408741295337677, "learning_rate": 8.22644356487573e-06, "loss": 0.3689, "step": 2920 }, { "epoch": 1.0475735118336122, "grad_norm": 0.3470117449760437, "learning_rate": 8.224849033531965e-06, "loss": 0.3214, "step": 2921 }, { "epoch": 1.0479321061439157, "grad_norm": 0.43620750308036804, "learning_rate": 8.223253940409464e-06, "loss": 0.3496, "step": 2922 }, { "epoch": 1.0482907004542195, "grad_norm": 0.3521103858947754, "learning_rate": 8.2216582857861e-06, "loss": 0.348, "step": 2923 }, { "epoch": 1.0486492947645232, "grad_norm": 0.33230942487716675, "learning_rate": 8.22006206993984e-06, "loss": 0.3234, "step": 2924 }, { "epoch": 1.0490078890748267, "grad_norm": 0.3759195804595947, "learning_rate": 8.218465293148749e-06, "loss": 0.3241, "step": 2925 }, { "epoch": 1.0493664833851304, "grad_norm": 0.3620462119579315, "learning_rate": 8.21686795569099e-06, "loss": 0.3173, "step": 2926 }, { "epoch": 1.0497250776954339, "grad_norm": 0.3473825752735138, "learning_rate": 8.215270057844826e-06, "loss": 0.3123, "step": 2927 }, { "epoch": 1.0500836720057376, "grad_norm": 0.3728044629096985, "learning_rate": 8.213671599888613e-06, "loss": 0.3059, "step": 2928 }, { "epoch": 1.050442266316041, "grad_norm": 0.38644149899482727, "learning_rate": 8.212072582100812e-06, "loss": 0.3655, "step": 2929 }, { "epoch": 1.0508008606263448, "grad_norm": 0.3637278974056244, "learning_rate": 8.21047300475997e-06, "loss": 0.3273, "step": 2930 }, { "epoch": 1.0511594549366483, "grad_norm": 0.37909889221191406, "learning_rate": 8.208872868144744e-06, "loss": 0.3275, "step": 2931 }, { "epoch": 1.051518049246952, "grad_norm": 0.34559062123298645, "learning_rate": 8.207272172533883e-06, "loss": 0.283, "step": 2932 }, { "epoch": 1.0518766435572555, "grad_norm": 0.3584957420825958, "learning_rate": 8.205670918206227e-06, "loss": 0.337, "step": 2933 }, { "epoch": 1.0522352378675592, "grad_norm": 0.3611052632331848, "learning_rate": 8.204069105440726e-06, "loss": 0.322, "step": 2934 }, { "epoch": 1.0525938321778627, "grad_norm": 0.3609042465686798, "learning_rate": 8.20246673451642e-06, "loss": 0.3268, "step": 2935 }, { "epoch": 1.0529524264881664, "grad_norm": 0.3660302758216858, "learning_rate": 8.200863805712442e-06, "loss": 0.341, "step": 2936 }, { "epoch": 1.05331102079847, "grad_norm": 0.35141119360923767, "learning_rate": 8.199260319308033e-06, "loss": 0.3158, "step": 2937 }, { "epoch": 1.0536696151087737, "grad_norm": 0.37913429737091064, "learning_rate": 8.197656275582523e-06, "loss": 0.3163, "step": 2938 }, { "epoch": 1.0540282094190772, "grad_norm": 0.4084859788417816, "learning_rate": 8.19605167481534e-06, "loss": 0.3529, "step": 2939 }, { "epoch": 1.0543868037293809, "grad_norm": 0.32973238825798035, "learning_rate": 8.194446517286012e-06, "loss": 0.3074, "step": 2940 }, { "epoch": 1.0547453980396844, "grad_norm": 0.385111927986145, "learning_rate": 8.192840803274163e-06, "loss": 0.3525, "step": 2941 }, { "epoch": 1.055103992349988, "grad_norm": 0.3469538390636444, "learning_rate": 8.191234533059511e-06, "loss": 0.2814, "step": 2942 }, { "epoch": 1.0554625866602916, "grad_norm": 0.4046466648578644, "learning_rate": 8.189627706921876e-06, "loss": 0.3075, "step": 2943 }, { "epoch": 1.0558211809705953, "grad_norm": 0.3691580891609192, "learning_rate": 8.188020325141171e-06, "loss": 0.3112, "step": 2944 }, { "epoch": 1.0561797752808988, "grad_norm": 0.3732447922229767, "learning_rate": 8.186412387997407e-06, "loss": 0.3436, "step": 2945 }, { "epoch": 1.0565383695912025, "grad_norm": 0.35107704997062683, "learning_rate": 8.184803895770691e-06, "loss": 0.3253, "step": 2946 }, { "epoch": 1.056896963901506, "grad_norm": 0.3862822353839874, "learning_rate": 8.183194848741228e-06, "loss": 0.3002, "step": 2947 }, { "epoch": 1.0572555582118097, "grad_norm": 0.43775591254234314, "learning_rate": 8.181585247189319e-06, "loss": 0.3422, "step": 2948 }, { "epoch": 1.0576141525221132, "grad_norm": 0.36133062839508057, "learning_rate": 8.179975091395363e-06, "loss": 0.3166, "step": 2949 }, { "epoch": 1.057972746832417, "grad_norm": 0.382436066865921, "learning_rate": 8.17836438163985e-06, "loss": 0.3523, "step": 2950 }, { "epoch": 1.0583313411427206, "grad_norm": 0.32505467534065247, "learning_rate": 8.176753118203374e-06, "loss": 0.2807, "step": 2951 }, { "epoch": 1.0586899354530241, "grad_norm": 0.3629365563392639, "learning_rate": 8.175141301366621e-06, "loss": 0.3489, "step": 2952 }, { "epoch": 1.0590485297633279, "grad_norm": 0.33509182929992676, "learning_rate": 8.173528931410375e-06, "loss": 0.2948, "step": 2953 }, { "epoch": 1.0594071240736314, "grad_norm": 0.36225900053977966, "learning_rate": 8.171916008615517e-06, "loss": 0.3808, "step": 2954 }, { "epoch": 1.059765718383935, "grad_norm": 0.3078058660030365, "learning_rate": 8.170302533263022e-06, "loss": 0.2868, "step": 2955 }, { "epoch": 1.0601243126942386, "grad_norm": 0.3837610185146332, "learning_rate": 8.168688505633962e-06, "loss": 0.3537, "step": 2956 }, { "epoch": 1.0604829070045423, "grad_norm": 0.3186891973018646, "learning_rate": 8.167073926009504e-06, "loss": 0.2889, "step": 2957 }, { "epoch": 1.0608415013148458, "grad_norm": 0.3643110990524292, "learning_rate": 8.165458794670917e-06, "loss": 0.3648, "step": 2958 }, { "epoch": 1.0612000956251495, "grad_norm": 0.3375709652900696, "learning_rate": 8.163843111899558e-06, "loss": 0.3323, "step": 2959 }, { "epoch": 1.061558689935453, "grad_norm": 0.3803700804710388, "learning_rate": 8.162226877976886e-06, "loss": 0.3428, "step": 2960 }, { "epoch": 1.0619172842457567, "grad_norm": 0.36355045437812805, "learning_rate": 8.160610093184456e-06, "loss": 0.3186, "step": 2961 }, { "epoch": 1.0622758785560602, "grad_norm": 0.3615235388278961, "learning_rate": 8.158992757803912e-06, "loss": 0.3318, "step": 2962 }, { "epoch": 1.062634472866364, "grad_norm": 0.374379962682724, "learning_rate": 8.157374872117003e-06, "loss": 0.3352, "step": 2963 }, { "epoch": 1.0629930671766674, "grad_norm": 0.3829643130302429, "learning_rate": 8.155756436405566e-06, "loss": 0.3359, "step": 2964 }, { "epoch": 1.0633516614869711, "grad_norm": 0.40713053941726685, "learning_rate": 8.15413745095154e-06, "loss": 0.3096, "step": 2965 }, { "epoch": 1.0637102557972746, "grad_norm": 0.4217376410961151, "learning_rate": 8.152517916036959e-06, "loss": 0.3258, "step": 2966 }, { "epoch": 1.0640688501075783, "grad_norm": 0.3575074076652527, "learning_rate": 8.150897831943947e-06, "loss": 0.2979, "step": 2967 }, { "epoch": 1.0644274444178818, "grad_norm": 0.3932551443576813, "learning_rate": 8.14927719895473e-06, "loss": 0.3382, "step": 2968 }, { "epoch": 1.0647860387281856, "grad_norm": 0.36468973755836487, "learning_rate": 8.147656017351629e-06, "loss": 0.3228, "step": 2969 }, { "epoch": 1.065144633038489, "grad_norm": 0.374577134847641, "learning_rate": 8.146034287417055e-06, "loss": 0.3333, "step": 2970 }, { "epoch": 1.0655032273487928, "grad_norm": 0.37259289622306824, "learning_rate": 8.14441200943352e-06, "loss": 0.2915, "step": 2971 }, { "epoch": 1.0658618216590963, "grad_norm": 0.34148168563842773, "learning_rate": 8.142789183683632e-06, "loss": 0.3272, "step": 2972 }, { "epoch": 1.0662204159694, "grad_norm": 0.34870386123657227, "learning_rate": 8.141165810450089e-06, "loss": 0.3212, "step": 2973 }, { "epoch": 1.0665790102797035, "grad_norm": 0.3364277780056, "learning_rate": 8.13954189001569e-06, "loss": 0.346, "step": 2974 }, { "epoch": 1.0669376045900072, "grad_norm": 0.3220863342285156, "learning_rate": 8.137917422663324e-06, "loss": 0.3244, "step": 2975 }, { "epoch": 1.0672961989003107, "grad_norm": 0.3771611750125885, "learning_rate": 8.13629240867598e-06, "loss": 0.3165, "step": 2976 }, { "epoch": 1.0676547932106144, "grad_norm": 0.3617149293422699, "learning_rate": 8.134666848336745e-06, "loss": 0.3425, "step": 2977 }, { "epoch": 1.068013387520918, "grad_norm": 0.3496668338775635, "learning_rate": 8.13304074192879e-06, "loss": 0.3186, "step": 2978 }, { "epoch": 1.0683719818312216, "grad_norm": 0.32843321561813354, "learning_rate": 8.13141408973539e-06, "loss": 0.3368, "step": 2979 }, { "epoch": 1.0687305761415251, "grad_norm": 0.33478689193725586, "learning_rate": 8.129786892039912e-06, "loss": 0.3092, "step": 2980 }, { "epoch": 1.0690891704518288, "grad_norm": 0.38355734944343567, "learning_rate": 8.128159149125823e-06, "loss": 0.3014, "step": 2981 }, { "epoch": 1.0694477647621325, "grad_norm": 0.3041651248931885, "learning_rate": 8.126530861276677e-06, "loss": 0.2803, "step": 2982 }, { "epoch": 1.069806359072436, "grad_norm": 0.41124096512794495, "learning_rate": 8.124902028776128e-06, "loss": 0.3513, "step": 2983 }, { "epoch": 1.0701649533827398, "grad_norm": 0.37642237544059753, "learning_rate": 8.123272651907923e-06, "loss": 0.2996, "step": 2984 }, { "epoch": 1.0705235476930433, "grad_norm": 0.3543640077114105, "learning_rate": 8.121642730955903e-06, "loss": 0.334, "step": 2985 }, { "epoch": 1.070882142003347, "grad_norm": 0.42270427942276, "learning_rate": 8.12001226620401e-06, "loss": 0.3232, "step": 2986 }, { "epoch": 1.0712407363136505, "grad_norm": 0.29854580760002136, "learning_rate": 8.118381257936272e-06, "loss": 0.2913, "step": 2987 }, { "epoch": 1.0715993306239542, "grad_norm": 0.33681929111480713, "learning_rate": 8.116749706436817e-06, "loss": 0.3267, "step": 2988 }, { "epoch": 1.0719579249342577, "grad_norm": 0.3440076410770416, "learning_rate": 8.115117611989867e-06, "loss": 0.3442, "step": 2989 }, { "epoch": 1.0723165192445614, "grad_norm": 0.3354615867137909, "learning_rate": 8.113484974879737e-06, "loss": 0.3485, "step": 2990 }, { "epoch": 1.072675113554865, "grad_norm": 0.32631686329841614, "learning_rate": 8.111851795390838e-06, "loss": 0.3238, "step": 2991 }, { "epoch": 1.0730337078651686, "grad_norm": 0.3499821126461029, "learning_rate": 8.110218073807674e-06, "loss": 0.3286, "step": 2992 }, { "epoch": 1.073392302175472, "grad_norm": 0.37320998311042786, "learning_rate": 8.108583810414848e-06, "loss": 0.3337, "step": 2993 }, { "epoch": 1.0737508964857758, "grad_norm": 0.3590715229511261, "learning_rate": 8.106949005497047e-06, "loss": 0.343, "step": 2994 }, { "epoch": 1.0741094907960793, "grad_norm": 0.3318147659301758, "learning_rate": 8.105313659339065e-06, "loss": 0.306, "step": 2995 }, { "epoch": 1.074468085106383, "grad_norm": 0.3804825246334076, "learning_rate": 8.103677772225783e-06, "loss": 0.3661, "step": 2996 }, { "epoch": 1.0748266794166865, "grad_norm": 0.328520804643631, "learning_rate": 8.102041344442175e-06, "loss": 0.2865, "step": 2997 }, { "epoch": 1.0751852737269902, "grad_norm": 0.3660850524902344, "learning_rate": 8.100404376273315e-06, "loss": 0.3517, "step": 2998 }, { "epoch": 1.0755438680372937, "grad_norm": 0.32265451550483704, "learning_rate": 8.098766868004366e-06, "loss": 0.3115, "step": 2999 }, { "epoch": 1.0759024623475975, "grad_norm": 0.332674503326416, "learning_rate": 8.097128819920587e-06, "loss": 0.3195, "step": 3000 }, { "epoch": 1.076261056657901, "grad_norm": 0.37155669927597046, "learning_rate": 8.095490232307334e-06, "loss": 0.3395, "step": 3001 }, { "epoch": 1.0766196509682047, "grad_norm": 0.36246684193611145, "learning_rate": 8.093851105450052e-06, "loss": 0.3122, "step": 3002 }, { "epoch": 1.0769782452785082, "grad_norm": 0.32547518610954285, "learning_rate": 8.092211439634281e-06, "loss": 0.2776, "step": 3003 }, { "epoch": 1.0773368395888119, "grad_norm": 0.36750784516334534, "learning_rate": 8.090571235145658e-06, "loss": 0.3218, "step": 3004 }, { "epoch": 1.0776954338991154, "grad_norm": 0.43401506543159485, "learning_rate": 8.088930492269908e-06, "loss": 0.3358, "step": 3005 }, { "epoch": 1.078054028209419, "grad_norm": 0.3543030619621277, "learning_rate": 8.087289211292856e-06, "loss": 0.2996, "step": 3006 }, { "epoch": 1.0784126225197226, "grad_norm": 0.3873533010482788, "learning_rate": 8.08564739250042e-06, "loss": 0.318, "step": 3007 }, { "epoch": 1.0787712168300263, "grad_norm": 0.4060046374797821, "learning_rate": 8.084005036178608e-06, "loss": 0.3241, "step": 3008 }, { "epoch": 1.07912981114033, "grad_norm": 0.3499407172203064, "learning_rate": 8.082362142613525e-06, "loss": 0.3215, "step": 3009 }, { "epoch": 1.0794884054506335, "grad_norm": 0.36314070224761963, "learning_rate": 8.080718712091364e-06, "loss": 0.2907, "step": 3010 }, { "epoch": 1.079846999760937, "grad_norm": 0.40329480171203613, "learning_rate": 8.07907474489842e-06, "loss": 0.331, "step": 3011 }, { "epoch": 1.0802055940712407, "grad_norm": 0.413316011428833, "learning_rate": 8.077430241321076e-06, "loss": 0.3338, "step": 3012 }, { "epoch": 1.0805641883815444, "grad_norm": 0.3645705580711365, "learning_rate": 8.07578520164581e-06, "loss": 0.3214, "step": 3013 }, { "epoch": 1.080922782691848, "grad_norm": 0.37915292382240295, "learning_rate": 8.074139626159194e-06, "loss": 0.3286, "step": 3014 }, { "epoch": 1.0812813770021517, "grad_norm": 0.39032480120658875, "learning_rate": 8.072493515147888e-06, "loss": 0.2916, "step": 3015 }, { "epoch": 1.0816399713124552, "grad_norm": 0.44461849331855774, "learning_rate": 8.070846868898654e-06, "loss": 0.3296, "step": 3016 }, { "epoch": 1.0819985656227589, "grad_norm": 0.32843953371047974, "learning_rate": 8.069199687698341e-06, "loss": 0.2991, "step": 3017 }, { "epoch": 1.0823571599330624, "grad_norm": 0.440586119890213, "learning_rate": 8.067551971833894e-06, "loss": 0.3409, "step": 3018 }, { "epoch": 1.082715754243366, "grad_norm": 0.42083653807640076, "learning_rate": 8.06590372159235e-06, "loss": 0.3176, "step": 3019 }, { "epoch": 1.0830743485536696, "grad_norm": 0.33633095026016235, "learning_rate": 8.064254937260837e-06, "loss": 0.3309, "step": 3020 }, { "epoch": 1.0834329428639733, "grad_norm": 0.34480583667755127, "learning_rate": 8.062605619126585e-06, "loss": 0.3096, "step": 3021 }, { "epoch": 1.0837915371742768, "grad_norm": 0.394378662109375, "learning_rate": 8.060955767476902e-06, "loss": 0.3138, "step": 3022 }, { "epoch": 1.0841501314845805, "grad_norm": 0.3879993259906769, "learning_rate": 8.059305382599203e-06, "loss": 0.305, "step": 3023 }, { "epoch": 1.084508725794884, "grad_norm": 0.4305036962032318, "learning_rate": 8.057654464780988e-06, "loss": 0.3483, "step": 3024 }, { "epoch": 1.0848673201051877, "grad_norm": 0.41007208824157715, "learning_rate": 8.056003014309851e-06, "loss": 0.3356, "step": 3025 }, { "epoch": 1.0852259144154912, "grad_norm": 0.44756636023521423, "learning_rate": 8.054351031473482e-06, "loss": 0.3222, "step": 3026 }, { "epoch": 1.085584508725795, "grad_norm": 0.3969649374485016, "learning_rate": 8.052698516559661e-06, "loss": 0.3292, "step": 3027 }, { "epoch": 1.0859431030360984, "grad_norm": 0.37151703238487244, "learning_rate": 8.05104546985626e-06, "loss": 0.3159, "step": 3028 }, { "epoch": 1.0863016973464021, "grad_norm": 0.3973095417022705, "learning_rate": 8.049391891651246e-06, "loss": 0.3162, "step": 3029 }, { "epoch": 1.0866602916567056, "grad_norm": 0.4055176079273224, "learning_rate": 8.04773778223268e-06, "loss": 0.3289, "step": 3030 }, { "epoch": 1.0870188859670094, "grad_norm": 0.4199112057685852, "learning_rate": 8.046083141888707e-06, "loss": 0.3595, "step": 3031 }, { "epoch": 1.0873774802773128, "grad_norm": 0.32945457100868225, "learning_rate": 8.044427970907575e-06, "loss": 0.2975, "step": 3032 }, { "epoch": 1.0877360745876166, "grad_norm": 0.3967323899269104, "learning_rate": 8.042772269577618e-06, "loss": 0.3598, "step": 3033 }, { "epoch": 1.08809466889792, "grad_norm": 0.389565110206604, "learning_rate": 8.041116038187266e-06, "loss": 0.3264, "step": 3034 }, { "epoch": 1.0884532632082238, "grad_norm": 0.36629465222358704, "learning_rate": 8.039459277025039e-06, "loss": 0.3286, "step": 3035 }, { "epoch": 1.0888118575185273, "grad_norm": 0.342124879360199, "learning_rate": 8.037801986379548e-06, "loss": 0.28, "step": 3036 }, { "epoch": 1.089170451828831, "grad_norm": 0.33902570605278015, "learning_rate": 8.036144166539501e-06, "loss": 0.3031, "step": 3037 }, { "epoch": 1.0895290461391345, "grad_norm": 0.421079158782959, "learning_rate": 8.034485817793693e-06, "loss": 0.3888, "step": 3038 }, { "epoch": 1.0898876404494382, "grad_norm": 0.3512687385082245, "learning_rate": 8.032826940431015e-06, "loss": 0.2854, "step": 3039 }, { "epoch": 1.090246234759742, "grad_norm": 0.37160125374794006, "learning_rate": 8.031167534740451e-06, "loss": 0.375, "step": 3040 }, { "epoch": 1.0906048290700454, "grad_norm": 0.4065544307231903, "learning_rate": 8.02950760101107e-06, "loss": 0.2959, "step": 3041 }, { "epoch": 1.0909634233803491, "grad_norm": 0.4146993160247803, "learning_rate": 8.027847139532039e-06, "loss": 0.3708, "step": 3042 }, { "epoch": 1.0913220176906526, "grad_norm": 0.3310873806476593, "learning_rate": 8.026186150592617e-06, "loss": 0.3116, "step": 3043 }, { "epoch": 1.0916806120009563, "grad_norm": 0.3753752112388611, "learning_rate": 8.024524634482152e-06, "loss": 0.3302, "step": 3044 }, { "epoch": 1.0920392063112598, "grad_norm": 0.35803866386413574, "learning_rate": 8.022862591490087e-06, "loss": 0.3556, "step": 3045 }, { "epoch": 1.0923978006215636, "grad_norm": 0.3592599630355835, "learning_rate": 8.021200021905951e-06, "loss": 0.316, "step": 3046 }, { "epoch": 1.092756394931867, "grad_norm": 0.3650077283382416, "learning_rate": 8.019536926019373e-06, "loss": 0.3217, "step": 3047 }, { "epoch": 1.0931149892421708, "grad_norm": 0.42585334181785583, "learning_rate": 8.017873304120069e-06, "loss": 0.3475, "step": 3048 }, { "epoch": 1.0934735835524743, "grad_norm": 0.34558960795402527, "learning_rate": 8.016209156497847e-06, "loss": 0.3131, "step": 3049 }, { "epoch": 1.093832177862778, "grad_norm": 0.35876473784446716, "learning_rate": 8.014544483442604e-06, "loss": 0.3438, "step": 3050 }, { "epoch": 1.0941907721730815, "grad_norm": 0.3996133804321289, "learning_rate": 8.012879285244335e-06, "loss": 0.3214, "step": 3051 }, { "epoch": 1.0945493664833852, "grad_norm": 0.35023292899131775, "learning_rate": 8.01121356219312e-06, "loss": 0.3469, "step": 3052 }, { "epoch": 1.0949079607936887, "grad_norm": 0.3637547492980957, "learning_rate": 8.009547314579133e-06, "loss": 0.314, "step": 3053 }, { "epoch": 1.0952665551039924, "grad_norm": 0.3855295777320862, "learning_rate": 8.007880542692643e-06, "loss": 0.3218, "step": 3054 }, { "epoch": 1.095625149414296, "grad_norm": 0.352434366941452, "learning_rate": 8.006213246824003e-06, "loss": 0.3605, "step": 3055 }, { "epoch": 1.0959837437245996, "grad_norm": 0.34474414587020874, "learning_rate": 8.004545427263663e-06, "loss": 0.3012, "step": 3056 }, { "epoch": 1.096342338034903, "grad_norm": 0.38119107484817505, "learning_rate": 8.002877084302162e-06, "loss": 0.3366, "step": 3057 }, { "epoch": 1.0967009323452068, "grad_norm": 0.36053138971328735, "learning_rate": 8.00120821823013e-06, "loss": 0.3027, "step": 3058 }, { "epoch": 1.0970595266555103, "grad_norm": 0.36540865898132324, "learning_rate": 7.99953882933829e-06, "loss": 0.3538, "step": 3059 }, { "epoch": 1.097418120965814, "grad_norm": 0.3423882722854614, "learning_rate": 7.997868917917453e-06, "loss": 0.2886, "step": 3060 }, { "epoch": 1.0977767152761175, "grad_norm": 0.32862135767936707, "learning_rate": 7.996198484258524e-06, "loss": 0.3126, "step": 3061 }, { "epoch": 1.0981353095864212, "grad_norm": 0.39592114090919495, "learning_rate": 7.994527528652495e-06, "loss": 0.3586, "step": 3062 }, { "epoch": 1.0984939038967247, "grad_norm": 0.40410295128822327, "learning_rate": 7.992856051390457e-06, "loss": 0.3502, "step": 3063 }, { "epoch": 1.0988524982070285, "grad_norm": 0.31578129529953003, "learning_rate": 7.991184052763582e-06, "loss": 0.3108, "step": 3064 }, { "epoch": 1.099211092517332, "grad_norm": 0.3779357671737671, "learning_rate": 7.989511533063138e-06, "loss": 0.3376, "step": 3065 }, { "epoch": 1.0995696868276357, "grad_norm": 0.35341909527778625, "learning_rate": 7.987838492580486e-06, "loss": 0.3271, "step": 3066 }, { "epoch": 1.0999282811379394, "grad_norm": 0.34463751316070557, "learning_rate": 7.986164931607073e-06, "loss": 0.3303, "step": 3067 }, { "epoch": 1.1002868754482429, "grad_norm": 0.31600064039230347, "learning_rate": 7.984490850434437e-06, "loss": 0.2895, "step": 3068 }, { "epoch": 1.1006454697585464, "grad_norm": 0.3048847019672394, "learning_rate": 7.982816249354212e-06, "loss": 0.288, "step": 3069 }, { "epoch": 1.10100406406885, "grad_norm": 0.36485978960990906, "learning_rate": 7.981141128658114e-06, "loss": 0.3848, "step": 3070 }, { "epoch": 1.1013626583791538, "grad_norm": 0.4107610285282135, "learning_rate": 7.979465488637957e-06, "loss": 0.3688, "step": 3071 }, { "epoch": 1.1017212526894573, "grad_norm": 0.3202988803386688, "learning_rate": 7.977789329585643e-06, "loss": 0.2838, "step": 3072 }, { "epoch": 1.102079846999761, "grad_norm": 0.3314446210861206, "learning_rate": 7.976112651793162e-06, "loss": 0.3334, "step": 3073 }, { "epoch": 1.1024384413100645, "grad_norm": 0.3229313790798187, "learning_rate": 7.974435455552598e-06, "loss": 0.3389, "step": 3074 }, { "epoch": 1.1027970356203682, "grad_norm": 0.36233043670654297, "learning_rate": 7.972757741156126e-06, "loss": 0.3479, "step": 3075 }, { "epoch": 1.1031556299306717, "grad_norm": 0.3466538190841675, "learning_rate": 7.971079508896002e-06, "loss": 0.3238, "step": 3076 }, { "epoch": 1.1035142242409754, "grad_norm": 0.3573911786079407, "learning_rate": 7.96940075906459e-06, "loss": 0.311, "step": 3077 }, { "epoch": 1.103872818551279, "grad_norm": 0.35009926557540894, "learning_rate": 7.967721491954322e-06, "loss": 0.3062, "step": 3078 }, { "epoch": 1.1042314128615827, "grad_norm": 0.3260801136493683, "learning_rate": 7.96604170785774e-06, "loss": 0.3033, "step": 3079 }, { "epoch": 1.1045900071718862, "grad_norm": 0.3517276346683502, "learning_rate": 7.964361407067464e-06, "loss": 0.3152, "step": 3080 }, { "epoch": 1.1049486014821899, "grad_norm": 0.35070234537124634, "learning_rate": 7.962680589876209e-06, "loss": 0.3314, "step": 3081 }, { "epoch": 1.1053071957924934, "grad_norm": 0.3567127287387848, "learning_rate": 7.960999256576776e-06, "loss": 0.3307, "step": 3082 }, { "epoch": 1.105665790102797, "grad_norm": 0.3145703971385956, "learning_rate": 7.959317407462062e-06, "loss": 0.3012, "step": 3083 }, { "epoch": 1.1060243844131006, "grad_norm": 0.3618406355381012, "learning_rate": 7.957635042825048e-06, "loss": 0.3583, "step": 3084 }, { "epoch": 1.1063829787234043, "grad_norm": 0.33087673783302307, "learning_rate": 7.955952162958807e-06, "loss": 0.2931, "step": 3085 }, { "epoch": 1.1067415730337078, "grad_norm": 0.34868088364601135, "learning_rate": 7.954268768156504e-06, "loss": 0.3506, "step": 3086 }, { "epoch": 1.1071001673440115, "grad_norm": 0.35709676146507263, "learning_rate": 7.952584858711389e-06, "loss": 0.3364, "step": 3087 }, { "epoch": 1.107458761654315, "grad_norm": 0.32512256503105164, "learning_rate": 7.950900434916807e-06, "loss": 0.3213, "step": 3088 }, { "epoch": 1.1078173559646187, "grad_norm": 0.35130780935287476, "learning_rate": 7.949215497066189e-06, "loss": 0.3249, "step": 3089 }, { "epoch": 1.1081759502749222, "grad_norm": 0.3505745530128479, "learning_rate": 7.947530045453054e-06, "loss": 0.312, "step": 3090 }, { "epoch": 1.108534544585226, "grad_norm": 0.3158123791217804, "learning_rate": 7.945844080371016e-06, "loss": 0.3041, "step": 3091 }, { "epoch": 1.1088931388955294, "grad_norm": 0.3654189109802246, "learning_rate": 7.944157602113774e-06, "loss": 0.362, "step": 3092 }, { "epoch": 1.1092517332058331, "grad_norm": 0.3075120151042938, "learning_rate": 7.942470610975118e-06, "loss": 0.2651, "step": 3093 }, { "epoch": 1.1096103275161366, "grad_norm": 0.36129602789878845, "learning_rate": 7.940783107248925e-06, "loss": 0.3605, "step": 3094 }, { "epoch": 1.1099689218264404, "grad_norm": 0.32914093136787415, "learning_rate": 7.939095091229168e-06, "loss": 0.3054, "step": 3095 }, { "epoch": 1.1103275161367439, "grad_norm": 0.3142252266407013, "learning_rate": 7.937406563209902e-06, "loss": 0.3383, "step": 3096 }, { "epoch": 1.1106861104470476, "grad_norm": 0.34489211440086365, "learning_rate": 7.935717523485273e-06, "loss": 0.3473, "step": 3097 }, { "epoch": 1.1110447047573513, "grad_norm": 0.34784993529319763, "learning_rate": 7.934027972349518e-06, "loss": 0.284, "step": 3098 }, { "epoch": 1.1114032990676548, "grad_norm": 0.3551425337791443, "learning_rate": 7.93233791009696e-06, "loss": 0.3705, "step": 3099 }, { "epoch": 1.1117618933779585, "grad_norm": 0.3549191653728485, "learning_rate": 7.930647337022018e-06, "loss": 0.3332, "step": 3100 }, { "epoch": 1.112120487688262, "grad_norm": 0.3363524377346039, "learning_rate": 7.928956253419191e-06, "loss": 0.307, "step": 3101 }, { "epoch": 1.1124790819985657, "grad_norm": 0.3372959792613983, "learning_rate": 7.927264659583071e-06, "loss": 0.3152, "step": 3102 }, { "epoch": 1.1128376763088692, "grad_norm": 0.33312222361564636, "learning_rate": 7.925572555808344e-06, "loss": 0.2975, "step": 3103 }, { "epoch": 1.113196270619173, "grad_norm": 0.3192470669746399, "learning_rate": 7.923879942389771e-06, "loss": 0.3054, "step": 3104 }, { "epoch": 1.1135548649294764, "grad_norm": 0.33662641048431396, "learning_rate": 7.922186819622218e-06, "loss": 0.355, "step": 3105 }, { "epoch": 1.1139134592397801, "grad_norm": 0.34055858850479126, "learning_rate": 7.920493187800629e-06, "loss": 0.306, "step": 3106 }, { "epoch": 1.1142720535500836, "grad_norm": 0.33250460028648376, "learning_rate": 7.91879904722004e-06, "loss": 0.3081, "step": 3107 }, { "epoch": 1.1146306478603873, "grad_norm": 0.319227933883667, "learning_rate": 7.917104398175577e-06, "loss": 0.3079, "step": 3108 }, { "epoch": 1.1149892421706908, "grad_norm": 0.3513329327106476, "learning_rate": 7.915409240962452e-06, "loss": 0.3366, "step": 3109 }, { "epoch": 1.1153478364809946, "grad_norm": 0.37033921480178833, "learning_rate": 7.913713575875965e-06, "loss": 0.3278, "step": 3110 }, { "epoch": 1.115706430791298, "grad_norm": 0.3498457074165344, "learning_rate": 7.912017403211511e-06, "loss": 0.3622, "step": 3111 }, { "epoch": 1.1160650251016018, "grad_norm": 0.33362478017807007, "learning_rate": 7.910320723264563e-06, "loss": 0.2928, "step": 3112 }, { "epoch": 1.1164236194119053, "grad_norm": 0.3910290598869324, "learning_rate": 7.908623536330693e-06, "loss": 0.3294, "step": 3113 }, { "epoch": 1.116782213722209, "grad_norm": 0.3520137369632721, "learning_rate": 7.906925842705551e-06, "loss": 0.2999, "step": 3114 }, { "epoch": 1.1171408080325125, "grad_norm": 0.3686507046222687, "learning_rate": 7.905227642684886e-06, "loss": 0.3389, "step": 3115 }, { "epoch": 1.1174994023428162, "grad_norm": 0.34329384565353394, "learning_rate": 7.903528936564524e-06, "loss": 0.3197, "step": 3116 }, { "epoch": 1.1178579966531197, "grad_norm": 0.380535364151001, "learning_rate": 7.90182972464039e-06, "loss": 0.3309, "step": 3117 }, { "epoch": 1.1182165909634234, "grad_norm": 0.3365139663219452, "learning_rate": 7.90013000720849e-06, "loss": 0.3034, "step": 3118 }, { "epoch": 1.118575185273727, "grad_norm": 0.3580665588378906, "learning_rate": 7.89842978456492e-06, "loss": 0.3158, "step": 3119 }, { "epoch": 1.1189337795840306, "grad_norm": 0.3649415075778961, "learning_rate": 7.896729057005862e-06, "loss": 0.3134, "step": 3120 }, { "epoch": 1.1192923738943341, "grad_norm": 0.35954955220222473, "learning_rate": 7.895027824827593e-06, "loss": 0.3274, "step": 3121 }, { "epoch": 1.1196509682046378, "grad_norm": 0.32658931612968445, "learning_rate": 7.893326088326467e-06, "loss": 0.3425, "step": 3122 }, { "epoch": 1.1200095625149413, "grad_norm": 0.36586815118789673, "learning_rate": 7.891623847798936e-06, "loss": 0.3534, "step": 3123 }, { "epoch": 1.120368156825245, "grad_norm": 0.3217116594314575, "learning_rate": 7.889921103541534e-06, "loss": 0.2716, "step": 3124 }, { "epoch": 1.1207267511355488, "grad_norm": 0.3602891266345978, "learning_rate": 7.888217855850885e-06, "loss": 0.3605, "step": 3125 }, { "epoch": 1.1210853454458523, "grad_norm": 0.3248378038406372, "learning_rate": 7.8865141050237e-06, "loss": 0.3043, "step": 3126 }, { "epoch": 1.121443939756156, "grad_norm": 0.36713603138923645, "learning_rate": 7.884809851356777e-06, "loss": 0.3118, "step": 3127 }, { "epoch": 1.1218025340664595, "grad_norm": 0.39120733737945557, "learning_rate": 7.883105095147004e-06, "loss": 0.3336, "step": 3128 }, { "epoch": 1.1221611283767632, "grad_norm": 0.3810417056083679, "learning_rate": 7.881399836691352e-06, "loss": 0.3695, "step": 3129 }, { "epoch": 1.1225197226870667, "grad_norm": 0.3265887200832367, "learning_rate": 7.879694076286884e-06, "loss": 0.2753, "step": 3130 }, { "epoch": 1.1228783169973704, "grad_norm": 0.3853415250778198, "learning_rate": 7.87798781423075e-06, "loss": 0.3537, "step": 3131 }, { "epoch": 1.123236911307674, "grad_norm": 0.3525622487068176, "learning_rate": 7.876281050820185e-06, "loss": 0.3199, "step": 3132 }, { "epoch": 1.1235955056179776, "grad_norm": 0.3953767418861389, "learning_rate": 7.87457378635251e-06, "loss": 0.3578, "step": 3133 }, { "epoch": 1.123954099928281, "grad_norm": 0.3570346236228943, "learning_rate": 7.87286602112514e-06, "loss": 0.3142, "step": 3134 }, { "epoch": 1.1243126942385848, "grad_norm": 0.3513626456260681, "learning_rate": 7.87115775543557e-06, "loss": 0.3474, "step": 3135 }, { "epoch": 1.1246712885488883, "grad_norm": 0.36239224672317505, "learning_rate": 7.869448989581388e-06, "loss": 0.3308, "step": 3136 }, { "epoch": 1.125029882859192, "grad_norm": 0.3560558259487152, "learning_rate": 7.867739723860263e-06, "loss": 0.3367, "step": 3137 }, { "epoch": 1.1253884771694955, "grad_norm": 0.3469524681568146, "learning_rate": 7.866029958569956e-06, "loss": 0.3176, "step": 3138 }, { "epoch": 1.1257470714797992, "grad_norm": 0.3727914094924927, "learning_rate": 7.864319694008314e-06, "loss": 0.3382, "step": 3139 }, { "epoch": 1.1261056657901027, "grad_norm": 0.3756181597709656, "learning_rate": 7.86260893047327e-06, "loss": 0.3453, "step": 3140 }, { "epoch": 1.1264642601004065, "grad_norm": 0.30089452862739563, "learning_rate": 7.860897668262845e-06, "loss": 0.3176, "step": 3141 }, { "epoch": 1.12682285441071, "grad_norm": 0.349147230386734, "learning_rate": 7.859185907675144e-06, "loss": 0.3671, "step": 3142 }, { "epoch": 1.1271814487210137, "grad_norm": 0.32084622979164124, "learning_rate": 7.857473649008364e-06, "loss": 0.3027, "step": 3143 }, { "epoch": 1.1275400430313172, "grad_norm": 0.37044522166252136, "learning_rate": 7.855760892560783e-06, "loss": 0.3428, "step": 3144 }, { "epoch": 1.1278986373416209, "grad_norm": 0.3495577275753021, "learning_rate": 7.85404763863077e-06, "loss": 0.3477, "step": 3145 }, { "epoch": 1.1282572316519244, "grad_norm": 0.33610427379608154, "learning_rate": 7.852333887516778e-06, "loss": 0.3046, "step": 3146 }, { "epoch": 1.128615825962228, "grad_norm": 0.36087027192115784, "learning_rate": 7.850619639517349e-06, "loss": 0.32, "step": 3147 }, { "epoch": 1.1289744202725316, "grad_norm": 0.3420476019382477, "learning_rate": 7.848904894931112e-06, "loss": 0.3302, "step": 3148 }, { "epoch": 1.1293330145828353, "grad_norm": 0.32096555829048157, "learning_rate": 7.847189654056777e-06, "loss": 0.3248, "step": 3149 }, { "epoch": 1.1296916088931388, "grad_norm": 0.3925534188747406, "learning_rate": 7.845473917193148e-06, "loss": 0.3416, "step": 3150 }, { "epoch": 1.1300502032034425, "grad_norm": 0.37024274468421936, "learning_rate": 7.843757684639108e-06, "loss": 0.3395, "step": 3151 }, { "epoch": 1.1304087975137462, "grad_norm": 0.32578736543655396, "learning_rate": 7.842040956693632e-06, "loss": 0.2835, "step": 3152 }, { "epoch": 1.1307673918240497, "grad_norm": 0.3606867492198944, "learning_rate": 7.84032373365578e-06, "loss": 0.3244, "step": 3153 }, { "epoch": 1.1311259861343532, "grad_norm": 0.3526366353034973, "learning_rate": 7.838606015824698e-06, "loss": 0.2851, "step": 3154 }, { "epoch": 1.131484580444657, "grad_norm": 0.3521553575992584, "learning_rate": 7.836887803499613e-06, "loss": 0.3446, "step": 3155 }, { "epoch": 1.1318431747549607, "grad_norm": 0.34778931736946106, "learning_rate": 7.835169096979849e-06, "loss": 0.2992, "step": 3156 }, { "epoch": 1.1322017690652642, "grad_norm": 0.37094733119010925, "learning_rate": 7.833449896564807e-06, "loss": 0.3547, "step": 3157 }, { "epoch": 1.1325603633755679, "grad_norm": 0.33963027596473694, "learning_rate": 7.831730202553979e-06, "loss": 0.312, "step": 3158 }, { "epoch": 1.1329189576858714, "grad_norm": 0.33799704909324646, "learning_rate": 7.830010015246939e-06, "loss": 0.331, "step": 3159 }, { "epoch": 1.133277551996175, "grad_norm": 0.3622550070285797, "learning_rate": 7.828289334943349e-06, "loss": 0.3554, "step": 3160 }, { "epoch": 1.1336361463064786, "grad_norm": 0.3578420877456665, "learning_rate": 7.826568161942958e-06, "loss": 0.3053, "step": 3161 }, { "epoch": 1.1339947406167823, "grad_norm": 0.3518799841403961, "learning_rate": 7.824846496545599e-06, "loss": 0.2967, "step": 3162 }, { "epoch": 1.1343533349270858, "grad_norm": 0.379058837890625, "learning_rate": 7.823124339051192e-06, "loss": 0.3763, "step": 3163 }, { "epoch": 1.1347119292373895, "grad_norm": 0.3387133479118347, "learning_rate": 7.82140168975974e-06, "loss": 0.3088, "step": 3164 }, { "epoch": 1.135070523547693, "grad_norm": 0.3414107859134674, "learning_rate": 7.819678548971339e-06, "loss": 0.3404, "step": 3165 }, { "epoch": 1.1354291178579967, "grad_norm": 0.36605140566825867, "learning_rate": 7.81795491698616e-06, "loss": 0.3119, "step": 3166 }, { "epoch": 1.1357877121683002, "grad_norm": 0.3665158748626709, "learning_rate": 7.816230794104467e-06, "loss": 0.3596, "step": 3167 }, { "epoch": 1.136146306478604, "grad_norm": 0.3500676453113556, "learning_rate": 7.81450618062661e-06, "loss": 0.3259, "step": 3168 }, { "epoch": 1.1365049007889074, "grad_norm": 0.33941611647605896, "learning_rate": 7.81278107685302e-06, "loss": 0.2888, "step": 3169 }, { "epoch": 1.1368634950992111, "grad_norm": 0.4016305208206177, "learning_rate": 7.811055483084214e-06, "loss": 0.3423, "step": 3170 }, { "epoch": 1.1372220894095146, "grad_norm": 0.3572543263435364, "learning_rate": 7.809329399620796e-06, "loss": 0.3279, "step": 3171 }, { "epoch": 1.1375806837198184, "grad_norm": 0.35812681913375854, "learning_rate": 7.807602826763459e-06, "loss": 0.3085, "step": 3172 }, { "epoch": 1.1379392780301218, "grad_norm": 0.3988063335418701, "learning_rate": 7.805875764812971e-06, "loss": 0.3111, "step": 3173 }, { "epoch": 1.1382978723404256, "grad_norm": 0.36046648025512695, "learning_rate": 7.8041482140702e-06, "loss": 0.3383, "step": 3174 }, { "epoch": 1.138656466650729, "grad_norm": 0.352774053812027, "learning_rate": 7.802420174836082e-06, "loss": 0.3287, "step": 3175 }, { "epoch": 1.1390150609610328, "grad_norm": 0.36856821179389954, "learning_rate": 7.80069164741165e-06, "loss": 0.3213, "step": 3176 }, { "epoch": 1.1393736552713363, "grad_norm": 0.3558560311794281, "learning_rate": 7.798962632098024e-06, "loss": 0.314, "step": 3177 }, { "epoch": 1.13973224958164, "grad_norm": 0.3592563569545746, "learning_rate": 7.797233129196398e-06, "loss": 0.3204, "step": 3178 }, { "epoch": 1.1400908438919437, "grad_norm": 0.36754873394966125, "learning_rate": 7.795503139008056e-06, "loss": 0.3136, "step": 3179 }, { "epoch": 1.1404494382022472, "grad_norm": 0.37230199575424194, "learning_rate": 7.793772661834372e-06, "loss": 0.3329, "step": 3180 }, { "epoch": 1.1408080325125507, "grad_norm": 0.3417327404022217, "learning_rate": 7.792041697976797e-06, "loss": 0.3272, "step": 3181 }, { "epoch": 1.1411666268228544, "grad_norm": 0.36234545707702637, "learning_rate": 7.790310247736872e-06, "loss": 0.3061, "step": 3182 }, { "epoch": 1.1415252211331581, "grad_norm": 0.3499959409236908, "learning_rate": 7.788578311416222e-06, "loss": 0.3445, "step": 3183 }, { "epoch": 1.1418838154434616, "grad_norm": 0.37087178230285645, "learning_rate": 7.786845889316552e-06, "loss": 0.341, "step": 3184 }, { "epoch": 1.1422424097537651, "grad_norm": 0.36402082443237305, "learning_rate": 7.785112981739659e-06, "loss": 0.326, "step": 3185 }, { "epoch": 1.1426010040640688, "grad_norm": 0.34812164306640625, "learning_rate": 7.78337958898742e-06, "loss": 0.3395, "step": 3186 }, { "epoch": 1.1429595983743726, "grad_norm": 0.3454589247703552, "learning_rate": 7.781645711361796e-06, "loss": 0.2976, "step": 3187 }, { "epoch": 1.143318192684676, "grad_norm": 0.3585636019706726, "learning_rate": 7.779911349164835e-06, "loss": 0.3236, "step": 3188 }, { "epoch": 1.1436767869949798, "grad_norm": 0.3659423589706421, "learning_rate": 7.77817650269867e-06, "loss": 0.3359, "step": 3189 }, { "epoch": 1.1440353813052833, "grad_norm": 0.3312259316444397, "learning_rate": 7.776441172265511e-06, "loss": 0.3199, "step": 3190 }, { "epoch": 1.144393975615587, "grad_norm": 0.33640754222869873, "learning_rate": 7.774705358167667e-06, "loss": 0.3201, "step": 3191 }, { "epoch": 1.1447525699258905, "grad_norm": 0.38042911887168884, "learning_rate": 7.772969060707514e-06, "loss": 0.3258, "step": 3192 }, { "epoch": 1.1451111642361942, "grad_norm": 0.34857770800590515, "learning_rate": 7.771232280187523e-06, "loss": 0.2839, "step": 3193 }, { "epoch": 1.1454697585464977, "grad_norm": 0.3627225458621979, "learning_rate": 7.769495016910247e-06, "loss": 0.3445, "step": 3194 }, { "epoch": 1.1458283528568014, "grad_norm": 0.3542725145816803, "learning_rate": 7.767757271178324e-06, "loss": 0.3161, "step": 3195 }, { "epoch": 1.146186947167105, "grad_norm": 0.35872986912727356, "learning_rate": 7.766019043294473e-06, "loss": 0.3244, "step": 3196 }, { "epoch": 1.1465455414774086, "grad_norm": 0.3497318923473358, "learning_rate": 7.764280333561498e-06, "loss": 0.3008, "step": 3197 }, { "epoch": 1.146904135787712, "grad_norm": 0.35826146602630615, "learning_rate": 7.76254114228229e-06, "loss": 0.3151, "step": 3198 }, { "epoch": 1.1472627300980158, "grad_norm": 0.3405020236968994, "learning_rate": 7.76080146975982e-06, "loss": 0.2828, "step": 3199 }, { "epoch": 1.1476213244083193, "grad_norm": 0.38488566875457764, "learning_rate": 7.759061316297146e-06, "loss": 0.3473, "step": 3200 }, { "epoch": 1.147979918718623, "grad_norm": 0.36455702781677246, "learning_rate": 7.757320682197404e-06, "loss": 0.3555, "step": 3201 }, { "epoch": 1.1483385130289265, "grad_norm": 0.3347936272621155, "learning_rate": 7.755579567763821e-06, "loss": 0.3023, "step": 3202 }, { "epoch": 1.1486971073392303, "grad_norm": 0.4199085235595703, "learning_rate": 7.753837973299706e-06, "loss": 0.3982, "step": 3203 }, { "epoch": 1.1490557016495337, "grad_norm": 0.3268807530403137, "learning_rate": 7.752095899108448e-06, "loss": 0.2886, "step": 3204 }, { "epoch": 1.1494142959598375, "grad_norm": 0.38803768157958984, "learning_rate": 7.75035334549352e-06, "loss": 0.3491, "step": 3205 }, { "epoch": 1.149772890270141, "grad_norm": 0.35538679361343384, "learning_rate": 7.748610312758484e-06, "loss": 0.3345, "step": 3206 }, { "epoch": 1.1501314845804447, "grad_norm": 0.35520339012145996, "learning_rate": 7.74686680120698e-06, "loss": 0.3021, "step": 3207 }, { "epoch": 1.1504900788907482, "grad_norm": 0.34158509969711304, "learning_rate": 7.745122811142733e-06, "loss": 0.3129, "step": 3208 }, { "epoch": 1.1508486732010519, "grad_norm": 0.36350852251052856, "learning_rate": 7.743378342869553e-06, "loss": 0.2973, "step": 3209 }, { "epoch": 1.1512072675113556, "grad_norm": 0.3778868317604065, "learning_rate": 7.741633396691329e-06, "loss": 0.3452, "step": 3210 }, { "epoch": 1.151565861821659, "grad_norm": 0.3446861505508423, "learning_rate": 7.73988797291204e-06, "loss": 0.3307, "step": 3211 }, { "epoch": 1.1519244561319626, "grad_norm": 0.30980929732322693, "learning_rate": 7.738142071835739e-06, "loss": 0.2838, "step": 3212 }, { "epoch": 1.1522830504422663, "grad_norm": 0.3803001344203949, "learning_rate": 7.736395693766571e-06, "loss": 0.3498, "step": 3213 }, { "epoch": 1.15264164475257, "grad_norm": 0.3757089376449585, "learning_rate": 7.73464883900876e-06, "loss": 0.3086, "step": 3214 }, { "epoch": 1.1530002390628735, "grad_norm": 0.3951210379600525, "learning_rate": 7.732901507866614e-06, "loss": 0.3622, "step": 3215 }, { "epoch": 1.1533588333731772, "grad_norm": 0.3473862111568451, "learning_rate": 7.73115370064452e-06, "loss": 0.3115, "step": 3216 }, { "epoch": 1.1537174276834807, "grad_norm": 0.3902221620082855, "learning_rate": 7.729405417646958e-06, "loss": 0.3326, "step": 3217 }, { "epoch": 1.1540760219937845, "grad_norm": 0.49382925033569336, "learning_rate": 7.727656659178478e-06, "loss": 0.329, "step": 3218 }, { "epoch": 1.154434616304088, "grad_norm": 0.35622450709342957, "learning_rate": 7.725907425543722e-06, "loss": 0.3084, "step": 3219 }, { "epoch": 1.1547932106143917, "grad_norm": 0.3699657618999481, "learning_rate": 7.724157717047414e-06, "loss": 0.286, "step": 3220 }, { "epoch": 1.1551518049246952, "grad_norm": 0.3894866704940796, "learning_rate": 7.722407533994354e-06, "loss": 0.3309, "step": 3221 }, { "epoch": 1.1555103992349989, "grad_norm": 0.3833305537700653, "learning_rate": 7.720656876689433e-06, "loss": 0.3453, "step": 3222 }, { "epoch": 1.1558689935453024, "grad_norm": 0.3735564649105072, "learning_rate": 7.718905745437619e-06, "loss": 0.3381, "step": 3223 }, { "epoch": 1.156227587855606, "grad_norm": 0.3552407920360565, "learning_rate": 7.717154140543963e-06, "loss": 0.3035, "step": 3224 }, { "epoch": 1.1565861821659096, "grad_norm": 0.37146610021591187, "learning_rate": 7.715402062313605e-06, "loss": 0.3254, "step": 3225 }, { "epoch": 1.1569447764762133, "grad_norm": 0.35227420926094055, "learning_rate": 7.71364951105176e-06, "loss": 0.3021, "step": 3226 }, { "epoch": 1.1573033707865168, "grad_norm": 0.3656979501247406, "learning_rate": 7.711896487063725e-06, "loss": 0.3396, "step": 3227 }, { "epoch": 1.1576619650968205, "grad_norm": 0.3845427334308624, "learning_rate": 7.710142990654886e-06, "loss": 0.354, "step": 3228 }, { "epoch": 1.158020559407124, "grad_norm": 0.3555806875228882, "learning_rate": 7.708389022130706e-06, "loss": 0.3093, "step": 3229 }, { "epoch": 1.1583791537174277, "grad_norm": 0.34569209814071655, "learning_rate": 7.706634581796734e-06, "loss": 0.3244, "step": 3230 }, { "epoch": 1.1587377480277312, "grad_norm": 0.34589120745658875, "learning_rate": 7.704879669958596e-06, "loss": 0.3304, "step": 3231 }, { "epoch": 1.159096342338035, "grad_norm": 0.3595709502696991, "learning_rate": 7.703124286922004e-06, "loss": 0.3372, "step": 3232 }, { "epoch": 1.1594549366483384, "grad_norm": 0.3708602786064148, "learning_rate": 7.70136843299275e-06, "loss": 0.3488, "step": 3233 }, { "epoch": 1.1598135309586421, "grad_norm": 0.3362174928188324, "learning_rate": 7.699612108476712e-06, "loss": 0.31, "step": 3234 }, { "epoch": 1.1601721252689456, "grad_norm": 0.31665679812431335, "learning_rate": 7.697855313679844e-06, "loss": 0.3106, "step": 3235 }, { "epoch": 1.1605307195792494, "grad_norm": 0.40506744384765625, "learning_rate": 7.69609804890819e-06, "loss": 0.3501, "step": 3236 }, { "epoch": 1.160889313889553, "grad_norm": 0.3331606090068817, "learning_rate": 7.694340314467866e-06, "loss": 0.2883, "step": 3237 }, { "epoch": 1.1612479081998566, "grad_norm": 0.3400909900665283, "learning_rate": 7.692582110665077e-06, "loss": 0.3086, "step": 3238 }, { "epoch": 1.16160650251016, "grad_norm": 0.34089022874832153, "learning_rate": 7.690823437806105e-06, "loss": 0.3265, "step": 3239 }, { "epoch": 1.1619650968204638, "grad_norm": 0.34089604020118713, "learning_rate": 7.689064296197322e-06, "loss": 0.3042, "step": 3240 }, { "epoch": 1.1623236911307675, "grad_norm": 0.3561393618583679, "learning_rate": 7.687304686145169e-06, "loss": 0.3503, "step": 3241 }, { "epoch": 1.162682285441071, "grad_norm": 0.3319580554962158, "learning_rate": 7.685544607956182e-06, "loss": 0.3303, "step": 3242 }, { "epoch": 1.1630408797513745, "grad_norm": 0.32852035760879517, "learning_rate": 7.683784061936967e-06, "loss": 0.3175, "step": 3243 }, { "epoch": 1.1633994740616782, "grad_norm": 0.4206533133983612, "learning_rate": 7.682023048394221e-06, "loss": 0.3476, "step": 3244 }, { "epoch": 1.163758068371982, "grad_norm": 0.34246742725372314, "learning_rate": 7.680261567634713e-06, "loss": 0.2991, "step": 3245 }, { "epoch": 1.1641166626822854, "grad_norm": 0.3435295522212982, "learning_rate": 7.678499619965303e-06, "loss": 0.3076, "step": 3246 }, { "epoch": 1.1644752569925891, "grad_norm": 0.4216267466545105, "learning_rate": 7.676737205692925e-06, "loss": 0.3726, "step": 3247 }, { "epoch": 1.1648338513028926, "grad_norm": 0.35641223192214966, "learning_rate": 7.674974325124599e-06, "loss": 0.3458, "step": 3248 }, { "epoch": 1.1651924456131963, "grad_norm": 0.3405619263648987, "learning_rate": 7.673210978567422e-06, "loss": 0.3023, "step": 3249 }, { "epoch": 1.1655510399234998, "grad_norm": 0.369035542011261, "learning_rate": 7.671447166328576e-06, "loss": 0.3164, "step": 3250 }, { "epoch": 1.1659096342338036, "grad_norm": 0.4017321765422821, "learning_rate": 7.669682888715323e-06, "loss": 0.3759, "step": 3251 }, { "epoch": 1.166268228544107, "grad_norm": 0.3847483992576599, "learning_rate": 7.667918146035005e-06, "loss": 0.3098, "step": 3252 }, { "epoch": 1.1666268228544108, "grad_norm": 0.3768741488456726, "learning_rate": 7.666152938595044e-06, "loss": 0.3231, "step": 3253 }, { "epoch": 1.1669854171647143, "grad_norm": 0.3389769196510315, "learning_rate": 7.664387266702948e-06, "loss": 0.3132, "step": 3254 }, { "epoch": 1.167344011475018, "grad_norm": 0.4042021334171295, "learning_rate": 7.6626211306663e-06, "loss": 0.3531, "step": 3255 }, { "epoch": 1.1677026057853215, "grad_norm": 0.38667768239974976, "learning_rate": 7.660854530792767e-06, "loss": 0.3, "step": 3256 }, { "epoch": 1.1680612000956252, "grad_norm": 0.33103322982788086, "learning_rate": 7.659087467390097e-06, "loss": 0.3051, "step": 3257 }, { "epoch": 1.1684197944059287, "grad_norm": 0.4015848934650421, "learning_rate": 7.657319940766117e-06, "loss": 0.3319, "step": 3258 }, { "epoch": 1.1687783887162324, "grad_norm": 0.35756930708885193, "learning_rate": 7.655551951228736e-06, "loss": 0.3403, "step": 3259 }, { "epoch": 1.169136983026536, "grad_norm": 0.32468464970588684, "learning_rate": 7.653783499085942e-06, "loss": 0.335, "step": 3260 }, { "epoch": 1.1694955773368396, "grad_norm": 0.3326076865196228, "learning_rate": 7.652014584645809e-06, "loss": 0.3062, "step": 3261 }, { "epoch": 1.1698541716471431, "grad_norm": 0.3201964199542999, "learning_rate": 7.65024520821648e-06, "loss": 0.3072, "step": 3262 }, { "epoch": 1.1702127659574468, "grad_norm": 0.3812856674194336, "learning_rate": 7.648475370106194e-06, "loss": 0.3303, "step": 3263 }, { "epoch": 1.1705713602677503, "grad_norm": 0.36687734723091125, "learning_rate": 7.646705070623257e-06, "loss": 0.3395, "step": 3264 }, { "epoch": 1.170929954578054, "grad_norm": 0.3584085702896118, "learning_rate": 7.644934310076065e-06, "loss": 0.3022, "step": 3265 }, { "epoch": 1.1712885488883575, "grad_norm": 0.35072579979896545, "learning_rate": 7.643163088773085e-06, "loss": 0.3215, "step": 3266 }, { "epoch": 1.1716471431986613, "grad_norm": 0.3774937689304352, "learning_rate": 7.641391407022872e-06, "loss": 0.3425, "step": 3267 }, { "epoch": 1.172005737508965, "grad_norm": 0.35717010498046875, "learning_rate": 7.639619265134056e-06, "loss": 0.3225, "step": 3268 }, { "epoch": 1.1723643318192685, "grad_norm": 0.3748975098133087, "learning_rate": 7.637846663415353e-06, "loss": 0.3355, "step": 3269 }, { "epoch": 1.172722926129572, "grad_norm": 0.37466639280319214, "learning_rate": 7.636073602175555e-06, "loss": 0.3058, "step": 3270 }, { "epoch": 1.1730815204398757, "grad_norm": 0.3962196707725525, "learning_rate": 7.634300081723535e-06, "loss": 0.3402, "step": 3271 }, { "epoch": 1.1734401147501794, "grad_norm": 0.37283819913864136, "learning_rate": 7.632526102368244e-06, "loss": 0.3541, "step": 3272 }, { "epoch": 1.173798709060483, "grad_norm": 0.33749163150787354, "learning_rate": 7.630751664418716e-06, "loss": 0.3121, "step": 3273 }, { "epoch": 1.1741573033707866, "grad_norm": 0.39087986946105957, "learning_rate": 7.628976768184062e-06, "loss": 0.329, "step": 3274 }, { "epoch": 1.17451589768109, "grad_norm": 0.3744421601295471, "learning_rate": 7.627201413973478e-06, "loss": 0.3287, "step": 3275 }, { "epoch": 1.1748744919913938, "grad_norm": 0.3180910646915436, "learning_rate": 7.6254256020962325e-06, "loss": 0.3224, "step": 3276 }, { "epoch": 1.1752330863016973, "grad_norm": 0.3850322663784027, "learning_rate": 7.62364933286168e-06, "loss": 0.3599, "step": 3277 }, { "epoch": 1.175591680612001, "grad_norm": 0.3517675995826721, "learning_rate": 7.62187260657925e-06, "loss": 0.3236, "step": 3278 }, { "epoch": 1.1759502749223045, "grad_norm": 0.3568272888660431, "learning_rate": 7.620095423558456e-06, "loss": 0.3101, "step": 3279 }, { "epoch": 1.1763088692326082, "grad_norm": 0.37312981486320496, "learning_rate": 7.618317784108887e-06, "loss": 0.3257, "step": 3280 }, { "epoch": 1.1766674635429117, "grad_norm": 0.4257490634918213, "learning_rate": 7.616539688540212e-06, "loss": 0.3631, "step": 3281 }, { "epoch": 1.1770260578532155, "grad_norm": 0.3526257276535034, "learning_rate": 7.614761137162185e-06, "loss": 0.3342, "step": 3282 }, { "epoch": 1.177384652163519, "grad_norm": 0.335030198097229, "learning_rate": 7.612982130284634e-06, "loss": 0.3109, "step": 3283 }, { "epoch": 1.1777432464738227, "grad_norm": 0.33850178122520447, "learning_rate": 7.611202668217465e-06, "loss": 0.2946, "step": 3284 }, { "epoch": 1.1781018407841262, "grad_norm": 0.39422091841697693, "learning_rate": 7.609422751270666e-06, "loss": 0.3789, "step": 3285 }, { "epoch": 1.1784604350944299, "grad_norm": 0.3731013834476471, "learning_rate": 7.607642379754308e-06, "loss": 0.297, "step": 3286 }, { "epoch": 1.1788190294047334, "grad_norm": 0.3514670133590698, "learning_rate": 7.605861553978533e-06, "loss": 0.3564, "step": 3287 }, { "epoch": 1.179177623715037, "grad_norm": 0.32759809494018555, "learning_rate": 7.604080274253568e-06, "loss": 0.3169, "step": 3288 }, { "epoch": 1.1795362180253406, "grad_norm": 0.38914355635643005, "learning_rate": 7.6022985408897186e-06, "loss": 0.3603, "step": 3289 }, { "epoch": 1.1798948123356443, "grad_norm": 0.32615384459495544, "learning_rate": 7.600516354197365e-06, "loss": 0.3029, "step": 3290 }, { "epoch": 1.1802534066459478, "grad_norm": 0.3537333905696869, "learning_rate": 7.5987337144869735e-06, "loss": 0.3651, "step": 3291 }, { "epoch": 1.1806120009562515, "grad_norm": 0.3649899363517761, "learning_rate": 7.596950622069083e-06, "loss": 0.3424, "step": 3292 }, { "epoch": 1.180970595266555, "grad_norm": 0.3721587657928467, "learning_rate": 7.595167077254315e-06, "loss": 0.331, "step": 3293 }, { "epoch": 1.1813291895768587, "grad_norm": 0.32295680046081543, "learning_rate": 7.593383080353369e-06, "loss": 0.3047, "step": 3294 }, { "epoch": 1.1816877838871624, "grad_norm": 0.3337714970111847, "learning_rate": 7.591598631677022e-06, "loss": 0.3275, "step": 3295 }, { "epoch": 1.182046378197466, "grad_norm": 0.3040936589241028, "learning_rate": 7.589813731536128e-06, "loss": 0.2879, "step": 3296 }, { "epoch": 1.1824049725077694, "grad_norm": 0.38268956542015076, "learning_rate": 7.5880283802416255e-06, "loss": 0.3649, "step": 3297 }, { "epoch": 1.1827635668180732, "grad_norm": 0.34735941886901855, "learning_rate": 7.586242578104528e-06, "loss": 0.3175, "step": 3298 }, { "epoch": 1.1831221611283769, "grad_norm": 0.35322174429893494, "learning_rate": 7.584456325435927e-06, "loss": 0.33, "step": 3299 }, { "epoch": 1.1834807554386804, "grad_norm": 0.34673747420310974, "learning_rate": 7.582669622546994e-06, "loss": 0.351, "step": 3300 }, { "epoch": 1.1838393497489839, "grad_norm": 0.3533152937889099, "learning_rate": 7.5808824697489766e-06, "loss": 0.3302, "step": 3301 }, { "epoch": 1.1841979440592876, "grad_norm": 0.35028529167175293, "learning_rate": 7.579094867353205e-06, "loss": 0.2901, "step": 3302 }, { "epoch": 1.1845565383695913, "grad_norm": 0.3266279101371765, "learning_rate": 7.5773068156710815e-06, "loss": 0.3375, "step": 3303 }, { "epoch": 1.1849151326798948, "grad_norm": 0.3627232313156128, "learning_rate": 7.575518315014092e-06, "loss": 0.3125, "step": 3304 }, { "epoch": 1.1852737269901985, "grad_norm": 0.35721123218536377, "learning_rate": 7.573729365693802e-06, "loss": 0.3137, "step": 3305 }, { "epoch": 1.185632321300502, "grad_norm": 0.3860536217689514, "learning_rate": 7.571939968021847e-06, "loss": 0.3284, "step": 3306 }, { "epoch": 1.1859909156108057, "grad_norm": 0.3485368490219116, "learning_rate": 7.570150122309947e-06, "loss": 0.3336, "step": 3307 }, { "epoch": 1.1863495099211092, "grad_norm": 0.3441995084285736, "learning_rate": 7.568359828869901e-06, "loss": 0.3008, "step": 3308 }, { "epoch": 1.186708104231413, "grad_norm": 0.40141257643699646, "learning_rate": 7.566569088013582e-06, "loss": 0.3797, "step": 3309 }, { "epoch": 1.1870666985417164, "grad_norm": 0.36611729860305786, "learning_rate": 7.564777900052943e-06, "loss": 0.3077, "step": 3310 }, { "epoch": 1.1874252928520201, "grad_norm": 0.363263338804245, "learning_rate": 7.562986265300014e-06, "loss": 0.3159, "step": 3311 }, { "epoch": 1.1877838871623236, "grad_norm": 0.4007851779460907, "learning_rate": 7.561194184066904e-06, "loss": 0.3112, "step": 3312 }, { "epoch": 1.1881424814726274, "grad_norm": 0.367647647857666, "learning_rate": 7.559401656665799e-06, "loss": 0.3159, "step": 3313 }, { "epoch": 1.1885010757829308, "grad_norm": 0.3860938549041748, "learning_rate": 7.557608683408962e-06, "loss": 0.3335, "step": 3314 }, { "epoch": 1.1888596700932346, "grad_norm": 0.38077861070632935, "learning_rate": 7.555815264608735e-06, "loss": 0.3365, "step": 3315 }, { "epoch": 1.189218264403538, "grad_norm": 0.4142000079154968, "learning_rate": 7.554021400577538e-06, "loss": 0.3318, "step": 3316 }, { "epoch": 1.1895768587138418, "grad_norm": 0.3456551730632782, "learning_rate": 7.552227091627866e-06, "loss": 0.3339, "step": 3317 }, { "epoch": 1.1899354530241453, "grad_norm": 0.35934221744537354, "learning_rate": 7.550432338072295e-06, "loss": 0.303, "step": 3318 }, { "epoch": 1.190294047334449, "grad_norm": 0.4569445550441742, "learning_rate": 7.548637140223476e-06, "loss": 0.3397, "step": 3319 }, { "epoch": 1.1906526416447525, "grad_norm": 0.35621315240859985, "learning_rate": 7.546841498394137e-06, "loss": 0.3097, "step": 3320 }, { "epoch": 1.1910112359550562, "grad_norm": 0.3563738167285919, "learning_rate": 7.545045412897086e-06, "loss": 0.2986, "step": 3321 }, { "epoch": 1.1913698302653597, "grad_norm": 0.322083979845047, "learning_rate": 7.543248884045207e-06, "loss": 0.3161, "step": 3322 }, { "epoch": 1.1917284245756634, "grad_norm": 0.38380706310272217, "learning_rate": 7.5414519121514586e-06, "loss": 0.3629, "step": 3323 }, { "epoch": 1.192087018885967, "grad_norm": 0.31534627079963684, "learning_rate": 7.53965449752888e-06, "loss": 0.2975, "step": 3324 }, { "epoch": 1.1924456131962706, "grad_norm": 0.39265498518943787, "learning_rate": 7.5378566404905885e-06, "loss": 0.361, "step": 3325 }, { "epoch": 1.1928042075065743, "grad_norm": 0.3422025144100189, "learning_rate": 7.536058341349772e-06, "loss": 0.3189, "step": 3326 }, { "epoch": 1.1931628018168778, "grad_norm": 0.36320048570632935, "learning_rate": 7.534259600419702e-06, "loss": 0.3054, "step": 3327 }, { "epoch": 1.1935213961271813, "grad_norm": 0.3598645031452179, "learning_rate": 7.5324604180137275e-06, "loss": 0.3218, "step": 3328 }, { "epoch": 1.193879990437485, "grad_norm": 0.36531734466552734, "learning_rate": 7.530660794445268e-06, "loss": 0.3274, "step": 3329 }, { "epoch": 1.1942385847477888, "grad_norm": 0.39026981592178345, "learning_rate": 7.528860730027825e-06, "loss": 0.3169, "step": 3330 }, { "epoch": 1.1945971790580923, "grad_norm": 0.31644904613494873, "learning_rate": 7.5270602250749755e-06, "loss": 0.296, "step": 3331 }, { "epoch": 1.194955773368396, "grad_norm": 0.3648725748062134, "learning_rate": 7.525259279900372e-06, "loss": 0.3375, "step": 3332 }, { "epoch": 1.1953143676786995, "grad_norm": 0.3599945902824402, "learning_rate": 7.523457894817745e-06, "loss": 0.33, "step": 3333 }, { "epoch": 1.1956729619890032, "grad_norm": 0.34375691413879395, "learning_rate": 7.5216560701409035e-06, "loss": 0.2872, "step": 3334 }, { "epoch": 1.1960315562993067, "grad_norm": 0.3628087341785431, "learning_rate": 7.5198538061837275e-06, "loss": 0.3145, "step": 3335 }, { "epoch": 1.1963901506096104, "grad_norm": 0.34448379278182983, "learning_rate": 7.51805110326018e-06, "loss": 0.3237, "step": 3336 }, { "epoch": 1.196748744919914, "grad_norm": 0.3542300760746002, "learning_rate": 7.516247961684295e-06, "loss": 0.3259, "step": 3337 }, { "epoch": 1.1971073392302176, "grad_norm": 0.3432222604751587, "learning_rate": 7.514444381770187e-06, "loss": 0.3346, "step": 3338 }, { "epoch": 1.197465933540521, "grad_norm": 0.4025283455848694, "learning_rate": 7.512640363832045e-06, "loss": 0.3403, "step": 3339 }, { "epoch": 1.1978245278508248, "grad_norm": 0.3782102167606354, "learning_rate": 7.510835908184135e-06, "loss": 0.2833, "step": 3340 }, { "epoch": 1.1981831221611283, "grad_norm": 0.40020549297332764, "learning_rate": 7.509031015140797e-06, "loss": 0.4035, "step": 3341 }, { "epoch": 1.198541716471432, "grad_norm": 0.33501484990119934, "learning_rate": 7.507225685016452e-06, "loss": 0.2745, "step": 3342 }, { "epoch": 1.1989003107817355, "grad_norm": 0.44856464862823486, "learning_rate": 7.505419918125589e-06, "loss": 0.3365, "step": 3343 }, { "epoch": 1.1992589050920393, "grad_norm": 0.43982967734336853, "learning_rate": 7.503613714782785e-06, "loss": 0.3497, "step": 3344 }, { "epoch": 1.1996174994023427, "grad_norm": 0.31890353560447693, "learning_rate": 7.501807075302681e-06, "loss": 0.3186, "step": 3345 }, { "epoch": 1.1999760937126465, "grad_norm": 0.42137274146080017, "learning_rate": 7.500000000000001e-06, "loss": 0.3463, "step": 3346 }, { "epoch": 1.20033468802295, "grad_norm": 0.37322232127189636, "learning_rate": 7.498192489189543e-06, "loss": 0.3108, "step": 3347 }, { "epoch": 1.2006932823332537, "grad_norm": 0.41691863536834717, "learning_rate": 7.4963845431861815e-06, "loss": 0.342, "step": 3348 }, { "epoch": 1.2010518766435572, "grad_norm": 0.34258466958999634, "learning_rate": 7.494576162304865e-06, "loss": 0.3343, "step": 3349 }, { "epoch": 1.2014104709538609, "grad_norm": 0.4194484353065491, "learning_rate": 7.49276734686062e-06, "loss": 0.3349, "step": 3350 }, { "epoch": 1.2017690652641644, "grad_norm": 0.3642909526824951, "learning_rate": 7.490958097168548e-06, "loss": 0.3234, "step": 3351 }, { "epoch": 1.202127659574468, "grad_norm": 0.3642926514148712, "learning_rate": 7.4891484135438275e-06, "loss": 0.3324, "step": 3352 }, { "epoch": 1.2024862538847718, "grad_norm": 0.3201279640197754, "learning_rate": 7.487338296301706e-06, "loss": 0.3023, "step": 3353 }, { "epoch": 1.2028448481950753, "grad_norm": 0.3691498041152954, "learning_rate": 7.485527745757517e-06, "loss": 0.3397, "step": 3354 }, { "epoch": 1.2032034425053788, "grad_norm": 0.3123743534088135, "learning_rate": 7.4837167622266606e-06, "loss": 0.3026, "step": 3355 }, { "epoch": 1.2035620368156825, "grad_norm": 0.4100337624549866, "learning_rate": 7.481905346024617e-06, "loss": 0.3527, "step": 3356 }, { "epoch": 1.2039206311259862, "grad_norm": 0.2953363358974457, "learning_rate": 7.4800934974669415e-06, "loss": 0.2902, "step": 3357 }, { "epoch": 1.2042792254362897, "grad_norm": 0.35388630628585815, "learning_rate": 7.478281216869261e-06, "loss": 0.3419, "step": 3358 }, { "epoch": 1.2046378197465932, "grad_norm": 0.3752861022949219, "learning_rate": 7.476468504547284e-06, "loss": 0.3176, "step": 3359 }, { "epoch": 1.204996414056897, "grad_norm": 0.32803401350975037, "learning_rate": 7.474655360816789e-06, "loss": 0.3255, "step": 3360 }, { "epoch": 1.2053550083672007, "grad_norm": 0.3213651478290558, "learning_rate": 7.47284178599363e-06, "loss": 0.315, "step": 3361 }, { "epoch": 1.2057136026775042, "grad_norm": 0.31859999895095825, "learning_rate": 7.471027780393737e-06, "loss": 0.2963, "step": 3362 }, { "epoch": 1.2060721969878079, "grad_norm": 0.33134546875953674, "learning_rate": 7.469213344333119e-06, "loss": 0.349, "step": 3363 }, { "epoch": 1.2064307912981114, "grad_norm": 0.32637733221054077, "learning_rate": 7.467398478127853e-06, "loss": 0.3137, "step": 3364 }, { "epoch": 1.206789385608415, "grad_norm": 0.3154071867465973, "learning_rate": 7.465583182094097e-06, "loss": 0.3236, "step": 3365 }, { "epoch": 1.2071479799187186, "grad_norm": 0.33839479088783264, "learning_rate": 7.463767456548079e-06, "loss": 0.3332, "step": 3366 }, { "epoch": 1.2075065742290223, "grad_norm": 0.3614792823791504, "learning_rate": 7.4619513018061045e-06, "loss": 0.3134, "step": 3367 }, { "epoch": 1.2078651685393258, "grad_norm": 0.34820112586021423, "learning_rate": 7.460134718184554e-06, "loss": 0.322, "step": 3368 }, { "epoch": 1.2082237628496295, "grad_norm": 0.33273082971572876, "learning_rate": 7.458317705999882e-06, "loss": 0.3147, "step": 3369 }, { "epoch": 1.208582357159933, "grad_norm": 0.33370763063430786, "learning_rate": 7.456500265568617e-06, "loss": 0.2861, "step": 3370 }, { "epoch": 1.2089409514702367, "grad_norm": 0.3658653199672699, "learning_rate": 7.454682397207363e-06, "loss": 0.3278, "step": 3371 }, { "epoch": 1.2092995457805402, "grad_norm": 0.38375455141067505, "learning_rate": 7.452864101232798e-06, "loss": 0.3259, "step": 3372 }, { "epoch": 1.209658140090844, "grad_norm": 0.35361430048942566, "learning_rate": 7.451045377961676e-06, "loss": 0.3127, "step": 3373 }, { "epoch": 1.2100167344011474, "grad_norm": 0.34792718291282654, "learning_rate": 7.449226227710824e-06, "loss": 0.3481, "step": 3374 }, { "epoch": 1.2103753287114511, "grad_norm": 0.3905819356441498, "learning_rate": 7.447406650797143e-06, "loss": 0.3624, "step": 3375 }, { "epoch": 1.2107339230217546, "grad_norm": 0.34159398078918457, "learning_rate": 7.445586647537608e-06, "loss": 0.3325, "step": 3376 }, { "epoch": 1.2110925173320584, "grad_norm": 0.3319905996322632, "learning_rate": 7.443766218249272e-06, "loss": 0.2948, "step": 3377 }, { "epoch": 1.2114511116423619, "grad_norm": 0.41032174229621887, "learning_rate": 7.4419453632492566e-06, "loss": 0.3618, "step": 3378 }, { "epoch": 1.2118097059526656, "grad_norm": 0.3360083997249603, "learning_rate": 7.4401240828547605e-06, "loss": 0.3141, "step": 3379 }, { "epoch": 1.212168300262969, "grad_norm": 0.3407900035381317, "learning_rate": 7.43830237738306e-06, "loss": 0.3352, "step": 3380 }, { "epoch": 1.2125268945732728, "grad_norm": 0.36873260140419006, "learning_rate": 7.436480247151498e-06, "loss": 0.3618, "step": 3381 }, { "epoch": 1.2128854888835763, "grad_norm": 0.3285355567932129, "learning_rate": 7.434657692477496e-06, "loss": 0.299, "step": 3382 }, { "epoch": 1.21324408319388, "grad_norm": 0.3431253433227539, "learning_rate": 7.4328347136785516e-06, "loss": 0.3248, "step": 3383 }, { "epoch": 1.2136026775041837, "grad_norm": 0.36524155735969543, "learning_rate": 7.43101131107223e-06, "loss": 0.2886, "step": 3384 }, { "epoch": 1.2139612718144872, "grad_norm": 0.461744099855423, "learning_rate": 7.429187484976172e-06, "loss": 0.3616, "step": 3385 }, { "epoch": 1.2143198661247907, "grad_norm": 0.3461199998855591, "learning_rate": 7.427363235708101e-06, "loss": 0.3169, "step": 3386 }, { "epoch": 1.2146784604350944, "grad_norm": 0.36429738998413086, "learning_rate": 7.425538563585799e-06, "loss": 0.3298, "step": 3387 }, { "epoch": 1.2150370547453981, "grad_norm": 0.4400508999824524, "learning_rate": 7.423713468927134e-06, "loss": 0.3327, "step": 3388 }, { "epoch": 1.2153956490557016, "grad_norm": 0.43485790491104126, "learning_rate": 7.421887952050043e-06, "loss": 0.3412, "step": 3389 }, { "epoch": 1.2157542433660053, "grad_norm": 0.3217335045337677, "learning_rate": 7.420062013272535e-06, "loss": 0.3455, "step": 3390 }, { "epoch": 1.2161128376763088, "grad_norm": 0.3803882896900177, "learning_rate": 7.418235652912694e-06, "loss": 0.3205, "step": 3391 }, { "epoch": 1.2164714319866126, "grad_norm": 0.38772109150886536, "learning_rate": 7.41640887128868e-06, "loss": 0.2786, "step": 3392 }, { "epoch": 1.216830026296916, "grad_norm": 0.4726371467113495, "learning_rate": 7.414581668718724e-06, "loss": 0.3467, "step": 3393 }, { "epoch": 1.2171886206072198, "grad_norm": 0.35520991683006287, "learning_rate": 7.4127540455211265e-06, "loss": 0.347, "step": 3394 }, { "epoch": 1.2175472149175233, "grad_norm": 0.3809390068054199, "learning_rate": 7.410926002014269e-06, "loss": 0.2975, "step": 3395 }, { "epoch": 1.217905809227827, "grad_norm": 0.41924965381622314, "learning_rate": 7.409097538516601e-06, "loss": 0.3288, "step": 3396 }, { "epoch": 1.2182644035381305, "grad_norm": 0.43525370955467224, "learning_rate": 7.407268655346646e-06, "loss": 0.3571, "step": 3397 }, { "epoch": 1.2186229978484342, "grad_norm": 0.3490234315395355, "learning_rate": 7.4054393528230025e-06, "loss": 0.3343, "step": 3398 }, { "epoch": 1.2189815921587377, "grad_norm": 0.4056638777256012, "learning_rate": 7.403609631264342e-06, "loss": 0.3215, "step": 3399 }, { "epoch": 1.2193401864690414, "grad_norm": 0.40134114027023315, "learning_rate": 7.401779490989404e-06, "loss": 0.3267, "step": 3400 }, { "epoch": 1.219698780779345, "grad_norm": 0.40713736414909363, "learning_rate": 7.399948932317005e-06, "loss": 0.33, "step": 3401 }, { "epoch": 1.2200573750896486, "grad_norm": 0.3905607759952545, "learning_rate": 7.398117955566035e-06, "loss": 0.3646, "step": 3402 }, { "epoch": 1.2204159693999521, "grad_norm": 0.35916486382484436, "learning_rate": 7.396286561055459e-06, "loss": 0.2929, "step": 3403 }, { "epoch": 1.2207745637102558, "grad_norm": 0.396329402923584, "learning_rate": 7.394454749104307e-06, "loss": 0.3362, "step": 3404 }, { "epoch": 1.2211331580205593, "grad_norm": 0.3572594225406647, "learning_rate": 7.392622520031689e-06, "loss": 0.3398, "step": 3405 }, { "epoch": 1.221491752330863, "grad_norm": 0.3413158655166626, "learning_rate": 7.390789874156784e-06, "loss": 0.3045, "step": 3406 }, { "epoch": 1.2218503466411665, "grad_norm": 0.3568262755870819, "learning_rate": 7.3889568117988444e-06, "loss": 0.3287, "step": 3407 }, { "epoch": 1.2222089409514703, "grad_norm": 0.33477917313575745, "learning_rate": 7.3871233332771956e-06, "loss": 0.3235, "step": 3408 }, { "epoch": 1.2225675352617738, "grad_norm": 0.33742040395736694, "learning_rate": 7.385289438911235e-06, "loss": 0.3336, "step": 3409 }, { "epoch": 1.2229261295720775, "grad_norm": 0.35567936301231384, "learning_rate": 7.383455129020434e-06, "loss": 0.3442, "step": 3410 }, { "epoch": 1.2232847238823812, "grad_norm": 0.3342415690422058, "learning_rate": 7.381620403924333e-06, "loss": 0.3044, "step": 3411 }, { "epoch": 1.2236433181926847, "grad_norm": 0.3766205310821533, "learning_rate": 7.379785263942549e-06, "loss": 0.3418, "step": 3412 }, { "epoch": 1.2240019125029882, "grad_norm": 0.299453467130661, "learning_rate": 7.377949709394767e-06, "loss": 0.2719, "step": 3413 }, { "epoch": 1.224360506813292, "grad_norm": 0.3710145056247711, "learning_rate": 7.376113740600749e-06, "loss": 0.3677, "step": 3414 }, { "epoch": 1.2247191011235956, "grad_norm": 0.3465885519981384, "learning_rate": 7.374277357880323e-06, "loss": 0.3526, "step": 3415 }, { "epoch": 1.225077695433899, "grad_norm": 0.3789970278739929, "learning_rate": 7.372440561553395e-06, "loss": 0.3338, "step": 3416 }, { "epoch": 1.2254362897442028, "grad_norm": 0.3766601085662842, "learning_rate": 7.3706033519399425e-06, "loss": 0.3327, "step": 3417 }, { "epoch": 1.2257948840545063, "grad_norm": 0.3872912526130676, "learning_rate": 7.368765729360008e-06, "loss": 0.3407, "step": 3418 }, { "epoch": 1.22615347836481, "grad_norm": 0.3507443368434906, "learning_rate": 7.366927694133716e-06, "loss": 0.3166, "step": 3419 }, { "epoch": 1.2265120726751135, "grad_norm": 0.4338586926460266, "learning_rate": 7.365089246581253e-06, "loss": 0.368, "step": 3420 }, { "epoch": 1.2268706669854172, "grad_norm": 0.3725344240665436, "learning_rate": 7.363250387022887e-06, "loss": 0.3002, "step": 3421 }, { "epoch": 1.2272292612957207, "grad_norm": 0.35913586616516113, "learning_rate": 7.36141111577895e-06, "loss": 0.3263, "step": 3422 }, { "epoch": 1.2275878556060245, "grad_norm": 0.3491273820400238, "learning_rate": 7.359571433169851e-06, "loss": 0.3246, "step": 3423 }, { "epoch": 1.227946449916328, "grad_norm": 0.3865163326263428, "learning_rate": 7.357731339516067e-06, "loss": 0.3067, "step": 3424 }, { "epoch": 1.2283050442266317, "grad_norm": 0.36414635181427, "learning_rate": 7.355890835138146e-06, "loss": 0.3233, "step": 3425 }, { "epoch": 1.2286636385369352, "grad_norm": 0.3244519829750061, "learning_rate": 7.354049920356715e-06, "loss": 0.3258, "step": 3426 }, { "epoch": 1.2290222328472389, "grad_norm": 0.36726030707359314, "learning_rate": 7.352208595492463e-06, "loss": 0.3345, "step": 3427 }, { "epoch": 1.2293808271575424, "grad_norm": 0.37833598256111145, "learning_rate": 7.350366860866156e-06, "loss": 0.329, "step": 3428 }, { "epoch": 1.229739421467846, "grad_norm": 0.3248935043811798, "learning_rate": 7.348524716798629e-06, "loss": 0.2982, "step": 3429 }, { "epoch": 1.2300980157781496, "grad_norm": 0.3451377749443054, "learning_rate": 7.346682163610789e-06, "loss": 0.3036, "step": 3430 }, { "epoch": 1.2304566100884533, "grad_norm": 0.36541929841041565, "learning_rate": 7.344839201623615e-06, "loss": 0.3376, "step": 3431 }, { "epoch": 1.2308152043987568, "grad_norm": 0.33748385310173035, "learning_rate": 7.3429958311581585e-06, "loss": 0.2828, "step": 3432 }, { "epoch": 1.2311737987090605, "grad_norm": 0.3673052489757538, "learning_rate": 7.341152052535538e-06, "loss": 0.3531, "step": 3433 }, { "epoch": 1.231532393019364, "grad_norm": 0.3427518606185913, "learning_rate": 7.339307866076946e-06, "loss": 0.3113, "step": 3434 }, { "epoch": 1.2318909873296677, "grad_norm": 0.34791556000709534, "learning_rate": 7.337463272103646e-06, "loss": 0.3101, "step": 3435 }, { "epoch": 1.2322495816399712, "grad_norm": 0.39159953594207764, "learning_rate": 7.33561827093697e-06, "loss": 0.3253, "step": 3436 }, { "epoch": 1.232608175950275, "grad_norm": 0.3481651246547699, "learning_rate": 7.333772862898328e-06, "loss": 0.3199, "step": 3437 }, { "epoch": 1.2329667702605784, "grad_norm": 0.34016528725624084, "learning_rate": 7.331927048309189e-06, "loss": 0.3316, "step": 3438 }, { "epoch": 1.2333253645708822, "grad_norm": 0.37207040190696716, "learning_rate": 7.330080827491105e-06, "loss": 0.3336, "step": 3439 }, { "epoch": 1.2336839588811856, "grad_norm": 0.3415721356868744, "learning_rate": 7.3282342007656915e-06, "loss": 0.2749, "step": 3440 }, { "epoch": 1.2340425531914894, "grad_norm": 0.34765952825546265, "learning_rate": 7.326387168454636e-06, "loss": 0.3183, "step": 3441 }, { "epoch": 1.234401147501793, "grad_norm": 0.3972608149051666, "learning_rate": 7.3245397308796995e-06, "loss": 0.3318, "step": 3442 }, { "epoch": 1.2347597418120966, "grad_norm": 0.368572473526001, "learning_rate": 7.322691888362708e-06, "loss": 0.3173, "step": 3443 }, { "epoch": 1.2351183361224, "grad_norm": 0.3244161605834961, "learning_rate": 7.320843641225564e-06, "loss": 0.3056, "step": 3444 }, { "epoch": 1.2354769304327038, "grad_norm": 0.3544963002204895, "learning_rate": 7.318994989790238e-06, "loss": 0.3409, "step": 3445 }, { "epoch": 1.2358355247430075, "grad_norm": 0.39275917410850525, "learning_rate": 7.31714593437877e-06, "loss": 0.2845, "step": 3446 }, { "epoch": 1.236194119053311, "grad_norm": 0.3499184846878052, "learning_rate": 7.315296475313269e-06, "loss": 0.3055, "step": 3447 }, { "epoch": 1.2365527133636147, "grad_norm": 0.3643210232257843, "learning_rate": 7.313446612915921e-06, "loss": 0.3434, "step": 3448 }, { "epoch": 1.2369113076739182, "grad_norm": 0.3727255165576935, "learning_rate": 7.311596347508974e-06, "loss": 0.3088, "step": 3449 }, { "epoch": 1.237269901984222, "grad_norm": 0.3478955328464508, "learning_rate": 7.309745679414751e-06, "loss": 0.3218, "step": 3450 }, { "epoch": 1.2376284962945254, "grad_norm": 0.34177955985069275, "learning_rate": 7.307894608955647e-06, "loss": 0.2921, "step": 3451 }, { "epoch": 1.2379870906048291, "grad_norm": 0.35913363099098206, "learning_rate": 7.30604313645412e-06, "loss": 0.318, "step": 3452 }, { "epoch": 1.2383456849151326, "grad_norm": 0.3662901818752289, "learning_rate": 7.304191262232705e-06, "loss": 0.3248, "step": 3453 }, { "epoch": 1.2387042792254364, "grad_norm": 0.3582971692085266, "learning_rate": 7.3023389866140025e-06, "loss": 0.3477, "step": 3454 }, { "epoch": 1.2390628735357399, "grad_norm": 0.34695762395858765, "learning_rate": 7.300486309920686e-06, "loss": 0.3066, "step": 3455 }, { "epoch": 1.2394214678460436, "grad_norm": 0.3510013520717621, "learning_rate": 7.298633232475497e-06, "loss": 0.3188, "step": 3456 }, { "epoch": 1.239780062156347, "grad_norm": 0.3483639061450958, "learning_rate": 7.296779754601249e-06, "loss": 0.315, "step": 3457 }, { "epoch": 1.2401386564666508, "grad_norm": 0.3670503795146942, "learning_rate": 7.29492587662082e-06, "loss": 0.3142, "step": 3458 }, { "epoch": 1.2404972507769543, "grad_norm": 0.3445195257663727, "learning_rate": 7.2930715988571645e-06, "loss": 0.3076, "step": 3459 }, { "epoch": 1.240855845087258, "grad_norm": 0.34750664234161377, "learning_rate": 7.291216921633301e-06, "loss": 0.3676, "step": 3460 }, { "epoch": 1.2412144393975615, "grad_norm": 0.3126395046710968, "learning_rate": 7.2893618452723215e-06, "loss": 0.2895, "step": 3461 }, { "epoch": 1.2415730337078652, "grad_norm": 0.40591564774513245, "learning_rate": 7.287506370097387e-06, "loss": 0.3181, "step": 3462 }, { "epoch": 1.2419316280181687, "grad_norm": 0.32441866397857666, "learning_rate": 7.285650496431726e-06, "loss": 0.3231, "step": 3463 }, { "epoch": 1.2422902223284724, "grad_norm": 0.3533647358417511, "learning_rate": 7.283794224598637e-06, "loss": 0.3334, "step": 3464 }, { "epoch": 1.242648816638776, "grad_norm": 0.33578863739967346, "learning_rate": 7.28193755492149e-06, "loss": 0.3281, "step": 3465 }, { "epoch": 1.2430074109490796, "grad_norm": 0.32767271995544434, "learning_rate": 7.280080487723719e-06, "loss": 0.323, "step": 3466 }, { "epoch": 1.2433660052593831, "grad_norm": 0.3500251770019531, "learning_rate": 7.2782230233288345e-06, "loss": 0.3638, "step": 3467 }, { "epoch": 1.2437245995696868, "grad_norm": 0.34365615248680115, "learning_rate": 7.276365162060411e-06, "loss": 0.3068, "step": 3468 }, { "epoch": 1.2440831938799906, "grad_norm": 0.3710424602031708, "learning_rate": 7.274506904242096e-06, "loss": 0.3357, "step": 3469 }, { "epoch": 1.244441788190294, "grad_norm": 0.40083077549934387, "learning_rate": 7.2726482501976e-06, "loss": 0.3283, "step": 3470 }, { "epoch": 1.2448003825005975, "grad_norm": 0.3511347770690918, "learning_rate": 7.27078920025071e-06, "loss": 0.3006, "step": 3471 }, { "epoch": 1.2451589768109013, "grad_norm": 0.3602675795555115, "learning_rate": 7.268929754725274e-06, "loss": 0.3474, "step": 3472 }, { "epoch": 1.245517571121205, "grad_norm": 0.3884657621383667, "learning_rate": 7.267069913945216e-06, "loss": 0.3514, "step": 3473 }, { "epoch": 1.2458761654315085, "grad_norm": 0.31391263008117676, "learning_rate": 7.265209678234527e-06, "loss": 0.293, "step": 3474 }, { "epoch": 1.2462347597418122, "grad_norm": 0.35179632902145386, "learning_rate": 7.263349047917264e-06, "loss": 0.3421, "step": 3475 }, { "epoch": 1.2465933540521157, "grad_norm": 0.36979007720947266, "learning_rate": 7.261488023317555e-06, "loss": 0.3338, "step": 3476 }, { "epoch": 1.2469519483624194, "grad_norm": 0.33658865094184875, "learning_rate": 7.259626604759597e-06, "loss": 0.3325, "step": 3477 }, { "epoch": 1.247310542672723, "grad_norm": 0.36049509048461914, "learning_rate": 7.257764792567654e-06, "loss": 0.358, "step": 3478 }, { "epoch": 1.2476691369830266, "grad_norm": 0.29036009311676025, "learning_rate": 7.2559025870660605e-06, "loss": 0.2981, "step": 3479 }, { "epoch": 1.2480277312933301, "grad_norm": 0.3550233542919159, "learning_rate": 7.254039988579218e-06, "loss": 0.3553, "step": 3480 }, { "epoch": 1.2483863256036338, "grad_norm": 0.34106481075286865, "learning_rate": 7.252176997431595e-06, "loss": 0.3122, "step": 3481 }, { "epoch": 1.2487449199139373, "grad_norm": 0.3467804491519928, "learning_rate": 7.250313613947733e-06, "loss": 0.3131, "step": 3482 }, { "epoch": 1.249103514224241, "grad_norm": 0.38537880778312683, "learning_rate": 7.24844983845224e-06, "loss": 0.3301, "step": 3483 }, { "epoch": 1.2494621085345445, "grad_norm": 0.37186068296432495, "learning_rate": 7.246585671269787e-06, "loss": 0.3204, "step": 3484 }, { "epoch": 1.2498207028448483, "grad_norm": 0.3615798354148865, "learning_rate": 7.244721112725122e-06, "loss": 0.307, "step": 3485 }, { "epoch": 1.2501792971551517, "grad_norm": 0.36999601125717163, "learning_rate": 7.242856163143056e-06, "loss": 0.353, "step": 3486 }, { "epoch": 1.2505378914654555, "grad_norm": 0.3448038101196289, "learning_rate": 7.240990822848467e-06, "loss": 0.3299, "step": 3487 }, { "epoch": 1.250896485775759, "grad_norm": 0.3832123577594757, "learning_rate": 7.239125092166306e-06, "loss": 0.3176, "step": 3488 }, { "epoch": 1.2512550800860627, "grad_norm": 0.35043492913246155, "learning_rate": 7.237258971421587e-06, "loss": 0.3214, "step": 3489 }, { "epoch": 1.2516136743963662, "grad_norm": 0.3095817565917969, "learning_rate": 7.235392460939393e-06, "loss": 0.3144, "step": 3490 }, { "epoch": 1.2519722687066699, "grad_norm": 0.34570378065109253, "learning_rate": 7.233525561044881e-06, "loss": 0.332, "step": 3491 }, { "epoch": 1.2523308630169734, "grad_norm": 0.34694716334342957, "learning_rate": 7.231658272063264e-06, "loss": 0.3291, "step": 3492 }, { "epoch": 1.252689457327277, "grad_norm": 0.36518749594688416, "learning_rate": 7.229790594319836e-06, "loss": 0.3277, "step": 3493 }, { "epoch": 1.2530480516375806, "grad_norm": 0.30725881457328796, "learning_rate": 7.227922528139948e-06, "loss": 0.315, "step": 3494 }, { "epoch": 1.2534066459478843, "grad_norm": 0.36477968096733093, "learning_rate": 7.2260540738490225e-06, "loss": 0.3784, "step": 3495 }, { "epoch": 1.253765240258188, "grad_norm": 0.28824007511138916, "learning_rate": 7.224185231772554e-06, "loss": 0.2917, "step": 3496 }, { "epoch": 1.2541238345684915, "grad_norm": 0.35962432622909546, "learning_rate": 7.222316002236097e-06, "loss": 0.336, "step": 3497 }, { "epoch": 1.254482428878795, "grad_norm": 0.3733093738555908, "learning_rate": 7.220446385565279e-06, "loss": 0.3279, "step": 3498 }, { "epoch": 1.2548410231890987, "grad_norm": 0.3501530587673187, "learning_rate": 7.218576382085792e-06, "loss": 0.3198, "step": 3499 }, { "epoch": 1.2551996174994025, "grad_norm": 0.38513338565826416, "learning_rate": 7.216705992123397e-06, "loss": 0.3292, "step": 3500 }, { "epoch": 1.255558211809706, "grad_norm": 0.35514315962791443, "learning_rate": 7.2148352160039215e-06, "loss": 0.331, "step": 3501 }, { "epoch": 1.2559168061200094, "grad_norm": 0.3887440860271454, "learning_rate": 7.212964054053259e-06, "loss": 0.33, "step": 3502 }, { "epoch": 1.2562754004303132, "grad_norm": 0.3438399136066437, "learning_rate": 7.211092506597377e-06, "loss": 0.3266, "step": 3503 }, { "epoch": 1.2566339947406169, "grad_norm": 0.3730560839176178, "learning_rate": 7.209220573962299e-06, "loss": 0.3241, "step": 3504 }, { "epoch": 1.2569925890509204, "grad_norm": 0.3350818455219269, "learning_rate": 7.2073482564741235e-06, "loss": 0.326, "step": 3505 }, { "epoch": 1.2573511833612239, "grad_norm": 0.36591073870658875, "learning_rate": 7.205475554459016e-06, "loss": 0.368, "step": 3506 }, { "epoch": 1.2577097776715276, "grad_norm": 0.38469433784484863, "learning_rate": 7.2036024682432035e-06, "loss": 0.2995, "step": 3507 }, { "epoch": 1.2580683719818313, "grad_norm": 0.3956853449344635, "learning_rate": 7.2017289981529855e-06, "loss": 0.3499, "step": 3508 }, { "epoch": 1.2584269662921348, "grad_norm": 0.3161185085773468, "learning_rate": 7.199855144514727e-06, "loss": 0.3019, "step": 3509 }, { "epoch": 1.2587855606024385, "grad_norm": 0.38032394647598267, "learning_rate": 7.197980907654857e-06, "loss": 0.3207, "step": 3510 }, { "epoch": 1.259144154912742, "grad_norm": 0.3401540219783783, "learning_rate": 7.196106287899875e-06, "loss": 0.3384, "step": 3511 }, { "epoch": 1.2595027492230457, "grad_norm": 0.3110048770904541, "learning_rate": 7.194231285576345e-06, "loss": 0.2865, "step": 3512 }, { "epoch": 1.2598613435333492, "grad_norm": 0.33506935834884644, "learning_rate": 7.192355901010899e-06, "loss": 0.3373, "step": 3513 }, { "epoch": 1.260219937843653, "grad_norm": 0.36045652627944946, "learning_rate": 7.190480134530234e-06, "loss": 0.3012, "step": 3514 }, { "epoch": 1.2605785321539564, "grad_norm": 0.35623180866241455, "learning_rate": 7.1886039864611145e-06, "loss": 0.3292, "step": 3515 }, { "epoch": 1.2609371264642601, "grad_norm": 0.31110668182373047, "learning_rate": 7.186727457130371e-06, "loss": 0.2967, "step": 3516 }, { "epoch": 1.2612957207745636, "grad_norm": 0.3606438636779785, "learning_rate": 7.184850546864903e-06, "loss": 0.3567, "step": 3517 }, { "epoch": 1.2616543150848674, "grad_norm": 0.4031972587108612, "learning_rate": 7.182973255991671e-06, "loss": 0.334, "step": 3518 }, { "epoch": 1.2620129093951709, "grad_norm": 0.37056905031204224, "learning_rate": 7.181095584837705e-06, "loss": 0.3106, "step": 3519 }, { "epoch": 1.2623715037054746, "grad_norm": 0.33501601219177246, "learning_rate": 7.179217533730104e-06, "loss": 0.3224, "step": 3520 }, { "epoch": 1.262730098015778, "grad_norm": 0.3610056936740875, "learning_rate": 7.177339102996028e-06, "loss": 0.3344, "step": 3521 }, { "epoch": 1.2630886923260818, "grad_norm": 0.3408975601196289, "learning_rate": 7.175460292962707e-06, "loss": 0.3135, "step": 3522 }, { "epoch": 1.2634472866363855, "grad_norm": 0.33700481057167053, "learning_rate": 7.173581103957434e-06, "loss": 0.3464, "step": 3523 }, { "epoch": 1.263805880946689, "grad_norm": 0.3306880593299866, "learning_rate": 7.17170153630757e-06, "loss": 0.3157, "step": 3524 }, { "epoch": 1.2641644752569925, "grad_norm": 0.344623327255249, "learning_rate": 7.169821590340542e-06, "loss": 0.3044, "step": 3525 }, { "epoch": 1.2645230695672962, "grad_norm": 0.3672276735305786, "learning_rate": 7.167941266383843e-06, "loss": 0.3343, "step": 3526 }, { "epoch": 1.2648816638776, "grad_norm": 0.35132482647895813, "learning_rate": 7.1660605647650295e-06, "loss": 0.3181, "step": 3527 }, { "epoch": 1.2652402581879034, "grad_norm": 0.3934905230998993, "learning_rate": 7.164179485811728e-06, "loss": 0.3423, "step": 3528 }, { "epoch": 1.265598852498207, "grad_norm": 0.3443652093410492, "learning_rate": 7.162298029851627e-06, "loss": 0.2951, "step": 3529 }, { "epoch": 1.2659574468085106, "grad_norm": 0.33601847290992737, "learning_rate": 7.160416197212479e-06, "loss": 0.2879, "step": 3530 }, { "epoch": 1.2663160411188144, "grad_norm": 0.3561622202396393, "learning_rate": 7.158533988222111e-06, "loss": 0.3689, "step": 3531 }, { "epoch": 1.2666746354291178, "grad_norm": 0.31428030133247375, "learning_rate": 7.1566514032084044e-06, "loss": 0.3003, "step": 3532 }, { "epoch": 1.2670332297394213, "grad_norm": 0.3630898594856262, "learning_rate": 7.154768442499316e-06, "loss": 0.3093, "step": 3533 }, { "epoch": 1.267391824049725, "grad_norm": 0.42046916484832764, "learning_rate": 7.15288510642286e-06, "loss": 0.3529, "step": 3534 }, { "epoch": 1.2677504183600288, "grad_norm": 0.30027273297309875, "learning_rate": 7.15100139530712e-06, "loss": 0.2815, "step": 3535 }, { "epoch": 1.2681090126703323, "grad_norm": 0.3872110843658447, "learning_rate": 7.1491173094802455e-06, "loss": 0.3626, "step": 3536 }, { "epoch": 1.268467606980636, "grad_norm": 0.32678598165512085, "learning_rate": 7.147232849270449e-06, "loss": 0.2967, "step": 3537 }, { "epoch": 1.2688262012909395, "grad_norm": 0.4012565016746521, "learning_rate": 7.145348015006009e-06, "loss": 0.3131, "step": 3538 }, { "epoch": 1.2691847956012432, "grad_norm": 0.38361817598342896, "learning_rate": 7.143462807015271e-06, "loss": 0.3574, "step": 3539 }, { "epoch": 1.2695433899115467, "grad_norm": 0.3161303400993347, "learning_rate": 7.1415772256266445e-06, "loss": 0.3158, "step": 3540 }, { "epoch": 1.2699019842218504, "grad_norm": 0.37607479095458984, "learning_rate": 7.139691271168601e-06, "loss": 0.3599, "step": 3541 }, { "epoch": 1.270260578532154, "grad_norm": 0.31060919165611267, "learning_rate": 7.13780494396968e-06, "loss": 0.3158, "step": 3542 }, { "epoch": 1.2706191728424576, "grad_norm": 0.3613177537918091, "learning_rate": 7.135918244358489e-06, "loss": 0.3237, "step": 3543 }, { "epoch": 1.2709777671527611, "grad_norm": 0.3637107014656067, "learning_rate": 7.134031172663693e-06, "loss": 0.3132, "step": 3544 }, { "epoch": 1.2713363614630648, "grad_norm": 0.3435966372489929, "learning_rate": 7.1321437292140295e-06, "loss": 0.329, "step": 3545 }, { "epoch": 1.2716949557733683, "grad_norm": 0.32413768768310547, "learning_rate": 7.130255914338293e-06, "loss": 0.3133, "step": 3546 }, { "epoch": 1.272053550083672, "grad_norm": 0.4468753933906555, "learning_rate": 7.12836772836535e-06, "loss": 0.3252, "step": 3547 }, { "epoch": 1.2724121443939755, "grad_norm": 0.37875673174858093, "learning_rate": 7.1264791716241254e-06, "loss": 0.3356, "step": 3548 }, { "epoch": 1.2727707387042793, "grad_norm": 0.3109451234340668, "learning_rate": 7.124590244443614e-06, "loss": 0.3, "step": 3549 }, { "epoch": 1.2731293330145828, "grad_norm": 0.35661277174949646, "learning_rate": 7.122700947152872e-06, "loss": 0.3428, "step": 3550 }, { "epoch": 1.2734879273248865, "grad_norm": 0.35323628783226013, "learning_rate": 7.120811280081021e-06, "loss": 0.3191, "step": 3551 }, { "epoch": 1.27384652163519, "grad_norm": 0.3986012637615204, "learning_rate": 7.118921243557247e-06, "loss": 0.3524, "step": 3552 }, { "epoch": 1.2742051159454937, "grad_norm": 0.3140433430671692, "learning_rate": 7.1170308379108e-06, "loss": 0.309, "step": 3553 }, { "epoch": 1.2745637102557974, "grad_norm": 0.3899165689945221, "learning_rate": 7.1151400634709935e-06, "loss": 0.3601, "step": 3554 }, { "epoch": 1.274922304566101, "grad_norm": 0.35167717933654785, "learning_rate": 7.1132489205672075e-06, "loss": 0.3399, "step": 3555 }, { "epoch": 1.2752808988764044, "grad_norm": 0.3378182649612427, "learning_rate": 7.111357409528886e-06, "loss": 0.3044, "step": 3556 }, { "epoch": 1.275639493186708, "grad_norm": 0.3385235667228699, "learning_rate": 7.1094655306855355e-06, "loss": 0.3064, "step": 3557 }, { "epoch": 1.2759980874970118, "grad_norm": 0.3602060079574585, "learning_rate": 7.107573284366724e-06, "loss": 0.3407, "step": 3558 }, { "epoch": 1.2763566818073153, "grad_norm": 0.3653680980205536, "learning_rate": 7.105680670902092e-06, "loss": 0.3638, "step": 3559 }, { "epoch": 1.2767152761176188, "grad_norm": 0.3231348693370819, "learning_rate": 7.103787690621334e-06, "loss": 0.3007, "step": 3560 }, { "epoch": 1.2770738704279225, "grad_norm": 0.36467263102531433, "learning_rate": 7.101894343854217e-06, "loss": 0.3245, "step": 3561 }, { "epoch": 1.2774324647382262, "grad_norm": 0.370537668466568, "learning_rate": 7.1000006309305645e-06, "loss": 0.3076, "step": 3562 }, { "epoch": 1.2777910590485297, "grad_norm": 0.3442441523075104, "learning_rate": 7.09810655218027e-06, "loss": 0.3276, "step": 3563 }, { "epoch": 1.2781496533588332, "grad_norm": 0.32598957419395447, "learning_rate": 7.0962121079332845e-06, "loss": 0.3552, "step": 3564 }, { "epoch": 1.278508247669137, "grad_norm": 0.3629119098186493, "learning_rate": 7.09431729851963e-06, "loss": 0.3481, "step": 3565 }, { "epoch": 1.2788668419794407, "grad_norm": 0.31330356001853943, "learning_rate": 7.092422124269385e-06, "loss": 0.2963, "step": 3566 }, { "epoch": 1.2792254362897442, "grad_norm": 0.3612512946128845, "learning_rate": 7.090526585512696e-06, "loss": 0.3415, "step": 3567 }, { "epoch": 1.2795840306000479, "grad_norm": 0.31099405884742737, "learning_rate": 7.088630682579772e-06, "loss": 0.3146, "step": 3568 }, { "epoch": 1.2799426249103514, "grad_norm": 0.34080275893211365, "learning_rate": 7.086734415800887e-06, "loss": 0.3459, "step": 3569 }, { "epoch": 1.280301219220655, "grad_norm": 0.32410603761672974, "learning_rate": 7.084837785506372e-06, "loss": 0.3153, "step": 3570 }, { "epoch": 1.2806598135309586, "grad_norm": 0.350014328956604, "learning_rate": 7.082940792026629e-06, "loss": 0.3601, "step": 3571 }, { "epoch": 1.2810184078412623, "grad_norm": 0.34480851888656616, "learning_rate": 7.081043435692119e-06, "loss": 0.2949, "step": 3572 }, { "epoch": 1.2813770021515658, "grad_norm": 0.3291606903076172, "learning_rate": 7.07914571683337e-06, "loss": 0.2923, "step": 3573 }, { "epoch": 1.2817355964618695, "grad_norm": 0.3483106791973114, "learning_rate": 7.0772476357809685e-06, "loss": 0.3443, "step": 3574 }, { "epoch": 1.282094190772173, "grad_norm": 0.33473092317581177, "learning_rate": 7.075349192865564e-06, "loss": 0.3121, "step": 3575 }, { "epoch": 1.2824527850824767, "grad_norm": 0.3781321048736572, "learning_rate": 7.073450388417875e-06, "loss": 0.3503, "step": 3576 }, { "epoch": 1.2828113793927802, "grad_norm": 0.3480997681617737, "learning_rate": 7.071551222768677e-06, "loss": 0.3287, "step": 3577 }, { "epoch": 1.283169973703084, "grad_norm": 0.34359216690063477, "learning_rate": 7.0696516962488095e-06, "loss": 0.3512, "step": 3578 }, { "epoch": 1.2835285680133874, "grad_norm": 0.3367169201374054, "learning_rate": 7.067751809189179e-06, "loss": 0.2927, "step": 3579 }, { "epoch": 1.2838871623236912, "grad_norm": 0.39557623863220215, "learning_rate": 7.065851561920751e-06, "loss": 0.3243, "step": 3580 }, { "epoch": 1.2842457566339949, "grad_norm": 0.37285536527633667, "learning_rate": 7.0639509547745526e-06, "loss": 0.3389, "step": 3581 }, { "epoch": 1.2846043509442984, "grad_norm": 0.3616439998149872, "learning_rate": 7.062049988081678e-06, "loss": 0.3184, "step": 3582 }, { "epoch": 1.2849629452546019, "grad_norm": 0.32875484228134155, "learning_rate": 7.0601486621732785e-06, "loss": 0.3106, "step": 3583 }, { "epoch": 1.2853215395649056, "grad_norm": 0.3595896065235138, "learning_rate": 7.058246977380572e-06, "loss": 0.3187, "step": 3584 }, { "epoch": 1.2856801338752093, "grad_norm": 0.3902831971645355, "learning_rate": 7.05634493403484e-06, "loss": 0.3423, "step": 3585 }, { "epoch": 1.2860387281855128, "grad_norm": 0.35002776980400085, "learning_rate": 7.054442532467423e-06, "loss": 0.3311, "step": 3586 }, { "epoch": 1.2863973224958163, "grad_norm": 0.34533390402793884, "learning_rate": 7.052539773009723e-06, "loss": 0.3505, "step": 3587 }, { "epoch": 1.28675591680612, "grad_norm": 0.3535301685333252, "learning_rate": 7.05063665599321e-06, "loss": 0.3176, "step": 3588 }, { "epoch": 1.2871145111164237, "grad_norm": 0.3757075369358063, "learning_rate": 7.048733181749412e-06, "loss": 0.3104, "step": 3589 }, { "epoch": 1.2874731054267272, "grad_norm": 0.3479163348674774, "learning_rate": 7.04682935060992e-06, "loss": 0.3317, "step": 3590 }, { "epoch": 1.2878316997370307, "grad_norm": 0.3246660828590393, "learning_rate": 7.044925162906387e-06, "loss": 0.325, "step": 3591 }, { "epoch": 1.2881902940473344, "grad_norm": 0.3274662494659424, "learning_rate": 7.043020618970526e-06, "loss": 0.3068, "step": 3592 }, { "epoch": 1.2885488883576381, "grad_norm": 0.3515101671218872, "learning_rate": 7.04111571913412e-06, "loss": 0.3411, "step": 3593 }, { "epoch": 1.2889074826679416, "grad_norm": 0.37256866693496704, "learning_rate": 7.039210463729005e-06, "loss": 0.3266, "step": 3594 }, { "epoch": 1.2892660769782454, "grad_norm": 0.35757210850715637, "learning_rate": 7.037304853087083e-06, "loss": 0.3014, "step": 3595 }, { "epoch": 1.2896246712885489, "grad_norm": 0.3740459978580475, "learning_rate": 7.035398887540316e-06, "loss": 0.3365, "step": 3596 }, { "epoch": 1.2899832655988526, "grad_norm": 0.3434391915798187, "learning_rate": 7.033492567420731e-06, "loss": 0.3097, "step": 3597 }, { "epoch": 1.290341859909156, "grad_norm": 0.35417553782463074, "learning_rate": 7.031585893060413e-06, "loss": 0.3433, "step": 3598 }, { "epoch": 1.2907004542194598, "grad_norm": 0.3795361816883087, "learning_rate": 7.0296788647915135e-06, "loss": 0.3587, "step": 3599 }, { "epoch": 1.2910590485297633, "grad_norm": 0.3573233187198639, "learning_rate": 7.0277714829462395e-06, "loss": 0.3287, "step": 3600 }, { "epoch": 1.291417642840067, "grad_norm": 0.34660157561302185, "learning_rate": 7.025863747856865e-06, "loss": 0.3012, "step": 3601 }, { "epoch": 1.2917762371503705, "grad_norm": 0.3552277982234955, "learning_rate": 7.023955659855724e-06, "loss": 0.3519, "step": 3602 }, { "epoch": 1.2921348314606742, "grad_norm": 0.35070326924324036, "learning_rate": 7.02204721927521e-06, "loss": 0.3214, "step": 3603 }, { "epoch": 1.2924934257709777, "grad_norm": 0.3510231077671051, "learning_rate": 7.020138426447778e-06, "loss": 0.3067, "step": 3604 }, { "epoch": 1.2928520200812814, "grad_norm": 0.34107208251953125, "learning_rate": 7.018229281705948e-06, "loss": 0.3217, "step": 3605 }, { "epoch": 1.293210614391585, "grad_norm": 0.3175535500049591, "learning_rate": 7.0163197853822975e-06, "loss": 0.319, "step": 3606 }, { "epoch": 1.2935692087018886, "grad_norm": 0.3540089428424835, "learning_rate": 7.014409937809468e-06, "loss": 0.3297, "step": 3607 }, { "epoch": 1.2939278030121921, "grad_norm": 0.37753942608833313, "learning_rate": 7.01249973932016e-06, "loss": 0.3452, "step": 3608 }, { "epoch": 1.2942863973224958, "grad_norm": 0.3316223919391632, "learning_rate": 7.010589190247135e-06, "loss": 0.2939, "step": 3609 }, { "epoch": 1.2946449916327993, "grad_norm": 0.38747355341911316, "learning_rate": 7.0086782909232195e-06, "loss": 0.3329, "step": 3610 }, { "epoch": 1.295003585943103, "grad_norm": 0.35363730788230896, "learning_rate": 7.006767041681296e-06, "loss": 0.316, "step": 3611 }, { "epoch": 1.2953621802534068, "grad_norm": 0.31421464681625366, "learning_rate": 7.00485544285431e-06, "loss": 0.3226, "step": 3612 }, { "epoch": 1.2957207745637103, "grad_norm": 0.36692848801612854, "learning_rate": 7.002943494775267e-06, "loss": 0.3607, "step": 3613 }, { "epoch": 1.2960793688740138, "grad_norm": 0.3258468806743622, "learning_rate": 7.001031197777238e-06, "loss": 0.3127, "step": 3614 }, { "epoch": 1.2964379631843175, "grad_norm": 0.31226858496665955, "learning_rate": 6.999118552193348e-06, "loss": 0.3053, "step": 3615 }, { "epoch": 1.2967965574946212, "grad_norm": 0.3951227366924286, "learning_rate": 6.997205558356787e-06, "loss": 0.3259, "step": 3616 }, { "epoch": 1.2971551518049247, "grad_norm": 0.3511606752872467, "learning_rate": 6.995292216600804e-06, "loss": 0.3592, "step": 3617 }, { "epoch": 1.2975137461152282, "grad_norm": 0.33893781900405884, "learning_rate": 6.993378527258707e-06, "loss": 0.2888, "step": 3618 }, { "epoch": 1.297872340425532, "grad_norm": 0.34769120812416077, "learning_rate": 6.991464490663871e-06, "loss": 0.2952, "step": 3619 }, { "epoch": 1.2982309347358356, "grad_norm": 0.33644458651542664, "learning_rate": 6.989550107149725e-06, "loss": 0.316, "step": 3620 }, { "epoch": 1.2985895290461391, "grad_norm": 0.34155362844467163, "learning_rate": 6.9876353770497595e-06, "loss": 0.3329, "step": 3621 }, { "epoch": 1.2989481233564426, "grad_norm": 0.30510076880455017, "learning_rate": 6.9857203006975285e-06, "loss": 0.3074, "step": 3622 }, { "epoch": 1.2993067176667463, "grad_norm": 0.35958901047706604, "learning_rate": 6.983804878426643e-06, "loss": 0.3616, "step": 3623 }, { "epoch": 1.29966531197705, "grad_norm": 0.3349123001098633, "learning_rate": 6.9818891105707745e-06, "loss": 0.3386, "step": 3624 }, { "epoch": 1.3000239062873535, "grad_norm": 0.3689821660518646, "learning_rate": 6.979972997463658e-06, "loss": 0.3088, "step": 3625 }, { "epoch": 1.3003825005976573, "grad_norm": 0.3528105616569519, "learning_rate": 6.978056539439085e-06, "loss": 0.3136, "step": 3626 }, { "epoch": 1.3007410949079607, "grad_norm": 0.34454265236854553, "learning_rate": 6.976139736830908e-06, "loss": 0.3804, "step": 3627 }, { "epoch": 1.3010996892182645, "grad_norm": 0.3373897671699524, "learning_rate": 6.974222589973042e-06, "loss": 0.3255, "step": 3628 }, { "epoch": 1.301458283528568, "grad_norm": 0.3209005296230316, "learning_rate": 6.972305099199458e-06, "loss": 0.3136, "step": 3629 }, { "epoch": 1.3018168778388717, "grad_norm": 0.320194274187088, "learning_rate": 6.970387264844189e-06, "loss": 0.3034, "step": 3630 }, { "epoch": 1.3021754721491752, "grad_norm": 0.30278971791267395, "learning_rate": 6.9684690872413295e-06, "loss": 0.277, "step": 3631 }, { "epoch": 1.302534066459479, "grad_norm": 0.3397359251976013, "learning_rate": 6.96655056672503e-06, "loss": 0.3304, "step": 3632 }, { "epoch": 1.3028926607697824, "grad_norm": 0.33861660957336426, "learning_rate": 6.964631703629504e-06, "loss": 0.3369, "step": 3633 }, { "epoch": 1.303251255080086, "grad_norm": 0.34509018063545227, "learning_rate": 6.962712498289023e-06, "loss": 0.3368, "step": 3634 }, { "epoch": 1.3036098493903896, "grad_norm": 0.3253213167190552, "learning_rate": 6.960792951037918e-06, "loss": 0.305, "step": 3635 }, { "epoch": 1.3039684437006933, "grad_norm": 0.339284747838974, "learning_rate": 6.9588730622105795e-06, "loss": 0.3348, "step": 3636 }, { "epoch": 1.3043270380109968, "grad_norm": 0.3505246639251709, "learning_rate": 6.956952832141462e-06, "loss": 0.3092, "step": 3637 }, { "epoch": 1.3046856323213005, "grad_norm": 0.36865124106407166, "learning_rate": 6.955032261165069e-06, "loss": 0.326, "step": 3638 }, { "epoch": 1.3050442266316042, "grad_norm": 0.37320590019226074, "learning_rate": 6.953111349615977e-06, "loss": 0.3029, "step": 3639 }, { "epoch": 1.3054028209419077, "grad_norm": 0.33916032314300537, "learning_rate": 6.95119009782881e-06, "loss": 0.3296, "step": 3640 }, { "epoch": 1.3057614152522112, "grad_norm": 0.3506612181663513, "learning_rate": 6.949268506138257e-06, "loss": 0.3291, "step": 3641 }, { "epoch": 1.306120009562515, "grad_norm": 0.3772229552268982, "learning_rate": 6.947346574879066e-06, "loss": 0.3339, "step": 3642 }, { "epoch": 1.3064786038728187, "grad_norm": 0.39591658115386963, "learning_rate": 6.945424304386043e-06, "loss": 0.3352, "step": 3643 }, { "epoch": 1.3068371981831222, "grad_norm": 0.31407469511032104, "learning_rate": 6.943501694994053e-06, "loss": 0.283, "step": 3644 }, { "epoch": 1.3071957924934257, "grad_norm": 0.3767494261264801, "learning_rate": 6.941578747038024e-06, "loss": 0.3132, "step": 3645 }, { "epoch": 1.3075543868037294, "grad_norm": 0.34158679842948914, "learning_rate": 6.939655460852935e-06, "loss": 0.3099, "step": 3646 }, { "epoch": 1.307912981114033, "grad_norm": 0.3509601354598999, "learning_rate": 6.9377318367738285e-06, "loss": 0.3377, "step": 3647 }, { "epoch": 1.3082715754243366, "grad_norm": 0.349213182926178, "learning_rate": 6.9358078751358095e-06, "loss": 0.3207, "step": 3648 }, { "epoch": 1.30863016973464, "grad_norm": 0.39133140444755554, "learning_rate": 6.933883576274034e-06, "loss": 0.3092, "step": 3649 }, { "epoch": 1.3089887640449438, "grad_norm": 0.35354629158973694, "learning_rate": 6.931958940523725e-06, "loss": 0.3104, "step": 3650 }, { "epoch": 1.3093473583552475, "grad_norm": 0.37216198444366455, "learning_rate": 6.9300339682201575e-06, "loss": 0.3344, "step": 3651 }, { "epoch": 1.309705952665551, "grad_norm": 0.3368598520755768, "learning_rate": 6.928108659698667e-06, "loss": 0.319, "step": 3652 }, { "epoch": 1.3100645469758547, "grad_norm": 0.36306703090667725, "learning_rate": 6.926183015294651e-06, "loss": 0.389, "step": 3653 }, { "epoch": 1.3104231412861582, "grad_norm": 0.33093446493148804, "learning_rate": 6.924257035343559e-06, "loss": 0.3055, "step": 3654 }, { "epoch": 1.310781735596462, "grad_norm": 0.3634485602378845, "learning_rate": 6.922330720180905e-06, "loss": 0.3257, "step": 3655 }, { "epoch": 1.3111403299067654, "grad_norm": 0.3357013165950775, "learning_rate": 6.9204040701422605e-06, "loss": 0.3018, "step": 3656 }, { "epoch": 1.3114989242170692, "grad_norm": 0.32631829380989075, "learning_rate": 6.918477085563251e-06, "loss": 0.3203, "step": 3657 }, { "epoch": 1.3118575185273726, "grad_norm": 0.3385878801345825, "learning_rate": 6.916549766779564e-06, "loss": 0.323, "step": 3658 }, { "epoch": 1.3122161128376764, "grad_norm": 0.34492751955986023, "learning_rate": 6.914622114126945e-06, "loss": 0.311, "step": 3659 }, { "epoch": 1.3125747071479799, "grad_norm": 0.3664289116859436, "learning_rate": 6.912694127941199e-06, "loss": 0.3405, "step": 3660 }, { "epoch": 1.3129333014582836, "grad_norm": 0.3232632875442505, "learning_rate": 6.910765808558183e-06, "loss": 0.3341, "step": 3661 }, { "epoch": 1.313291895768587, "grad_norm": 0.3564242124557495, "learning_rate": 6.90883715631382e-06, "loss": 0.3396, "step": 3662 }, { "epoch": 1.3136504900788908, "grad_norm": 0.3260398805141449, "learning_rate": 6.906908171544086e-06, "loss": 0.3185, "step": 3663 }, { "epoch": 1.3140090843891943, "grad_norm": 0.3402220606803894, "learning_rate": 6.904978854585014e-06, "loss": 0.3367, "step": 3664 }, { "epoch": 1.314367678699498, "grad_norm": 0.36235812306404114, "learning_rate": 6.9030492057726985e-06, "loss": 0.3234, "step": 3665 }, { "epoch": 1.3147262730098017, "grad_norm": 0.385057270526886, "learning_rate": 6.901119225443293e-06, "loss": 0.309, "step": 3666 }, { "epoch": 1.3150848673201052, "grad_norm": 0.30226728320121765, "learning_rate": 6.8991889139330016e-06, "loss": 0.2735, "step": 3667 }, { "epoch": 1.3154434616304087, "grad_norm": 0.3317315876483917, "learning_rate": 6.897258271578093e-06, "loss": 0.3448, "step": 3668 }, { "epoch": 1.3158020559407124, "grad_norm": 0.3430403769016266, "learning_rate": 6.895327298714891e-06, "loss": 0.3253, "step": 3669 }, { "epoch": 1.3161606502510161, "grad_norm": 0.3636515438556671, "learning_rate": 6.893395995679777e-06, "loss": 0.3278, "step": 3670 }, { "epoch": 1.3165192445613196, "grad_norm": 0.34698522090911865, "learning_rate": 6.89146436280919e-06, "loss": 0.3468, "step": 3671 }, { "epoch": 1.3168778388716231, "grad_norm": 0.3422556221485138, "learning_rate": 6.889532400439626e-06, "loss": 0.3238, "step": 3672 }, { "epoch": 1.3172364331819268, "grad_norm": 0.3681071102619171, "learning_rate": 6.88760010890764e-06, "loss": 0.3533, "step": 3673 }, { "epoch": 1.3175950274922306, "grad_norm": 0.34434688091278076, "learning_rate": 6.885667488549842e-06, "loss": 0.3291, "step": 3674 }, { "epoch": 1.317953621802534, "grad_norm": 0.32981961965560913, "learning_rate": 6.8837345397029e-06, "loss": 0.3178, "step": 3675 }, { "epoch": 1.3183122161128376, "grad_norm": 0.31808462738990784, "learning_rate": 6.881801262703541e-06, "loss": 0.3218, "step": 3676 }, { "epoch": 1.3186708104231413, "grad_norm": 0.37207403779029846, "learning_rate": 6.879867657888547e-06, "loss": 0.3597, "step": 3677 }, { "epoch": 1.319029404733445, "grad_norm": 0.36580750346183777, "learning_rate": 6.877933725594759e-06, "loss": 0.313, "step": 3678 }, { "epoch": 1.3193879990437485, "grad_norm": 0.3770889937877655, "learning_rate": 6.875999466159073e-06, "loss": 0.3395, "step": 3679 }, { "epoch": 1.319746593354052, "grad_norm": 0.35567691922187805, "learning_rate": 6.874064879918445e-06, "loss": 0.304, "step": 3680 }, { "epoch": 1.3201051876643557, "grad_norm": 0.3553863763809204, "learning_rate": 6.872129967209883e-06, "loss": 0.3041, "step": 3681 }, { "epoch": 1.3204637819746594, "grad_norm": 0.3732777237892151, "learning_rate": 6.870194728370456e-06, "loss": 0.3435, "step": 3682 }, { "epoch": 1.320822376284963, "grad_norm": 0.3860336244106293, "learning_rate": 6.86825916373729e-06, "loss": 0.332, "step": 3683 }, { "epoch": 1.3211809705952666, "grad_norm": 0.30970829725265503, "learning_rate": 6.866323273647564e-06, "loss": 0.307, "step": 3684 }, { "epoch": 1.3215395649055701, "grad_norm": 0.3667997121810913, "learning_rate": 6.8643870584385176e-06, "loss": 0.3351, "step": 3685 }, { "epoch": 1.3218981592158738, "grad_norm": 0.37091299891471863, "learning_rate": 6.862450518447445e-06, "loss": 0.3295, "step": 3686 }, { "epoch": 1.3222567535261773, "grad_norm": 0.3193114101886749, "learning_rate": 6.860513654011698e-06, "loss": 0.3433, "step": 3687 }, { "epoch": 1.322615347836481, "grad_norm": 0.3540288507938385, "learning_rate": 6.858576465468684e-06, "loss": 0.312, "step": 3688 }, { "epoch": 1.3229739421467845, "grad_norm": 0.3684771955013275, "learning_rate": 6.856638953155867e-06, "loss": 0.3144, "step": 3689 }, { "epoch": 1.3233325364570883, "grad_norm": 0.3511123061180115, "learning_rate": 6.854701117410768e-06, "loss": 0.3166, "step": 3690 }, { "epoch": 1.3236911307673918, "grad_norm": 0.3384224474430084, "learning_rate": 6.852762958570964e-06, "loss": 0.3184, "step": 3691 }, { "epoch": 1.3240497250776955, "grad_norm": 0.34852471947669983, "learning_rate": 6.8508244769740896e-06, "loss": 0.3258, "step": 3692 }, { "epoch": 1.324408319387999, "grad_norm": 0.34794142842292786, "learning_rate": 6.848885672957832e-06, "loss": 0.3376, "step": 3693 }, { "epoch": 1.3247669136983027, "grad_norm": 0.3221017122268677, "learning_rate": 6.846946546859938e-06, "loss": 0.3351, "step": 3694 }, { "epoch": 1.3251255080086062, "grad_norm": 0.3131476044654846, "learning_rate": 6.84500709901821e-06, "loss": 0.3182, "step": 3695 }, { "epoch": 1.32548410231891, "grad_norm": 0.36260712146759033, "learning_rate": 6.8430673297705045e-06, "loss": 0.3393, "step": 3696 }, { "epoch": 1.3258426966292136, "grad_norm": 0.35696378350257874, "learning_rate": 6.841127239454737e-06, "loss": 0.3384, "step": 3697 }, { "epoch": 1.326201290939517, "grad_norm": 0.3231946527957916, "learning_rate": 6.839186828408874e-06, "loss": 0.3302, "step": 3698 }, { "epoch": 1.3265598852498206, "grad_norm": 0.30246153473854065, "learning_rate": 6.837246096970946e-06, "loss": 0.3011, "step": 3699 }, { "epoch": 1.3269184795601243, "grad_norm": 0.3956155776977539, "learning_rate": 6.83530504547903e-06, "loss": 0.3404, "step": 3700 }, { "epoch": 1.327277073870428, "grad_norm": 0.3339563310146332, "learning_rate": 6.833363674271266e-06, "loss": 0.3151, "step": 3701 }, { "epoch": 1.3276356681807315, "grad_norm": 0.3474021852016449, "learning_rate": 6.831421983685846e-06, "loss": 0.3313, "step": 3702 }, { "epoch": 1.327994262491035, "grad_norm": 0.3759939670562744, "learning_rate": 6.829479974061019e-06, "loss": 0.3581, "step": 3703 }, { "epoch": 1.3283528568013387, "grad_norm": 0.35966619849205017, "learning_rate": 6.8275376457350894e-06, "loss": 0.2937, "step": 3704 }, { "epoch": 1.3287114511116425, "grad_norm": 0.3491395115852356, "learning_rate": 6.825594999046415e-06, "loss": 0.3456, "step": 3705 }, { "epoch": 1.329070045421946, "grad_norm": 0.32679829001426697, "learning_rate": 6.823652034333412e-06, "loss": 0.2904, "step": 3706 }, { "epoch": 1.3294286397322495, "grad_norm": 0.3403702974319458, "learning_rate": 6.8217087519345504e-06, "loss": 0.3271, "step": 3707 }, { "epoch": 1.3297872340425532, "grad_norm": 0.3647257685661316, "learning_rate": 6.819765152188358e-06, "loss": 0.3306, "step": 3708 }, { "epoch": 1.3301458283528569, "grad_norm": 0.3353247344493866, "learning_rate": 6.817821235433413e-06, "loss": 0.3212, "step": 3709 }, { "epoch": 1.3305044226631604, "grad_norm": 0.33736351132392883, "learning_rate": 6.815877002008354e-06, "loss": 0.3481, "step": 3710 }, { "epoch": 1.330863016973464, "grad_norm": 0.3749123811721802, "learning_rate": 6.8139324522518725e-06, "loss": 0.3277, "step": 3711 }, { "epoch": 1.3312216112837676, "grad_norm": 0.32993265986442566, "learning_rate": 6.811987586502713e-06, "loss": 0.3053, "step": 3712 }, { "epoch": 1.3315802055940713, "grad_norm": 0.3386860489845276, "learning_rate": 6.8100424050996776e-06, "loss": 0.3025, "step": 3713 }, { "epoch": 1.3319387999043748, "grad_norm": 0.33865857124328613, "learning_rate": 6.808096908381626e-06, "loss": 0.3244, "step": 3714 }, { "epoch": 1.3322973942146785, "grad_norm": 0.333383709192276, "learning_rate": 6.806151096687467e-06, "loss": 0.2951, "step": 3715 }, { "epoch": 1.332655988524982, "grad_norm": 0.30974647402763367, "learning_rate": 6.804204970356167e-06, "loss": 0.3018, "step": 3716 }, { "epoch": 1.3330145828352857, "grad_norm": 0.35435962677001953, "learning_rate": 6.802258529726748e-06, "loss": 0.3528, "step": 3717 }, { "epoch": 1.3333731771455892, "grad_norm": 0.3174069821834564, "learning_rate": 6.8003117751382866e-06, "loss": 0.3108, "step": 3718 }, { "epoch": 1.333731771455893, "grad_norm": 0.3285665512084961, "learning_rate": 6.798364706929913e-06, "loss": 0.3183, "step": 3719 }, { "epoch": 1.3340903657661964, "grad_norm": 0.3218231201171875, "learning_rate": 6.796417325440813e-06, "loss": 0.3192, "step": 3720 }, { "epoch": 1.3344489600765002, "grad_norm": 0.33004412055015564, "learning_rate": 6.794469631010224e-06, "loss": 0.3146, "step": 3721 }, { "epoch": 1.3348075543868037, "grad_norm": 0.37398239970207214, "learning_rate": 6.792521623977445e-06, "loss": 0.327, "step": 3722 }, { "epoch": 1.3351661486971074, "grad_norm": 0.32221847772598267, "learning_rate": 6.79057330468182e-06, "loss": 0.3021, "step": 3723 }, { "epoch": 1.335524743007411, "grad_norm": 0.32356902956962585, "learning_rate": 6.788624673462755e-06, "loss": 0.3189, "step": 3724 }, { "epoch": 1.3358833373177146, "grad_norm": 0.3077397346496582, "learning_rate": 6.786675730659708e-06, "loss": 0.3242, "step": 3725 }, { "epoch": 1.336241931628018, "grad_norm": 0.3931964933872223, "learning_rate": 6.784726476612189e-06, "loss": 0.3145, "step": 3726 }, { "epoch": 1.3366005259383218, "grad_norm": 0.3242512047290802, "learning_rate": 6.782776911659765e-06, "loss": 0.3084, "step": 3727 }, { "epoch": 1.3369591202486255, "grad_norm": 0.38388049602508545, "learning_rate": 6.780827036142059e-06, "loss": 0.357, "step": 3728 }, { "epoch": 1.337317714558929, "grad_norm": 0.3481656312942505, "learning_rate": 6.77887685039874e-06, "loss": 0.2964, "step": 3729 }, { "epoch": 1.3376763088692325, "grad_norm": 0.33316540718078613, "learning_rate": 6.7769263547695396e-06, "loss": 0.3276, "step": 3730 }, { "epoch": 1.3380349031795362, "grad_norm": 0.31426969170570374, "learning_rate": 6.7749755495942415e-06, "loss": 0.333, "step": 3731 }, { "epoch": 1.33839349748984, "grad_norm": 0.3497731387615204, "learning_rate": 6.773024435212678e-06, "loss": 0.3621, "step": 3732 }, { "epoch": 1.3387520918001434, "grad_norm": 0.3351404070854187, "learning_rate": 6.771073011964744e-06, "loss": 0.3245, "step": 3733 }, { "epoch": 1.339110686110447, "grad_norm": 0.3358813226222992, "learning_rate": 6.76912128019038e-06, "loss": 0.3077, "step": 3734 }, { "epoch": 1.3394692804207506, "grad_norm": 0.34345003962516785, "learning_rate": 6.767169240229585e-06, "loss": 0.3132, "step": 3735 }, { "epoch": 1.3398278747310544, "grad_norm": 0.3234782814979553, "learning_rate": 6.76521689242241e-06, "loss": 0.319, "step": 3736 }, { "epoch": 1.3401864690413579, "grad_norm": 0.3390451669692993, "learning_rate": 6.763264237108962e-06, "loss": 0.3402, "step": 3737 }, { "epoch": 1.3405450633516616, "grad_norm": 0.3370974063873291, "learning_rate": 6.761311274629396e-06, "loss": 0.299, "step": 3738 }, { "epoch": 1.340903657661965, "grad_norm": 0.3698820173740387, "learning_rate": 6.759358005323928e-06, "loss": 0.34, "step": 3739 }, { "epoch": 1.3412622519722688, "grad_norm": 0.349324494600296, "learning_rate": 6.7574044295328224e-06, "loss": 0.2979, "step": 3740 }, { "epoch": 1.3416208462825723, "grad_norm": 0.3421362638473511, "learning_rate": 6.755450547596396e-06, "loss": 0.3408, "step": 3741 }, { "epoch": 1.341979440592876, "grad_norm": 0.33788350224494934, "learning_rate": 6.753496359855022e-06, "loss": 0.321, "step": 3742 }, { "epoch": 1.3423380349031795, "grad_norm": 0.36234620213508606, "learning_rate": 6.7515418666491285e-06, "loss": 0.3118, "step": 3743 }, { "epoch": 1.3426966292134832, "grad_norm": 0.3509208559989929, "learning_rate": 6.7495870683191915e-06, "loss": 0.3289, "step": 3744 }, { "epoch": 1.3430552235237867, "grad_norm": 0.3410482406616211, "learning_rate": 6.7476319652057445e-06, "loss": 0.3244, "step": 3745 }, { "epoch": 1.3434138178340904, "grad_norm": 0.32406994700431824, "learning_rate": 6.74567655764937e-06, "loss": 0.2976, "step": 3746 }, { "epoch": 1.343772412144394, "grad_norm": 0.36554086208343506, "learning_rate": 6.74372084599071e-06, "loss": 0.3093, "step": 3747 }, { "epoch": 1.3441310064546976, "grad_norm": 0.3416000008583069, "learning_rate": 6.741764830570453e-06, "loss": 0.3251, "step": 3748 }, { "epoch": 1.3444896007650011, "grad_norm": 0.31214439868927, "learning_rate": 6.739808511729343e-06, "loss": 0.3297, "step": 3749 }, { "epoch": 1.3448481950753048, "grad_norm": 0.3337412476539612, "learning_rate": 6.737851889808179e-06, "loss": 0.3211, "step": 3750 }, { "epoch": 1.3452067893856083, "grad_norm": 0.38408058881759644, "learning_rate": 6.7358949651478085e-06, "loss": 0.3721, "step": 3751 }, { "epoch": 1.345565383695912, "grad_norm": 0.3278135359287262, "learning_rate": 6.733937738089134e-06, "loss": 0.3051, "step": 3752 }, { "epoch": 1.3459239780062155, "grad_norm": 0.3553510904312134, "learning_rate": 6.73198020897311e-06, "loss": 0.318, "step": 3753 }, { "epoch": 1.3462825723165193, "grad_norm": 0.32587572932243347, "learning_rate": 6.730022378140747e-06, "loss": 0.2953, "step": 3754 }, { "epoch": 1.346641166626823, "grad_norm": 0.3542752265930176, "learning_rate": 6.728064245933102e-06, "loss": 0.302, "step": 3755 }, { "epoch": 1.3469997609371265, "grad_norm": 0.37815818190574646, "learning_rate": 6.72610581269129e-06, "loss": 0.3274, "step": 3756 }, { "epoch": 1.34735835524743, "grad_norm": 0.3556056618690491, "learning_rate": 6.724147078756475e-06, "loss": 0.3008, "step": 3757 }, { "epoch": 1.3477169495577337, "grad_norm": 0.37895071506500244, "learning_rate": 6.722188044469874e-06, "loss": 0.3508, "step": 3758 }, { "epoch": 1.3480755438680374, "grad_norm": 0.37447667121887207, "learning_rate": 6.720228710172758e-06, "loss": 0.319, "step": 3759 }, { "epoch": 1.348434138178341, "grad_norm": 0.3462047874927521, "learning_rate": 6.71826907620645e-06, "loss": 0.3277, "step": 3760 }, { "epoch": 1.3487927324886444, "grad_norm": 0.3634292185306549, "learning_rate": 6.7163091429123204e-06, "loss": 0.3363, "step": 3761 }, { "epoch": 1.3491513267989481, "grad_norm": 0.3459502160549164, "learning_rate": 6.7143489106318e-06, "loss": 0.2971, "step": 3762 }, { "epoch": 1.3495099211092518, "grad_norm": 0.3420383930206299, "learning_rate": 6.712388379706367e-06, "loss": 0.3235, "step": 3763 }, { "epoch": 1.3498685154195553, "grad_norm": 0.32220062613487244, "learning_rate": 6.710427550477548e-06, "loss": 0.3081, "step": 3764 }, { "epoch": 1.3502271097298588, "grad_norm": 0.34953999519348145, "learning_rate": 6.70846642328693e-06, "loss": 0.309, "step": 3765 }, { "epoch": 1.3505857040401625, "grad_norm": 0.35184013843536377, "learning_rate": 6.706504998476144e-06, "loss": 0.3219, "step": 3766 }, { "epoch": 1.3509442983504663, "grad_norm": 0.4280894696712494, "learning_rate": 6.70454327638688e-06, "loss": 0.3698, "step": 3767 }, { "epoch": 1.3513028926607697, "grad_norm": 0.3443121314048767, "learning_rate": 6.702581257360874e-06, "loss": 0.3168, "step": 3768 }, { "epoch": 1.3516614869710735, "grad_norm": 0.36106598377227783, "learning_rate": 6.7006189417399145e-06, "loss": 0.3011, "step": 3769 }, { "epoch": 1.352020081281377, "grad_norm": 0.386237233877182, "learning_rate": 6.698656329865846e-06, "loss": 0.3233, "step": 3770 }, { "epoch": 1.3523786755916807, "grad_norm": 0.3521845042705536, "learning_rate": 6.6966934220805594e-06, "loss": 0.3196, "step": 3771 }, { "epoch": 1.3527372699019842, "grad_norm": 0.3699953854084015, "learning_rate": 6.6947302187259985e-06, "loss": 0.3394, "step": 3772 }, { "epoch": 1.353095864212288, "grad_norm": 0.35905522108078003, "learning_rate": 6.692766720144163e-06, "loss": 0.3064, "step": 3773 }, { "epoch": 1.3534544585225914, "grad_norm": 0.34990906715393066, "learning_rate": 6.690802926677098e-06, "loss": 0.3326, "step": 3774 }, { "epoch": 1.353813052832895, "grad_norm": 0.37117621302604675, "learning_rate": 6.688838838666902e-06, "loss": 0.3744, "step": 3775 }, { "epoch": 1.3541716471431986, "grad_norm": 0.3310484290122986, "learning_rate": 6.6868744564557266e-06, "loss": 0.3122, "step": 3776 }, { "epoch": 1.3545302414535023, "grad_norm": 0.3322141468524933, "learning_rate": 6.684909780385773e-06, "loss": 0.3038, "step": 3777 }, { "epoch": 1.3548888357638058, "grad_norm": 0.33567535877227783, "learning_rate": 6.682944810799292e-06, "loss": 0.3281, "step": 3778 }, { "epoch": 1.3552474300741095, "grad_norm": 0.3709191679954529, "learning_rate": 6.680979548038591e-06, "loss": 0.3428, "step": 3779 }, { "epoch": 1.355606024384413, "grad_norm": 0.3579472303390503, "learning_rate": 6.679013992446022e-06, "loss": 0.3083, "step": 3780 }, { "epoch": 1.3559646186947167, "grad_norm": 0.356567919254303, "learning_rate": 6.677048144363991e-06, "loss": 0.3362, "step": 3781 }, { "epoch": 1.3563232130050205, "grad_norm": 0.38915228843688965, "learning_rate": 6.675082004134956e-06, "loss": 0.3458, "step": 3782 }, { "epoch": 1.356681807315324, "grad_norm": 0.3376353979110718, "learning_rate": 6.673115572101426e-06, "loss": 0.318, "step": 3783 }, { "epoch": 1.3570404016256274, "grad_norm": 0.3605036735534668, "learning_rate": 6.671148848605957e-06, "loss": 0.3118, "step": 3784 }, { "epoch": 1.3573989959359312, "grad_norm": 0.3757016956806183, "learning_rate": 6.66918183399116e-06, "loss": 0.3411, "step": 3785 }, { "epoch": 1.3577575902462349, "grad_norm": 0.35057246685028076, "learning_rate": 6.667214528599692e-06, "loss": 0.3285, "step": 3786 }, { "epoch": 1.3581161845565384, "grad_norm": 0.3687611520290375, "learning_rate": 6.665246932774268e-06, "loss": 0.3125, "step": 3787 }, { "epoch": 1.3584747788668419, "grad_norm": 0.34461718797683716, "learning_rate": 6.663279046857647e-06, "loss": 0.3011, "step": 3788 }, { "epoch": 1.3588333731771456, "grad_norm": 0.37581562995910645, "learning_rate": 6.661310871192641e-06, "loss": 0.327, "step": 3789 }, { "epoch": 1.3591919674874493, "grad_norm": 0.377827525138855, "learning_rate": 6.659342406122113e-06, "loss": 0.3309, "step": 3790 }, { "epoch": 1.3595505617977528, "grad_norm": 0.3278774619102478, "learning_rate": 6.657373651988976e-06, "loss": 0.3479, "step": 3791 }, { "epoch": 1.3599091561080563, "grad_norm": 0.3201501965522766, "learning_rate": 6.655404609136193e-06, "loss": 0.2996, "step": 3792 }, { "epoch": 1.36026775041836, "grad_norm": 0.34116825461387634, "learning_rate": 6.6534352779067755e-06, "loss": 0.3297, "step": 3793 }, { "epoch": 1.3606263447286637, "grad_norm": 0.330150306224823, "learning_rate": 6.6514656586437886e-06, "loss": 0.3472, "step": 3794 }, { "epoch": 1.3609849390389672, "grad_norm": 0.3367539346218109, "learning_rate": 6.6494957516903445e-06, "loss": 0.3237, "step": 3795 }, { "epoch": 1.361343533349271, "grad_norm": 0.3289749324321747, "learning_rate": 6.647525557389611e-06, "loss": 0.2916, "step": 3796 }, { "epoch": 1.3617021276595744, "grad_norm": 0.3921591639518738, "learning_rate": 6.6455550760847995e-06, "loss": 0.3527, "step": 3797 }, { "epoch": 1.3620607219698782, "grad_norm": 0.31513649225234985, "learning_rate": 6.643584308119173e-06, "loss": 0.3171, "step": 3798 }, { "epoch": 1.3624193162801816, "grad_norm": 0.34594687819480896, "learning_rate": 6.6416132538360475e-06, "loss": 0.3742, "step": 3799 }, { "epoch": 1.3627779105904854, "grad_norm": 0.35699528455734253, "learning_rate": 6.639641913578785e-06, "loss": 0.3196, "step": 3800 }, { "epoch": 1.3631365049007889, "grad_norm": 0.3463289737701416, "learning_rate": 6.6376702876908e-06, "loss": 0.3128, "step": 3801 }, { "epoch": 1.3634950992110926, "grad_norm": 0.3499366044998169, "learning_rate": 6.635698376515556e-06, "loss": 0.3257, "step": 3802 }, { "epoch": 1.363853693521396, "grad_norm": 0.3149631917476654, "learning_rate": 6.633726180396567e-06, "loss": 0.2744, "step": 3803 }, { "epoch": 1.3642122878316998, "grad_norm": 0.39029791951179504, "learning_rate": 6.631753699677392e-06, "loss": 0.3444, "step": 3804 }, { "epoch": 1.3645708821420033, "grad_norm": 0.3675004839897156, "learning_rate": 6.6297809347016475e-06, "loss": 0.3442, "step": 3805 }, { "epoch": 1.364929476452307, "grad_norm": 0.33081015944480896, "learning_rate": 6.627807885812992e-06, "loss": 0.3121, "step": 3806 }, { "epoch": 1.3652880707626105, "grad_norm": 0.36455434560775757, "learning_rate": 6.62583455335514e-06, "loss": 0.3258, "step": 3807 }, { "epoch": 1.3656466650729142, "grad_norm": 0.34742793440818787, "learning_rate": 6.623860937671851e-06, "loss": 0.2854, "step": 3808 }, { "epoch": 1.3660052593832177, "grad_norm": 0.42023882269859314, "learning_rate": 6.621887039106933e-06, "loss": 0.3548, "step": 3809 }, { "epoch": 1.3663638536935214, "grad_norm": 0.3698314130306244, "learning_rate": 6.619912858004249e-06, "loss": 0.292, "step": 3810 }, { "epoch": 1.366722448003825, "grad_norm": 0.3690159022808075, "learning_rate": 6.6179383947077036e-06, "loss": 0.3208, "step": 3811 }, { "epoch": 1.3670810423141286, "grad_norm": 0.36504092812538147, "learning_rate": 6.615963649561256e-06, "loss": 0.3137, "step": 3812 }, { "epoch": 1.3674396366244324, "grad_norm": 0.3472405970096588, "learning_rate": 6.613988622908915e-06, "loss": 0.3244, "step": 3813 }, { "epoch": 1.3677982309347358, "grad_norm": 0.38033992052078247, "learning_rate": 6.612013315094733e-06, "loss": 0.3259, "step": 3814 }, { "epoch": 1.3681568252450393, "grad_norm": 0.3834106922149658, "learning_rate": 6.6100377264628155e-06, "loss": 0.3626, "step": 3815 }, { "epoch": 1.368515419555343, "grad_norm": 0.3492448925971985, "learning_rate": 6.608061857357319e-06, "loss": 0.3224, "step": 3816 }, { "epoch": 1.3688740138656468, "grad_norm": 0.3224974572658539, "learning_rate": 6.6060857081224416e-06, "loss": 0.3142, "step": 3817 }, { "epoch": 1.3692326081759503, "grad_norm": 0.3707599639892578, "learning_rate": 6.604109279102437e-06, "loss": 0.2948, "step": 3818 }, { "epoch": 1.3695912024862538, "grad_norm": 0.3652895987033844, "learning_rate": 6.602132570641608e-06, "loss": 0.3633, "step": 3819 }, { "epoch": 1.3699497967965575, "grad_norm": 0.32512366771698, "learning_rate": 6.6001555830843e-06, "loss": 0.3293, "step": 3820 }, { "epoch": 1.3703083911068612, "grad_norm": 0.3678995370864868, "learning_rate": 6.59817831677491e-06, "loss": 0.2998, "step": 3821 }, { "epoch": 1.3706669854171647, "grad_norm": 0.34040454030036926, "learning_rate": 6.596200772057886e-06, "loss": 0.3188, "step": 3822 }, { "epoch": 1.3710255797274682, "grad_norm": 0.3292410373687744, "learning_rate": 6.59422294927772e-06, "loss": 0.338, "step": 3823 }, { "epoch": 1.371384174037772, "grad_norm": 0.3296072781085968, "learning_rate": 6.592244848778957e-06, "loss": 0.3161, "step": 3824 }, { "epoch": 1.3717427683480756, "grad_norm": 0.33248525857925415, "learning_rate": 6.590266470906188e-06, "loss": 0.2958, "step": 3825 }, { "epoch": 1.3721013626583791, "grad_norm": 0.374994158744812, "learning_rate": 6.5882878160040505e-06, "loss": 0.3399, "step": 3826 }, { "epoch": 1.3724599569686828, "grad_norm": 0.3496556282043457, "learning_rate": 6.586308884417236e-06, "loss": 0.3439, "step": 3827 }, { "epoch": 1.3728185512789863, "grad_norm": 0.3031177818775177, "learning_rate": 6.584329676490478e-06, "loss": 0.2686, "step": 3828 }, { "epoch": 1.37317714558929, "grad_norm": 0.3697598874568939, "learning_rate": 6.58235019256856e-06, "loss": 0.3287, "step": 3829 }, { "epoch": 1.3735357398995935, "grad_norm": 0.3618963956832886, "learning_rate": 6.580370432996317e-06, "loss": 0.3471, "step": 3830 }, { "epoch": 1.3738943342098973, "grad_norm": 0.3273853063583374, "learning_rate": 6.578390398118628e-06, "loss": 0.2902, "step": 3831 }, { "epoch": 1.3742529285202008, "grad_norm": 0.3819330632686615, "learning_rate": 6.576410088280419e-06, "loss": 0.335, "step": 3832 }, { "epoch": 1.3746115228305045, "grad_norm": 0.33117470145225525, "learning_rate": 6.57442950382667e-06, "loss": 0.3265, "step": 3833 }, { "epoch": 1.374970117140808, "grad_norm": 0.3149867057800293, "learning_rate": 6.572448645102403e-06, "loss": 0.3044, "step": 3834 }, { "epoch": 1.3753287114511117, "grad_norm": 0.3639225661754608, "learning_rate": 6.570467512452688e-06, "loss": 0.344, "step": 3835 }, { "epoch": 1.3756873057614152, "grad_norm": 0.35163259506225586, "learning_rate": 6.568486106222649e-06, "loss": 0.3316, "step": 3836 }, { "epoch": 1.376045900071719, "grad_norm": 0.3128391206264496, "learning_rate": 6.566504426757449e-06, "loss": 0.3018, "step": 3837 }, { "epoch": 1.3764044943820224, "grad_norm": 0.3305722177028656, "learning_rate": 6.564522474402304e-06, "loss": 0.3269, "step": 3838 }, { "epoch": 1.376763088692326, "grad_norm": 0.34093722701072693, "learning_rate": 6.562540249502478e-06, "loss": 0.3455, "step": 3839 }, { "epoch": 1.3771216830026298, "grad_norm": 0.30995067954063416, "learning_rate": 6.560557752403277e-06, "loss": 0.2963, "step": 3840 }, { "epoch": 1.3774802773129333, "grad_norm": 0.3183930516242981, "learning_rate": 6.558574983450061e-06, "loss": 0.3279, "step": 3841 }, { "epoch": 1.3778388716232368, "grad_norm": 0.37360838055610657, "learning_rate": 6.556591942988235e-06, "loss": 0.3499, "step": 3842 }, { "epoch": 1.3781974659335405, "grad_norm": 0.352171391248703, "learning_rate": 6.554608631363249e-06, "loss": 0.322, "step": 3843 }, { "epoch": 1.3785560602438443, "grad_norm": 0.27890434861183167, "learning_rate": 6.552625048920602e-06, "loss": 0.2736, "step": 3844 }, { "epoch": 1.3789146545541477, "grad_norm": 0.3251744508743286, "learning_rate": 6.550641196005842e-06, "loss": 0.3141, "step": 3845 }, { "epoch": 1.3792732488644512, "grad_norm": 0.3378085792064667, "learning_rate": 6.548657072964562e-06, "loss": 0.3051, "step": 3846 }, { "epoch": 1.379631843174755, "grad_norm": 0.345044881105423, "learning_rate": 6.546672680142399e-06, "loss": 0.3238, "step": 3847 }, { "epoch": 1.3799904374850587, "grad_norm": 0.3506249189376831, "learning_rate": 6.5446880178850455e-06, "loss": 0.3619, "step": 3848 }, { "epoch": 1.3803490317953622, "grad_norm": 0.3138086199760437, "learning_rate": 6.542703086538233e-06, "loss": 0.2865, "step": 3849 }, { "epoch": 1.3807076261056657, "grad_norm": 0.3447328209877014, "learning_rate": 6.540717886447744e-06, "loss": 0.3429, "step": 3850 }, { "epoch": 1.3810662204159694, "grad_norm": 0.3606286644935608, "learning_rate": 6.538732417959406e-06, "loss": 0.3342, "step": 3851 }, { "epoch": 1.381424814726273, "grad_norm": 0.3040069043636322, "learning_rate": 6.536746681419091e-06, "loss": 0.2974, "step": 3852 }, { "epoch": 1.3817834090365766, "grad_norm": 0.3662046492099762, "learning_rate": 6.5347606771727245e-06, "loss": 0.3428, "step": 3853 }, { "epoch": 1.3821420033468803, "grad_norm": 0.3320424258708954, "learning_rate": 6.5327744055662735e-06, "loss": 0.2963, "step": 3854 }, { "epoch": 1.3825005976571838, "grad_norm": 0.3536110818386078, "learning_rate": 6.530787866945751e-06, "loss": 0.3528, "step": 3855 }, { "epoch": 1.3828591919674875, "grad_norm": 0.3155308961868286, "learning_rate": 6.528801061657221e-06, "loss": 0.2797, "step": 3856 }, { "epoch": 1.383217786277791, "grad_norm": 0.37259575724601746, "learning_rate": 6.526813990046789e-06, "loss": 0.3701, "step": 3857 }, { "epoch": 1.3835763805880947, "grad_norm": 0.3116655647754669, "learning_rate": 6.52482665246061e-06, "loss": 0.3124, "step": 3858 }, { "epoch": 1.3839349748983982, "grad_norm": 0.3580784499645233, "learning_rate": 6.5228390492448824e-06, "loss": 0.3305, "step": 3859 }, { "epoch": 1.384293569208702, "grad_norm": 0.36205577850341797, "learning_rate": 6.5208511807458574e-06, "loss": 0.3419, "step": 3860 }, { "epoch": 1.3846521635190054, "grad_norm": 0.34645962715148926, "learning_rate": 6.518863047309823e-06, "loss": 0.3034, "step": 3861 }, { "epoch": 1.3850107578293092, "grad_norm": 0.34794899821281433, "learning_rate": 6.516874649283122e-06, "loss": 0.3087, "step": 3862 }, { "epoch": 1.3853693521396127, "grad_norm": 0.34553107619285583, "learning_rate": 6.5148859870121365e-06, "loss": 0.3527, "step": 3863 }, { "epoch": 1.3857279464499164, "grad_norm": 0.364692360162735, "learning_rate": 6.512897060843298e-06, "loss": 0.3318, "step": 3864 }, { "epoch": 1.3860865407602199, "grad_norm": 0.37141600251197815, "learning_rate": 6.510907871123087e-06, "loss": 0.3186, "step": 3865 }, { "epoch": 1.3864451350705236, "grad_norm": 0.30570146441459656, "learning_rate": 6.508918418198023e-06, "loss": 0.3001, "step": 3866 }, { "epoch": 1.386803729380827, "grad_norm": 0.3189466595649719, "learning_rate": 6.5069287024146765e-06, "loss": 0.3313, "step": 3867 }, { "epoch": 1.3871623236911308, "grad_norm": 0.3534683883190155, "learning_rate": 6.5049387241196625e-06, "loss": 0.3356, "step": 3868 }, { "epoch": 1.3875209180014343, "grad_norm": 0.34388861060142517, "learning_rate": 6.5029484836596394e-06, "loss": 0.3271, "step": 3869 }, { "epoch": 1.387879512311738, "grad_norm": 0.37277448177337646, "learning_rate": 6.500957981381313e-06, "loss": 0.3216, "step": 3870 }, { "epoch": 1.3882381066220417, "grad_norm": 0.345643550157547, "learning_rate": 6.498967217631439e-06, "loss": 0.3344, "step": 3871 }, { "epoch": 1.3885967009323452, "grad_norm": 0.3392532169818878, "learning_rate": 6.496976192756811e-06, "loss": 0.3154, "step": 3872 }, { "epoch": 1.3889552952426487, "grad_norm": 0.3930304944515228, "learning_rate": 6.494984907104274e-06, "loss": 0.349, "step": 3873 }, { "epoch": 1.3893138895529524, "grad_norm": 0.35142797231674194, "learning_rate": 6.4929933610207145e-06, "loss": 0.3209, "step": 3874 }, { "epoch": 1.3896724838632561, "grad_norm": 0.36348599195480347, "learning_rate": 6.491001554853066e-06, "loss": 0.3392, "step": 3875 }, { "epoch": 1.3900310781735596, "grad_norm": 0.2919713854789734, "learning_rate": 6.4890094889483065e-06, "loss": 0.2696, "step": 3876 }, { "epoch": 1.3903896724838631, "grad_norm": 0.36330047249794006, "learning_rate": 6.487017163653464e-06, "loss": 0.3722, "step": 3877 }, { "epoch": 1.3907482667941669, "grad_norm": 0.34590527415275574, "learning_rate": 6.4850245793156045e-06, "loss": 0.307, "step": 3878 }, { "epoch": 1.3911068611044706, "grad_norm": 0.3615592420101166, "learning_rate": 6.483031736281843e-06, "loss": 0.3502, "step": 3879 }, { "epoch": 1.391465455414774, "grad_norm": 0.32287517189979553, "learning_rate": 6.481038634899339e-06, "loss": 0.2904, "step": 3880 }, { "epoch": 1.3918240497250776, "grad_norm": 0.3839420974254608, "learning_rate": 6.479045275515297e-06, "loss": 0.341, "step": 3881 }, { "epoch": 1.3921826440353813, "grad_norm": 0.35583189129829407, "learning_rate": 6.477051658476965e-06, "loss": 0.3068, "step": 3882 }, { "epoch": 1.392541238345685, "grad_norm": 0.3414817154407501, "learning_rate": 6.47505778413164e-06, "loss": 0.3307, "step": 3883 }, { "epoch": 1.3928998326559885, "grad_norm": 0.3847240209579468, "learning_rate": 6.473063652826661e-06, "loss": 0.3169, "step": 3884 }, { "epoch": 1.3932584269662922, "grad_norm": 0.3662363588809967, "learning_rate": 6.471069264909409e-06, "loss": 0.3019, "step": 3885 }, { "epoch": 1.3936170212765957, "grad_norm": 0.32991573214530945, "learning_rate": 6.469074620727316e-06, "loss": 0.3084, "step": 3886 }, { "epoch": 1.3939756155868994, "grad_norm": 0.3630506694316864, "learning_rate": 6.467079720627853e-06, "loss": 0.3352, "step": 3887 }, { "epoch": 1.394334209897203, "grad_norm": 0.3435509204864502, "learning_rate": 6.465084564958537e-06, "loss": 0.345, "step": 3888 }, { "epoch": 1.3946928042075066, "grad_norm": 0.3231755197048187, "learning_rate": 6.463089154066932e-06, "loss": 0.2786, "step": 3889 }, { "epoch": 1.3950513985178101, "grad_norm": 0.36690056324005127, "learning_rate": 6.461093488300645e-06, "loss": 0.3076, "step": 3890 }, { "epoch": 1.3954099928281138, "grad_norm": 0.33519676327705383, "learning_rate": 6.459097568007326e-06, "loss": 0.3359, "step": 3891 }, { "epoch": 1.3957685871384173, "grad_norm": 0.3699050843715668, "learning_rate": 6.4571013935346724e-06, "loss": 0.3148, "step": 3892 }, { "epoch": 1.396127181448721, "grad_norm": 0.4150782525539398, "learning_rate": 6.455104965230421e-06, "loss": 0.3292, "step": 3893 }, { "epoch": 1.3964857757590246, "grad_norm": 0.3070867657661438, "learning_rate": 6.453108283442359e-06, "loss": 0.3248, "step": 3894 }, { "epoch": 1.3968443700693283, "grad_norm": 0.31626591086387634, "learning_rate": 6.451111348518313e-06, "loss": 0.3181, "step": 3895 }, { "epoch": 1.3972029643796318, "grad_norm": 0.37044578790664673, "learning_rate": 6.4491141608061555e-06, "loss": 0.3325, "step": 3896 }, { "epoch": 1.3975615586899355, "grad_norm": 0.35269683599472046, "learning_rate": 6.447116720653803e-06, "loss": 0.3336, "step": 3897 }, { "epoch": 1.3979201530002392, "grad_norm": 0.35244113206863403, "learning_rate": 6.445119028409215e-06, "loss": 0.3297, "step": 3898 }, { "epoch": 1.3982787473105427, "grad_norm": 0.35408729314804077, "learning_rate": 6.443121084420395e-06, "loss": 0.3229, "step": 3899 }, { "epoch": 1.3986373416208462, "grad_norm": 0.35514071583747864, "learning_rate": 6.441122889035394e-06, "loss": 0.3344, "step": 3900 }, { "epoch": 1.39899593593115, "grad_norm": 0.35301461815834045, "learning_rate": 6.4391244426023e-06, "loss": 0.2968, "step": 3901 }, { "epoch": 1.3993545302414536, "grad_norm": 0.3441656827926636, "learning_rate": 6.437125745469254e-06, "loss": 0.3281, "step": 3902 }, { "epoch": 1.3997131245517571, "grad_norm": 0.3170117437839508, "learning_rate": 6.435126797984429e-06, "loss": 0.3097, "step": 3903 }, { "epoch": 1.4000717188620606, "grad_norm": 0.3512064218521118, "learning_rate": 6.433127600496053e-06, "loss": 0.3286, "step": 3904 }, { "epoch": 1.4004303131723643, "grad_norm": 0.36441290378570557, "learning_rate": 6.431128153352389e-06, "loss": 0.3577, "step": 3905 }, { "epoch": 1.400788907482668, "grad_norm": 0.35127490758895874, "learning_rate": 6.429128456901748e-06, "loss": 0.3393, "step": 3906 }, { "epoch": 1.4011475017929715, "grad_norm": 0.34610262513160706, "learning_rate": 6.427128511492484e-06, "loss": 0.2972, "step": 3907 }, { "epoch": 1.401506096103275, "grad_norm": 0.3581230640411377, "learning_rate": 6.425128317472995e-06, "loss": 0.3255, "step": 3908 }, { "epoch": 1.4018646904135788, "grad_norm": 0.3678511381149292, "learning_rate": 6.423127875191717e-06, "loss": 0.338, "step": 3909 }, { "epoch": 1.4022232847238825, "grad_norm": 0.3101665675640106, "learning_rate": 6.421127184997135e-06, "loss": 0.2694, "step": 3910 }, { "epoch": 1.402581879034186, "grad_norm": 0.3509877324104309, "learning_rate": 6.419126247237778e-06, "loss": 0.3422, "step": 3911 }, { "epoch": 1.4029404733444897, "grad_norm": 0.3390199840068817, "learning_rate": 6.417125062262213e-06, "loss": 0.2998, "step": 3912 }, { "epoch": 1.4032990676547932, "grad_norm": 0.3471926748752594, "learning_rate": 6.415123630419054e-06, "loss": 0.2957, "step": 3913 }, { "epoch": 1.403657661965097, "grad_norm": 0.3584308326244354, "learning_rate": 6.413121952056954e-06, "loss": 0.3472, "step": 3914 }, { "epoch": 1.4040162562754004, "grad_norm": 0.3444192409515381, "learning_rate": 6.411120027524614e-06, "loss": 0.3035, "step": 3915 }, { "epoch": 1.404374850585704, "grad_norm": 0.36433929204940796, "learning_rate": 6.4091178571707754e-06, "loss": 0.3174, "step": 3916 }, { "epoch": 1.4047334448960076, "grad_norm": 0.3965659439563751, "learning_rate": 6.407115441344222e-06, "loss": 0.3629, "step": 3917 }, { "epoch": 1.4050920392063113, "grad_norm": 0.3296337127685547, "learning_rate": 6.405112780393781e-06, "loss": 0.2937, "step": 3918 }, { "epoch": 1.4054506335166148, "grad_norm": 0.3616819679737091, "learning_rate": 6.4031098746683215e-06, "loss": 0.3381, "step": 3919 }, { "epoch": 1.4058092278269185, "grad_norm": 0.38663533329963684, "learning_rate": 6.401106724516757e-06, "loss": 0.3358, "step": 3920 }, { "epoch": 1.406167822137222, "grad_norm": 0.3147234320640564, "learning_rate": 6.399103330288042e-06, "loss": 0.2856, "step": 3921 }, { "epoch": 1.4065264164475257, "grad_norm": 0.3533361256122589, "learning_rate": 6.397099692331175e-06, "loss": 0.3438, "step": 3922 }, { "epoch": 1.4068850107578292, "grad_norm": 0.3806164860725403, "learning_rate": 6.395095810995192e-06, "loss": 0.3161, "step": 3923 }, { "epoch": 1.407243605068133, "grad_norm": 0.3997611999511719, "learning_rate": 6.3930916866291815e-06, "loss": 0.3237, "step": 3924 }, { "epoch": 1.4076021993784364, "grad_norm": 0.3425874710083008, "learning_rate": 6.391087319582264e-06, "loss": 0.3038, "step": 3925 }, { "epoch": 1.4079607936887402, "grad_norm": 0.32625290751457214, "learning_rate": 6.389082710203607e-06, "loss": 0.3072, "step": 3926 }, { "epoch": 1.4083193879990437, "grad_norm": 0.324617475271225, "learning_rate": 6.387077858842421e-06, "loss": 0.306, "step": 3927 }, { "epoch": 1.4086779823093474, "grad_norm": 0.36304551362991333, "learning_rate": 6.3850727658479565e-06, "loss": 0.3214, "step": 3928 }, { "epoch": 1.409036576619651, "grad_norm": 0.37797486782073975, "learning_rate": 6.383067431569505e-06, "loss": 0.2969, "step": 3929 }, { "epoch": 1.4093951709299546, "grad_norm": 0.34809404611587524, "learning_rate": 6.381061856356406e-06, "loss": 0.3263, "step": 3930 }, { "epoch": 1.409753765240258, "grad_norm": 0.3704679310321808, "learning_rate": 6.379056040558036e-06, "loss": 0.3432, "step": 3931 }, { "epoch": 1.4101123595505618, "grad_norm": 0.37555935978889465, "learning_rate": 6.377049984523811e-06, "loss": 0.3289, "step": 3932 }, { "epoch": 1.4104709538608655, "grad_norm": 0.3568625748157501, "learning_rate": 6.375043688603195e-06, "loss": 0.3167, "step": 3933 }, { "epoch": 1.410829548171169, "grad_norm": 0.40079811215400696, "learning_rate": 6.37303715314569e-06, "loss": 0.321, "step": 3934 }, { "epoch": 1.4111881424814725, "grad_norm": 0.3641504645347595, "learning_rate": 6.371030378500842e-06, "loss": 0.3372, "step": 3935 }, { "epoch": 1.4115467367917762, "grad_norm": 0.32684779167175293, "learning_rate": 6.369023365018236e-06, "loss": 0.3256, "step": 3936 }, { "epoch": 1.41190533110208, "grad_norm": 0.37066778540611267, "learning_rate": 6.367016113047501e-06, "loss": 0.325, "step": 3937 }, { "epoch": 1.4122639254123834, "grad_norm": 0.37318432331085205, "learning_rate": 6.365008622938307e-06, "loss": 0.3076, "step": 3938 }, { "epoch": 1.412622519722687, "grad_norm": 0.35332730412483215, "learning_rate": 6.363000895040363e-06, "loss": 0.3265, "step": 3939 }, { "epoch": 1.4129811140329906, "grad_norm": 0.29168689250946045, "learning_rate": 6.3609929297034226e-06, "loss": 0.2934, "step": 3940 }, { "epoch": 1.4133397083432944, "grad_norm": 0.35546764731407166, "learning_rate": 6.358984727277278e-06, "loss": 0.3117, "step": 3941 }, { "epoch": 1.4136983026535979, "grad_norm": 0.3226073980331421, "learning_rate": 6.356976288111768e-06, "loss": 0.325, "step": 3942 }, { "epoch": 1.4140568969639016, "grad_norm": 0.34813863039016724, "learning_rate": 6.354967612556766e-06, "loss": 0.3307, "step": 3943 }, { "epoch": 1.414415491274205, "grad_norm": 0.3499899208545685, "learning_rate": 6.352958700962191e-06, "loss": 0.3252, "step": 3944 }, { "epoch": 1.4147740855845088, "grad_norm": 0.3323659896850586, "learning_rate": 6.350949553678001e-06, "loss": 0.3222, "step": 3945 }, { "epoch": 1.4151326798948123, "grad_norm": 0.3527906835079193, "learning_rate": 6.3489401710541945e-06, "loss": 0.3576, "step": 3946 }, { "epoch": 1.415491274205116, "grad_norm": 0.40275612473487854, "learning_rate": 6.346930553440813e-06, "loss": 0.384, "step": 3947 }, { "epoch": 1.4158498685154195, "grad_norm": 0.33552315831184387, "learning_rate": 6.344920701187941e-06, "loss": 0.2929, "step": 3948 }, { "epoch": 1.4162084628257232, "grad_norm": 0.3392702043056488, "learning_rate": 6.342910614645696e-06, "loss": 0.313, "step": 3949 }, { "epoch": 1.4165670571360267, "grad_norm": 0.389153391122818, "learning_rate": 6.340900294164246e-06, "loss": 0.3475, "step": 3950 }, { "epoch": 1.4169256514463304, "grad_norm": 0.357768714427948, "learning_rate": 6.338889740093792e-06, "loss": 0.3251, "step": 3951 }, { "epoch": 1.417284245756634, "grad_norm": 0.3520878851413727, "learning_rate": 6.33687895278458e-06, "loss": 0.3488, "step": 3952 }, { "epoch": 1.4176428400669376, "grad_norm": 0.3192225396633148, "learning_rate": 6.334867932586894e-06, "loss": 0.341, "step": 3953 }, { "epoch": 1.4180014343772411, "grad_norm": 0.3662876784801483, "learning_rate": 6.332856679851064e-06, "loss": 0.3256, "step": 3954 }, { "epoch": 1.4183600286875448, "grad_norm": 0.3285267651081085, "learning_rate": 6.330845194927451e-06, "loss": 0.2839, "step": 3955 }, { "epoch": 1.4187186229978486, "grad_norm": 0.3352038264274597, "learning_rate": 6.3288334781664665e-06, "loss": 0.3161, "step": 3956 }, { "epoch": 1.419077217308152, "grad_norm": 0.369874507188797, "learning_rate": 6.3268215299185545e-06, "loss": 0.3491, "step": 3957 }, { "epoch": 1.4194358116184556, "grad_norm": 0.3151629567146301, "learning_rate": 6.324809350534202e-06, "loss": 0.3005, "step": 3958 }, { "epoch": 1.4197944059287593, "grad_norm": 0.3456835150718689, "learning_rate": 6.322796940363942e-06, "loss": 0.3206, "step": 3959 }, { "epoch": 1.420153000239063, "grad_norm": 0.35595130920410156, "learning_rate": 6.320784299758339e-06, "loss": 0.3437, "step": 3960 }, { "epoch": 1.4205115945493665, "grad_norm": 0.3594105839729309, "learning_rate": 6.318771429068002e-06, "loss": 0.3298, "step": 3961 }, { "epoch": 1.42087018885967, "grad_norm": 0.36704057455062866, "learning_rate": 6.31675832864358e-06, "loss": 0.3141, "step": 3962 }, { "epoch": 1.4212287831699737, "grad_norm": 0.34074193239212036, "learning_rate": 6.314744998835758e-06, "loss": 0.3231, "step": 3963 }, { "epoch": 1.4215873774802774, "grad_norm": 0.29930099844932556, "learning_rate": 6.312731439995266e-06, "loss": 0.2899, "step": 3964 }, { "epoch": 1.421945971790581, "grad_norm": 0.3364555835723877, "learning_rate": 6.310717652472876e-06, "loss": 0.3297, "step": 3965 }, { "epoch": 1.4223045661008844, "grad_norm": 0.38506799936294556, "learning_rate": 6.308703636619392e-06, "loss": 0.3236, "step": 3966 }, { "epoch": 1.4226631604111881, "grad_norm": 0.32599276304244995, "learning_rate": 6.3066893927856635e-06, "loss": 0.3227, "step": 3967 }, { "epoch": 1.4230217547214918, "grad_norm": 0.35493963956832886, "learning_rate": 6.304674921322576e-06, "loss": 0.3104, "step": 3968 }, { "epoch": 1.4233803490317953, "grad_norm": 0.34065255522727966, "learning_rate": 6.302660222581059e-06, "loss": 0.3439, "step": 3969 }, { "epoch": 1.423738943342099, "grad_norm": 0.3298160135746002, "learning_rate": 6.300645296912078e-06, "loss": 0.2883, "step": 3970 }, { "epoch": 1.4240975376524025, "grad_norm": 0.37511271238327026, "learning_rate": 6.2986301446666406e-06, "loss": 0.336, "step": 3971 }, { "epoch": 1.4244561319627063, "grad_norm": 0.3575359284877777, "learning_rate": 6.296614766195791e-06, "loss": 0.3384, "step": 3972 }, { "epoch": 1.4248147262730098, "grad_norm": 0.302752286195755, "learning_rate": 6.294599161850616e-06, "loss": 0.3016, "step": 3973 }, { "epoch": 1.4251733205833135, "grad_norm": 0.354873925447464, "learning_rate": 6.292583331982238e-06, "loss": 0.3682, "step": 3974 }, { "epoch": 1.425531914893617, "grad_norm": 0.31226250529289246, "learning_rate": 6.290567276941822e-06, "loss": 0.3207, "step": 3975 }, { "epoch": 1.4258905092039207, "grad_norm": 0.3060482144355774, "learning_rate": 6.2885509970805726e-06, "loss": 0.2906, "step": 3976 }, { "epoch": 1.4262491035142242, "grad_norm": 0.3409822881221771, "learning_rate": 6.286534492749731e-06, "loss": 0.3443, "step": 3977 }, { "epoch": 1.426607697824528, "grad_norm": 0.3193223774433136, "learning_rate": 6.284517764300576e-06, "loss": 0.3194, "step": 3978 }, { "epoch": 1.4269662921348314, "grad_norm": 0.29592165350914, "learning_rate": 6.282500812084431e-06, "loss": 0.2746, "step": 3979 }, { "epoch": 1.427324886445135, "grad_norm": 0.31987497210502625, "learning_rate": 6.280483636452654e-06, "loss": 0.2933, "step": 3980 }, { "epoch": 1.4276834807554386, "grad_norm": 0.3061775863170624, "learning_rate": 6.278466237756644e-06, "loss": 0.3184, "step": 3981 }, { "epoch": 1.4280420750657423, "grad_norm": 0.33120331168174744, "learning_rate": 6.276448616347839e-06, "loss": 0.3062, "step": 3982 }, { "epoch": 1.4284006693760458, "grad_norm": 0.35805782675743103, "learning_rate": 6.274430772577712e-06, "loss": 0.3231, "step": 3983 }, { "epoch": 1.4287592636863495, "grad_norm": 0.34753766655921936, "learning_rate": 6.272412706797781e-06, "loss": 0.3352, "step": 3984 }, { "epoch": 1.429117857996653, "grad_norm": 0.3652752637863159, "learning_rate": 6.270394419359598e-06, "loss": 0.3248, "step": 3985 }, { "epoch": 1.4294764523069567, "grad_norm": 0.3478773534297943, "learning_rate": 6.268375910614754e-06, "loss": 0.331, "step": 3986 }, { "epoch": 1.4298350466172605, "grad_norm": 0.3002125322818756, "learning_rate": 6.266357180914879e-06, "loss": 0.307, "step": 3987 }, { "epoch": 1.430193640927564, "grad_norm": 0.3106337785720825, "learning_rate": 6.264338230611644e-06, "loss": 0.3082, "step": 3988 }, { "epoch": 1.4305522352378675, "grad_norm": 0.34307512640953064, "learning_rate": 6.2623190600567566e-06, "loss": 0.3239, "step": 3989 }, { "epoch": 1.4309108295481712, "grad_norm": 0.3630918264389038, "learning_rate": 6.260299669601961e-06, "loss": 0.3521, "step": 3990 }, { "epoch": 1.4312694238584749, "grad_norm": 0.32570794224739075, "learning_rate": 6.2582800595990425e-06, "loss": 0.322, "step": 3991 }, { "epoch": 1.4316280181687784, "grad_norm": 0.3149537146091461, "learning_rate": 6.256260230399822e-06, "loss": 0.3193, "step": 3992 }, { "epoch": 1.4319866124790819, "grad_norm": 0.3412272334098816, "learning_rate": 6.254240182356161e-06, "loss": 0.3355, "step": 3993 }, { "epoch": 1.4323452067893856, "grad_norm": 0.32780027389526367, "learning_rate": 6.252219915819958e-06, "loss": 0.2894, "step": 3994 }, { "epoch": 1.4327038010996893, "grad_norm": 0.3462631106376648, "learning_rate": 6.25019943114315e-06, "loss": 0.3809, "step": 3995 }, { "epoch": 1.4330623954099928, "grad_norm": 0.31567999720573425, "learning_rate": 6.2481787286777116e-06, "loss": 0.3286, "step": 3996 }, { "epoch": 1.4334209897202963, "grad_norm": 0.3435400724411011, "learning_rate": 6.246157808775656e-06, "loss": 0.3204, "step": 3997 }, { "epoch": 1.4337795840306, "grad_norm": 0.3390352427959442, "learning_rate": 6.244136671789031e-06, "loss": 0.3035, "step": 3998 }, { "epoch": 1.4341381783409037, "grad_norm": 0.3272869288921356, "learning_rate": 6.242115318069929e-06, "loss": 0.3231, "step": 3999 }, { "epoch": 1.4344967726512072, "grad_norm": 0.3162679076194763, "learning_rate": 6.240093747970473e-06, "loss": 0.3223, "step": 4000 }, { "epoch": 1.434855366961511, "grad_norm": 0.36242207884788513, "learning_rate": 6.23807196184283e-06, "loss": 0.3336, "step": 4001 }, { "epoch": 1.4352139612718144, "grad_norm": 0.3235011100769043, "learning_rate": 6.236049960039197e-06, "loss": 0.3114, "step": 4002 }, { "epoch": 1.4355725555821182, "grad_norm": 0.3414252996444702, "learning_rate": 6.2340277429118166e-06, "loss": 0.3358, "step": 4003 }, { "epoch": 1.4359311498924217, "grad_norm": 0.30506032705307007, "learning_rate": 6.232005310812964e-06, "loss": 0.3159, "step": 4004 }, { "epoch": 1.4362897442027254, "grad_norm": 0.32025057077407837, "learning_rate": 6.2299826640949535e-06, "loss": 0.3363, "step": 4005 }, { "epoch": 1.4366483385130289, "grad_norm": 0.34233126044273376, "learning_rate": 6.227959803110135e-06, "loss": 0.3029, "step": 4006 }, { "epoch": 1.4370069328233326, "grad_norm": 0.3271728754043579, "learning_rate": 6.2259367282108996e-06, "loss": 0.3031, "step": 4007 }, { "epoch": 1.437365527133636, "grad_norm": 0.35118380188941956, "learning_rate": 6.223913439749672e-06, "loss": 0.3733, "step": 4008 }, { "epoch": 1.4377241214439398, "grad_norm": 0.31555700302124023, "learning_rate": 6.221889938078916e-06, "loss": 0.2851, "step": 4009 }, { "epoch": 1.4380827157542433, "grad_norm": 0.35800570249557495, "learning_rate": 6.219866223551128e-06, "loss": 0.3014, "step": 4010 }, { "epoch": 1.438441310064547, "grad_norm": 0.34098851680755615, "learning_rate": 6.217842296518853e-06, "loss": 0.3338, "step": 4011 }, { "epoch": 1.4387999043748505, "grad_norm": 0.348324179649353, "learning_rate": 6.215818157334658e-06, "loss": 0.3624, "step": 4012 }, { "epoch": 1.4391584986851542, "grad_norm": 0.3115454614162445, "learning_rate": 6.213793806351158e-06, "loss": 0.323, "step": 4013 }, { "epoch": 1.439517092995458, "grad_norm": 0.31489962339401245, "learning_rate": 6.211769243921002e-06, "loss": 0.3105, "step": 4014 }, { "epoch": 1.4398756873057614, "grad_norm": 0.373869389295578, "learning_rate": 6.209744470396872e-06, "loss": 0.3438, "step": 4015 }, { "epoch": 1.440234281616065, "grad_norm": 0.33719712495803833, "learning_rate": 6.207719486131491e-06, "loss": 0.3015, "step": 4016 }, { "epoch": 1.4405928759263686, "grad_norm": 0.35441145300865173, "learning_rate": 6.205694291477621e-06, "loss": 0.3314, "step": 4017 }, { "epoch": 1.4409514702366724, "grad_norm": 0.3405461013317108, "learning_rate": 6.203668886788052e-06, "loss": 0.3446, "step": 4018 }, { "epoch": 1.4413100645469759, "grad_norm": 0.3298288881778717, "learning_rate": 6.201643272415619e-06, "loss": 0.3271, "step": 4019 }, { "epoch": 1.4416686588572794, "grad_norm": 0.3366185128688812, "learning_rate": 6.199617448713189e-06, "loss": 0.3336, "step": 4020 }, { "epoch": 1.442027253167583, "grad_norm": 0.3227972090244293, "learning_rate": 6.197591416033668e-06, "loss": 0.3221, "step": 4021 }, { "epoch": 1.4423858474778868, "grad_norm": 0.3683101534843445, "learning_rate": 6.195565174729995e-06, "loss": 0.3612, "step": 4022 }, { "epoch": 1.4427444417881903, "grad_norm": 0.334966778755188, "learning_rate": 6.193538725155148e-06, "loss": 0.3393, "step": 4023 }, { "epoch": 1.4431030360984938, "grad_norm": 0.328595370054245, "learning_rate": 6.191512067662144e-06, "loss": 0.2997, "step": 4024 }, { "epoch": 1.4434616304087975, "grad_norm": 0.3481209874153137, "learning_rate": 6.189485202604032e-06, "loss": 0.3225, "step": 4025 }, { "epoch": 1.4438202247191012, "grad_norm": 0.3127344846725464, "learning_rate": 6.1874581303338945e-06, "loss": 0.3049, "step": 4026 }, { "epoch": 1.4441788190294047, "grad_norm": 0.3671319782733917, "learning_rate": 6.185430851204858e-06, "loss": 0.3198, "step": 4027 }, { "epoch": 1.4445374133397084, "grad_norm": 0.3245278596878052, "learning_rate": 6.183403365570078e-06, "loss": 0.3136, "step": 4028 }, { "epoch": 1.444896007650012, "grad_norm": 0.33006930351257324, "learning_rate": 6.181375673782749e-06, "loss": 0.3281, "step": 4029 }, { "epoch": 1.4452546019603156, "grad_norm": 0.3108843266963959, "learning_rate": 6.179347776196103e-06, "loss": 0.2984, "step": 4030 }, { "epoch": 1.4456131962706191, "grad_norm": 0.3719620406627655, "learning_rate": 6.177319673163407e-06, "loss": 0.3417, "step": 4031 }, { "epoch": 1.4459717905809228, "grad_norm": 0.34579169750213623, "learning_rate": 6.175291365037959e-06, "loss": 0.3334, "step": 4032 }, { "epoch": 1.4463303848912263, "grad_norm": 0.29979267716407776, "learning_rate": 6.1732628521731e-06, "loss": 0.301, "step": 4033 }, { "epoch": 1.44668897920153, "grad_norm": 0.3669675886631012, "learning_rate": 6.171234134922199e-06, "loss": 0.3497, "step": 4034 }, { "epoch": 1.4470475735118336, "grad_norm": 0.3460247814655304, "learning_rate": 6.169205213638671e-06, "loss": 0.3363, "step": 4035 }, { "epoch": 1.4474061678221373, "grad_norm": 0.3588554859161377, "learning_rate": 6.167176088675956e-06, "loss": 0.3131, "step": 4036 }, { "epoch": 1.4477647621324408, "grad_norm": 0.34486594796180725, "learning_rate": 6.165146760387534e-06, "loss": 0.338, "step": 4037 }, { "epoch": 1.4481233564427445, "grad_norm": 0.32745158672332764, "learning_rate": 6.163117229126924e-06, "loss": 0.3188, "step": 4038 }, { "epoch": 1.448481950753048, "grad_norm": 0.3170833885669708, "learning_rate": 6.161087495247672e-06, "loss": 0.3237, "step": 4039 }, { "epoch": 1.4488405450633517, "grad_norm": 0.33481964468955994, "learning_rate": 6.159057559103367e-06, "loss": 0.3214, "step": 4040 }, { "epoch": 1.4491991393736554, "grad_norm": 0.3793586492538452, "learning_rate": 6.157027421047628e-06, "loss": 0.3446, "step": 4041 }, { "epoch": 1.449557733683959, "grad_norm": 0.3440397083759308, "learning_rate": 6.154997081434114e-06, "loss": 0.3596, "step": 4042 }, { "epoch": 1.4499163279942624, "grad_norm": 0.3008336126804352, "learning_rate": 6.152966540616514e-06, "loss": 0.3087, "step": 4043 }, { "epoch": 1.4502749223045661, "grad_norm": 0.34358328580856323, "learning_rate": 6.150935798948556e-06, "loss": 0.3274, "step": 4044 }, { "epoch": 1.4506335166148698, "grad_norm": 0.3405058979988098, "learning_rate": 6.1489048567840005e-06, "loss": 0.2981, "step": 4045 }, { "epoch": 1.4509921109251733, "grad_norm": 0.3099876642227173, "learning_rate": 6.146873714476645e-06, "loss": 0.3014, "step": 4046 }, { "epoch": 1.4513507052354768, "grad_norm": 0.3248499631881714, "learning_rate": 6.1448423723803195e-06, "loss": 0.3279, "step": 4047 }, { "epoch": 1.4517092995457805, "grad_norm": 0.3264178931713104, "learning_rate": 6.1428108308488934e-06, "loss": 0.2917, "step": 4048 }, { "epoch": 1.4520678938560843, "grad_norm": 0.328804075717926, "learning_rate": 6.140779090236262e-06, "loss": 0.3167, "step": 4049 }, { "epoch": 1.4524264881663878, "grad_norm": 0.3287566304206848, "learning_rate": 6.138747150896366e-06, "loss": 0.3302, "step": 4050 }, { "epoch": 1.4527850824766912, "grad_norm": 0.3810116648674011, "learning_rate": 6.136715013183172e-06, "loss": 0.3456, "step": 4051 }, { "epoch": 1.453143676786995, "grad_norm": 0.3499535918235779, "learning_rate": 6.134682677450686e-06, "loss": 0.2951, "step": 4052 }, { "epoch": 1.4535022710972987, "grad_norm": 0.34658610820770264, "learning_rate": 6.13265014405295e-06, "loss": 0.3377, "step": 4053 }, { "epoch": 1.4538608654076022, "grad_norm": 0.34259137511253357, "learning_rate": 6.130617413344033e-06, "loss": 0.338, "step": 4054 }, { "epoch": 1.4542194597179057, "grad_norm": 0.33292481303215027, "learning_rate": 6.128584485678045e-06, "loss": 0.3286, "step": 4055 }, { "epoch": 1.4545780540282094, "grad_norm": 0.36370670795440674, "learning_rate": 6.126551361409129e-06, "loss": 0.3325, "step": 4056 }, { "epoch": 1.454936648338513, "grad_norm": 0.3256087303161621, "learning_rate": 6.1245180408914604e-06, "loss": 0.3086, "step": 4057 }, { "epoch": 1.4552952426488166, "grad_norm": 0.37218010425567627, "learning_rate": 6.12248452447925e-06, "loss": 0.3397, "step": 4058 }, { "epoch": 1.4556538369591203, "grad_norm": 0.35601240396499634, "learning_rate": 6.120450812526745e-06, "loss": 0.3592, "step": 4059 }, { "epoch": 1.4560124312694238, "grad_norm": 0.3201351761817932, "learning_rate": 6.11841690538822e-06, "loss": 0.2995, "step": 4060 }, { "epoch": 1.4563710255797275, "grad_norm": 0.3468649089336395, "learning_rate": 6.116382803417993e-06, "loss": 0.3292, "step": 4061 }, { "epoch": 1.456729619890031, "grad_norm": 0.3457306921482086, "learning_rate": 6.114348506970408e-06, "loss": 0.3208, "step": 4062 }, { "epoch": 1.4570882142003347, "grad_norm": 0.3386303782463074, "learning_rate": 6.112314016399844e-06, "loss": 0.364, "step": 4063 }, { "epoch": 1.4574468085106382, "grad_norm": 0.30449363589286804, "learning_rate": 6.110279332060719e-06, "loss": 0.3072, "step": 4064 }, { "epoch": 1.457805402820942, "grad_norm": 0.3188607096672058, "learning_rate": 6.108244454307481e-06, "loss": 0.3028, "step": 4065 }, { "epoch": 1.4581639971312454, "grad_norm": 0.3189346492290497, "learning_rate": 6.10620938349461e-06, "loss": 0.3226, "step": 4066 }, { "epoch": 1.4585225914415492, "grad_norm": 0.33818453550338745, "learning_rate": 6.104174119976624e-06, "loss": 0.3353, "step": 4067 }, { "epoch": 1.4588811857518527, "grad_norm": 0.33712539076805115, "learning_rate": 6.10213866410807e-06, "loss": 0.3265, "step": 4068 }, { "epoch": 1.4592397800621564, "grad_norm": 0.328368604183197, "learning_rate": 6.100103016243531e-06, "loss": 0.3321, "step": 4069 }, { "epoch": 1.4595983743724599, "grad_norm": 0.3217172622680664, "learning_rate": 6.0980671767376256e-06, "loss": 0.3018, "step": 4070 }, { "epoch": 1.4599569686827636, "grad_norm": 0.36333197355270386, "learning_rate": 6.096031145945002e-06, "loss": 0.3414, "step": 4071 }, { "epoch": 1.4603155629930673, "grad_norm": 0.3188776969909668, "learning_rate": 6.0939949242203415e-06, "loss": 0.3032, "step": 4072 }, { "epoch": 1.4606741573033708, "grad_norm": 0.3402807414531708, "learning_rate": 6.091958511918362e-06, "loss": 0.3245, "step": 4073 }, { "epoch": 1.4610327516136743, "grad_norm": 0.33662769198417664, "learning_rate": 6.089921909393812e-06, "loss": 0.325, "step": 4074 }, { "epoch": 1.461391345923978, "grad_norm": 0.3410702347755432, "learning_rate": 6.087885117001474e-06, "loss": 0.3106, "step": 4075 }, { "epoch": 1.4617499402342817, "grad_norm": 0.33042892813682556, "learning_rate": 6.085848135096165e-06, "loss": 0.3236, "step": 4076 }, { "epoch": 1.4621085345445852, "grad_norm": 0.31843623518943787, "learning_rate": 6.083810964032732e-06, "loss": 0.3028, "step": 4077 }, { "epoch": 1.4624671288548887, "grad_norm": 0.32950541377067566, "learning_rate": 6.0817736041660594e-06, "loss": 0.3146, "step": 4078 }, { "epoch": 1.4628257231651924, "grad_norm": 0.3511168658733368, "learning_rate": 6.079736055851058e-06, "loss": 0.3251, "step": 4079 }, { "epoch": 1.4631843174754962, "grad_norm": 0.3575631082057953, "learning_rate": 6.077698319442675e-06, "loss": 0.3349, "step": 4080 }, { "epoch": 1.4635429117857996, "grad_norm": 0.3366102874279022, "learning_rate": 6.075660395295893e-06, "loss": 0.2849, "step": 4081 }, { "epoch": 1.4639015060961031, "grad_norm": 0.3350306749343872, "learning_rate": 6.073622283765723e-06, "loss": 0.3316, "step": 4082 }, { "epoch": 1.4642601004064069, "grad_norm": 0.33558472990989685, "learning_rate": 6.071583985207211e-06, "loss": 0.3161, "step": 4083 }, { "epoch": 1.4646186947167106, "grad_norm": 0.341061532497406, "learning_rate": 6.069545499975436e-06, "loss": 0.324, "step": 4084 }, { "epoch": 1.464977289027014, "grad_norm": 0.34200647473335266, "learning_rate": 6.067506828425507e-06, "loss": 0.342, "step": 4085 }, { "epoch": 1.4653358833373178, "grad_norm": 0.3369915783405304, "learning_rate": 6.0654679709125665e-06, "loss": 0.3061, "step": 4086 }, { "epoch": 1.4656944776476213, "grad_norm": 0.3279882073402405, "learning_rate": 6.06342892779179e-06, "loss": 0.31, "step": 4087 }, { "epoch": 1.466053071957925, "grad_norm": 0.32432499527931213, "learning_rate": 6.061389699418388e-06, "loss": 0.3493, "step": 4088 }, { "epoch": 1.4664116662682285, "grad_norm": 0.3239844739437103, "learning_rate": 6.059350286147596e-06, "loss": 0.3272, "step": 4089 }, { "epoch": 1.4667702605785322, "grad_norm": 0.32353147864341736, "learning_rate": 6.05731068833469e-06, "loss": 0.2953, "step": 4090 }, { "epoch": 1.4671288548888357, "grad_norm": 0.3454834222793579, "learning_rate": 6.055270906334972e-06, "loss": 0.3525, "step": 4091 }, { "epoch": 1.4674874491991394, "grad_norm": 0.3483075797557831, "learning_rate": 6.053230940503778e-06, "loss": 0.3125, "step": 4092 }, { "epoch": 1.467846043509443, "grad_norm": 0.3309766352176666, "learning_rate": 6.0511907911964785e-06, "loss": 0.3102, "step": 4093 }, { "epoch": 1.4682046378197466, "grad_norm": 0.32472530007362366, "learning_rate": 6.0491504587684724e-06, "loss": 0.3287, "step": 4094 }, { "epoch": 1.4685632321300501, "grad_norm": 0.34039434790611267, "learning_rate": 6.047109943575192e-06, "loss": 0.3362, "step": 4095 }, { "epoch": 1.4689218264403539, "grad_norm": 0.3236401379108429, "learning_rate": 6.0450692459721025e-06, "loss": 0.3223, "step": 4096 }, { "epoch": 1.4692804207506573, "grad_norm": 0.33192598819732666, "learning_rate": 6.043028366314698e-06, "loss": 0.3395, "step": 4097 }, { "epoch": 1.469639015060961, "grad_norm": 0.3384690284729004, "learning_rate": 6.0409873049585075e-06, "loss": 0.3642, "step": 4098 }, { "epoch": 1.4699976093712648, "grad_norm": 0.2875082492828369, "learning_rate": 6.03894606225909e-06, "loss": 0.3024, "step": 4099 }, { "epoch": 1.4703562036815683, "grad_norm": 0.367733359336853, "learning_rate": 6.036904638572035e-06, "loss": 0.3638, "step": 4100 }, { "epoch": 1.4707147979918718, "grad_norm": 0.3654639422893524, "learning_rate": 6.034863034252968e-06, "loss": 0.3218, "step": 4101 }, { "epoch": 1.4710733923021755, "grad_norm": 0.33016350865364075, "learning_rate": 6.03282124965754e-06, "loss": 0.3212, "step": 4102 }, { "epoch": 1.4714319866124792, "grad_norm": 0.3114372789859772, "learning_rate": 6.030779285141437e-06, "loss": 0.3459, "step": 4103 }, { "epoch": 1.4717905809227827, "grad_norm": 0.351879745721817, "learning_rate": 6.028737141060374e-06, "loss": 0.306, "step": 4104 }, { "epoch": 1.4721491752330862, "grad_norm": 0.3194299042224884, "learning_rate": 6.0266948177701024e-06, "loss": 0.3246, "step": 4105 }, { "epoch": 1.47250776954339, "grad_norm": 0.33845916390419006, "learning_rate": 6.024652315626398e-06, "loss": 0.3333, "step": 4106 }, { "epoch": 1.4728663638536936, "grad_norm": 0.31621626019477844, "learning_rate": 6.022609634985074e-06, "loss": 0.3013, "step": 4107 }, { "epoch": 1.4732249581639971, "grad_norm": 0.3094860017299652, "learning_rate": 6.020566776201969e-06, "loss": 0.3174, "step": 4108 }, { "epoch": 1.4735835524743006, "grad_norm": 0.29819178581237793, "learning_rate": 6.018523739632956e-06, "loss": 0.3136, "step": 4109 }, { "epoch": 1.4739421467846043, "grad_norm": 0.32538196444511414, "learning_rate": 6.01648052563394e-06, "loss": 0.3137, "step": 4110 }, { "epoch": 1.474300741094908, "grad_norm": 0.3134687542915344, "learning_rate": 6.014437134560853e-06, "loss": 0.3136, "step": 4111 }, { "epoch": 1.4746593354052115, "grad_norm": 0.3234504461288452, "learning_rate": 6.012393566769661e-06, "loss": 0.3421, "step": 4112 }, { "epoch": 1.4750179297155153, "grad_norm": 0.30813512206077576, "learning_rate": 6.01034982261636e-06, "loss": 0.3192, "step": 4113 }, { "epoch": 1.4753765240258188, "grad_norm": 0.3446738123893738, "learning_rate": 6.008305902456978e-06, "loss": 0.3561, "step": 4114 }, { "epoch": 1.4757351183361225, "grad_norm": 0.3397809863090515, "learning_rate": 6.006261806647569e-06, "loss": 0.3114, "step": 4115 }, { "epoch": 1.476093712646426, "grad_norm": 0.31417685747146606, "learning_rate": 6.004217535544222e-06, "loss": 0.3163, "step": 4116 }, { "epoch": 1.4764523069567297, "grad_norm": 0.3100140690803528, "learning_rate": 6.002173089503057e-06, "loss": 0.3407, "step": 4117 }, { "epoch": 1.4768109012670332, "grad_norm": 0.33601444959640503, "learning_rate": 6.000128468880223e-06, "loss": 0.3666, "step": 4118 }, { "epoch": 1.477169495577337, "grad_norm": 0.318121075630188, "learning_rate": 5.998083674031897e-06, "loss": 0.3281, "step": 4119 }, { "epoch": 1.4775280898876404, "grad_norm": 0.30500876903533936, "learning_rate": 5.99603870531429e-06, "loss": 0.3282, "step": 4120 }, { "epoch": 1.4778866841979441, "grad_norm": 0.33794549107551575, "learning_rate": 5.993993563083641e-06, "loss": 0.3195, "step": 4121 }, { "epoch": 1.4782452785082476, "grad_norm": 0.3368840217590332, "learning_rate": 5.991948247696222e-06, "loss": 0.3324, "step": 4122 }, { "epoch": 1.4786038728185513, "grad_norm": 0.32406991720199585, "learning_rate": 5.989902759508331e-06, "loss": 0.3094, "step": 4123 }, { "epoch": 1.4789624671288548, "grad_norm": 0.31518977880477905, "learning_rate": 5.9878570988763e-06, "loss": 0.2984, "step": 4124 }, { "epoch": 1.4793210614391585, "grad_norm": 0.33437323570251465, "learning_rate": 5.9858112661564905e-06, "loss": 0.3253, "step": 4125 }, { "epoch": 1.479679655749462, "grad_norm": 0.3543867766857147, "learning_rate": 5.983765261705289e-06, "loss": 0.3391, "step": 4126 }, { "epoch": 1.4800382500597657, "grad_norm": 0.3464480936527252, "learning_rate": 5.981719085879119e-06, "loss": 0.3357, "step": 4127 }, { "epoch": 1.4803968443700692, "grad_norm": 0.3222384452819824, "learning_rate": 5.979672739034431e-06, "loss": 0.3113, "step": 4128 }, { "epoch": 1.480755438680373, "grad_norm": 0.3561939299106598, "learning_rate": 5.977626221527703e-06, "loss": 0.3482, "step": 4129 }, { "epoch": 1.4811140329906767, "grad_norm": 0.3167296350002289, "learning_rate": 5.975579533715446e-06, "loss": 0.2962, "step": 4130 }, { "epoch": 1.4814726273009802, "grad_norm": 0.3300926089286804, "learning_rate": 5.973532675954199e-06, "loss": 0.3068, "step": 4131 }, { "epoch": 1.4818312216112837, "grad_norm": 0.3606431186199188, "learning_rate": 5.97148564860053e-06, "loss": 0.3317, "step": 4132 }, { "epoch": 1.4821898159215874, "grad_norm": 0.3487732410430908, "learning_rate": 5.969438452011038e-06, "loss": 0.3416, "step": 4133 }, { "epoch": 1.482548410231891, "grad_norm": 0.3340480327606201, "learning_rate": 5.967391086542353e-06, "loss": 0.3234, "step": 4134 }, { "epoch": 1.4829070045421946, "grad_norm": 0.325332373380661, "learning_rate": 5.965343552551132e-06, "loss": 0.3023, "step": 4135 }, { "epoch": 1.483265598852498, "grad_norm": 0.32389160990715027, "learning_rate": 5.963295850394059e-06, "loss": 0.3195, "step": 4136 }, { "epoch": 1.4836241931628018, "grad_norm": 0.3526616096496582, "learning_rate": 5.961247980427853e-06, "loss": 0.3223, "step": 4137 }, { "epoch": 1.4839827874731055, "grad_norm": 0.33145105838775635, "learning_rate": 5.959199943009258e-06, "loss": 0.3456, "step": 4138 }, { "epoch": 1.484341381783409, "grad_norm": 0.33248940110206604, "learning_rate": 5.957151738495048e-06, "loss": 0.3332, "step": 4139 }, { "epoch": 1.4846999760937125, "grad_norm": 0.32703185081481934, "learning_rate": 5.955103367242028e-06, "loss": 0.3299, "step": 4140 }, { "epoch": 1.4850585704040162, "grad_norm": 0.33205556869506836, "learning_rate": 5.953054829607029e-06, "loss": 0.3133, "step": 4141 }, { "epoch": 1.48541716471432, "grad_norm": 0.35763630270957947, "learning_rate": 5.9510061259469154e-06, "loss": 0.3282, "step": 4142 }, { "epoch": 1.4857757590246234, "grad_norm": 0.34154731035232544, "learning_rate": 5.948957256618574e-06, "loss": 0.3223, "step": 4143 }, { "epoch": 1.4861343533349272, "grad_norm": 0.3148634433746338, "learning_rate": 5.946908221978928e-06, "loss": 0.2795, "step": 4144 }, { "epoch": 1.4864929476452307, "grad_norm": 0.362316370010376, "learning_rate": 5.944859022384921e-06, "loss": 0.3831, "step": 4145 }, { "epoch": 1.4868515419555344, "grad_norm": 0.30062443017959595, "learning_rate": 5.9428096581935345e-06, "loss": 0.2979, "step": 4146 }, { "epoch": 1.4872101362658379, "grad_norm": 0.3063413202762604, "learning_rate": 5.940760129761772e-06, "loss": 0.2743, "step": 4147 }, { "epoch": 1.4875687305761416, "grad_norm": 0.37322092056274414, "learning_rate": 5.9387104374466685e-06, "loss": 0.3393, "step": 4148 }, { "epoch": 1.487927324886445, "grad_norm": 0.3194534182548523, "learning_rate": 5.936660581605286e-06, "loss": 0.3504, "step": 4149 }, { "epoch": 1.4882859191967488, "grad_norm": 0.30546271800994873, "learning_rate": 5.934610562594716e-06, "loss": 0.3184, "step": 4150 }, { "epoch": 1.4886445135070523, "grad_norm": 0.330870658159256, "learning_rate": 5.932560380772078e-06, "loss": 0.3249, "step": 4151 }, { "epoch": 1.489003107817356, "grad_norm": 0.3323940932750702, "learning_rate": 5.93051003649452e-06, "loss": 0.3443, "step": 4152 }, { "epoch": 1.4893617021276595, "grad_norm": 0.33140283823013306, "learning_rate": 5.9284595301192195e-06, "loss": 0.3007, "step": 4153 }, { "epoch": 1.4897202964379632, "grad_norm": 0.3216685354709625, "learning_rate": 5.926408862003379e-06, "loss": 0.3475, "step": 4154 }, { "epoch": 1.4900788907482667, "grad_norm": 0.30319902300834656, "learning_rate": 5.924358032504235e-06, "loss": 0.3099, "step": 4155 }, { "epoch": 1.4904374850585704, "grad_norm": 0.34029528498649597, "learning_rate": 5.922307041979043e-06, "loss": 0.3182, "step": 4156 }, { "epoch": 1.4907960793688741, "grad_norm": 0.352529376745224, "learning_rate": 5.920255890785096e-06, "loss": 0.308, "step": 4157 }, { "epoch": 1.4911546736791776, "grad_norm": 0.33048415184020996, "learning_rate": 5.918204579279711e-06, "loss": 0.3362, "step": 4158 }, { "epoch": 1.4915132679894811, "grad_norm": 0.30381515622138977, "learning_rate": 5.916153107820231e-06, "loss": 0.283, "step": 4159 }, { "epoch": 1.4918718622997849, "grad_norm": 0.33588868379592896, "learning_rate": 5.914101476764027e-06, "loss": 0.3269, "step": 4160 }, { "epoch": 1.4922304566100886, "grad_norm": 0.3564424216747284, "learning_rate": 5.912049686468505e-06, "loss": 0.3534, "step": 4161 }, { "epoch": 1.492589050920392, "grad_norm": 0.31908613443374634, "learning_rate": 5.909997737291088e-06, "loss": 0.3179, "step": 4162 }, { "epoch": 1.4929476452306956, "grad_norm": 0.34647315740585327, "learning_rate": 5.907945629589234e-06, "loss": 0.3503, "step": 4163 }, { "epoch": 1.4933062395409993, "grad_norm": 0.6913116574287415, "learning_rate": 5.905893363720427e-06, "loss": 0.3106, "step": 4164 }, { "epoch": 1.493664833851303, "grad_norm": 0.3169073462486267, "learning_rate": 5.903840940042179e-06, "loss": 0.3413, "step": 4165 }, { "epoch": 1.4940234281616065, "grad_norm": 0.3012007772922516, "learning_rate": 5.901788358912026e-06, "loss": 0.3212, "step": 4166 }, { "epoch": 1.49438202247191, "grad_norm": 0.3136209547519684, "learning_rate": 5.8997356206875354e-06, "loss": 0.2989, "step": 4167 }, { "epoch": 1.4947406167822137, "grad_norm": 0.3145228624343872, "learning_rate": 5.8976827257263005e-06, "loss": 0.3378, "step": 4168 }, { "epoch": 1.4950992110925174, "grad_norm": 0.34269633889198303, "learning_rate": 5.895629674385941e-06, "loss": 0.3502, "step": 4169 }, { "epoch": 1.495457805402821, "grad_norm": 0.342051237821579, "learning_rate": 5.893576467024107e-06, "loss": 0.3511, "step": 4170 }, { "epoch": 1.4958163997131246, "grad_norm": 0.30759739875793457, "learning_rate": 5.891523103998473e-06, "loss": 0.3048, "step": 4171 }, { "epoch": 1.4961749940234281, "grad_norm": 0.3297123610973358, "learning_rate": 5.889469585666738e-06, "loss": 0.3183, "step": 4172 }, { "epoch": 1.4965335883337318, "grad_norm": 0.33667469024658203, "learning_rate": 5.887415912386637e-06, "loss": 0.3045, "step": 4173 }, { "epoch": 1.4968921826440353, "grad_norm": 0.39457836747169495, "learning_rate": 5.885362084515921e-06, "loss": 0.3639, "step": 4174 }, { "epoch": 1.497250776954339, "grad_norm": 0.3151298761367798, "learning_rate": 5.883308102412375e-06, "loss": 0.3157, "step": 4175 }, { "epoch": 1.4976093712646426, "grad_norm": 0.3167121112346649, "learning_rate": 5.8812539664338105e-06, "loss": 0.2957, "step": 4176 }, { "epoch": 1.4979679655749463, "grad_norm": 0.35409003496170044, "learning_rate": 5.879199676938063e-06, "loss": 0.3537, "step": 4177 }, { "epoch": 1.4983265598852498, "grad_norm": 0.35508865118026733, "learning_rate": 5.8771452342829975e-06, "loss": 0.3357, "step": 4178 }, { "epoch": 1.4986851541955535, "grad_norm": 0.36215195059776306, "learning_rate": 5.8750906388265026e-06, "loss": 0.3524, "step": 4179 }, { "epoch": 1.499043748505857, "grad_norm": 0.36638274788856506, "learning_rate": 5.873035890926494e-06, "loss": 0.3387, "step": 4180 }, { "epoch": 1.4994023428161607, "grad_norm": 0.3448766767978668, "learning_rate": 5.87098099094092e-06, "loss": 0.3287, "step": 4181 }, { "epoch": 1.4997609371264642, "grad_norm": 0.331049382686615, "learning_rate": 5.868925939227747e-06, "loss": 0.321, "step": 4182 }, { "epoch": 1.500119531436768, "grad_norm": 0.3314874470233917, "learning_rate": 5.866870736144969e-06, "loss": 0.3351, "step": 4183 }, { "epoch": 1.5004781257470716, "grad_norm": 0.3274809420108795, "learning_rate": 5.8648153820506156e-06, "loss": 0.3221, "step": 4184 }, { "epoch": 1.5008367200573751, "grad_norm": 0.29965001344680786, "learning_rate": 5.8627598773027305e-06, "loss": 0.306, "step": 4185 }, { "epoch": 1.5011953143676786, "grad_norm": 0.35506126284599304, "learning_rate": 5.860704222259389e-06, "loss": 0.3033, "step": 4186 }, { "epoch": 1.5015539086779823, "grad_norm": 0.36365148425102234, "learning_rate": 5.858648417278696e-06, "loss": 0.3456, "step": 4187 }, { "epoch": 1.501912502988286, "grad_norm": 0.3112335503101349, "learning_rate": 5.8565924627187785e-06, "loss": 0.3102, "step": 4188 }, { "epoch": 1.5022710972985895, "grad_norm": 0.3226967751979828, "learning_rate": 5.854536358937787e-06, "loss": 0.3318, "step": 4189 }, { "epoch": 1.502629691608893, "grad_norm": 0.35595783591270447, "learning_rate": 5.852480106293904e-06, "loss": 0.3594, "step": 4190 }, { "epoch": 1.5029882859191968, "grad_norm": 0.3320959210395813, "learning_rate": 5.850423705145334e-06, "loss": 0.3185, "step": 4191 }, { "epoch": 1.5033468802295005, "grad_norm": 0.34523263573646545, "learning_rate": 5.848367155850308e-06, "loss": 0.3468, "step": 4192 }, { "epoch": 1.503705474539804, "grad_norm": 0.28340426087379456, "learning_rate": 5.846310458767085e-06, "loss": 0.2727, "step": 4193 }, { "epoch": 1.5040640688501075, "grad_norm": 0.3635442852973938, "learning_rate": 5.844253614253946e-06, "loss": 0.3564, "step": 4194 }, { "epoch": 1.5044226631604112, "grad_norm": 0.3450302481651306, "learning_rate": 5.842196622669203e-06, "loss": 0.3186, "step": 4195 }, { "epoch": 1.504781257470715, "grad_norm": 0.3356356620788574, "learning_rate": 5.840139484371187e-06, "loss": 0.3299, "step": 4196 }, { "epoch": 1.5051398517810184, "grad_norm": 0.3417384922504425, "learning_rate": 5.838082199718258e-06, "loss": 0.35, "step": 4197 }, { "epoch": 1.5054984460913219, "grad_norm": 0.3315131962299347, "learning_rate": 5.836024769068802e-06, "loss": 0.2946, "step": 4198 }, { "epoch": 1.5058570404016256, "grad_norm": 0.3345284163951874, "learning_rate": 5.833967192781231e-06, "loss": 0.3469, "step": 4199 }, { "epoch": 1.5062156347119293, "grad_norm": 0.35113897919654846, "learning_rate": 5.831909471213981e-06, "loss": 0.3081, "step": 4200 }, { "epoch": 1.5065742290222328, "grad_norm": 0.34078189730644226, "learning_rate": 5.829851604725513e-06, "loss": 0.3212, "step": 4201 }, { "epoch": 1.5069328233325363, "grad_norm": 0.33567720651626587, "learning_rate": 5.827793593674314e-06, "loss": 0.3334, "step": 4202 }, { "epoch": 1.50729141764284, "grad_norm": 0.3190380036830902, "learning_rate": 5.825735438418893e-06, "loss": 0.3505, "step": 4203 }, { "epoch": 1.5076500119531437, "grad_norm": 0.32182571291923523, "learning_rate": 5.8236771393177915e-06, "loss": 0.3201, "step": 4204 }, { "epoch": 1.5080086062634472, "grad_norm": 0.3955269157886505, "learning_rate": 5.821618696729571e-06, "loss": 0.3547, "step": 4205 }, { "epoch": 1.5083672005737507, "grad_norm": 0.33923736214637756, "learning_rate": 5.819560111012816e-06, "loss": 0.305, "step": 4206 }, { "epoch": 1.5087257948840547, "grad_norm": 0.3621453046798706, "learning_rate": 5.8175013825261395e-06, "loss": 0.3269, "step": 4207 }, { "epoch": 1.5090843891943582, "grad_norm": 0.34358683228492737, "learning_rate": 5.81544251162818e-06, "loss": 0.2873, "step": 4208 }, { "epoch": 1.5094429835046617, "grad_norm": 0.3430454432964325, "learning_rate": 5.813383498677595e-06, "loss": 0.3019, "step": 4209 }, { "epoch": 1.5098015778149654, "grad_norm": 0.35522010922431946, "learning_rate": 5.811324344033077e-06, "loss": 0.3311, "step": 4210 }, { "epoch": 1.510160172125269, "grad_norm": 0.3283100128173828, "learning_rate": 5.809265048053333e-06, "loss": 0.3132, "step": 4211 }, { "epoch": 1.5105187664355726, "grad_norm": 0.36237505078315735, "learning_rate": 5.8072056110971e-06, "loss": 0.3446, "step": 4212 }, { "epoch": 1.510877360745876, "grad_norm": 0.33408603072166443, "learning_rate": 5.805146033523138e-06, "loss": 0.3114, "step": 4213 }, { "epoch": 1.5112359550561798, "grad_norm": 0.3450746536254883, "learning_rate": 5.803086315690232e-06, "loss": 0.2901, "step": 4214 }, { "epoch": 1.5115945493664835, "grad_norm": 0.33864736557006836, "learning_rate": 5.801026457957191e-06, "loss": 0.3405, "step": 4215 }, { "epoch": 1.511953143676787, "grad_norm": 0.3426058888435364, "learning_rate": 5.79896646068285e-06, "loss": 0.3348, "step": 4216 }, { "epoch": 1.5123117379870905, "grad_norm": 0.3728117048740387, "learning_rate": 5.796906324226064e-06, "loss": 0.2975, "step": 4217 }, { "epoch": 1.5126703322973942, "grad_norm": 0.3629318177700043, "learning_rate": 5.794846048945719e-06, "loss": 0.313, "step": 4218 }, { "epoch": 1.513028926607698, "grad_norm": 0.3729887008666992, "learning_rate": 5.792785635200719e-06, "loss": 0.3571, "step": 4219 }, { "epoch": 1.5133875209180014, "grad_norm": 0.2982260286808014, "learning_rate": 5.790725083349994e-06, "loss": 0.2978, "step": 4220 }, { "epoch": 1.513746115228305, "grad_norm": 0.42886120080947876, "learning_rate": 5.788664393752499e-06, "loss": 0.318, "step": 4221 }, { "epoch": 1.5141047095386087, "grad_norm": 0.36201417446136475, "learning_rate": 5.786603566767214e-06, "loss": 0.3278, "step": 4222 }, { "epoch": 1.5144633038489124, "grad_norm": 0.3264196515083313, "learning_rate": 5.784542602753141e-06, "loss": 0.3081, "step": 4223 }, { "epoch": 1.5148218981592159, "grad_norm": 0.32140034437179565, "learning_rate": 5.7824815020693035e-06, "loss": 0.3158, "step": 4224 }, { "epoch": 1.5151804924695194, "grad_norm": 0.3705422282218933, "learning_rate": 5.7804202650747565e-06, "loss": 0.3066, "step": 4225 }, { "epoch": 1.515539086779823, "grad_norm": 0.40492895245552063, "learning_rate": 5.778358892128569e-06, "loss": 0.3257, "step": 4226 }, { "epoch": 1.5158976810901268, "grad_norm": 0.3548726439476013, "learning_rate": 5.776297383589842e-06, "loss": 0.3223, "step": 4227 }, { "epoch": 1.5162562754004303, "grad_norm": 0.3279911279678345, "learning_rate": 5.774235739817697e-06, "loss": 0.3127, "step": 4228 }, { "epoch": 1.5166148697107338, "grad_norm": 0.37012413144111633, "learning_rate": 5.772173961171275e-06, "loss": 0.3511, "step": 4229 }, { "epoch": 1.5169734640210375, "grad_norm": 0.33682286739349365, "learning_rate": 5.770112048009747e-06, "loss": 0.3347, "step": 4230 }, { "epoch": 1.5173320583313412, "grad_norm": 0.3555898070335388, "learning_rate": 5.768050000692304e-06, "loss": 0.3698, "step": 4231 }, { "epoch": 1.5176906526416447, "grad_norm": 0.3047337234020233, "learning_rate": 5.765987819578163e-06, "loss": 0.2845, "step": 4232 }, { "epoch": 1.5180492469519482, "grad_norm": 0.33289235830307007, "learning_rate": 5.76392550502656e-06, "loss": 0.3366, "step": 4233 }, { "epoch": 1.518407841262252, "grad_norm": 0.3609386086463928, "learning_rate": 5.761863057396756e-06, "loss": 0.32, "step": 4234 }, { "epoch": 1.5187664355725556, "grad_norm": 0.3337680399417877, "learning_rate": 5.759800477048039e-06, "loss": 0.3405, "step": 4235 }, { "epoch": 1.5191250298828591, "grad_norm": 0.310817152261734, "learning_rate": 5.7577377643397146e-06, "loss": 0.3296, "step": 4236 }, { "epoch": 1.5194836241931629, "grad_norm": 0.3415580987930298, "learning_rate": 5.755674919631113e-06, "loss": 0.3199, "step": 4237 }, { "epoch": 1.5198422185034666, "grad_norm": 0.39430367946624756, "learning_rate": 5.753611943281589e-06, "loss": 0.3811, "step": 4238 }, { "epoch": 1.52020081281377, "grad_norm": 0.31341561675071716, "learning_rate": 5.751548835650522e-06, "loss": 0.3016, "step": 4239 }, { "epoch": 1.5205594071240736, "grad_norm": 0.3534560799598694, "learning_rate": 5.749485597097309e-06, "loss": 0.3776, "step": 4240 }, { "epoch": 1.5209180014343773, "grad_norm": 0.2797226011753082, "learning_rate": 5.747422227981373e-06, "loss": 0.2767, "step": 4241 }, { "epoch": 1.521276595744681, "grad_norm": 0.36121317744255066, "learning_rate": 5.745358728662159e-06, "loss": 0.3507, "step": 4242 }, { "epoch": 1.5216351900549845, "grad_norm": 0.3602997362613678, "learning_rate": 5.743295099499135e-06, "loss": 0.3352, "step": 4243 }, { "epoch": 1.521993784365288, "grad_norm": 0.34212321043014526, "learning_rate": 5.741231340851794e-06, "loss": 0.3228, "step": 4244 }, { "epoch": 1.5223523786755917, "grad_norm": 0.32166552543640137, "learning_rate": 5.739167453079646e-06, "loss": 0.3133, "step": 4245 }, { "epoch": 1.5227109729858954, "grad_norm": 0.35026925802230835, "learning_rate": 5.737103436542229e-06, "loss": 0.3515, "step": 4246 }, { "epoch": 1.523069567296199, "grad_norm": 0.3400267958641052, "learning_rate": 5.735039291599099e-06, "loss": 0.3126, "step": 4247 }, { "epoch": 1.5234281616065024, "grad_norm": 0.30023643374443054, "learning_rate": 5.732975018609839e-06, "loss": 0.3167, "step": 4248 }, { "epoch": 1.5237867559168061, "grad_norm": 0.32304611802101135, "learning_rate": 5.730910617934049e-06, "loss": 0.355, "step": 4249 }, { "epoch": 1.5241453502271098, "grad_norm": 0.3294595777988434, "learning_rate": 5.728846089931355e-06, "loss": 0.3062, "step": 4250 }, { "epoch": 1.5245039445374133, "grad_norm": 0.3350067138671875, "learning_rate": 5.7267814349614046e-06, "loss": 0.3398, "step": 4251 }, { "epoch": 1.5248625388477168, "grad_norm": 0.30627891421318054, "learning_rate": 5.724716653383868e-06, "loss": 0.2982, "step": 4252 }, { "epoch": 1.5252211331580205, "grad_norm": 0.30666041374206543, "learning_rate": 5.722651745558436e-06, "loss": 0.2791, "step": 4253 }, { "epoch": 1.5255797274683243, "grad_norm": 0.34869250655174255, "learning_rate": 5.720586711844821e-06, "loss": 0.3693, "step": 4254 }, { "epoch": 1.5259383217786278, "grad_norm": 0.33012473583221436, "learning_rate": 5.7185215526027595e-06, "loss": 0.3115, "step": 4255 }, { "epoch": 1.5262969160889313, "grad_norm": 0.3454163670539856, "learning_rate": 5.716456268192007e-06, "loss": 0.3042, "step": 4256 }, { "epoch": 1.526655510399235, "grad_norm": 0.33154135942459106, "learning_rate": 5.714390858972344e-06, "loss": 0.3085, "step": 4257 }, { "epoch": 1.5270141047095387, "grad_norm": 0.32150423526763916, "learning_rate": 5.712325325303571e-06, "loss": 0.3351, "step": 4258 }, { "epoch": 1.5273726990198422, "grad_norm": 0.3180641829967499, "learning_rate": 5.71025966754551e-06, "loss": 0.3427, "step": 4259 }, { "epoch": 1.5277312933301457, "grad_norm": 0.34512007236480713, "learning_rate": 5.7081938860580055e-06, "loss": 0.3309, "step": 4260 }, { "epoch": 1.5280898876404494, "grad_norm": 0.3169928193092346, "learning_rate": 5.706127981200923e-06, "loss": 0.3112, "step": 4261 }, { "epoch": 1.5284484819507531, "grad_norm": 0.3616637587547302, "learning_rate": 5.704061953334148e-06, "loss": 0.3188, "step": 4262 }, { "epoch": 1.5288070762610566, "grad_norm": 0.3502345085144043, "learning_rate": 5.7019958028175905e-06, "loss": 0.308, "step": 4263 }, { "epoch": 1.52916567057136, "grad_norm": 0.31518077850341797, "learning_rate": 5.699929530011181e-06, "loss": 0.3208, "step": 4264 }, { "epoch": 1.529524264881664, "grad_norm": 0.3750615119934082, "learning_rate": 5.6978631352748706e-06, "loss": 0.3317, "step": 4265 }, { "epoch": 1.5298828591919675, "grad_norm": 0.33951354026794434, "learning_rate": 5.695796618968629e-06, "loss": 0.2816, "step": 4266 }, { "epoch": 1.530241453502271, "grad_norm": 0.3684043288230896, "learning_rate": 5.6937299814524516e-06, "loss": 0.3541, "step": 4267 }, { "epoch": 1.5306000478125747, "grad_norm": 0.3238110840320587, "learning_rate": 5.691663223086354e-06, "loss": 0.2991, "step": 4268 }, { "epoch": 1.5309586421228785, "grad_norm": 0.35166943073272705, "learning_rate": 5.68959634423037e-06, "loss": 0.3224, "step": 4269 }, { "epoch": 1.531317236433182, "grad_norm": 0.35529181361198425, "learning_rate": 5.687529345244559e-06, "loss": 0.3337, "step": 4270 }, { "epoch": 1.5316758307434855, "grad_norm": 0.3234633207321167, "learning_rate": 5.6854622264889955e-06, "loss": 0.2938, "step": 4271 }, { "epoch": 1.5320344250537892, "grad_norm": 0.35051071643829346, "learning_rate": 5.683394988323781e-06, "loss": 0.3049, "step": 4272 }, { "epoch": 1.532393019364093, "grad_norm": 0.330775648355484, "learning_rate": 5.681327631109033e-06, "loss": 0.319, "step": 4273 }, { "epoch": 1.5327516136743964, "grad_norm": 0.33456090092658997, "learning_rate": 5.679260155204891e-06, "loss": 0.3217, "step": 4274 }, { "epoch": 1.5331102079846999, "grad_norm": 0.32017871737480164, "learning_rate": 5.677192560971517e-06, "loss": 0.2952, "step": 4275 }, { "epoch": 1.5334688022950036, "grad_norm": 0.3960675597190857, "learning_rate": 5.675124848769094e-06, "loss": 0.3428, "step": 4276 }, { "epoch": 1.5338273966053073, "grad_norm": 0.3262293040752411, "learning_rate": 5.6730570189578194e-06, "loss": 0.3325, "step": 4277 }, { "epoch": 1.5341859909156108, "grad_norm": 0.3418356478214264, "learning_rate": 5.670989071897919e-06, "loss": 0.312, "step": 4278 }, { "epoch": 1.5345445852259143, "grad_norm": 0.35295575857162476, "learning_rate": 5.668921007949635e-06, "loss": 0.3169, "step": 4279 }, { "epoch": 1.534903179536218, "grad_norm": 0.37212806940078735, "learning_rate": 5.666852827473228e-06, "loss": 0.3371, "step": 4280 }, { "epoch": 1.5352617738465217, "grad_norm": 0.3483004570007324, "learning_rate": 5.664784530828987e-06, "loss": 0.335, "step": 4281 }, { "epoch": 1.5356203681568252, "grad_norm": 0.3168235421180725, "learning_rate": 5.662716118377212e-06, "loss": 0.2944, "step": 4282 }, { "epoch": 1.5359789624671287, "grad_norm": 0.3885766267776489, "learning_rate": 5.660647590478225e-06, "loss": 0.3205, "step": 4283 }, { "epoch": 1.5363375567774324, "grad_norm": 0.34757566452026367, "learning_rate": 5.658578947492375e-06, "loss": 0.3114, "step": 4284 }, { "epoch": 1.5366961510877362, "grad_norm": 0.3315841257572174, "learning_rate": 5.656510189780021e-06, "loss": 0.3168, "step": 4285 }, { "epoch": 1.5370547453980397, "grad_norm": 0.3315229117870331, "learning_rate": 5.654441317701551e-06, "loss": 0.3305, "step": 4286 }, { "epoch": 1.5374133397083432, "grad_norm": 0.3382154405117035, "learning_rate": 5.6523723316173676e-06, "loss": 0.3124, "step": 4287 }, { "epoch": 1.5377719340186469, "grad_norm": 0.3247446119785309, "learning_rate": 5.650303231887893e-06, "loss": 0.3049, "step": 4288 }, { "epoch": 1.5381305283289506, "grad_norm": 0.35700199007987976, "learning_rate": 5.648234018873573e-06, "loss": 0.336, "step": 4289 }, { "epoch": 1.538489122639254, "grad_norm": 0.34671878814697266, "learning_rate": 5.646164692934872e-06, "loss": 0.3222, "step": 4290 }, { "epoch": 1.5388477169495576, "grad_norm": 0.334737092256546, "learning_rate": 5.644095254432268e-06, "loss": 0.3117, "step": 4291 }, { "epoch": 1.5392063112598613, "grad_norm": 0.36445897817611694, "learning_rate": 5.642025703726267e-06, "loss": 0.3286, "step": 4292 }, { "epoch": 1.539564905570165, "grad_norm": 0.31186220049858093, "learning_rate": 5.6399560411773936e-06, "loss": 0.3177, "step": 4293 }, { "epoch": 1.5399234998804685, "grad_norm": 0.36387941241264343, "learning_rate": 5.6378862671461844e-06, "loss": 0.3317, "step": 4294 }, { "epoch": 1.5402820941907722, "grad_norm": 0.34120455384254456, "learning_rate": 5.635816381993204e-06, "loss": 0.3037, "step": 4295 }, { "epoch": 1.540640688501076, "grad_norm": 0.3228357434272766, "learning_rate": 5.633746386079032e-06, "loss": 0.3147, "step": 4296 }, { "epoch": 1.5409992828113794, "grad_norm": 0.3431609272956848, "learning_rate": 5.631676279764266e-06, "loss": 0.338, "step": 4297 }, { "epoch": 1.541357877121683, "grad_norm": 0.3350958228111267, "learning_rate": 5.6296060634095285e-06, "loss": 0.3572, "step": 4298 }, { "epoch": 1.5417164714319866, "grad_norm": 0.32823964953422546, "learning_rate": 5.627535737375456e-06, "loss": 0.3223, "step": 4299 }, { "epoch": 1.5420750657422904, "grad_norm": 0.3212375044822693, "learning_rate": 5.625465302022704e-06, "loss": 0.309, "step": 4300 }, { "epoch": 1.5424336600525939, "grad_norm": 0.33512237668037415, "learning_rate": 5.623394757711953e-06, "loss": 0.3281, "step": 4301 }, { "epoch": 1.5427922543628974, "grad_norm": 0.3133206069469452, "learning_rate": 5.621324104803894e-06, "loss": 0.3035, "step": 4302 }, { "epoch": 1.543150848673201, "grad_norm": 0.3150657117366791, "learning_rate": 5.619253343659242e-06, "loss": 0.3224, "step": 4303 }, { "epoch": 1.5435094429835048, "grad_norm": 0.3562255799770355, "learning_rate": 5.617182474638733e-06, "loss": 0.3366, "step": 4304 }, { "epoch": 1.5438680372938083, "grad_norm": 0.3235214948654175, "learning_rate": 5.615111498103116e-06, "loss": 0.3151, "step": 4305 }, { "epoch": 1.5442266316041118, "grad_norm": 0.3342101573944092, "learning_rate": 5.613040414413162e-06, "loss": 0.3116, "step": 4306 }, { "epoch": 1.5445852259144155, "grad_norm": 0.36874720454216003, "learning_rate": 5.610969223929662e-06, "loss": 0.33, "step": 4307 }, { "epoch": 1.5449438202247192, "grad_norm": 0.3567386269569397, "learning_rate": 5.60889792701342e-06, "loss": 0.3289, "step": 4308 }, { "epoch": 1.5453024145350227, "grad_norm": 0.3375410735607147, "learning_rate": 5.6068265240252665e-06, "loss": 0.3422, "step": 4309 }, { "epoch": 1.5456610088453262, "grad_norm": 0.32652944326400757, "learning_rate": 5.604755015326043e-06, "loss": 0.3236, "step": 4310 }, { "epoch": 1.54601960315563, "grad_norm": 0.37577250599861145, "learning_rate": 5.6026834012766155e-06, "loss": 0.3164, "step": 4311 }, { "epoch": 1.5463781974659336, "grad_norm": 0.31693199276924133, "learning_rate": 5.600611682237864e-06, "loss": 0.2935, "step": 4312 }, { "epoch": 1.5467367917762371, "grad_norm": 0.3712199032306671, "learning_rate": 5.59853985857069e-06, "loss": 0.3602, "step": 4313 }, { "epoch": 1.5470953860865406, "grad_norm": 0.3677234649658203, "learning_rate": 5.596467930636009e-06, "loss": 0.3499, "step": 4314 }, { "epoch": 1.5474539803968443, "grad_norm": 0.3227739632129669, "learning_rate": 5.594395898794759e-06, "loss": 0.3013, "step": 4315 }, { "epoch": 1.547812574707148, "grad_norm": 0.32911887764930725, "learning_rate": 5.592323763407895e-06, "loss": 0.3477, "step": 4316 }, { "epoch": 1.5481711690174516, "grad_norm": 0.33791792392730713, "learning_rate": 5.590251524836388e-06, "loss": 0.3208, "step": 4317 }, { "epoch": 1.548529763327755, "grad_norm": 0.36336222290992737, "learning_rate": 5.588179183441229e-06, "loss": 0.3113, "step": 4318 }, { "epoch": 1.5488883576380588, "grad_norm": 0.3707115650177002, "learning_rate": 5.5861067395834276e-06, "loss": 0.3446, "step": 4319 }, { "epoch": 1.5492469519483625, "grad_norm": 0.325323224067688, "learning_rate": 5.584034193624006e-06, "loss": 0.32, "step": 4320 }, { "epoch": 1.549605546258666, "grad_norm": 0.318177193403244, "learning_rate": 5.581961545924013e-06, "loss": 0.3043, "step": 4321 }, { "epoch": 1.5499641405689695, "grad_norm": 0.34990131855010986, "learning_rate": 5.579888796844507e-06, "loss": 0.3405, "step": 4322 }, { "epoch": 1.5503227348792734, "grad_norm": 0.3329474925994873, "learning_rate": 5.577815946746568e-06, "loss": 0.3272, "step": 4323 }, { "epoch": 1.550681329189577, "grad_norm": 0.3085208237171173, "learning_rate": 5.575742995991295e-06, "loss": 0.292, "step": 4324 }, { "epoch": 1.5510399234998804, "grad_norm": 0.35032036900520325, "learning_rate": 5.573669944939802e-06, "loss": 0.368, "step": 4325 }, { "epoch": 1.5513985178101841, "grad_norm": 0.2826087176799774, "learning_rate": 5.571596793953216e-06, "loss": 0.2815, "step": 4326 }, { "epoch": 1.5517571121204878, "grad_norm": 0.32471272349357605, "learning_rate": 5.569523543392692e-06, "loss": 0.3151, "step": 4327 }, { "epoch": 1.5521157064307913, "grad_norm": 0.31104615330696106, "learning_rate": 5.567450193619396e-06, "loss": 0.3058, "step": 4328 }, { "epoch": 1.5524743007410948, "grad_norm": 0.34025540947914124, "learning_rate": 5.565376744994509e-06, "loss": 0.3449, "step": 4329 }, { "epoch": 1.5528328950513985, "grad_norm": 0.32722511887550354, "learning_rate": 5.563303197879235e-06, "loss": 0.3331, "step": 4330 }, { "epoch": 1.5531914893617023, "grad_norm": 0.308909147977829, "learning_rate": 5.56122955263479e-06, "loss": 0.2751, "step": 4331 }, { "epoch": 1.5535500836720058, "grad_norm": 0.34149909019470215, "learning_rate": 5.559155809622412e-06, "loss": 0.3203, "step": 4332 }, { "epoch": 1.5539086779823092, "grad_norm": 0.3280397951602936, "learning_rate": 5.557081969203353e-06, "loss": 0.3487, "step": 4333 }, { "epoch": 1.554267272292613, "grad_norm": 0.30969908833503723, "learning_rate": 5.5550080317388814e-06, "loss": 0.3482, "step": 4334 }, { "epoch": 1.5546258666029167, "grad_norm": 0.34491997957229614, "learning_rate": 5.552933997590284e-06, "loss": 0.3355, "step": 4335 }, { "epoch": 1.5549844609132202, "grad_norm": 0.38148170709609985, "learning_rate": 5.550859867118864e-06, "loss": 0.362, "step": 4336 }, { "epoch": 1.5553430552235237, "grad_norm": 0.33532658219337463, "learning_rate": 5.54878564068594e-06, "loss": 0.3084, "step": 4337 }, { "epoch": 1.5557016495338274, "grad_norm": 0.3365311920642853, "learning_rate": 5.546711318652851e-06, "loss": 0.3121, "step": 4338 }, { "epoch": 1.556060243844131, "grad_norm": 0.3776874244213104, "learning_rate": 5.54463690138095e-06, "loss": 0.325, "step": 4339 }, { "epoch": 1.5564188381544346, "grad_norm": 0.34817755222320557, "learning_rate": 5.542562389231605e-06, "loss": 0.3354, "step": 4340 }, { "epoch": 1.556777432464738, "grad_norm": 0.3365228474140167, "learning_rate": 5.540487782566204e-06, "loss": 0.3302, "step": 4341 }, { "epoch": 1.5571360267750418, "grad_norm": 0.3134651184082031, "learning_rate": 5.538413081746151e-06, "loss": 0.3032, "step": 4342 }, { "epoch": 1.5574946210853455, "grad_norm": 0.390245258808136, "learning_rate": 5.536338287132863e-06, "loss": 0.3529, "step": 4343 }, { "epoch": 1.557853215395649, "grad_norm": 0.3625478744506836, "learning_rate": 5.534263399087776e-06, "loss": 0.296, "step": 4344 }, { "epoch": 1.5582118097059525, "grad_norm": 0.41351771354675293, "learning_rate": 5.532188417972345e-06, "loss": 0.3638, "step": 4345 }, { "epoch": 1.5585704040162562, "grad_norm": 0.3384473919868469, "learning_rate": 5.530113344148033e-06, "loss": 0.3053, "step": 4346 }, { "epoch": 1.55892899832656, "grad_norm": 0.38422641158103943, "learning_rate": 5.52803817797633e-06, "loss": 0.3034, "step": 4347 }, { "epoch": 1.5592875926368635, "grad_norm": 0.3845027685165405, "learning_rate": 5.525962919818732e-06, "loss": 0.3277, "step": 4348 }, { "epoch": 1.559646186947167, "grad_norm": 0.38037654757499695, "learning_rate": 5.523887570036758e-06, "loss": 0.3573, "step": 4349 }, { "epoch": 1.5600047812574707, "grad_norm": 0.30220478773117065, "learning_rate": 5.521812128991938e-06, "loss": 0.3118, "step": 4350 }, { "epoch": 1.5603633755677744, "grad_norm": 0.3346523940563202, "learning_rate": 5.5197365970458215e-06, "loss": 0.3254, "step": 4351 }, { "epoch": 1.5607219698780779, "grad_norm": 0.3840430974960327, "learning_rate": 5.517660974559974e-06, "loss": 0.3259, "step": 4352 }, { "epoch": 1.5610805641883816, "grad_norm": 0.37250831723213196, "learning_rate": 5.5155852618959745e-06, "loss": 0.3484, "step": 4353 }, { "epoch": 1.5614391584986853, "grad_norm": 0.3118446171283722, "learning_rate": 5.513509459415417e-06, "loss": 0.3062, "step": 4354 }, { "epoch": 1.5617977528089888, "grad_norm": 0.3570483326911926, "learning_rate": 5.511433567479914e-06, "loss": 0.3276, "step": 4355 }, { "epoch": 1.5621563471192923, "grad_norm": 0.371726393699646, "learning_rate": 5.5093575864510936e-06, "loss": 0.3547, "step": 4356 }, { "epoch": 1.562514941429596, "grad_norm": 0.34316548705101013, "learning_rate": 5.507281516690595e-06, "loss": 0.3124, "step": 4357 }, { "epoch": 1.5628735357398997, "grad_norm": 0.31448718905448914, "learning_rate": 5.505205358560081e-06, "loss": 0.2827, "step": 4358 }, { "epoch": 1.5632321300502032, "grad_norm": 0.343770295381546, "learning_rate": 5.503129112421221e-06, "loss": 0.3564, "step": 4359 }, { "epoch": 1.5635907243605067, "grad_norm": 0.3759769797325134, "learning_rate": 5.501052778635703e-06, "loss": 0.3293, "step": 4360 }, { "epoch": 1.5639493186708104, "grad_norm": 0.3221164643764496, "learning_rate": 5.498976357565233e-06, "loss": 0.3173, "step": 4361 }, { "epoch": 1.5643079129811142, "grad_norm": 0.3132743239402771, "learning_rate": 5.4968998495715275e-06, "loss": 0.3331, "step": 4362 }, { "epoch": 1.5646665072914177, "grad_norm": 0.3503887355327606, "learning_rate": 5.494823255016323e-06, "loss": 0.3032, "step": 4363 }, { "epoch": 1.5650251016017211, "grad_norm": 0.38504594564437866, "learning_rate": 5.4927465742613695e-06, "loss": 0.3307, "step": 4364 }, { "epoch": 1.5653836959120249, "grad_norm": 0.2822467088699341, "learning_rate": 5.4906698076684285e-06, "loss": 0.2683, "step": 4365 }, { "epoch": 1.5657422902223286, "grad_norm": 0.3588356077671051, "learning_rate": 5.48859295559928e-06, "loss": 0.3474, "step": 4366 }, { "epoch": 1.566100884532632, "grad_norm": 0.3223552107810974, "learning_rate": 5.486516018415718e-06, "loss": 0.2858, "step": 4367 }, { "epoch": 1.5664594788429356, "grad_norm": 0.3476199209690094, "learning_rate": 5.484438996479553e-06, "loss": 0.3206, "step": 4368 }, { "epoch": 1.5668180731532393, "grad_norm": 0.3482986390590668, "learning_rate": 5.482361890152609e-06, "loss": 0.3283, "step": 4369 }, { "epoch": 1.567176667463543, "grad_norm": 0.31736990809440613, "learning_rate": 5.480284699796722e-06, "loss": 0.297, "step": 4370 }, { "epoch": 1.5675352617738465, "grad_norm": 0.3531707227230072, "learning_rate": 5.478207425773745e-06, "loss": 0.3147, "step": 4371 }, { "epoch": 1.56789385608415, "grad_norm": 0.3499375283718109, "learning_rate": 5.476130068445549e-06, "loss": 0.3205, "step": 4372 }, { "epoch": 1.5682524503944537, "grad_norm": 0.3605358898639679, "learning_rate": 5.4740526281740106e-06, "loss": 0.3348, "step": 4373 }, { "epoch": 1.5686110447047574, "grad_norm": 0.3146045506000519, "learning_rate": 5.47197510532103e-06, "loss": 0.3033, "step": 4374 }, { "epoch": 1.568969639015061, "grad_norm": 0.33127573132514954, "learning_rate": 5.46989750024852e-06, "loss": 0.3125, "step": 4375 }, { "epoch": 1.5693282333253644, "grad_norm": 0.3271101713180542, "learning_rate": 5.467819813318402e-06, "loss": 0.3209, "step": 4376 }, { "epoch": 1.5696868276356681, "grad_norm": 0.33609965443611145, "learning_rate": 5.465742044892618e-06, "loss": 0.329, "step": 4377 }, { "epoch": 1.5700454219459719, "grad_norm": 0.37008893489837646, "learning_rate": 5.463664195333121e-06, "loss": 0.3499, "step": 4378 }, { "epoch": 1.5704040162562753, "grad_norm": 0.3309059143066406, "learning_rate": 5.461586265001876e-06, "loss": 0.3035, "step": 4379 }, { "epoch": 1.570762610566579, "grad_norm": 0.34610891342163086, "learning_rate": 5.459508254260869e-06, "loss": 0.3643, "step": 4380 }, { "epoch": 1.5711212048768828, "grad_norm": 0.31988510489463806, "learning_rate": 5.457430163472095e-06, "loss": 0.291, "step": 4381 }, { "epoch": 1.5714797991871863, "grad_norm": 0.34993964433670044, "learning_rate": 5.455351992997564e-06, "loss": 0.3322, "step": 4382 }, { "epoch": 1.5718383934974898, "grad_norm": 0.32947108149528503, "learning_rate": 5.453273743199298e-06, "loss": 0.306, "step": 4383 }, { "epoch": 1.5721969878077935, "grad_norm": 0.34236928820610046, "learning_rate": 5.451195414439335e-06, "loss": 0.3437, "step": 4384 }, { "epoch": 1.5725555821180972, "grad_norm": 0.3301115334033966, "learning_rate": 5.449117007079728e-06, "loss": 0.3067, "step": 4385 }, { "epoch": 1.5729141764284007, "grad_norm": 0.35726606845855713, "learning_rate": 5.447038521482542e-06, "loss": 0.3183, "step": 4386 }, { "epoch": 1.5732727707387042, "grad_norm": 0.3451405465602875, "learning_rate": 5.444959958009854e-06, "loss": 0.3292, "step": 4387 }, { "epoch": 1.573631365049008, "grad_norm": 0.34323954582214355, "learning_rate": 5.442881317023756e-06, "loss": 0.329, "step": 4388 }, { "epoch": 1.5739899593593116, "grad_norm": 0.3930697739124298, "learning_rate": 5.440802598886356e-06, "loss": 0.3343, "step": 4389 }, { "epoch": 1.5743485536696151, "grad_norm": 0.31970489025115967, "learning_rate": 5.4387238039597715e-06, "loss": 0.299, "step": 4390 }, { "epoch": 1.5747071479799186, "grad_norm": 0.358233779668808, "learning_rate": 5.436644932606135e-06, "loss": 0.3624, "step": 4391 }, { "epoch": 1.5750657422902223, "grad_norm": 0.32882723212242126, "learning_rate": 5.4345659851875934e-06, "loss": 0.3127, "step": 4392 }, { "epoch": 1.575424336600526, "grad_norm": 0.33565253019332886, "learning_rate": 5.432486962066305e-06, "loss": 0.3197, "step": 4393 }, { "epoch": 1.5757829309108295, "grad_norm": 0.31714051961898804, "learning_rate": 5.430407863604441e-06, "loss": 0.3157, "step": 4394 }, { "epoch": 1.576141525221133, "grad_norm": 0.3127841353416443, "learning_rate": 5.4283286901641904e-06, "loss": 0.2862, "step": 4395 }, { "epoch": 1.5765001195314368, "grad_norm": 0.34370604157447815, "learning_rate": 5.426249442107747e-06, "loss": 0.3315, "step": 4396 }, { "epoch": 1.5768587138417405, "grad_norm": 0.32987821102142334, "learning_rate": 5.424170119797325e-06, "loss": 0.3413, "step": 4397 }, { "epoch": 1.577217308152044, "grad_norm": 0.35080263018608093, "learning_rate": 5.42209072359515e-06, "loss": 0.3021, "step": 4398 }, { "epoch": 1.5775759024623475, "grad_norm": 0.31915825605392456, "learning_rate": 5.420011253863458e-06, "loss": 0.3217, "step": 4399 }, { "epoch": 1.5779344967726512, "grad_norm": 0.29128068685531616, "learning_rate": 5.417931710964497e-06, "loss": 0.2976, "step": 4400 }, { "epoch": 1.578293091082955, "grad_norm": 0.3364444971084595, "learning_rate": 5.4158520952605334e-06, "loss": 0.3538, "step": 4401 }, { "epoch": 1.5786516853932584, "grad_norm": 0.3358033299446106, "learning_rate": 5.41377240711384e-06, "loss": 0.3218, "step": 4402 }, { "epoch": 1.579010279703562, "grad_norm": 0.3283160626888275, "learning_rate": 5.411692646886705e-06, "loss": 0.3061, "step": 4403 }, { "epoch": 1.5793688740138656, "grad_norm": 0.3262569308280945, "learning_rate": 5.409612814941432e-06, "loss": 0.3035, "step": 4404 }, { "epoch": 1.5797274683241693, "grad_norm": 0.3182334899902344, "learning_rate": 5.407532911640332e-06, "loss": 0.3092, "step": 4405 }, { "epoch": 1.5800860626344728, "grad_norm": 0.31409308314323425, "learning_rate": 5.40545293734573e-06, "loss": 0.3117, "step": 4406 }, { "epoch": 1.5804446569447763, "grad_norm": 0.2952927052974701, "learning_rate": 5.403372892419966e-06, "loss": 0.2974, "step": 4407 }, { "epoch": 1.58080325125508, "grad_norm": 0.3239227831363678, "learning_rate": 5.401292777225388e-06, "loss": 0.3058, "step": 4408 }, { "epoch": 1.5811618455653838, "grad_norm": 0.34909936785697937, "learning_rate": 5.399212592124359e-06, "loss": 0.3161, "step": 4409 }, { "epoch": 1.5815204398756872, "grad_norm": 0.36472758650779724, "learning_rate": 5.397132337479256e-06, "loss": 0.3051, "step": 4410 }, { "epoch": 1.581879034185991, "grad_norm": 0.3179176151752472, "learning_rate": 5.395052013652463e-06, "loss": 0.3357, "step": 4411 }, { "epoch": 1.5822376284962947, "grad_norm": 0.3410685360431671, "learning_rate": 5.392971621006382e-06, "loss": 0.3008, "step": 4412 }, { "epoch": 1.5825962228065982, "grad_norm": 0.3520962595939636, "learning_rate": 5.39089115990342e-06, "loss": 0.3135, "step": 4413 }, { "epoch": 1.5829548171169017, "grad_norm": 0.3558984398841858, "learning_rate": 5.388810630706002e-06, "loss": 0.3332, "step": 4414 }, { "epoch": 1.5833134114272054, "grad_norm": 0.3311256468296051, "learning_rate": 5.386730033776564e-06, "loss": 0.3282, "step": 4415 }, { "epoch": 1.583672005737509, "grad_norm": 0.3318168520927429, "learning_rate": 5.38464936947755e-06, "loss": 0.3472, "step": 4416 }, { "epoch": 1.5840306000478126, "grad_norm": 0.36561134457588196, "learning_rate": 5.382568638171419e-06, "loss": 0.309, "step": 4417 }, { "epoch": 1.584389194358116, "grad_norm": 0.3490801155567169, "learning_rate": 5.380487840220641e-06, "loss": 0.3114, "step": 4418 }, { "epoch": 1.5847477886684198, "grad_norm": 0.3354800343513489, "learning_rate": 5.378406975987699e-06, "loss": 0.3141, "step": 4419 }, { "epoch": 1.5851063829787235, "grad_norm": 0.34874674677848816, "learning_rate": 5.376326045835082e-06, "loss": 0.3486, "step": 4420 }, { "epoch": 1.585464977289027, "grad_norm": 0.32906660437583923, "learning_rate": 5.374245050125299e-06, "loss": 0.2996, "step": 4421 }, { "epoch": 1.5858235715993305, "grad_norm": 0.3325408697128296, "learning_rate": 5.3721639892208635e-06, "loss": 0.3581, "step": 4422 }, { "epoch": 1.5861821659096342, "grad_norm": 0.3188903331756592, "learning_rate": 5.370082863484303e-06, "loss": 0.2921, "step": 4423 }, { "epoch": 1.586540760219938, "grad_norm": 0.3210955560207367, "learning_rate": 5.368001673278156e-06, "loss": 0.3283, "step": 4424 }, { "epoch": 1.5868993545302414, "grad_norm": 0.3417406976222992, "learning_rate": 5.365920418964973e-06, "loss": 0.3528, "step": 4425 }, { "epoch": 1.587257948840545, "grad_norm": 0.3136041462421417, "learning_rate": 5.363839100907313e-06, "loss": 0.2962, "step": 4426 }, { "epoch": 1.5876165431508487, "grad_norm": 0.3307868540287018, "learning_rate": 5.361757719467752e-06, "loss": 0.3509, "step": 4427 }, { "epoch": 1.5879751374611524, "grad_norm": 0.2952059805393219, "learning_rate": 5.359676275008869e-06, "loss": 0.3046, "step": 4428 }, { "epoch": 1.5883337317714559, "grad_norm": 0.3080075681209564, "learning_rate": 5.357594767893262e-06, "loss": 0.3131, "step": 4429 }, { "epoch": 1.5886923260817594, "grad_norm": 0.33023735880851746, "learning_rate": 5.355513198483532e-06, "loss": 0.3217, "step": 4430 }, { "epoch": 1.589050920392063, "grad_norm": 0.3199808895587921, "learning_rate": 5.3534315671422966e-06, "loss": 0.3074, "step": 4431 }, { "epoch": 1.5894095147023668, "grad_norm": 0.30993640422821045, "learning_rate": 5.351349874232184e-06, "loss": 0.3103, "step": 4432 }, { "epoch": 1.5897681090126703, "grad_norm": 0.3205249607563019, "learning_rate": 5.349268120115829e-06, "loss": 0.3119, "step": 4433 }, { "epoch": 1.5901267033229738, "grad_norm": 0.3446029722690582, "learning_rate": 5.347186305155881e-06, "loss": 0.322, "step": 4434 }, { "epoch": 1.5904852976332775, "grad_norm": 0.3261599838733673, "learning_rate": 5.345104429714999e-06, "loss": 0.3048, "step": 4435 }, { "epoch": 1.5908438919435812, "grad_norm": 0.3323497772216797, "learning_rate": 5.343022494155853e-06, "loss": 0.3642, "step": 4436 }, { "epoch": 1.5912024862538847, "grad_norm": 0.31365951895713806, "learning_rate": 5.340940498841118e-06, "loss": 0.3179, "step": 4437 }, { "epoch": 1.5915610805641884, "grad_norm": 0.33512383699417114, "learning_rate": 5.338858444133489e-06, "loss": 0.2892, "step": 4438 }, { "epoch": 1.5919196748744922, "grad_norm": 0.3432907164096832, "learning_rate": 5.336776330395666e-06, "loss": 0.355, "step": 4439 }, { "epoch": 1.5922782691847956, "grad_norm": 0.3534168303012848, "learning_rate": 5.334694157990357e-06, "loss": 0.3194, "step": 4440 }, { "epoch": 1.5926368634950991, "grad_norm": 0.3181435167789459, "learning_rate": 5.332611927280284e-06, "loss": 0.3315, "step": 4441 }, { "epoch": 1.5929954578054029, "grad_norm": 0.32881876826286316, "learning_rate": 5.330529638628181e-06, "loss": 0.3205, "step": 4442 }, { "epoch": 1.5933540521157066, "grad_norm": 0.3431917428970337, "learning_rate": 5.328447292396783e-06, "loss": 0.2891, "step": 4443 }, { "epoch": 1.59371264642601, "grad_norm": 0.3556362986564636, "learning_rate": 5.326364888948847e-06, "loss": 0.3153, "step": 4444 }, { "epoch": 1.5940712407363136, "grad_norm": 0.3460709750652313, "learning_rate": 5.324282428647132e-06, "loss": 0.3433, "step": 4445 }, { "epoch": 1.5944298350466173, "grad_norm": 0.28472188115119934, "learning_rate": 5.322199911854409e-06, "loss": 0.3023, "step": 4446 }, { "epoch": 1.594788429356921, "grad_norm": 0.3817041218280792, "learning_rate": 5.320117338933459e-06, "loss": 0.366, "step": 4447 }, { "epoch": 1.5951470236672245, "grad_norm": 0.34470894932746887, "learning_rate": 5.318034710247072e-06, "loss": 0.3095, "step": 4448 }, { "epoch": 1.595505617977528, "grad_norm": 0.3430670201778412, "learning_rate": 5.3159520261580486e-06, "loss": 0.3161, "step": 4449 }, { "epoch": 1.5958642122878317, "grad_norm": 0.32367461919784546, "learning_rate": 5.3138692870292e-06, "loss": 0.3243, "step": 4450 }, { "epoch": 1.5962228065981354, "grad_norm": 0.33092448115348816, "learning_rate": 5.3117864932233445e-06, "loss": 0.3105, "step": 4451 }, { "epoch": 1.596581400908439, "grad_norm": 0.3019755482673645, "learning_rate": 5.309703645103313e-06, "loss": 0.2997, "step": 4452 }, { "epoch": 1.5969399952187424, "grad_norm": 0.30574581027030945, "learning_rate": 5.307620743031943e-06, "loss": 0.3119, "step": 4453 }, { "epoch": 1.5972985895290461, "grad_norm": 0.3792893886566162, "learning_rate": 5.3055377873720815e-06, "loss": 0.3757, "step": 4454 }, { "epoch": 1.5976571838393498, "grad_norm": 0.32704901695251465, "learning_rate": 5.303454778486587e-06, "loss": 0.3044, "step": 4455 }, { "epoch": 1.5980157781496533, "grad_norm": 0.32499203085899353, "learning_rate": 5.301371716738326e-06, "loss": 0.3318, "step": 4456 }, { "epoch": 1.5983743724599568, "grad_norm": 0.31993550062179565, "learning_rate": 5.299288602490175e-06, "loss": 0.3095, "step": 4457 }, { "epoch": 1.5987329667702606, "grad_norm": 0.32063013315200806, "learning_rate": 5.297205436105017e-06, "loss": 0.3364, "step": 4458 }, { "epoch": 1.5990915610805643, "grad_norm": 0.337297648191452, "learning_rate": 5.2951222179457495e-06, "loss": 0.3419, "step": 4459 }, { "epoch": 1.5994501553908678, "grad_norm": 0.3185509145259857, "learning_rate": 5.2930389483752706e-06, "loss": 0.3227, "step": 4460 }, { "epoch": 1.5998087497011713, "grad_norm": 0.2993617653846741, "learning_rate": 5.290955627756497e-06, "loss": 0.2968, "step": 4461 }, { "epoch": 1.600167344011475, "grad_norm": 0.36879289150238037, "learning_rate": 5.288872256452349e-06, "loss": 0.3588, "step": 4462 }, { "epoch": 1.6005259383217787, "grad_norm": 0.30518919229507446, "learning_rate": 5.286788834825752e-06, "loss": 0.3025, "step": 4463 }, { "epoch": 1.6008845326320822, "grad_norm": 0.34780365228652954, "learning_rate": 5.284705363239651e-06, "loss": 0.3607, "step": 4464 }, { "epoch": 1.6012431269423857, "grad_norm": 0.31483548879623413, "learning_rate": 5.282621842056988e-06, "loss": 0.3084, "step": 4465 }, { "epoch": 1.6016017212526894, "grad_norm": 0.36118143796920776, "learning_rate": 5.280538271640721e-06, "loss": 0.2869, "step": 4466 }, { "epoch": 1.6019603155629931, "grad_norm": 0.3483530580997467, "learning_rate": 5.278454652353814e-06, "loss": 0.3345, "step": 4467 }, { "epoch": 1.6023189098732966, "grad_norm": 0.32351601123809814, "learning_rate": 5.2763709845592406e-06, "loss": 0.315, "step": 4468 }, { "epoch": 1.6026775041836003, "grad_norm": 0.34144818782806396, "learning_rate": 5.274287268619982e-06, "loss": 0.3378, "step": 4469 }, { "epoch": 1.603036098493904, "grad_norm": 0.31661853194236755, "learning_rate": 5.272203504899028e-06, "loss": 0.3244, "step": 4470 }, { "epoch": 1.6033946928042075, "grad_norm": 0.33927637338638306, "learning_rate": 5.270119693759377e-06, "loss": 0.3179, "step": 4471 }, { "epoch": 1.603753287114511, "grad_norm": 0.33999091386795044, "learning_rate": 5.268035835564034e-06, "loss": 0.3177, "step": 4472 }, { "epoch": 1.6041118814248148, "grad_norm": 0.3339446783065796, "learning_rate": 5.265951930676015e-06, "loss": 0.3284, "step": 4473 }, { "epoch": 1.6044704757351185, "grad_norm": 0.3304173946380615, "learning_rate": 5.263867979458341e-06, "loss": 0.3504, "step": 4474 }, { "epoch": 1.604829070045422, "grad_norm": 0.31435737013816833, "learning_rate": 5.261783982274047e-06, "loss": 0.3073, "step": 4475 }, { "epoch": 1.6051876643557255, "grad_norm": 0.34787192940711975, "learning_rate": 5.259699939486167e-06, "loss": 0.3151, "step": 4476 }, { "epoch": 1.6055462586660292, "grad_norm": 0.3801470100879669, "learning_rate": 5.257615851457749e-06, "loss": 0.34, "step": 4477 }, { "epoch": 1.605904852976333, "grad_norm": 0.29811692237854004, "learning_rate": 5.255531718551849e-06, "loss": 0.3121, "step": 4478 }, { "epoch": 1.6062634472866364, "grad_norm": 0.33904901146888733, "learning_rate": 5.253447541131529e-06, "loss": 0.3322, "step": 4479 }, { "epoch": 1.6066220415969399, "grad_norm": 0.33029577136039734, "learning_rate": 5.251363319559858e-06, "loss": 0.3273, "step": 4480 }, { "epoch": 1.6069806359072436, "grad_norm": 0.3415244221687317, "learning_rate": 5.249279054199915e-06, "loss": 0.2892, "step": 4481 }, { "epoch": 1.6073392302175473, "grad_norm": 0.36078232526779175, "learning_rate": 5.2471947454147855e-06, "loss": 0.3086, "step": 4482 }, { "epoch": 1.6076978245278508, "grad_norm": 0.33217522501945496, "learning_rate": 5.245110393567562e-06, "loss": 0.3473, "step": 4483 }, { "epoch": 1.6080564188381543, "grad_norm": 0.3287135064601898, "learning_rate": 5.243025999021344e-06, "loss": 0.3454, "step": 4484 }, { "epoch": 1.608415013148458, "grad_norm": 0.3476663827896118, "learning_rate": 5.240941562139242e-06, "loss": 0.3182, "step": 4485 }, { "epoch": 1.6087736074587617, "grad_norm": 0.3463369905948639, "learning_rate": 5.238857083284371e-06, "loss": 0.3128, "step": 4486 }, { "epoch": 1.6091322017690652, "grad_norm": 0.3173767924308777, "learning_rate": 5.236772562819853e-06, "loss": 0.3023, "step": 4487 }, { "epoch": 1.6094907960793687, "grad_norm": 0.3406693637371063, "learning_rate": 5.234688001108817e-06, "loss": 0.303, "step": 4488 }, { "epoch": 1.6098493903896725, "grad_norm": 0.35893508791923523, "learning_rate": 5.232603398514402e-06, "loss": 0.326, "step": 4489 }, { "epoch": 1.6102079846999762, "grad_norm": 0.3680574297904968, "learning_rate": 5.230518755399749e-06, "loss": 0.3262, "step": 4490 }, { "epoch": 1.6105665790102797, "grad_norm": 0.3635680675506592, "learning_rate": 5.2284340721280135e-06, "loss": 0.3374, "step": 4491 }, { "epoch": 1.6109251733205832, "grad_norm": 0.31679561734199524, "learning_rate": 5.226349349062353e-06, "loss": 0.319, "step": 4492 }, { "epoch": 1.6112837676308869, "grad_norm": 0.35042908787727356, "learning_rate": 5.22426458656593e-06, "loss": 0.3525, "step": 4493 }, { "epoch": 1.6116423619411906, "grad_norm": 0.3393344581127167, "learning_rate": 5.222179785001918e-06, "loss": 0.2987, "step": 4494 }, { "epoch": 1.612000956251494, "grad_norm": 0.34701865911483765, "learning_rate": 5.220094944733498e-06, "loss": 0.3386, "step": 4495 }, { "epoch": 1.6123595505617978, "grad_norm": 0.3631252944469452, "learning_rate": 5.218010066123852e-06, "loss": 0.3356, "step": 4496 }, { "epoch": 1.6127181448721015, "grad_norm": 0.31056705117225647, "learning_rate": 5.215925149536175e-06, "loss": 0.3204, "step": 4497 }, { "epoch": 1.613076739182405, "grad_norm": 0.29122963547706604, "learning_rate": 5.213840195333666e-06, "loss": 0.3136, "step": 4498 }, { "epoch": 1.6134353334927085, "grad_norm": 0.3514558970928192, "learning_rate": 5.211755203879529e-06, "loss": 0.3145, "step": 4499 }, { "epoch": 1.6137939278030122, "grad_norm": 0.3282981812953949, "learning_rate": 5.209670175536976e-06, "loss": 0.3572, "step": 4500 }, { "epoch": 1.614152522113316, "grad_norm": 0.33110788464546204, "learning_rate": 5.207585110669227e-06, "loss": 0.3289, "step": 4501 }, { "epoch": 1.6145111164236194, "grad_norm": 0.34441134333610535, "learning_rate": 5.2055000096395055e-06, "loss": 0.3456, "step": 4502 }, { "epoch": 1.614869710733923, "grad_norm": 0.35283738374710083, "learning_rate": 5.203414872811042e-06, "loss": 0.3115, "step": 4503 }, { "epoch": 1.6152283050442267, "grad_norm": 0.3367399573326111, "learning_rate": 5.201329700547077e-06, "loss": 0.3035, "step": 4504 }, { "epoch": 1.6155868993545304, "grad_norm": 0.35120993852615356, "learning_rate": 5.19924449321085e-06, "loss": 0.3389, "step": 4505 }, { "epoch": 1.6159454936648339, "grad_norm": 0.32219573855400085, "learning_rate": 5.197159251165613e-06, "loss": 0.3087, "step": 4506 }, { "epoch": 1.6163040879751374, "grad_norm": 0.34136778116226196, "learning_rate": 5.19507397477462e-06, "loss": 0.3405, "step": 4507 }, { "epoch": 1.616662682285441, "grad_norm": 0.3274731934070587, "learning_rate": 5.192988664401135e-06, "loss": 0.3359, "step": 4508 }, { "epoch": 1.6170212765957448, "grad_norm": 0.3398153483867645, "learning_rate": 5.190903320408425e-06, "loss": 0.3739, "step": 4509 }, { "epoch": 1.6173798709060483, "grad_norm": 0.3174699544906616, "learning_rate": 5.188817943159762e-06, "loss": 0.3054, "step": 4510 }, { "epoch": 1.6177384652163518, "grad_norm": 0.3073560297489166, "learning_rate": 5.186732533018425e-06, "loss": 0.3291, "step": 4511 }, { "epoch": 1.6180970595266555, "grad_norm": 0.322231650352478, "learning_rate": 5.184647090347701e-06, "loss": 0.3205, "step": 4512 }, { "epoch": 1.6184556538369592, "grad_norm": 0.3262142837047577, "learning_rate": 5.182561615510877e-06, "loss": 0.3275, "step": 4513 }, { "epoch": 1.6188142481472627, "grad_norm": 0.3222876191139221, "learning_rate": 5.180476108871253e-06, "loss": 0.3353, "step": 4514 }, { "epoch": 1.6191728424575662, "grad_norm": 0.3124052882194519, "learning_rate": 5.17839057079213e-06, "loss": 0.3386, "step": 4515 }, { "epoch": 1.61953143676787, "grad_norm": 0.33070123195648193, "learning_rate": 5.176305001636815e-06, "loss": 0.3368, "step": 4516 }, { "epoch": 1.6198900310781736, "grad_norm": 0.31087374687194824, "learning_rate": 5.174219401768619e-06, "loss": 0.3226, "step": 4517 }, { "epoch": 1.6202486253884771, "grad_norm": 0.2815220057964325, "learning_rate": 5.172133771550863e-06, "loss": 0.3061, "step": 4518 }, { "epoch": 1.6206072196987806, "grad_norm": 0.32770001888275146, "learning_rate": 5.170048111346866e-06, "loss": 0.3195, "step": 4519 }, { "epoch": 1.6209658140090843, "grad_norm": 0.3090744614601135, "learning_rate": 5.167962421519958e-06, "loss": 0.3263, "step": 4520 }, { "epoch": 1.621324408319388, "grad_norm": 0.3334694504737854, "learning_rate": 5.165876702433476e-06, "loss": 0.3064, "step": 4521 }, { "epoch": 1.6216830026296916, "grad_norm": 0.3006609082221985, "learning_rate": 5.163790954450758e-06, "loss": 0.3382, "step": 4522 }, { "epoch": 1.622041596939995, "grad_norm": 0.3253108561038971, "learning_rate": 5.1617051779351435e-06, "loss": 0.3321, "step": 4523 }, { "epoch": 1.6224001912502988, "grad_norm": 0.33212488889694214, "learning_rate": 5.159619373249984e-06, "loss": 0.3459, "step": 4524 }, { "epoch": 1.6227587855606025, "grad_norm": 0.34523317217826843, "learning_rate": 5.1575335407586335e-06, "loss": 0.3392, "step": 4525 }, { "epoch": 1.623117379870906, "grad_norm": 0.300839364528656, "learning_rate": 5.15544768082445e-06, "loss": 0.2916, "step": 4526 }, { "epoch": 1.6234759741812097, "grad_norm": 0.3097054064273834, "learning_rate": 5.153361793810797e-06, "loss": 0.3082, "step": 4527 }, { "epoch": 1.6238345684915134, "grad_norm": 0.3440721035003662, "learning_rate": 5.151275880081042e-06, "loss": 0.3276, "step": 4528 }, { "epoch": 1.624193162801817, "grad_norm": 0.3328946530818939, "learning_rate": 5.149189939998559e-06, "loss": 0.3391, "step": 4529 }, { "epoch": 1.6245517571121204, "grad_norm": 0.3119357228279114, "learning_rate": 5.147103973926724e-06, "loss": 0.2776, "step": 4530 }, { "epoch": 1.6249103514224241, "grad_norm": 0.38015010952949524, "learning_rate": 5.145017982228918e-06, "loss": 0.3246, "step": 4531 }, { "epoch": 1.6252689457327278, "grad_norm": 0.34884580969810486, "learning_rate": 5.142931965268529e-06, "loss": 0.3416, "step": 4532 }, { "epoch": 1.6256275400430313, "grad_norm": 0.32420605421066284, "learning_rate": 5.140845923408948e-06, "loss": 0.3072, "step": 4533 }, { "epoch": 1.6259861343533348, "grad_norm": 0.306251585483551, "learning_rate": 5.138759857013568e-06, "loss": 0.307, "step": 4534 }, { "epoch": 1.6263447286636386, "grad_norm": 0.35534870624542236, "learning_rate": 5.13667376644579e-06, "loss": 0.3118, "step": 4535 }, { "epoch": 1.6267033229739423, "grad_norm": 0.3534141480922699, "learning_rate": 5.134587652069015e-06, "loss": 0.3418, "step": 4536 }, { "epoch": 1.6270619172842458, "grad_norm": 0.3351789712905884, "learning_rate": 5.132501514246652e-06, "loss": 0.3215, "step": 4537 }, { "epoch": 1.6274205115945493, "grad_norm": 0.3144701421260834, "learning_rate": 5.130415353342115e-06, "loss": 0.324, "step": 4538 }, { "epoch": 1.627779105904853, "grad_norm": 0.3282017409801483, "learning_rate": 5.1283291697188175e-06, "loss": 0.3121, "step": 4539 }, { "epoch": 1.6281377002151567, "grad_norm": 0.35795822739601135, "learning_rate": 5.126242963740179e-06, "loss": 0.3161, "step": 4540 }, { "epoch": 1.6284962945254602, "grad_norm": 0.3355070650577545, "learning_rate": 5.124156735769623e-06, "loss": 0.3128, "step": 4541 }, { "epoch": 1.6288548888357637, "grad_norm": 0.34128454327583313, "learning_rate": 5.1220704861705775e-06, "loss": 0.3162, "step": 4542 }, { "epoch": 1.6292134831460674, "grad_norm": 0.34883108735084534, "learning_rate": 5.119984215306473e-06, "loss": 0.3209, "step": 4543 }, { "epoch": 1.6295720774563711, "grad_norm": 0.3442445993423462, "learning_rate": 5.117897923540746e-06, "loss": 0.3164, "step": 4544 }, { "epoch": 1.6299306717666746, "grad_norm": 0.32725802063941956, "learning_rate": 5.115811611236833e-06, "loss": 0.2868, "step": 4545 }, { "epoch": 1.630289266076978, "grad_norm": 0.3443431258201599, "learning_rate": 5.113725278758178e-06, "loss": 0.3047, "step": 4546 }, { "epoch": 1.6306478603872818, "grad_norm": 0.3168589472770691, "learning_rate": 5.111638926468226e-06, "loss": 0.3241, "step": 4547 }, { "epoch": 1.6310064546975855, "grad_norm": 0.331572562456131, "learning_rate": 5.109552554730423e-06, "loss": 0.3552, "step": 4548 }, { "epoch": 1.631365049007889, "grad_norm": 0.3275730311870575, "learning_rate": 5.107466163908226e-06, "loss": 0.3495, "step": 4549 }, { "epoch": 1.6317236433181925, "grad_norm": 0.3440254330635071, "learning_rate": 5.105379754365089e-06, "loss": 0.3293, "step": 4550 }, { "epoch": 1.6320822376284962, "grad_norm": 0.32555851340293884, "learning_rate": 5.10329332646447e-06, "loss": 0.32, "step": 4551 }, { "epoch": 1.6324408319388, "grad_norm": 0.3165387511253357, "learning_rate": 5.101206880569834e-06, "loss": 0.2735, "step": 4552 }, { "epoch": 1.6327994262491035, "grad_norm": 0.3240683376789093, "learning_rate": 5.099120417044644e-06, "loss": 0.2822, "step": 4553 }, { "epoch": 1.6331580205594072, "grad_norm": 0.3402480185031891, "learning_rate": 5.097033936252368e-06, "loss": 0.3386, "step": 4554 }, { "epoch": 1.633516614869711, "grad_norm": 0.32404637336730957, "learning_rate": 5.094947438556478e-06, "loss": 0.3344, "step": 4555 }, { "epoch": 1.6338752091800144, "grad_norm": 0.3263510465621948, "learning_rate": 5.09286092432045e-06, "loss": 0.3419, "step": 4556 }, { "epoch": 1.6342338034903179, "grad_norm": 0.3197726607322693, "learning_rate": 5.0907743939077595e-06, "loss": 0.3149, "step": 4557 }, { "epoch": 1.6345923978006216, "grad_norm": 0.30998697876930237, "learning_rate": 5.088687847681888e-06, "loss": 0.2982, "step": 4558 }, { "epoch": 1.6349509921109253, "grad_norm": 0.3419743478298187, "learning_rate": 5.086601286006317e-06, "loss": 0.3979, "step": 4559 }, { "epoch": 1.6353095864212288, "grad_norm": 0.326031893491745, "learning_rate": 5.08451470924453e-06, "loss": 0.3206, "step": 4560 }, { "epoch": 1.6356681807315323, "grad_norm": 0.35143786668777466, "learning_rate": 5.082428117760021e-06, "loss": 0.3039, "step": 4561 }, { "epoch": 1.636026775041836, "grad_norm": 0.325339138507843, "learning_rate": 5.080341511916276e-06, "loss": 0.2957, "step": 4562 }, { "epoch": 1.6363853693521397, "grad_norm": 0.3161524832248688, "learning_rate": 5.078254892076792e-06, "loss": 0.3297, "step": 4563 }, { "epoch": 1.6367439636624432, "grad_norm": 0.33357590436935425, "learning_rate": 5.07616825860506e-06, "loss": 0.3362, "step": 4564 }, { "epoch": 1.6371025579727467, "grad_norm": 0.34647220373153687, "learning_rate": 5.074081611864581e-06, "loss": 0.2974, "step": 4565 }, { "epoch": 1.6374611522830504, "grad_norm": 0.3736247420310974, "learning_rate": 5.071994952218854e-06, "loss": 0.3744, "step": 4566 }, { "epoch": 1.6378197465933542, "grad_norm": 0.3088010251522064, "learning_rate": 5.069908280031383e-06, "loss": 0.2944, "step": 4567 }, { "epoch": 1.6381783409036577, "grad_norm": 0.3134879171848297, "learning_rate": 5.067821595665672e-06, "loss": 0.2978, "step": 4568 }, { "epoch": 1.6385369352139612, "grad_norm": 0.3420461118221283, "learning_rate": 5.065734899485229e-06, "loss": 0.3214, "step": 4569 }, { "epoch": 1.6388955295242649, "grad_norm": 0.3289158046245575, "learning_rate": 5.06364819185356e-06, "loss": 0.3035, "step": 4570 }, { "epoch": 1.6392541238345686, "grad_norm": 0.3532332479953766, "learning_rate": 5.06156147313418e-06, "loss": 0.3258, "step": 4571 }, { "epoch": 1.639612718144872, "grad_norm": 0.3309069275856018, "learning_rate": 5.059474743690598e-06, "loss": 0.3475, "step": 4572 }, { "epoch": 1.6399713124551756, "grad_norm": 0.2999861240386963, "learning_rate": 5.057388003886332e-06, "loss": 0.3193, "step": 4573 }, { "epoch": 1.6403299067654793, "grad_norm": 0.3071732223033905, "learning_rate": 5.055301254084897e-06, "loss": 0.3187, "step": 4574 }, { "epoch": 1.640688501075783, "grad_norm": 0.3135095536708832, "learning_rate": 5.053214494649811e-06, "loss": 0.3175, "step": 4575 }, { "epoch": 1.6410470953860865, "grad_norm": 0.2996574938297272, "learning_rate": 5.051127725944595e-06, "loss": 0.3065, "step": 4576 }, { "epoch": 1.64140568969639, "grad_norm": 0.3193032741546631, "learning_rate": 5.04904094833277e-06, "loss": 0.3154, "step": 4577 }, { "epoch": 1.6417642840066937, "grad_norm": 0.32258471846580505, "learning_rate": 5.046954162177859e-06, "loss": 0.3181, "step": 4578 }, { "epoch": 1.6421228783169974, "grad_norm": 0.3198044002056122, "learning_rate": 5.044867367843387e-06, "loss": 0.3455, "step": 4579 }, { "epoch": 1.642481472627301, "grad_norm": 0.33508116006851196, "learning_rate": 5.042780565692879e-06, "loss": 0.3251, "step": 4580 }, { "epoch": 1.6428400669376044, "grad_norm": 0.3160111606121063, "learning_rate": 5.040693756089865e-06, "loss": 0.3003, "step": 4581 }, { "epoch": 1.6431986612479084, "grad_norm": 0.3528611361980438, "learning_rate": 5.03860693939787e-06, "loss": 0.3502, "step": 4582 }, { "epoch": 1.6435572555582119, "grad_norm": 0.3164597153663635, "learning_rate": 5.036520115980428e-06, "loss": 0.3058, "step": 4583 }, { "epoch": 1.6439158498685154, "grad_norm": 0.32344767451286316, "learning_rate": 5.034433286201067e-06, "loss": 0.2974, "step": 4584 }, { "epoch": 1.644274444178819, "grad_norm": 0.36217811703681946, "learning_rate": 5.03234645042332e-06, "loss": 0.3156, "step": 4585 }, { "epoch": 1.6446330384891228, "grad_norm": 0.3587016761302948, "learning_rate": 5.030259609010723e-06, "loss": 0.3774, "step": 4586 }, { "epoch": 1.6449916327994263, "grad_norm": 0.2951982617378235, "learning_rate": 5.028172762326806e-06, "loss": 0.2712, "step": 4587 }, { "epoch": 1.6453502271097298, "grad_norm": 0.3421509861946106, "learning_rate": 5.026085910735107e-06, "loss": 0.3538, "step": 4588 }, { "epoch": 1.6457088214200335, "grad_norm": 0.3178785443305969, "learning_rate": 5.0239990545991605e-06, "loss": 0.3218, "step": 4589 }, { "epoch": 1.6460674157303372, "grad_norm": 0.34830567240715027, "learning_rate": 5.021912194282504e-06, "loss": 0.3589, "step": 4590 }, { "epoch": 1.6464260100406407, "grad_norm": 0.3156374990940094, "learning_rate": 5.0198253301486754e-06, "loss": 0.3125, "step": 4591 }, { "epoch": 1.6467846043509442, "grad_norm": 0.3294989764690399, "learning_rate": 5.017738462561214e-06, "loss": 0.3419, "step": 4592 }, { "epoch": 1.647143198661248, "grad_norm": 0.3499855399131775, "learning_rate": 5.015651591883656e-06, "loss": 0.3535, "step": 4593 }, { "epoch": 1.6475017929715516, "grad_norm": 0.3551419675350189, "learning_rate": 5.013564718479541e-06, "loss": 0.2973, "step": 4594 }, { "epoch": 1.6478603872818551, "grad_norm": 0.34870457649230957, "learning_rate": 5.01147784271241e-06, "loss": 0.3715, "step": 4595 }, { "epoch": 1.6482189815921586, "grad_norm": 0.31017830967903137, "learning_rate": 5.009390964945805e-06, "loss": 0.2693, "step": 4596 }, { "epoch": 1.6485775759024623, "grad_norm": 0.4017471671104431, "learning_rate": 5.007304085543263e-06, "loss": 0.3487, "step": 4597 }, { "epoch": 1.648936170212766, "grad_norm": 0.32845139503479004, "learning_rate": 5.0052172048683275e-06, "loss": 0.3005, "step": 4598 }, { "epoch": 1.6492947645230696, "grad_norm": 0.3025859594345093, "learning_rate": 5.003130323284538e-06, "loss": 0.297, "step": 4599 }, { "epoch": 1.649653358833373, "grad_norm": 0.3090691864490509, "learning_rate": 5.001043441155437e-06, "loss": 0.3104, "step": 4600 }, { "epoch": 1.6500119531436768, "grad_norm": 0.34744587540626526, "learning_rate": 4.998956558844564e-06, "loss": 0.3288, "step": 4601 }, { "epoch": 1.6503705474539805, "grad_norm": 0.3666223883628845, "learning_rate": 4.9968696767154626e-06, "loss": 0.3101, "step": 4602 }, { "epoch": 1.650729141764284, "grad_norm": 0.33447229862213135, "learning_rate": 4.994782795131674e-06, "loss": 0.3247, "step": 4603 }, { "epoch": 1.6510877360745875, "grad_norm": 0.35039278864860535, "learning_rate": 4.992695914456739e-06, "loss": 0.3336, "step": 4604 }, { "epoch": 1.6514463303848912, "grad_norm": 0.34102579951286316, "learning_rate": 4.990609035054198e-06, "loss": 0.3256, "step": 4605 }, { "epoch": 1.651804924695195, "grad_norm": 0.3341858685016632, "learning_rate": 4.988522157287591e-06, "loss": 0.2999, "step": 4606 }, { "epoch": 1.6521635190054984, "grad_norm": 0.35023587942123413, "learning_rate": 4.98643528152046e-06, "loss": 0.3294, "step": 4607 }, { "epoch": 1.652522113315802, "grad_norm": 0.3756418228149414, "learning_rate": 4.984348408116346e-06, "loss": 0.3193, "step": 4608 }, { "epoch": 1.6528807076261056, "grad_norm": 0.31708425283432007, "learning_rate": 4.982261537438789e-06, "loss": 0.3076, "step": 4609 }, { "epoch": 1.6532393019364093, "grad_norm": 0.3576224446296692, "learning_rate": 4.980174669851326e-06, "loss": 0.334, "step": 4610 }, { "epoch": 1.6535978962467128, "grad_norm": 0.37730520963668823, "learning_rate": 4.978087805717498e-06, "loss": 0.3379, "step": 4611 }, { "epoch": 1.6539564905570165, "grad_norm": 0.3654917776584625, "learning_rate": 4.97600094540084e-06, "loss": 0.3282, "step": 4612 }, { "epoch": 1.6543150848673203, "grad_norm": 0.3236863613128662, "learning_rate": 4.973914089264893e-06, "loss": 0.3242, "step": 4613 }, { "epoch": 1.6546736791776238, "grad_norm": 0.34508609771728516, "learning_rate": 4.9718272376731955e-06, "loss": 0.3176, "step": 4614 }, { "epoch": 1.6550322734879273, "grad_norm": 0.3256690204143524, "learning_rate": 4.969740390989279e-06, "loss": 0.3054, "step": 4615 }, { "epoch": 1.655390867798231, "grad_norm": 0.3474135994911194, "learning_rate": 4.967653549576681e-06, "loss": 0.3259, "step": 4616 }, { "epoch": 1.6557494621085347, "grad_norm": 0.3282634913921356, "learning_rate": 4.965566713798935e-06, "loss": 0.317, "step": 4617 }, { "epoch": 1.6561080564188382, "grad_norm": 0.3520298898220062, "learning_rate": 4.9634798840195735e-06, "loss": 0.3203, "step": 4618 }, { "epoch": 1.6564666507291417, "grad_norm": 0.3159423768520355, "learning_rate": 4.96139306060213e-06, "loss": 0.3196, "step": 4619 }, { "epoch": 1.6568252450394454, "grad_norm": 0.34073716402053833, "learning_rate": 4.959306243910137e-06, "loss": 0.3321, "step": 4620 }, { "epoch": 1.657183839349749, "grad_norm": 0.3532961905002594, "learning_rate": 4.957219434307123e-06, "loss": 0.3056, "step": 4621 }, { "epoch": 1.6575424336600526, "grad_norm": 0.3298320472240448, "learning_rate": 4.955132632156616e-06, "loss": 0.3203, "step": 4622 }, { "epoch": 1.657901027970356, "grad_norm": 0.3392408490180969, "learning_rate": 4.953045837822144e-06, "loss": 0.3332, "step": 4623 }, { "epoch": 1.6582596222806598, "grad_norm": 0.31108200550079346, "learning_rate": 4.950959051667232e-06, "loss": 0.3291, "step": 4624 }, { "epoch": 1.6586182165909635, "grad_norm": 0.30391910672187805, "learning_rate": 4.9488722740554055e-06, "loss": 0.3154, "step": 4625 }, { "epoch": 1.658976810901267, "grad_norm": 0.3241225779056549, "learning_rate": 4.94678550535019e-06, "loss": 0.3253, "step": 4626 }, { "epoch": 1.6593354052115705, "grad_norm": 0.33624356985092163, "learning_rate": 4.944698745915105e-06, "loss": 0.3196, "step": 4627 }, { "epoch": 1.6596939995218742, "grad_norm": 0.36890703439712524, "learning_rate": 4.942611996113671e-06, "loss": 0.3313, "step": 4628 }, { "epoch": 1.660052593832178, "grad_norm": 0.34487485885620117, "learning_rate": 4.940525256309403e-06, "loss": 0.3383, "step": 4629 }, { "epoch": 1.6604111881424815, "grad_norm": 0.3158728778362274, "learning_rate": 4.938438526865822e-06, "loss": 0.3202, "step": 4630 }, { "epoch": 1.660769782452785, "grad_norm": 0.3024800419807434, "learning_rate": 4.9363518081464405e-06, "loss": 0.283, "step": 4631 }, { "epoch": 1.6611283767630887, "grad_norm": 0.373075395822525, "learning_rate": 4.934265100514773e-06, "loss": 0.3435, "step": 4632 }, { "epoch": 1.6614869710733924, "grad_norm": 0.38756969571113586, "learning_rate": 4.93217840433433e-06, "loss": 0.3147, "step": 4633 }, { "epoch": 1.6618455653836959, "grad_norm": 0.32465317845344543, "learning_rate": 4.930091719968619e-06, "loss": 0.3261, "step": 4634 }, { "epoch": 1.6622041596939994, "grad_norm": 0.3426406979560852, "learning_rate": 4.928005047781147e-06, "loss": 0.2957, "step": 4635 }, { "epoch": 1.662562754004303, "grad_norm": 0.39344748854637146, "learning_rate": 4.925918388135421e-06, "loss": 0.341, "step": 4636 }, { "epoch": 1.6629213483146068, "grad_norm": 0.3236566185951233, "learning_rate": 4.923831741394941e-06, "loss": 0.3324, "step": 4637 }, { "epoch": 1.6632799426249103, "grad_norm": 0.34224820137023926, "learning_rate": 4.92174510792321e-06, "loss": 0.363, "step": 4638 }, { "epoch": 1.6636385369352138, "grad_norm": 0.32397907972335815, "learning_rate": 4.919658488083725e-06, "loss": 0.3046, "step": 4639 }, { "epoch": 1.6639971312455177, "grad_norm": 0.2909294366836548, "learning_rate": 4.917571882239981e-06, "loss": 0.3086, "step": 4640 }, { "epoch": 1.6643557255558212, "grad_norm": 0.3620684742927551, "learning_rate": 4.91548529075547e-06, "loss": 0.3412, "step": 4641 }, { "epoch": 1.6647143198661247, "grad_norm": 0.31328603625297546, "learning_rate": 4.913398713993686e-06, "loss": 0.3075, "step": 4642 }, { "epoch": 1.6650729141764284, "grad_norm": 0.3608044981956482, "learning_rate": 4.911312152318114e-06, "loss": 0.3512, "step": 4643 }, { "epoch": 1.6654315084867322, "grad_norm": 0.35252121090888977, "learning_rate": 4.909225606092242e-06, "loss": 0.3093, "step": 4644 }, { "epoch": 1.6657901027970357, "grad_norm": 0.31398382782936096, "learning_rate": 4.907139075679552e-06, "loss": 0.3021, "step": 4645 }, { "epoch": 1.6661486971073391, "grad_norm": 0.3175313472747803, "learning_rate": 4.905052561443524e-06, "loss": 0.3149, "step": 4646 }, { "epoch": 1.6665072914176429, "grad_norm": 0.3297487795352936, "learning_rate": 4.902966063747634e-06, "loss": 0.3225, "step": 4647 }, { "epoch": 1.6668658857279466, "grad_norm": 0.32704928517341614, "learning_rate": 4.900879582955358e-06, "loss": 0.3283, "step": 4648 }, { "epoch": 1.66722448003825, "grad_norm": 0.3155619502067566, "learning_rate": 4.898793119430167e-06, "loss": 0.3115, "step": 4649 }, { "epoch": 1.6675830743485536, "grad_norm": 0.34134167432785034, "learning_rate": 4.8967066735355315e-06, "loss": 0.3366, "step": 4650 }, { "epoch": 1.6679416686588573, "grad_norm": 0.3094240128993988, "learning_rate": 4.894620245634913e-06, "loss": 0.3212, "step": 4651 }, { "epoch": 1.668300262969161, "grad_norm": 0.32103216648101807, "learning_rate": 4.892533836091775e-06, "loss": 0.3298, "step": 4652 }, { "epoch": 1.6686588572794645, "grad_norm": 0.32497337460517883, "learning_rate": 4.890447445269577e-06, "loss": 0.3095, "step": 4653 }, { "epoch": 1.669017451589768, "grad_norm": 0.33413949608802795, "learning_rate": 4.888361073531776e-06, "loss": 0.3424, "step": 4654 }, { "epoch": 1.6693760459000717, "grad_norm": 0.3539927005767822, "learning_rate": 4.886274721241824e-06, "loss": 0.3283, "step": 4655 }, { "epoch": 1.6697346402103754, "grad_norm": 0.355091392993927, "learning_rate": 4.884188388763169e-06, "loss": 0.3131, "step": 4656 }, { "epoch": 1.670093234520679, "grad_norm": 0.3331652581691742, "learning_rate": 4.882102076459257e-06, "loss": 0.3176, "step": 4657 }, { "epoch": 1.6704518288309824, "grad_norm": 0.3081337511539459, "learning_rate": 4.880015784693527e-06, "loss": 0.3062, "step": 4658 }, { "epoch": 1.6708104231412861, "grad_norm": 0.31681716442108154, "learning_rate": 4.877929513829424e-06, "loss": 0.3341, "step": 4659 }, { "epoch": 1.6711690174515899, "grad_norm": 0.3416052758693695, "learning_rate": 4.875843264230378e-06, "loss": 0.3422, "step": 4660 }, { "epoch": 1.6715276117618934, "grad_norm": 0.3373035788536072, "learning_rate": 4.873757036259823e-06, "loss": 0.3444, "step": 4661 }, { "epoch": 1.6718862060721968, "grad_norm": 0.32776910066604614, "learning_rate": 4.871670830281185e-06, "loss": 0.3342, "step": 4662 }, { "epoch": 1.6722448003825006, "grad_norm": 0.30977663397789, "learning_rate": 4.8695846466578875e-06, "loss": 0.2893, "step": 4663 }, { "epoch": 1.6726033946928043, "grad_norm": 0.3503076732158661, "learning_rate": 4.8674984857533484e-06, "loss": 0.3329, "step": 4664 }, { "epoch": 1.6729619890031078, "grad_norm": 0.31672126054763794, "learning_rate": 4.865412347930987e-06, "loss": 0.3189, "step": 4665 }, { "epoch": 1.6733205833134113, "grad_norm": 0.32722291350364685, "learning_rate": 4.863326233554212e-06, "loss": 0.3461, "step": 4666 }, { "epoch": 1.673679177623715, "grad_norm": 0.3139183819293976, "learning_rate": 4.861240142986434e-06, "loss": 0.2915, "step": 4667 }, { "epoch": 1.6740377719340187, "grad_norm": 0.350053608417511, "learning_rate": 4.859154076591054e-06, "loss": 0.3222, "step": 4668 }, { "epoch": 1.6743963662443222, "grad_norm": 0.328975111246109, "learning_rate": 4.857068034731471e-06, "loss": 0.3134, "step": 4669 }, { "epoch": 1.674754960554626, "grad_norm": 0.3218133747577667, "learning_rate": 4.8549820177710825e-06, "loss": 0.3031, "step": 4670 }, { "epoch": 1.6751135548649296, "grad_norm": 0.3633529841899872, "learning_rate": 4.852896026073278e-06, "loss": 0.3395, "step": 4671 }, { "epoch": 1.6754721491752331, "grad_norm": 0.33892497420310974, "learning_rate": 4.850810060001442e-06, "loss": 0.3565, "step": 4672 }, { "epoch": 1.6758307434855366, "grad_norm": 0.30561962723731995, "learning_rate": 4.848724119918959e-06, "loss": 0.2865, "step": 4673 }, { "epoch": 1.6761893377958403, "grad_norm": 0.37473317980766296, "learning_rate": 4.846638206189205e-06, "loss": 0.3275, "step": 4674 }, { "epoch": 1.676547932106144, "grad_norm": 0.3638024926185608, "learning_rate": 4.844552319175551e-06, "loss": 0.3446, "step": 4675 }, { "epoch": 1.6769065264164476, "grad_norm": 0.3156245946884155, "learning_rate": 4.842466459241368e-06, "loss": 0.3019, "step": 4676 }, { "epoch": 1.677265120726751, "grad_norm": 0.33487921953201294, "learning_rate": 4.840380626750017e-06, "loss": 0.3103, "step": 4677 }, { "epoch": 1.6776237150370548, "grad_norm": 0.34685999155044556, "learning_rate": 4.838294822064858e-06, "loss": 0.3582, "step": 4678 }, { "epoch": 1.6779823093473585, "grad_norm": 0.30714911222457886, "learning_rate": 4.836209045549245e-06, "loss": 0.3003, "step": 4679 }, { "epoch": 1.678340903657662, "grad_norm": 0.3769870102405548, "learning_rate": 4.834123297566525e-06, "loss": 0.3226, "step": 4680 }, { "epoch": 1.6786994979679655, "grad_norm": 0.3029564321041107, "learning_rate": 4.832037578480042e-06, "loss": 0.2981, "step": 4681 }, { "epoch": 1.6790580922782692, "grad_norm": 0.31057968735694885, "learning_rate": 4.829951888653136e-06, "loss": 0.3143, "step": 4682 }, { "epoch": 1.679416686588573, "grad_norm": 0.3153983950614929, "learning_rate": 4.82786622844914e-06, "loss": 0.3357, "step": 4683 }, { "epoch": 1.6797752808988764, "grad_norm": 0.335938960313797, "learning_rate": 4.825780598231383e-06, "loss": 0.3306, "step": 4684 }, { "epoch": 1.68013387520918, "grad_norm": 0.3235626816749573, "learning_rate": 4.823694998363187e-06, "loss": 0.3022, "step": 4685 }, { "epoch": 1.6804924695194836, "grad_norm": 0.36780717968940735, "learning_rate": 4.821609429207872e-06, "loss": 0.3556, "step": 4686 }, { "epoch": 1.6808510638297873, "grad_norm": 0.3234381675720215, "learning_rate": 4.819523891128747e-06, "loss": 0.2837, "step": 4687 }, { "epoch": 1.6812096581400908, "grad_norm": 0.32755324244499207, "learning_rate": 4.8174383844891236e-06, "loss": 0.3196, "step": 4688 }, { "epoch": 1.6815682524503943, "grad_norm": 0.3860875964164734, "learning_rate": 4.815352909652301e-06, "loss": 0.3625, "step": 4689 }, { "epoch": 1.681926846760698, "grad_norm": 0.3372691869735718, "learning_rate": 4.813267466981577e-06, "loss": 0.2903, "step": 4690 }, { "epoch": 1.6822854410710018, "grad_norm": 0.3400701582431793, "learning_rate": 4.8111820568402405e-06, "loss": 0.3062, "step": 4691 }, { "epoch": 1.6826440353813052, "grad_norm": 0.3439266085624695, "learning_rate": 4.809096679591576e-06, "loss": 0.3421, "step": 4692 }, { "epoch": 1.6830026296916087, "grad_norm": 0.3639332354068756, "learning_rate": 4.807011335598865e-06, "loss": 0.3089, "step": 4693 }, { "epoch": 1.6833612240019125, "grad_norm": 0.32982292771339417, "learning_rate": 4.8049260252253804e-06, "loss": 0.3598, "step": 4694 }, { "epoch": 1.6837198183122162, "grad_norm": 0.31448936462402344, "learning_rate": 4.8028407488343884e-06, "loss": 0.3002, "step": 4695 }, { "epoch": 1.6840784126225197, "grad_norm": 0.31280526518821716, "learning_rate": 4.800755506789152e-06, "loss": 0.2894, "step": 4696 }, { "epoch": 1.6844370069328232, "grad_norm": 0.3390677571296692, "learning_rate": 4.798670299452926e-06, "loss": 0.3525, "step": 4697 }, { "epoch": 1.684795601243127, "grad_norm": 0.3392046093940735, "learning_rate": 4.796585127188958e-06, "loss": 0.3036, "step": 4698 }, { "epoch": 1.6851541955534306, "grad_norm": 0.29858389496803284, "learning_rate": 4.794499990360496e-06, "loss": 0.2975, "step": 4699 }, { "epoch": 1.685512789863734, "grad_norm": 0.33891645073890686, "learning_rate": 4.792414889330775e-06, "loss": 0.3434, "step": 4700 }, { "epoch": 1.6858713841740378, "grad_norm": 0.2842062711715698, "learning_rate": 4.790329824463025e-06, "loss": 0.2875, "step": 4701 }, { "epoch": 1.6862299784843415, "grad_norm": 0.34642839431762695, "learning_rate": 4.788244796120474e-06, "loss": 0.3287, "step": 4702 }, { "epoch": 1.686588572794645, "grad_norm": 0.35460302233695984, "learning_rate": 4.786159804666337e-06, "loss": 0.3325, "step": 4703 }, { "epoch": 1.6869471671049485, "grad_norm": 0.311735063791275, "learning_rate": 4.784074850463826e-06, "loss": 0.3015, "step": 4704 }, { "epoch": 1.6873057614152522, "grad_norm": 0.31194356083869934, "learning_rate": 4.781989933876149e-06, "loss": 0.2849, "step": 4705 }, { "epoch": 1.687664355725556, "grad_norm": 0.3416978418827057, "learning_rate": 4.779905055266504e-06, "loss": 0.3135, "step": 4706 }, { "epoch": 1.6880229500358594, "grad_norm": 0.305373877286911, "learning_rate": 4.777820214998083e-06, "loss": 0.2996, "step": 4707 }, { "epoch": 1.688381544346163, "grad_norm": 0.3408378064632416, "learning_rate": 4.7757354134340725e-06, "loss": 0.3377, "step": 4708 }, { "epoch": 1.6887401386564667, "grad_norm": 0.3131219148635864, "learning_rate": 4.773650650937648e-06, "loss": 0.3298, "step": 4709 }, { "epoch": 1.6890987329667704, "grad_norm": 0.3402863144874573, "learning_rate": 4.7715659278719865e-06, "loss": 0.3191, "step": 4710 }, { "epoch": 1.6894573272770739, "grad_norm": 0.3440448045730591, "learning_rate": 4.7694812446002516e-06, "loss": 0.3455, "step": 4711 }, { "epoch": 1.6898159215873774, "grad_norm": 0.32195234298706055, "learning_rate": 4.7673966014856e-06, "loss": 0.3085, "step": 4712 }, { "epoch": 1.690174515897681, "grad_norm": 0.3429883122444153, "learning_rate": 4.765311998891184e-06, "loss": 0.3266, "step": 4713 }, { "epoch": 1.6905331102079848, "grad_norm": 0.3340223729610443, "learning_rate": 4.763227437180149e-06, "loss": 0.325, "step": 4714 }, { "epoch": 1.6908917045182883, "grad_norm": 0.33752164244651794, "learning_rate": 4.76114291671563e-06, "loss": 0.3233, "step": 4715 }, { "epoch": 1.6912502988285918, "grad_norm": 0.35116392374038696, "learning_rate": 4.759058437860759e-06, "loss": 0.3473, "step": 4716 }, { "epoch": 1.6916088931388955, "grad_norm": 0.32985180616378784, "learning_rate": 4.756974000978657e-06, "loss": 0.3002, "step": 4717 }, { "epoch": 1.6919674874491992, "grad_norm": 0.359675794839859, "learning_rate": 4.75488960643244e-06, "loss": 0.3557, "step": 4718 }, { "epoch": 1.6923260817595027, "grad_norm": 0.33678188920021057, "learning_rate": 4.752805254585217e-06, "loss": 0.3296, "step": 4719 }, { "epoch": 1.6926846760698062, "grad_norm": 0.3356111943721771, "learning_rate": 4.750720945800087e-06, "loss": 0.3308, "step": 4720 }, { "epoch": 1.69304327038011, "grad_norm": 0.3211020827293396, "learning_rate": 4.748636680440143e-06, "loss": 0.2872, "step": 4721 }, { "epoch": 1.6934018646904136, "grad_norm": 0.3698407709598541, "learning_rate": 4.746552458868473e-06, "loss": 0.3396, "step": 4722 }, { "epoch": 1.6937604590007171, "grad_norm": 0.3274990916252136, "learning_rate": 4.744468281448153e-06, "loss": 0.3071, "step": 4723 }, { "epoch": 1.6941190533110206, "grad_norm": 0.3560289442539215, "learning_rate": 4.742384148542252e-06, "loss": 0.3251, "step": 4724 }, { "epoch": 1.6944776476213244, "grad_norm": 0.3524215519428253, "learning_rate": 4.7403000605138356e-06, "loss": 0.296, "step": 4725 }, { "epoch": 1.694836241931628, "grad_norm": 0.36792662739753723, "learning_rate": 4.738216017725956e-06, "loss": 0.3223, "step": 4726 }, { "epoch": 1.6951948362419316, "grad_norm": 0.3219072222709656, "learning_rate": 4.736132020541658e-06, "loss": 0.2982, "step": 4727 }, { "epoch": 1.6955534305522353, "grad_norm": 0.3804849684238434, "learning_rate": 4.734048069323986e-06, "loss": 0.3144, "step": 4728 }, { "epoch": 1.695912024862539, "grad_norm": 0.3404417634010315, "learning_rate": 4.731964164435968e-06, "loss": 0.3117, "step": 4729 }, { "epoch": 1.6962706191728425, "grad_norm": 0.37103158235549927, "learning_rate": 4.729880306240625e-06, "loss": 0.3316, "step": 4730 }, { "epoch": 1.696629213483146, "grad_norm": 0.34392818808555603, "learning_rate": 4.7277964951009735e-06, "loss": 0.3267, "step": 4731 }, { "epoch": 1.6969878077934497, "grad_norm": 0.31126055121421814, "learning_rate": 4.725712731380018e-06, "loss": 0.286, "step": 4732 }, { "epoch": 1.6973464021037534, "grad_norm": 0.2900109589099884, "learning_rate": 4.72362901544076e-06, "loss": 0.2952, "step": 4733 }, { "epoch": 1.697704996414057, "grad_norm": 0.3318169414997101, "learning_rate": 4.721545347646187e-06, "loss": 0.3193, "step": 4734 }, { "epoch": 1.6980635907243604, "grad_norm": 0.30810776352882385, "learning_rate": 4.71946172835928e-06, "loss": 0.2941, "step": 4735 }, { "epoch": 1.6984221850346641, "grad_norm": 0.3311827778816223, "learning_rate": 4.717378157943014e-06, "loss": 0.3126, "step": 4736 }, { "epoch": 1.6987807793449679, "grad_norm": 0.3237468898296356, "learning_rate": 4.715294636760352e-06, "loss": 0.3075, "step": 4737 }, { "epoch": 1.6991393736552713, "grad_norm": 0.3334652781486511, "learning_rate": 4.713211165174248e-06, "loss": 0.3181, "step": 4738 }, { "epoch": 1.6994979679655748, "grad_norm": 0.35078227519989014, "learning_rate": 4.7111277435476534e-06, "loss": 0.3453, "step": 4739 }, { "epoch": 1.6998565622758786, "grad_norm": 0.32501327991485596, "learning_rate": 4.709044372243504e-06, "loss": 0.3069, "step": 4740 }, { "epoch": 1.7002151565861823, "grad_norm": 0.3070370852947235, "learning_rate": 4.70696105162473e-06, "loss": 0.3105, "step": 4741 }, { "epoch": 1.7005737508964858, "grad_norm": 0.3029398024082184, "learning_rate": 4.704877782054254e-06, "loss": 0.2991, "step": 4742 }, { "epoch": 1.7009323452067893, "grad_norm": 0.3097861409187317, "learning_rate": 4.702794563894985e-06, "loss": 0.3468, "step": 4743 }, { "epoch": 1.701290939517093, "grad_norm": 0.3248051106929779, "learning_rate": 4.700711397509826e-06, "loss": 0.3476, "step": 4744 }, { "epoch": 1.7016495338273967, "grad_norm": 0.2771078646183014, "learning_rate": 4.698628283261675e-06, "loss": 0.2763, "step": 4745 }, { "epoch": 1.7020081281377002, "grad_norm": 0.34783798456192017, "learning_rate": 4.696545221513415e-06, "loss": 0.3554, "step": 4746 }, { "epoch": 1.7023667224480037, "grad_norm": 0.30885422229766846, "learning_rate": 4.69446221262792e-06, "loss": 0.3087, "step": 4747 }, { "epoch": 1.7027253167583074, "grad_norm": 0.3223823606967926, "learning_rate": 4.692379256968059e-06, "loss": 0.34, "step": 4748 }, { "epoch": 1.7030839110686111, "grad_norm": 0.3236197233200073, "learning_rate": 4.690296354896688e-06, "loss": 0.3424, "step": 4749 }, { "epoch": 1.7034425053789146, "grad_norm": 0.300096720457077, "learning_rate": 4.6882135067766555e-06, "loss": 0.3199, "step": 4750 }, { "epoch": 1.7038010996892181, "grad_norm": 0.3055698871612549, "learning_rate": 4.686130712970801e-06, "loss": 0.2857, "step": 4751 }, { "epoch": 1.7041596939995218, "grad_norm": 0.3515763282775879, "learning_rate": 4.684047973841953e-06, "loss": 0.3413, "step": 4752 }, { "epoch": 1.7045182883098255, "grad_norm": 0.3033442497253418, "learning_rate": 4.68196528975293e-06, "loss": 0.3408, "step": 4753 }, { "epoch": 1.704876882620129, "grad_norm": 0.33954206109046936, "learning_rate": 4.679882661066544e-06, "loss": 0.313, "step": 4754 }, { "epoch": 1.7052354769304328, "grad_norm": 0.36560654640197754, "learning_rate": 4.677800088145591e-06, "loss": 0.3316, "step": 4755 }, { "epoch": 1.7055940712407365, "grad_norm": 0.30739617347717285, "learning_rate": 4.675717571352869e-06, "loss": 0.2974, "step": 4756 }, { "epoch": 1.70595266555104, "grad_norm": 0.3246406614780426, "learning_rate": 4.6736351110511545e-06, "loss": 0.3346, "step": 4757 }, { "epoch": 1.7063112598613435, "grad_norm": 0.3038376271724701, "learning_rate": 4.671552707603219e-06, "loss": 0.2824, "step": 4758 }, { "epoch": 1.7066698541716472, "grad_norm": 0.37042349576950073, "learning_rate": 4.6694703613718215e-06, "loss": 0.314, "step": 4759 }, { "epoch": 1.707028448481951, "grad_norm": 0.33628636598587036, "learning_rate": 4.6673880727197175e-06, "loss": 0.3292, "step": 4760 }, { "epoch": 1.7073870427922544, "grad_norm": 0.32586631178855896, "learning_rate": 4.665305842009643e-06, "loss": 0.3339, "step": 4761 }, { "epoch": 1.707745637102558, "grad_norm": 0.3196246027946472, "learning_rate": 4.663223669604335e-06, "loss": 0.3112, "step": 4762 }, { "epoch": 1.7081042314128616, "grad_norm": 0.3468758761882782, "learning_rate": 4.6611415558665115e-06, "loss": 0.2878, "step": 4763 }, { "epoch": 1.7084628257231653, "grad_norm": 0.32531407475471497, "learning_rate": 4.6590595011588826e-06, "loss": 0.3244, "step": 4764 }, { "epoch": 1.7088214200334688, "grad_norm": 0.32282936573028564, "learning_rate": 4.65697750584415e-06, "loss": 0.3044, "step": 4765 }, { "epoch": 1.7091800143437723, "grad_norm": 0.3265041410923004, "learning_rate": 4.654895570285001e-06, "loss": 0.3279, "step": 4766 }, { "epoch": 1.709538608654076, "grad_norm": 0.36055245995521545, "learning_rate": 4.65281369484412e-06, "loss": 0.3211, "step": 4767 }, { "epoch": 1.7098972029643797, "grad_norm": 0.3101968467235565, "learning_rate": 4.650731879884172e-06, "loss": 0.3145, "step": 4768 }, { "epoch": 1.7102557972746832, "grad_norm": 0.31887418031692505, "learning_rate": 4.648650125767819e-06, "loss": 0.3105, "step": 4769 }, { "epoch": 1.7106143915849867, "grad_norm": 0.3257124722003937, "learning_rate": 4.646568432857704e-06, "loss": 0.3129, "step": 4770 }, { "epoch": 1.7109729858952905, "grad_norm": 0.3324878513813019, "learning_rate": 4.64448680151647e-06, "loss": 0.3434, "step": 4771 }, { "epoch": 1.7113315802055942, "grad_norm": 0.308130145072937, "learning_rate": 4.642405232106739e-06, "loss": 0.3041, "step": 4772 }, { "epoch": 1.7116901745158977, "grad_norm": 0.33253777027130127, "learning_rate": 4.640323724991131e-06, "loss": 0.3488, "step": 4773 }, { "epoch": 1.7120487688262012, "grad_norm": 0.3063446879386902, "learning_rate": 4.63824228053225e-06, "loss": 0.2939, "step": 4774 }, { "epoch": 1.7124073631365049, "grad_norm": 0.2996333837509155, "learning_rate": 4.636160899092689e-06, "loss": 0.2912, "step": 4775 }, { "epoch": 1.7127659574468086, "grad_norm": 0.3444596827030182, "learning_rate": 4.634079581035029e-06, "loss": 0.3513, "step": 4776 }, { "epoch": 1.713124551757112, "grad_norm": 0.300304114818573, "learning_rate": 4.6319983267218465e-06, "loss": 0.2958, "step": 4777 }, { "epoch": 1.7134831460674156, "grad_norm": 0.32954108715057373, "learning_rate": 4.629917136515698e-06, "loss": 0.3123, "step": 4778 }, { "epoch": 1.7138417403777193, "grad_norm": 0.3661377727985382, "learning_rate": 4.627836010779137e-06, "loss": 0.3536, "step": 4779 }, { "epoch": 1.714200334688023, "grad_norm": 0.31386011838912964, "learning_rate": 4.625754949874702e-06, "loss": 0.2927, "step": 4780 }, { "epoch": 1.7145589289983265, "grad_norm": 0.32372674345970154, "learning_rate": 4.6236739541649195e-06, "loss": 0.3417, "step": 4781 }, { "epoch": 1.71491752330863, "grad_norm": 0.3356725573539734, "learning_rate": 4.621593024012303e-06, "loss": 0.3403, "step": 4782 }, { "epoch": 1.7152761176189337, "grad_norm": 0.34010010957717896, "learning_rate": 4.61951215977936e-06, "loss": 0.3205, "step": 4783 }, { "epoch": 1.7156347119292374, "grad_norm": 0.3169122040271759, "learning_rate": 4.617431361828581e-06, "loss": 0.3056, "step": 4784 }, { "epoch": 1.715993306239541, "grad_norm": 0.3321836590766907, "learning_rate": 4.6153506305224505e-06, "loss": 0.3057, "step": 4785 }, { "epoch": 1.7163519005498447, "grad_norm": 0.35814860463142395, "learning_rate": 4.613269966223437e-06, "loss": 0.3519, "step": 4786 }, { "epoch": 1.7167104948601484, "grad_norm": 0.33723294734954834, "learning_rate": 4.611189369293999e-06, "loss": 0.3186, "step": 4787 }, { "epoch": 1.7170690891704519, "grad_norm": 0.3019822835922241, "learning_rate": 4.609108840096582e-06, "loss": 0.3053, "step": 4788 }, { "epoch": 1.7174276834807554, "grad_norm": 0.34097039699554443, "learning_rate": 4.607028378993619e-06, "loss": 0.3135, "step": 4789 }, { "epoch": 1.717786277791059, "grad_norm": 0.32607027888298035, "learning_rate": 4.6049479863475376e-06, "loss": 0.3167, "step": 4790 }, { "epoch": 1.7181448721013628, "grad_norm": 0.3616013526916504, "learning_rate": 4.602867662520746e-06, "loss": 0.3493, "step": 4791 }, { "epoch": 1.7185034664116663, "grad_norm": 0.31470492482185364, "learning_rate": 4.600787407875643e-06, "loss": 0.3074, "step": 4792 }, { "epoch": 1.7188620607219698, "grad_norm": 0.32698163390159607, "learning_rate": 4.598707222774614e-06, "loss": 0.3679, "step": 4793 }, { "epoch": 1.7192206550322735, "grad_norm": 0.33186885714530945, "learning_rate": 4.5966271075800375e-06, "loss": 0.3365, "step": 4794 }, { "epoch": 1.7195792493425772, "grad_norm": 0.303782194852829, "learning_rate": 4.594547062654271e-06, "loss": 0.3173, "step": 4795 }, { "epoch": 1.7199378436528807, "grad_norm": 0.3432393968105316, "learning_rate": 4.592467088359669e-06, "loss": 0.3212, "step": 4796 }, { "epoch": 1.7202964379631842, "grad_norm": 0.3441748321056366, "learning_rate": 4.5903871850585695e-06, "loss": 0.3129, "step": 4797 }, { "epoch": 1.720655032273488, "grad_norm": 0.30904626846313477, "learning_rate": 4.588307353113296e-06, "loss": 0.2912, "step": 4798 }, { "epoch": 1.7210136265837916, "grad_norm": 0.3042355179786682, "learning_rate": 4.586227592886162e-06, "loss": 0.2992, "step": 4799 }, { "epoch": 1.7213722208940951, "grad_norm": 0.3680964708328247, "learning_rate": 4.584147904739469e-06, "loss": 0.3242, "step": 4800 }, { "epoch": 1.7217308152043986, "grad_norm": 0.36103108525276184, "learning_rate": 4.582068289035503e-06, "loss": 0.363, "step": 4801 }, { "epoch": 1.7220894095147024, "grad_norm": 0.2699114680290222, "learning_rate": 4.579988746136543e-06, "loss": 0.2722, "step": 4802 }, { "epoch": 1.722448003825006, "grad_norm": 0.35802391171455383, "learning_rate": 4.577909276404851e-06, "loss": 0.3643, "step": 4803 }, { "epoch": 1.7228065981353096, "grad_norm": 0.36079734563827515, "learning_rate": 4.575829880202676e-06, "loss": 0.3582, "step": 4804 }, { "epoch": 1.723165192445613, "grad_norm": 0.30947890877723694, "learning_rate": 4.573750557892255e-06, "loss": 0.288, "step": 4805 }, { "epoch": 1.7235237867559168, "grad_norm": 0.3040592670440674, "learning_rate": 4.571671309835811e-06, "loss": 0.3055, "step": 4806 }, { "epoch": 1.7238823810662205, "grad_norm": 0.31269651651382446, "learning_rate": 4.569592136395559e-06, "loss": 0.3135, "step": 4807 }, { "epoch": 1.724240975376524, "grad_norm": 0.33247822523117065, "learning_rate": 4.567513037933696e-06, "loss": 0.3157, "step": 4808 }, { "epoch": 1.7245995696868275, "grad_norm": 0.33356374502182007, "learning_rate": 4.565434014812409e-06, "loss": 0.2696, "step": 4809 }, { "epoch": 1.7249581639971312, "grad_norm": 0.3476690948009491, "learning_rate": 4.563355067393867e-06, "loss": 0.3737, "step": 4810 }, { "epoch": 1.725316758307435, "grad_norm": 0.3029771149158478, "learning_rate": 4.56127619604023e-06, "loss": 0.2732, "step": 4811 }, { "epoch": 1.7256753526177384, "grad_norm": 0.36520326137542725, "learning_rate": 4.559197401113645e-06, "loss": 0.3135, "step": 4812 }, { "epoch": 1.7260339469280421, "grad_norm": 0.3132314085960388, "learning_rate": 4.557118682976244e-06, "loss": 0.3078, "step": 4813 }, { "epoch": 1.7263925412383458, "grad_norm": 0.327780544757843, "learning_rate": 4.555040041990147e-06, "loss": 0.3282, "step": 4814 }, { "epoch": 1.7267511355486493, "grad_norm": 0.30791351199150085, "learning_rate": 4.5529614785174606e-06, "loss": 0.2963, "step": 4815 }, { "epoch": 1.7271097298589528, "grad_norm": 0.336734801530838, "learning_rate": 4.550882992920273e-06, "loss": 0.3052, "step": 4816 }, { "epoch": 1.7274683241692566, "grad_norm": 0.3168617784976959, "learning_rate": 4.548804585560666e-06, "loss": 0.3299, "step": 4817 }, { "epoch": 1.7278269184795603, "grad_norm": 0.32586535811424255, "learning_rate": 4.546726256800702e-06, "loss": 0.3286, "step": 4818 }, { "epoch": 1.7281855127898638, "grad_norm": 0.3307632505893707, "learning_rate": 4.544648007002438e-06, "loss": 0.3272, "step": 4819 }, { "epoch": 1.7285441071001673, "grad_norm": 0.3191487193107605, "learning_rate": 4.5425698365279054e-06, "loss": 0.3027, "step": 4820 }, { "epoch": 1.728902701410471, "grad_norm": 0.3288640081882477, "learning_rate": 4.540491745739132e-06, "loss": 0.3142, "step": 4821 }, { "epoch": 1.7292612957207747, "grad_norm": 0.3618411719799042, "learning_rate": 4.538413734998125e-06, "loss": 0.3341, "step": 4822 }, { "epoch": 1.7296198900310782, "grad_norm": 0.35100889205932617, "learning_rate": 4.5363358046668825e-06, "loss": 0.3219, "step": 4823 }, { "epoch": 1.7299784843413817, "grad_norm": 0.31394216418266296, "learning_rate": 4.534257955107383e-06, "loss": 0.2957, "step": 4824 }, { "epoch": 1.7303370786516854, "grad_norm": 0.34475231170654297, "learning_rate": 4.532180186681598e-06, "loss": 0.3317, "step": 4825 }, { "epoch": 1.7306956729619891, "grad_norm": 0.336648166179657, "learning_rate": 4.530102499751482e-06, "loss": 0.3001, "step": 4826 }, { "epoch": 1.7310542672722926, "grad_norm": 0.337404727935791, "learning_rate": 4.528024894678971e-06, "loss": 0.3414, "step": 4827 }, { "epoch": 1.731412861582596, "grad_norm": 0.3049179017543793, "learning_rate": 4.52594737182599e-06, "loss": 0.3062, "step": 4828 }, { "epoch": 1.7317714558928998, "grad_norm": 0.44358551502227783, "learning_rate": 4.523869931554453e-06, "loss": 0.3584, "step": 4829 }, { "epoch": 1.7321300502032035, "grad_norm": 0.36593741178512573, "learning_rate": 4.521792574226255e-06, "loss": 0.3292, "step": 4830 }, { "epoch": 1.732488644513507, "grad_norm": 0.3467451333999634, "learning_rate": 4.5197153002032796e-06, "loss": 0.3044, "step": 4831 }, { "epoch": 1.7328472388238105, "grad_norm": 0.33335721492767334, "learning_rate": 4.517638109847393e-06, "loss": 0.2948, "step": 4832 }, { "epoch": 1.7332058331341142, "grad_norm": 0.33248138427734375, "learning_rate": 4.515561003520448e-06, "loss": 0.318, "step": 4833 }, { "epoch": 1.733564427444418, "grad_norm": 0.3710123896598816, "learning_rate": 4.513483981584283e-06, "loss": 0.3368, "step": 4834 }, { "epoch": 1.7339230217547215, "grad_norm": 0.31652235984802246, "learning_rate": 4.5114070444007205e-06, "loss": 0.3081, "step": 4835 }, { "epoch": 1.734281616065025, "grad_norm": 0.33189472556114197, "learning_rate": 4.509330192331572e-06, "loss": 0.3343, "step": 4836 }, { "epoch": 1.7346402103753287, "grad_norm": 0.3245450556278229, "learning_rate": 4.507253425738632e-06, "loss": 0.315, "step": 4837 }, { "epoch": 1.7349988046856324, "grad_norm": 0.3065686821937561, "learning_rate": 4.505176744983678e-06, "loss": 0.3301, "step": 4838 }, { "epoch": 1.7353573989959359, "grad_norm": 0.3475804328918457, "learning_rate": 4.503100150428474e-06, "loss": 0.3226, "step": 4839 }, { "epoch": 1.7357159933062394, "grad_norm": 0.33458879590034485, "learning_rate": 4.50102364243477e-06, "loss": 0.311, "step": 4840 }, { "epoch": 1.736074587616543, "grad_norm": 0.34556815028190613, "learning_rate": 4.498947221364299e-06, "loss": 0.3123, "step": 4841 }, { "epoch": 1.7364331819268468, "grad_norm": 0.31604263186454773, "learning_rate": 4.49687088757878e-06, "loss": 0.3283, "step": 4842 }, { "epoch": 1.7367917762371503, "grad_norm": 0.32676267623901367, "learning_rate": 4.49479464143992e-06, "loss": 0.303, "step": 4843 }, { "epoch": 1.737150370547454, "grad_norm": 0.34016022086143494, "learning_rate": 4.4927184833094055e-06, "loss": 0.3393, "step": 4844 }, { "epoch": 1.7375089648577577, "grad_norm": 0.32282754778862, "learning_rate": 4.490642413548908e-06, "loss": 0.3225, "step": 4845 }, { "epoch": 1.7378675591680612, "grad_norm": 0.3070774972438812, "learning_rate": 4.488566432520087e-06, "loss": 0.2939, "step": 4846 }, { "epoch": 1.7382261534783647, "grad_norm": 0.3196033239364624, "learning_rate": 4.486490540584583e-06, "loss": 0.3415, "step": 4847 }, { "epoch": 1.7385847477886684, "grad_norm": 0.2999747097492218, "learning_rate": 4.484414738104027e-06, "loss": 0.2833, "step": 4848 }, { "epoch": 1.7389433420989722, "grad_norm": 0.3579872250556946, "learning_rate": 4.4823390254400275e-06, "loss": 0.3363, "step": 4849 }, { "epoch": 1.7393019364092757, "grad_norm": 0.3458065390586853, "learning_rate": 4.48026340295418e-06, "loss": 0.3218, "step": 4850 }, { "epoch": 1.7396605307195792, "grad_norm": 0.33525893092155457, "learning_rate": 4.478187871008064e-06, "loss": 0.3489, "step": 4851 }, { "epoch": 1.7400191250298829, "grad_norm": 0.3367781341075897, "learning_rate": 4.4761124299632445e-06, "loss": 0.3245, "step": 4852 }, { "epoch": 1.7403777193401866, "grad_norm": 0.3727831542491913, "learning_rate": 4.474037080181269e-06, "loss": 0.3278, "step": 4853 }, { "epoch": 1.74073631365049, "grad_norm": 0.32661572098731995, "learning_rate": 4.4719618220236715e-06, "loss": 0.3266, "step": 4854 }, { "epoch": 1.7410949079607936, "grad_norm": 0.3776702880859375, "learning_rate": 4.4698866558519675e-06, "loss": 0.3118, "step": 4855 }, { "epoch": 1.7414535022710973, "grad_norm": 0.3158247768878937, "learning_rate": 4.467811582027658e-06, "loss": 0.3412, "step": 4856 }, { "epoch": 1.741812096581401, "grad_norm": 0.3273157477378845, "learning_rate": 4.465736600912225e-06, "loss": 0.2956, "step": 4857 }, { "epoch": 1.7421706908917045, "grad_norm": 0.38139867782592773, "learning_rate": 4.463661712867138e-06, "loss": 0.3255, "step": 4858 }, { "epoch": 1.742529285202008, "grad_norm": 0.3520585894584656, "learning_rate": 4.46158691825385e-06, "loss": 0.3137, "step": 4859 }, { "epoch": 1.7428878795123117, "grad_norm": 0.3186306059360504, "learning_rate": 4.459512217433797e-06, "loss": 0.3177, "step": 4860 }, { "epoch": 1.7432464738226154, "grad_norm": 0.34772351384162903, "learning_rate": 4.457437610768397e-06, "loss": 0.3576, "step": 4861 }, { "epoch": 1.743605068132919, "grad_norm": 0.3310803771018982, "learning_rate": 4.4553630986190535e-06, "loss": 0.3024, "step": 4862 }, { "epoch": 1.7439636624432224, "grad_norm": 0.32962095737457275, "learning_rate": 4.453288681347151e-06, "loss": 0.3019, "step": 4863 }, { "epoch": 1.7443222567535261, "grad_norm": 0.3310961127281189, "learning_rate": 4.451214359314061e-06, "loss": 0.3065, "step": 4864 }, { "epoch": 1.7446808510638299, "grad_norm": 0.34680691361427307, "learning_rate": 4.449140132881138e-06, "loss": 0.3302, "step": 4865 }, { "epoch": 1.7450394453741334, "grad_norm": 0.3411029577255249, "learning_rate": 4.447066002409718e-06, "loss": 0.3179, "step": 4866 }, { "epoch": 1.7453980396844369, "grad_norm": 0.3497675657272339, "learning_rate": 4.444991968261121e-06, "loss": 0.3358, "step": 4867 }, { "epoch": 1.7457566339947406, "grad_norm": 0.3531877100467682, "learning_rate": 4.4429180307966496e-06, "loss": 0.3283, "step": 4868 }, { "epoch": 1.7461152283050443, "grad_norm": 0.33659884333610535, "learning_rate": 4.440844190377588e-06, "loss": 0.3352, "step": 4869 }, { "epoch": 1.7464738226153478, "grad_norm": 0.3082220256328583, "learning_rate": 4.438770447365211e-06, "loss": 0.3017, "step": 4870 }, { "epoch": 1.7468324169256515, "grad_norm": 0.33664679527282715, "learning_rate": 4.436696802120766e-06, "loss": 0.3438, "step": 4871 }, { "epoch": 1.7471910112359552, "grad_norm": 0.32294920086860657, "learning_rate": 4.434623255005493e-06, "loss": 0.3029, "step": 4872 }, { "epoch": 1.7475496055462587, "grad_norm": 0.33958277106285095, "learning_rate": 4.4325498063806074e-06, "loss": 0.3157, "step": 4873 }, { "epoch": 1.7479081998565622, "grad_norm": 0.3259594142436981, "learning_rate": 4.43047645660731e-06, "loss": 0.3419, "step": 4874 }, { "epoch": 1.748266794166866, "grad_norm": 0.32954779267311096, "learning_rate": 4.428403206046785e-06, "loss": 0.3253, "step": 4875 }, { "epoch": 1.7486253884771696, "grad_norm": 0.3981184959411621, "learning_rate": 4.426330055060201e-06, "loss": 0.346, "step": 4876 }, { "epoch": 1.7489839827874731, "grad_norm": 0.31961432099342346, "learning_rate": 4.424257004008706e-06, "loss": 0.3304, "step": 4877 }, { "epoch": 1.7493425770977766, "grad_norm": 0.3141395151615143, "learning_rate": 4.4221840532534325e-06, "loss": 0.3025, "step": 4878 }, { "epoch": 1.7497011714080803, "grad_norm": 0.3224447965621948, "learning_rate": 4.420111203155495e-06, "loss": 0.3465, "step": 4879 }, { "epoch": 1.750059765718384, "grad_norm": 0.30860695242881775, "learning_rate": 4.4180384540759905e-06, "loss": 0.3125, "step": 4880 }, { "epoch": 1.7504183600286876, "grad_norm": 0.325685977935791, "learning_rate": 4.415965806375995e-06, "loss": 0.3278, "step": 4881 }, { "epoch": 1.750776954338991, "grad_norm": 0.32374194264411926, "learning_rate": 4.413893260416574e-06, "loss": 0.314, "step": 4882 }, { "epoch": 1.7511355486492948, "grad_norm": 0.2993296682834625, "learning_rate": 4.411820816558772e-06, "loss": 0.295, "step": 4883 }, { "epoch": 1.7514941429595985, "grad_norm": 0.3512868583202362, "learning_rate": 4.409748475163614e-06, "loss": 0.3537, "step": 4884 }, { "epoch": 1.751852737269902, "grad_norm": 0.32359427213668823, "learning_rate": 4.407676236592107e-06, "loss": 0.3381, "step": 4885 }, { "epoch": 1.7522113315802055, "grad_norm": 0.33730417490005493, "learning_rate": 4.4056041012052405e-06, "loss": 0.3352, "step": 4886 }, { "epoch": 1.7525699258905092, "grad_norm": 0.3073294758796692, "learning_rate": 4.4035320693639925e-06, "loss": 0.2914, "step": 4887 }, { "epoch": 1.752928520200813, "grad_norm": 0.3320930302143097, "learning_rate": 4.401460141429311e-06, "loss": 0.3425, "step": 4888 }, { "epoch": 1.7532871145111164, "grad_norm": 0.3400581181049347, "learning_rate": 4.399388317762137e-06, "loss": 0.3201, "step": 4889 }, { "epoch": 1.75364570882142, "grad_norm": 0.34173333644866943, "learning_rate": 4.397316598723385e-06, "loss": 0.3094, "step": 4890 }, { "epoch": 1.7540043031317236, "grad_norm": 0.36469852924346924, "learning_rate": 4.395244984673958e-06, "loss": 0.3335, "step": 4891 }, { "epoch": 1.7543628974420273, "grad_norm": 0.306065171957016, "learning_rate": 4.393173475974734e-06, "loss": 0.2984, "step": 4892 }, { "epoch": 1.7547214917523308, "grad_norm": 0.3356734812259674, "learning_rate": 4.391102072986581e-06, "loss": 0.3151, "step": 4893 }, { "epoch": 1.7550800860626343, "grad_norm": 0.3583798408508301, "learning_rate": 4.38903077607034e-06, "loss": 0.3408, "step": 4894 }, { "epoch": 1.755438680372938, "grad_norm": 0.34607991576194763, "learning_rate": 4.386959585586839e-06, "loss": 0.3344, "step": 4895 }, { "epoch": 1.7557972746832418, "grad_norm": 0.3641197681427002, "learning_rate": 4.384888501896886e-06, "loss": 0.3305, "step": 4896 }, { "epoch": 1.7561558689935453, "grad_norm": 0.3215930163860321, "learning_rate": 4.3828175253612694e-06, "loss": 0.3052, "step": 4897 }, { "epoch": 1.7565144633038487, "grad_norm": 0.3645997941493988, "learning_rate": 4.380746656340758e-06, "loss": 0.3331, "step": 4898 }, { "epoch": 1.7568730576141525, "grad_norm": 0.36638975143432617, "learning_rate": 4.378675895196108e-06, "loss": 0.3243, "step": 4899 }, { "epoch": 1.7572316519244562, "grad_norm": 0.3552320897579193, "learning_rate": 4.376605242288049e-06, "loss": 0.3073, "step": 4900 }, { "epoch": 1.7575902462347597, "grad_norm": 0.35196271538734436, "learning_rate": 4.374534697977297e-06, "loss": 0.3008, "step": 4901 }, { "epoch": 1.7579488405450634, "grad_norm": 0.34309664368629456, "learning_rate": 4.3724642626245464e-06, "loss": 0.3092, "step": 4902 }, { "epoch": 1.7583074348553671, "grad_norm": 0.3505459725856781, "learning_rate": 4.3703939365904715e-06, "loss": 0.3553, "step": 4903 }, { "epoch": 1.7586660291656706, "grad_norm": 0.3258625864982605, "learning_rate": 4.368323720235735e-06, "loss": 0.3244, "step": 4904 }, { "epoch": 1.759024623475974, "grad_norm": 0.3464969992637634, "learning_rate": 4.366253613920969e-06, "loss": 0.3596, "step": 4905 }, { "epoch": 1.7593832177862778, "grad_norm": 0.3428959846496582, "learning_rate": 4.364183618006798e-06, "loss": 0.3159, "step": 4906 }, { "epoch": 1.7597418120965815, "grad_norm": 0.308167427778244, "learning_rate": 4.362113732853817e-06, "loss": 0.3081, "step": 4907 }, { "epoch": 1.760100406406885, "grad_norm": 0.32760944962501526, "learning_rate": 4.36004395882261e-06, "loss": 0.3025, "step": 4908 }, { "epoch": 1.7604590007171885, "grad_norm": 0.34776628017425537, "learning_rate": 4.357974296273733e-06, "loss": 0.3103, "step": 4909 }, { "epoch": 1.7608175950274922, "grad_norm": 0.33669957518577576, "learning_rate": 4.355904745567733e-06, "loss": 0.3427, "step": 4910 }, { "epoch": 1.761176189337796, "grad_norm": 0.2972956895828247, "learning_rate": 4.35383530706513e-06, "loss": 0.2959, "step": 4911 }, { "epoch": 1.7615347836480995, "grad_norm": 0.3251316547393799, "learning_rate": 4.351765981126428e-06, "loss": 0.329, "step": 4912 }, { "epoch": 1.761893377958403, "grad_norm": 0.32816267013549805, "learning_rate": 4.349696768112108e-06, "loss": 0.3093, "step": 4913 }, { "epoch": 1.7622519722687067, "grad_norm": 0.34188541769981384, "learning_rate": 4.347627668382635e-06, "loss": 0.3332, "step": 4914 }, { "epoch": 1.7626105665790104, "grad_norm": 0.29291436076164246, "learning_rate": 4.34555868229845e-06, "loss": 0.3244, "step": 4915 }, { "epoch": 1.7629691608893139, "grad_norm": 0.361208438873291, "learning_rate": 4.34348981021998e-06, "loss": 0.3412, "step": 4916 }, { "epoch": 1.7633277551996174, "grad_norm": 0.33295536041259766, "learning_rate": 4.341421052507627e-06, "loss": 0.2943, "step": 4917 }, { "epoch": 1.763686349509921, "grad_norm": 0.32740211486816406, "learning_rate": 4.339352409521776e-06, "loss": 0.3048, "step": 4918 }, { "epoch": 1.7640449438202248, "grad_norm": 0.3328720033168793, "learning_rate": 4.3372838816227905e-06, "loss": 0.3233, "step": 4919 }, { "epoch": 1.7644035381305283, "grad_norm": 0.3397323489189148, "learning_rate": 4.335215469171016e-06, "loss": 0.329, "step": 4920 }, { "epoch": 1.7647621324408318, "grad_norm": 0.3625101149082184, "learning_rate": 4.333147172526771e-06, "loss": 0.3334, "step": 4921 }, { "epoch": 1.7651207267511355, "grad_norm": 0.3199448585510254, "learning_rate": 4.331078992050367e-06, "loss": 0.2814, "step": 4922 }, { "epoch": 1.7654793210614392, "grad_norm": 0.2988422214984894, "learning_rate": 4.329010928102082e-06, "loss": 0.2723, "step": 4923 }, { "epoch": 1.7658379153717427, "grad_norm": 0.342793345451355, "learning_rate": 4.326942981042182e-06, "loss": 0.3536, "step": 4924 }, { "epoch": 1.7661965096820462, "grad_norm": 0.3348810076713562, "learning_rate": 4.324875151230909e-06, "loss": 0.3004, "step": 4925 }, { "epoch": 1.76655510399235, "grad_norm": 0.3373590409755707, "learning_rate": 4.322807439028482e-06, "loss": 0.3336, "step": 4926 }, { "epoch": 1.7669136983026537, "grad_norm": 0.34844595193862915, "learning_rate": 4.32073984479511e-06, "loss": 0.3137, "step": 4927 }, { "epoch": 1.7672722926129572, "grad_norm": 0.316464900970459, "learning_rate": 4.318672368890969e-06, "loss": 0.3338, "step": 4928 }, { "epoch": 1.7676308869232609, "grad_norm": 0.335448682308197, "learning_rate": 4.316605011676219e-06, "loss": 0.2932, "step": 4929 }, { "epoch": 1.7679894812335646, "grad_norm": 0.3384028673171997, "learning_rate": 4.314537773511005e-06, "loss": 0.3106, "step": 4930 }, { "epoch": 1.768348075543868, "grad_norm": 0.33738791942596436, "learning_rate": 4.312470654755443e-06, "loss": 0.2944, "step": 4931 }, { "epoch": 1.7687066698541716, "grad_norm": 0.3246981203556061, "learning_rate": 4.310403655769629e-06, "loss": 0.2925, "step": 4932 }, { "epoch": 1.7690652641644753, "grad_norm": 0.3392328917980194, "learning_rate": 4.308336776913647e-06, "loss": 0.3283, "step": 4933 }, { "epoch": 1.769423858474779, "grad_norm": 0.3218151032924652, "learning_rate": 4.30627001854755e-06, "loss": 0.3202, "step": 4934 }, { "epoch": 1.7697824527850825, "grad_norm": 0.3314831256866455, "learning_rate": 4.304203381031373e-06, "loss": 0.3366, "step": 4935 }, { "epoch": 1.770141047095386, "grad_norm": 0.3431839644908905, "learning_rate": 4.302136864725132e-06, "loss": 0.3282, "step": 4936 }, { "epoch": 1.7704996414056897, "grad_norm": 0.3191826641559601, "learning_rate": 4.300070469988821e-06, "loss": 0.3277, "step": 4937 }, { "epoch": 1.7708582357159934, "grad_norm": 0.3125009536743164, "learning_rate": 4.29800419718241e-06, "loss": 0.2987, "step": 4938 }, { "epoch": 1.771216830026297, "grad_norm": 0.3396902084350586, "learning_rate": 4.295938046665853e-06, "loss": 0.3292, "step": 4939 }, { "epoch": 1.7715754243366004, "grad_norm": 0.33774203062057495, "learning_rate": 4.293872018799079e-06, "loss": 0.3235, "step": 4940 }, { "epoch": 1.7719340186469041, "grad_norm": 0.3189641535282135, "learning_rate": 4.291806113941996e-06, "loss": 0.3059, "step": 4941 }, { "epoch": 1.7722926129572079, "grad_norm": 0.34768304228782654, "learning_rate": 4.289740332454492e-06, "loss": 0.3157, "step": 4942 }, { "epoch": 1.7726512072675114, "grad_norm": 0.35583797097206116, "learning_rate": 4.2876746746964295e-06, "loss": 0.3481, "step": 4943 }, { "epoch": 1.7730098015778148, "grad_norm": 0.3672747313976288, "learning_rate": 4.285609141027657e-06, "loss": 0.2805, "step": 4944 }, { "epoch": 1.7733683958881186, "grad_norm": 0.3514081537723541, "learning_rate": 4.283543731807994e-06, "loss": 0.3142, "step": 4945 }, { "epoch": 1.7737269901984223, "grad_norm": 0.3248136341571808, "learning_rate": 4.281478447397242e-06, "loss": 0.3221, "step": 4946 }, { "epoch": 1.7740855845087258, "grad_norm": 0.35036802291870117, "learning_rate": 4.279413288155181e-06, "loss": 0.3142, "step": 4947 }, { "epoch": 1.7744441788190293, "grad_norm": 0.42870649695396423, "learning_rate": 4.277348254441566e-06, "loss": 0.3701, "step": 4948 }, { "epoch": 1.774802773129333, "grad_norm": 0.3731567859649658, "learning_rate": 4.275283346616132e-06, "loss": 0.3193, "step": 4949 }, { "epoch": 1.7751613674396367, "grad_norm": 0.3503645658493042, "learning_rate": 4.273218565038596e-06, "loss": 0.3362, "step": 4950 }, { "epoch": 1.7755199617499402, "grad_norm": 0.3574483394622803, "learning_rate": 4.271153910068647e-06, "loss": 0.3109, "step": 4951 }, { "epoch": 1.7758785560602437, "grad_norm": 0.3416339159011841, "learning_rate": 4.269089382065953e-06, "loss": 0.3292, "step": 4952 }, { "epoch": 1.7762371503705474, "grad_norm": 0.35022979974746704, "learning_rate": 4.2670249813901636e-06, "loss": 0.2825, "step": 4953 }, { "epoch": 1.7765957446808511, "grad_norm": 0.3524635136127472, "learning_rate": 4.264960708400903e-06, "loss": 0.3698, "step": 4954 }, { "epoch": 1.7769543389911546, "grad_norm": 0.32876360416412354, "learning_rate": 4.262896563457772e-06, "loss": 0.3028, "step": 4955 }, { "epoch": 1.7773129333014581, "grad_norm": 0.3362855911254883, "learning_rate": 4.260832546920355e-06, "loss": 0.3245, "step": 4956 }, { "epoch": 1.777671527611762, "grad_norm": 0.3047451674938202, "learning_rate": 4.2587686591482084e-06, "loss": 0.2838, "step": 4957 }, { "epoch": 1.7780301219220656, "grad_norm": 0.3184412717819214, "learning_rate": 4.256704900500866e-06, "loss": 0.2979, "step": 4958 }, { "epoch": 1.778388716232369, "grad_norm": 0.38299596309661865, "learning_rate": 4.254641271337843e-06, "loss": 0.326, "step": 4959 }, { "epoch": 1.7787473105426728, "grad_norm": 0.3015463650226593, "learning_rate": 4.2525777720186305e-06, "loss": 0.3036, "step": 4960 }, { "epoch": 1.7791059048529765, "grad_norm": 0.3453713357448578, "learning_rate": 4.250514402902692e-06, "loss": 0.36, "step": 4961 }, { "epoch": 1.77946449916328, "grad_norm": 0.34435752034187317, "learning_rate": 4.248451164349479e-06, "loss": 0.3186, "step": 4962 }, { "epoch": 1.7798230934735835, "grad_norm": 0.35500234365463257, "learning_rate": 4.246388056718411e-06, "loss": 0.3329, "step": 4963 }, { "epoch": 1.7801816877838872, "grad_norm": 0.3099416196346283, "learning_rate": 4.2443250803688884e-06, "loss": 0.2816, "step": 4964 }, { "epoch": 1.780540282094191, "grad_norm": 0.346737265586853, "learning_rate": 4.242262235660288e-06, "loss": 0.3406, "step": 4965 }, { "epoch": 1.7808988764044944, "grad_norm": 0.326055109500885, "learning_rate": 4.240199522951962e-06, "loss": 0.3153, "step": 4966 }, { "epoch": 1.781257470714798, "grad_norm": 0.33314064145088196, "learning_rate": 4.238136942603245e-06, "loss": 0.2975, "step": 4967 }, { "epoch": 1.7816160650251016, "grad_norm": 0.358700692653656, "learning_rate": 4.2360744949734424e-06, "loss": 0.3433, "step": 4968 }, { "epoch": 1.7819746593354053, "grad_norm": 0.3118727505207062, "learning_rate": 4.234012180421838e-06, "loss": 0.2892, "step": 4969 }, { "epoch": 1.7823332536457088, "grad_norm": 0.3164593577384949, "learning_rate": 4.231949999307697e-06, "loss": 0.347, "step": 4970 }, { "epoch": 1.7826918479560123, "grad_norm": 0.39026904106140137, "learning_rate": 4.229887951990255e-06, "loss": 0.3408, "step": 4971 }, { "epoch": 1.783050442266316, "grad_norm": 0.32257941365242004, "learning_rate": 4.227826038828726e-06, "loss": 0.3139, "step": 4972 }, { "epoch": 1.7834090365766198, "grad_norm": 0.2997569739818573, "learning_rate": 4.225764260182306e-06, "loss": 0.266, "step": 4973 }, { "epoch": 1.7837676308869232, "grad_norm": 0.32685261964797974, "learning_rate": 4.22370261641016e-06, "loss": 0.3117, "step": 4974 }, { "epoch": 1.7841262251972267, "grad_norm": 0.3307839334011078, "learning_rate": 4.221641107871432e-06, "loss": 0.3592, "step": 4975 }, { "epoch": 1.7844848195075305, "grad_norm": 0.33127638697624207, "learning_rate": 4.219579734925246e-06, "loss": 0.3223, "step": 4976 }, { "epoch": 1.7848434138178342, "grad_norm": 0.35685959458351135, "learning_rate": 4.217518497930698e-06, "loss": 0.2862, "step": 4977 }, { "epoch": 1.7852020081281377, "grad_norm": 0.3453713059425354, "learning_rate": 4.21545739724686e-06, "loss": 0.3511, "step": 4978 }, { "epoch": 1.7855606024384412, "grad_norm": 0.2851032316684723, "learning_rate": 4.213396433232787e-06, "loss": 0.2689, "step": 4979 }, { "epoch": 1.7859191967487449, "grad_norm": 0.31502217054367065, "learning_rate": 4.211335606247503e-06, "loss": 0.3163, "step": 4980 }, { "epoch": 1.7862777910590486, "grad_norm": 0.31617864966392517, "learning_rate": 4.209274916650008e-06, "loss": 0.3221, "step": 4981 }, { "epoch": 1.786636385369352, "grad_norm": 0.32212206721305847, "learning_rate": 4.207214364799283e-06, "loss": 0.3144, "step": 4982 }, { "epoch": 1.7869949796796556, "grad_norm": 0.30114153027534485, "learning_rate": 4.205153951054282e-06, "loss": 0.2978, "step": 4983 }, { "epoch": 1.7873535739899593, "grad_norm": 0.32126474380493164, "learning_rate": 4.203093675773936e-06, "loss": 0.3153, "step": 4984 }, { "epoch": 1.787712168300263, "grad_norm": 0.2928643822669983, "learning_rate": 4.201033539317152e-06, "loss": 0.2852, "step": 4985 }, { "epoch": 1.7880707626105665, "grad_norm": 0.31553131341934204, "learning_rate": 4.198973542042811e-06, "loss": 0.3538, "step": 4986 }, { "epoch": 1.7884293569208702, "grad_norm": 0.3589349687099457, "learning_rate": 4.19691368430977e-06, "loss": 0.3232, "step": 4987 }, { "epoch": 1.788787951231174, "grad_norm": 0.3374667167663574, "learning_rate": 4.194853966476864e-06, "loss": 0.3266, "step": 4988 }, { "epoch": 1.7891465455414775, "grad_norm": 0.34156662225723267, "learning_rate": 4.192794388902901e-06, "loss": 0.3144, "step": 4989 }, { "epoch": 1.789505139851781, "grad_norm": 0.30587300658226013, "learning_rate": 4.190734951946668e-06, "loss": 0.3064, "step": 4990 }, { "epoch": 1.7898637341620847, "grad_norm": 0.32255464792251587, "learning_rate": 4.188675655966925e-06, "loss": 0.3118, "step": 4991 }, { "epoch": 1.7902223284723884, "grad_norm": 0.31917014718055725, "learning_rate": 4.1866165013224065e-06, "loss": 0.3164, "step": 4992 }, { "epoch": 1.7905809227826919, "grad_norm": 0.32188937067985535, "learning_rate": 4.184557488371823e-06, "loss": 0.3341, "step": 4993 }, { "epoch": 1.7909395170929954, "grad_norm": 0.3316861093044281, "learning_rate": 4.182498617473863e-06, "loss": 0.3017, "step": 4994 }, { "epoch": 1.791298111403299, "grad_norm": 0.30993446707725525, "learning_rate": 4.180439888987185e-06, "loss": 0.3405, "step": 4995 }, { "epoch": 1.7916567057136028, "grad_norm": 0.3065170347690582, "learning_rate": 4.178381303270431e-06, "loss": 0.3028, "step": 4996 }, { "epoch": 1.7920153000239063, "grad_norm": 0.326749712228775, "learning_rate": 4.176322860682209e-06, "loss": 0.3439, "step": 4997 }, { "epoch": 1.7923738943342098, "grad_norm": 0.34690579771995544, "learning_rate": 4.1742645615811075e-06, "loss": 0.3275, "step": 4998 }, { "epoch": 1.7927324886445135, "grad_norm": 0.30593112111091614, "learning_rate": 4.172206406325689e-06, "loss": 0.3065, "step": 4999 }, { "epoch": 1.7930910829548172, "grad_norm": 0.3124520778656006, "learning_rate": 4.170148395274487e-06, "loss": 0.3101, "step": 5000 }, { "epoch": 1.7934496772651207, "grad_norm": 0.3044942617416382, "learning_rate": 4.16809052878602e-06, "loss": 0.3199, "step": 5001 }, { "epoch": 1.7938082715754242, "grad_norm": 0.30033332109451294, "learning_rate": 4.16603280721877e-06, "loss": 0.3056, "step": 5002 }, { "epoch": 1.794166865885728, "grad_norm": 0.31506288051605225, "learning_rate": 4.1639752309312e-06, "loss": 0.3249, "step": 5003 }, { "epoch": 1.7945254601960317, "grad_norm": 0.3317893445491791, "learning_rate": 4.161917800281744e-06, "loss": 0.336, "step": 5004 }, { "epoch": 1.7948840545063351, "grad_norm": 0.28899845480918884, "learning_rate": 4.159860515628817e-06, "loss": 0.2823, "step": 5005 }, { "epoch": 1.7952426488166386, "grad_norm": 0.3152802586555481, "learning_rate": 4.157803377330798e-06, "loss": 0.3296, "step": 5006 }, { "epoch": 1.7956012431269424, "grad_norm": 0.30935198068618774, "learning_rate": 4.155746385746055e-06, "loss": 0.3122, "step": 5007 }, { "epoch": 1.795959837437246, "grad_norm": 0.31864944100379944, "learning_rate": 4.153689541232916e-06, "loss": 0.3306, "step": 5008 }, { "epoch": 1.7963184317475496, "grad_norm": 0.32197314500808716, "learning_rate": 4.151632844149694e-06, "loss": 0.3237, "step": 5009 }, { "epoch": 1.796677026057853, "grad_norm": 0.32288962602615356, "learning_rate": 4.149576294854668e-06, "loss": 0.346, "step": 5010 }, { "epoch": 1.7970356203681568, "grad_norm": 0.3240530788898468, "learning_rate": 4.147519893706098e-06, "loss": 0.2957, "step": 5011 }, { "epoch": 1.7973942146784605, "grad_norm": 0.3225233256816864, "learning_rate": 4.145463641062214e-06, "loss": 0.3135, "step": 5012 }, { "epoch": 1.797752808988764, "grad_norm": 0.340421199798584, "learning_rate": 4.143407537281223e-06, "loss": 0.3358, "step": 5013 }, { "epoch": 1.7981114032990675, "grad_norm": 0.3132797181606293, "learning_rate": 4.141351582721305e-06, "loss": 0.3039, "step": 5014 }, { "epoch": 1.7984699976093714, "grad_norm": 0.34173670411109924, "learning_rate": 4.1392957777406115e-06, "loss": 0.294, "step": 5015 }, { "epoch": 1.798828591919675, "grad_norm": 0.30984655022621155, "learning_rate": 4.137240122697271e-06, "loss": 0.2992, "step": 5016 }, { "epoch": 1.7991871862299784, "grad_norm": 0.3441812992095947, "learning_rate": 4.135184617949387e-06, "loss": 0.327, "step": 5017 }, { "epoch": 1.7995457805402821, "grad_norm": 0.328016459941864, "learning_rate": 4.13312926385503e-06, "loss": 0.3146, "step": 5018 }, { "epoch": 1.7999043748505859, "grad_norm": 0.3221192955970764, "learning_rate": 4.131074060772255e-06, "loss": 0.2745, "step": 5019 }, { "epoch": 1.8002629691608893, "grad_norm": 0.35604238510131836, "learning_rate": 4.129019009059082e-06, "loss": 0.3275, "step": 5020 }, { "epoch": 1.8006215634711928, "grad_norm": 0.3178648054599762, "learning_rate": 4.126964109073507e-06, "loss": 0.2873, "step": 5021 }, { "epoch": 1.8009801577814966, "grad_norm": 0.32686978578567505, "learning_rate": 4.1249093611735e-06, "loss": 0.3211, "step": 5022 }, { "epoch": 1.8013387520918003, "grad_norm": 0.3056224584579468, "learning_rate": 4.1228547657170025e-06, "loss": 0.2994, "step": 5023 }, { "epoch": 1.8016973464021038, "grad_norm": 0.3319786489009857, "learning_rate": 4.120800323061937e-06, "loss": 0.3617, "step": 5024 }, { "epoch": 1.8020559407124073, "grad_norm": 0.3198208212852478, "learning_rate": 4.11874603356619e-06, "loss": 0.3401, "step": 5025 }, { "epoch": 1.802414535022711, "grad_norm": 0.3316494822502136, "learning_rate": 4.116691897587626e-06, "loss": 0.3162, "step": 5026 }, { "epoch": 1.8027731293330147, "grad_norm": 0.2909908890724182, "learning_rate": 4.11463791548408e-06, "loss": 0.2965, "step": 5027 }, { "epoch": 1.8031317236433182, "grad_norm": 0.3209242820739746, "learning_rate": 4.112584087613366e-06, "loss": 0.3098, "step": 5028 }, { "epoch": 1.8034903179536217, "grad_norm": 0.3428199589252472, "learning_rate": 4.110530414333261e-06, "loss": 0.311, "step": 5029 }, { "epoch": 1.8038489122639254, "grad_norm": 0.29917579889297485, "learning_rate": 4.108476896001529e-06, "loss": 0.3006, "step": 5030 }, { "epoch": 1.8042075065742291, "grad_norm": 0.3190292418003082, "learning_rate": 4.106423532975894e-06, "loss": 0.3274, "step": 5031 }, { "epoch": 1.8045661008845326, "grad_norm": 0.35178127884864807, "learning_rate": 4.104370325614061e-06, "loss": 0.323, "step": 5032 }, { "epoch": 1.8049246951948361, "grad_norm": 0.35404062271118164, "learning_rate": 4.102317274273701e-06, "loss": 0.3257, "step": 5033 }, { "epoch": 1.8052832895051398, "grad_norm": 0.3003898561000824, "learning_rate": 4.100264379312467e-06, "loss": 0.3063, "step": 5034 }, { "epoch": 1.8056418838154435, "grad_norm": 0.3706265091896057, "learning_rate": 4.098211641087975e-06, "loss": 0.3596, "step": 5035 }, { "epoch": 1.806000478125747, "grad_norm": 0.31645894050598145, "learning_rate": 4.096159059957822e-06, "loss": 0.3144, "step": 5036 }, { "epoch": 1.8063590724360505, "grad_norm": 0.313814640045166, "learning_rate": 4.094106636279574e-06, "loss": 0.3014, "step": 5037 }, { "epoch": 1.8067176667463543, "grad_norm": 0.33244577050209045, "learning_rate": 4.092054370410767e-06, "loss": 0.3219, "step": 5038 }, { "epoch": 1.807076261056658, "grad_norm": 0.3243325650691986, "learning_rate": 4.090002262708914e-06, "loss": 0.3333, "step": 5039 }, { "epoch": 1.8074348553669615, "grad_norm": 0.30758339166641235, "learning_rate": 4.087950313531497e-06, "loss": 0.3025, "step": 5040 }, { "epoch": 1.807793449677265, "grad_norm": 0.31800130009651184, "learning_rate": 4.085898523235972e-06, "loss": 0.3254, "step": 5041 }, { "epoch": 1.8081520439875687, "grad_norm": 0.3188425898551941, "learning_rate": 4.083846892179772e-06, "loss": 0.3136, "step": 5042 }, { "epoch": 1.8085106382978724, "grad_norm": 0.3445046544075012, "learning_rate": 4.081795420720292e-06, "loss": 0.3691, "step": 5043 }, { "epoch": 1.808869232608176, "grad_norm": 0.33052486181259155, "learning_rate": 4.079744109214906e-06, "loss": 0.3196, "step": 5044 }, { "epoch": 1.8092278269184796, "grad_norm": 0.30918100476264954, "learning_rate": 4.077692958020958e-06, "loss": 0.285, "step": 5045 }, { "epoch": 1.8095864212287833, "grad_norm": 0.3427303433418274, "learning_rate": 4.075641967495767e-06, "loss": 0.3189, "step": 5046 }, { "epoch": 1.8099450155390868, "grad_norm": 0.3331349492073059, "learning_rate": 4.073591137996621e-06, "loss": 0.3263, "step": 5047 }, { "epoch": 1.8103036098493903, "grad_norm": 0.33262255787849426, "learning_rate": 4.071540469880782e-06, "loss": 0.3349, "step": 5048 }, { "epoch": 1.810662204159694, "grad_norm": 0.3142712712287903, "learning_rate": 4.069489963505482e-06, "loss": 0.2881, "step": 5049 }, { "epoch": 1.8110207984699978, "grad_norm": 0.3102864921092987, "learning_rate": 4.067439619227925e-06, "loss": 0.3022, "step": 5050 }, { "epoch": 1.8113793927803012, "grad_norm": 0.34828758239746094, "learning_rate": 4.065389437405286e-06, "loss": 0.3275, "step": 5051 }, { "epoch": 1.8117379870906047, "grad_norm": 0.3457863926887512, "learning_rate": 4.063339418394714e-06, "loss": 0.3594, "step": 5052 }, { "epoch": 1.8120965814009085, "grad_norm": 0.3247181475162506, "learning_rate": 4.061289562553332e-06, "loss": 0.3087, "step": 5053 }, { "epoch": 1.8124551757112122, "grad_norm": 0.32452622056007385, "learning_rate": 4.059239870238229e-06, "loss": 0.3349, "step": 5054 }, { "epoch": 1.8128137700215157, "grad_norm": 0.3050273358821869, "learning_rate": 4.057190341806467e-06, "loss": 0.3151, "step": 5055 }, { "epoch": 1.8131723643318192, "grad_norm": 0.33043649792671204, "learning_rate": 4.05514097761508e-06, "loss": 0.299, "step": 5056 }, { "epoch": 1.8135309586421229, "grad_norm": 0.3159095048904419, "learning_rate": 4.053091778021076e-06, "loss": 0.3138, "step": 5057 }, { "epoch": 1.8138895529524266, "grad_norm": 0.3194497525691986, "learning_rate": 4.051042743381426e-06, "loss": 0.3097, "step": 5058 }, { "epoch": 1.81424814726273, "grad_norm": 0.31823647022247314, "learning_rate": 4.048993874053086e-06, "loss": 0.3274, "step": 5059 }, { "epoch": 1.8146067415730336, "grad_norm": 0.3388727605342865, "learning_rate": 4.046945170392972e-06, "loss": 0.3305, "step": 5060 }, { "epoch": 1.8149653358833373, "grad_norm": 0.28829121589660645, "learning_rate": 4.044896632757974e-06, "loss": 0.2729, "step": 5061 }, { "epoch": 1.815323930193641, "grad_norm": 0.3424137830734253, "learning_rate": 4.042848261504954e-06, "loss": 0.3353, "step": 5062 }, { "epoch": 1.8156825245039445, "grad_norm": 0.35673975944519043, "learning_rate": 4.040800056990744e-06, "loss": 0.364, "step": 5063 }, { "epoch": 1.816041118814248, "grad_norm": 0.32376861572265625, "learning_rate": 4.038752019572148e-06, "loss": 0.3034, "step": 5064 }, { "epoch": 1.8163997131245517, "grad_norm": 0.32543620467185974, "learning_rate": 4.0367041496059415e-06, "loss": 0.298, "step": 5065 }, { "epoch": 1.8167583074348554, "grad_norm": 0.3447396457195282, "learning_rate": 4.03465644744887e-06, "loss": 0.3368, "step": 5066 }, { "epoch": 1.817116901745159, "grad_norm": 0.3384729027748108, "learning_rate": 4.032608913457648e-06, "loss": 0.352, "step": 5067 }, { "epoch": 1.8174754960554624, "grad_norm": 0.29231470823287964, "learning_rate": 4.030561547988963e-06, "loss": 0.2764, "step": 5068 }, { "epoch": 1.8178340903657662, "grad_norm": 0.32552996277809143, "learning_rate": 4.0285143513994715e-06, "loss": 0.3291, "step": 5069 }, { "epoch": 1.8181926846760699, "grad_norm": 0.32900553941726685, "learning_rate": 4.026467324045802e-06, "loss": 0.331, "step": 5070 }, { "epoch": 1.8185512789863734, "grad_norm": 0.31050965189933777, "learning_rate": 4.024420466284556e-06, "loss": 0.2942, "step": 5071 }, { "epoch": 1.8189098732966769, "grad_norm": 0.33562302589416504, "learning_rate": 4.022373778472299e-06, "loss": 0.3425, "step": 5072 }, { "epoch": 1.8192684676069808, "grad_norm": 0.30441832542419434, "learning_rate": 4.020327260965572e-06, "loss": 0.3352, "step": 5073 }, { "epoch": 1.8196270619172843, "grad_norm": 0.3234856128692627, "learning_rate": 4.018280914120882e-06, "loss": 0.299, "step": 5074 }, { "epoch": 1.8199856562275878, "grad_norm": 0.33595454692840576, "learning_rate": 4.016234738294712e-06, "loss": 0.3056, "step": 5075 }, { "epoch": 1.8203442505378915, "grad_norm": 0.3596133291721344, "learning_rate": 4.014188733843511e-06, "loss": 0.3232, "step": 5076 }, { "epoch": 1.8207028448481952, "grad_norm": 0.32640668749809265, "learning_rate": 4.0121429011237004e-06, "loss": 0.294, "step": 5077 }, { "epoch": 1.8210614391584987, "grad_norm": 0.3220815360546112, "learning_rate": 4.0100972404916705e-06, "loss": 0.3555, "step": 5078 }, { "epoch": 1.8214200334688022, "grad_norm": 0.2835087776184082, "learning_rate": 4.00805175230378e-06, "loss": 0.2906, "step": 5079 }, { "epoch": 1.821778627779106, "grad_norm": 0.33980825543403625, "learning_rate": 4.0060064369163595e-06, "loss": 0.3336, "step": 5080 }, { "epoch": 1.8221372220894096, "grad_norm": 0.3429560661315918, "learning_rate": 4.00396129468571e-06, "loss": 0.3232, "step": 5081 }, { "epoch": 1.8224958163997131, "grad_norm": 0.30627790093421936, "learning_rate": 4.001916325968104e-06, "loss": 0.3065, "step": 5082 }, { "epoch": 1.8228544107100166, "grad_norm": 0.30013060569763184, "learning_rate": 3.999871531119779e-06, "loss": 0.3059, "step": 5083 }, { "epoch": 1.8232130050203204, "grad_norm": 0.36423459649086, "learning_rate": 3.9978269104969446e-06, "loss": 0.3376, "step": 5084 }, { "epoch": 1.823571599330624, "grad_norm": 0.31073489785194397, "learning_rate": 3.995782464455779e-06, "loss": 0.3205, "step": 5085 }, { "epoch": 1.8239301936409276, "grad_norm": 0.3566993474960327, "learning_rate": 3.993738193352432e-06, "loss": 0.333, "step": 5086 }, { "epoch": 1.824288787951231, "grad_norm": 0.3249010145664215, "learning_rate": 3.991694097543024e-06, "loss": 0.3033, "step": 5087 }, { "epoch": 1.8246473822615348, "grad_norm": 0.32304221391677856, "learning_rate": 3.989650177383641e-06, "loss": 0.33, "step": 5088 }, { "epoch": 1.8250059765718385, "grad_norm": 0.2974727749824524, "learning_rate": 3.98760643323034e-06, "loss": 0.3, "step": 5089 }, { "epoch": 1.825364570882142, "grad_norm": 0.3239371180534363, "learning_rate": 3.985562865439149e-06, "loss": 0.3224, "step": 5090 }, { "epoch": 1.8257231651924455, "grad_norm": 0.3357645273208618, "learning_rate": 3.983519474366062e-06, "loss": 0.3255, "step": 5091 }, { "epoch": 1.8260817595027492, "grad_norm": 0.3331914246082306, "learning_rate": 3.9814762603670446e-06, "loss": 0.3125, "step": 5092 }, { "epoch": 1.826440353813053, "grad_norm": 0.3023386299610138, "learning_rate": 3.9794332237980316e-06, "loss": 0.2783, "step": 5093 }, { "epoch": 1.8267989481233564, "grad_norm": 0.33088773488998413, "learning_rate": 3.977390365014927e-06, "loss": 0.355, "step": 5094 }, { "epoch": 1.82715754243366, "grad_norm": 0.3239876925945282, "learning_rate": 3.975347684373603e-06, "loss": 0.3136, "step": 5095 }, { "epoch": 1.8275161367439636, "grad_norm": 0.3083685636520386, "learning_rate": 3.973305182229899e-06, "loss": 0.285, "step": 5096 }, { "epoch": 1.8278747310542673, "grad_norm": 0.2973634600639343, "learning_rate": 3.9712628589396265e-06, "loss": 0.2884, "step": 5097 }, { "epoch": 1.8282333253645708, "grad_norm": 0.33095458149909973, "learning_rate": 3.969220714858565e-06, "loss": 0.3514, "step": 5098 }, { "epoch": 1.8285919196748743, "grad_norm": 0.3566107451915741, "learning_rate": 3.967178750342461e-06, "loss": 0.3173, "step": 5099 }, { "epoch": 1.828950513985178, "grad_norm": 0.32422763109207153, "learning_rate": 3.965136965747034e-06, "loss": 0.3228, "step": 5100 }, { "epoch": 1.8293091082954818, "grad_norm": 0.310639888048172, "learning_rate": 3.963095361427966e-06, "loss": 0.3376, "step": 5101 }, { "epoch": 1.8296677026057853, "grad_norm": 0.3118346929550171, "learning_rate": 3.961053937740912e-06, "loss": 0.3035, "step": 5102 }, { "epoch": 1.830026296916089, "grad_norm": 0.32478222250938416, "learning_rate": 3.959012695041493e-06, "loss": 0.3124, "step": 5103 }, { "epoch": 1.8303848912263927, "grad_norm": 0.32573267817497253, "learning_rate": 3.956971633685303e-06, "loss": 0.307, "step": 5104 }, { "epoch": 1.8307434855366962, "grad_norm": 0.3273395597934723, "learning_rate": 3.954930754027898e-06, "loss": 0.3053, "step": 5105 }, { "epoch": 1.8311020798469997, "grad_norm": 0.3076156973838806, "learning_rate": 3.952890056424809e-06, "loss": 0.3033, "step": 5106 }, { "epoch": 1.8314606741573034, "grad_norm": 0.34468144178390503, "learning_rate": 3.950849541231529e-06, "loss": 0.3423, "step": 5107 }, { "epoch": 1.8318192684676071, "grad_norm": 0.3122490644454956, "learning_rate": 3.948809208803524e-06, "loss": 0.2903, "step": 5108 }, { "epoch": 1.8321778627779106, "grad_norm": 0.31796854734420776, "learning_rate": 3.946769059496223e-06, "loss": 0.3093, "step": 5109 }, { "epoch": 1.832536457088214, "grad_norm": 0.3347013592720032, "learning_rate": 3.944729093665029e-06, "loss": 0.3195, "step": 5110 }, { "epoch": 1.8328950513985178, "grad_norm": 0.3224472999572754, "learning_rate": 3.942689311665312e-06, "loss": 0.3413, "step": 5111 }, { "epoch": 1.8332536457088215, "grad_norm": 0.29596051573753357, "learning_rate": 3.940649713852405e-06, "loss": 0.2918, "step": 5112 }, { "epoch": 1.833612240019125, "grad_norm": 0.33042195439338684, "learning_rate": 3.938610300581615e-06, "loss": 0.3472, "step": 5113 }, { "epoch": 1.8339708343294285, "grad_norm": 0.3209547698497772, "learning_rate": 3.9365710722082115e-06, "loss": 0.3, "step": 5114 }, { "epoch": 1.8343294286397323, "grad_norm": 0.3608379662036896, "learning_rate": 3.934532029087435e-06, "loss": 0.3463, "step": 5115 }, { "epoch": 1.834688022950036, "grad_norm": 0.2972043752670288, "learning_rate": 3.932493171574495e-06, "loss": 0.3144, "step": 5116 }, { "epoch": 1.8350466172603395, "grad_norm": 0.32953038811683655, "learning_rate": 3.930454500024565e-06, "loss": 0.3035, "step": 5117 }, { "epoch": 1.835405211570643, "grad_norm": 0.3296234905719757, "learning_rate": 3.92841601479279e-06, "loss": 0.3263, "step": 5118 }, { "epoch": 1.8357638058809467, "grad_norm": 0.3269011080265045, "learning_rate": 3.9263777162342794e-06, "loss": 0.3088, "step": 5119 }, { "epoch": 1.8361224001912504, "grad_norm": 0.3028678596019745, "learning_rate": 3.924339604704108e-06, "loss": 0.3108, "step": 5120 }, { "epoch": 1.8364809945015539, "grad_norm": 0.34237194061279297, "learning_rate": 3.922301680557326e-06, "loss": 0.3578, "step": 5121 }, { "epoch": 1.8368395888118574, "grad_norm": 0.31420573592185974, "learning_rate": 3.920263944148944e-06, "loss": 0.3075, "step": 5122 }, { "epoch": 1.837198183122161, "grad_norm": 0.33176594972610474, "learning_rate": 3.918226395833943e-06, "loss": 0.3239, "step": 5123 }, { "epoch": 1.8375567774324648, "grad_norm": 0.33330202102661133, "learning_rate": 3.9161890359672684e-06, "loss": 0.3125, "step": 5124 }, { "epoch": 1.8379153717427683, "grad_norm": 0.30788445472717285, "learning_rate": 3.914151864903837e-06, "loss": 0.3042, "step": 5125 }, { "epoch": 1.8382739660530718, "grad_norm": 0.3301326632499695, "learning_rate": 3.912114882998526e-06, "loss": 0.2998, "step": 5126 }, { "epoch": 1.8386325603633755, "grad_norm": 0.32553866505622864, "learning_rate": 3.91007809060619e-06, "loss": 0.3125, "step": 5127 }, { "epoch": 1.8389911546736792, "grad_norm": 0.34139150381088257, "learning_rate": 3.90804148808164e-06, "loss": 0.3231, "step": 5128 }, { "epoch": 1.8393497489839827, "grad_norm": 0.3116137683391571, "learning_rate": 3.906005075779661e-06, "loss": 0.3102, "step": 5129 }, { "epoch": 1.8397083432942865, "grad_norm": 0.345468133687973, "learning_rate": 3.903968854055001e-06, "loss": 0.3479, "step": 5130 }, { "epoch": 1.8400669376045902, "grad_norm": 0.36753955483436584, "learning_rate": 3.901932823262377e-06, "loss": 0.3032, "step": 5131 }, { "epoch": 1.8404255319148937, "grad_norm": 0.32913830876350403, "learning_rate": 3.899896983756469e-06, "loss": 0.3031, "step": 5132 }, { "epoch": 1.8407841262251972, "grad_norm": 0.33809027075767517, "learning_rate": 3.897861335891932e-06, "loss": 0.3086, "step": 5133 }, { "epoch": 1.8411427205355009, "grad_norm": 0.3100724518299103, "learning_rate": 3.895825880023377e-06, "loss": 0.2988, "step": 5134 }, { "epoch": 1.8415013148458046, "grad_norm": 0.3326844871044159, "learning_rate": 3.893790616505391e-06, "loss": 0.3328, "step": 5135 }, { "epoch": 1.841859909156108, "grad_norm": 0.3189273476600647, "learning_rate": 3.891755545692521e-06, "loss": 0.3001, "step": 5136 }, { "epoch": 1.8422185034664116, "grad_norm": 0.3263334631919861, "learning_rate": 3.889720667939281e-06, "loss": 0.332, "step": 5137 }, { "epoch": 1.8425770977767153, "grad_norm": 0.29313427209854126, "learning_rate": 3.8876859836001564e-06, "loss": 0.3058, "step": 5138 }, { "epoch": 1.842935692087019, "grad_norm": 0.32570210099220276, "learning_rate": 3.885651493029594e-06, "loss": 0.3558, "step": 5139 }, { "epoch": 1.8432942863973225, "grad_norm": 0.3270883560180664, "learning_rate": 3.883617196582009e-06, "loss": 0.3185, "step": 5140 }, { "epoch": 1.843652880707626, "grad_norm": 0.3240123391151428, "learning_rate": 3.881583094611781e-06, "loss": 0.3131, "step": 5141 }, { "epoch": 1.8440114750179297, "grad_norm": 0.30955204367637634, "learning_rate": 3.879549187473258e-06, "loss": 0.3048, "step": 5142 }, { "epoch": 1.8443700693282334, "grad_norm": 0.31080061197280884, "learning_rate": 3.877515475520751e-06, "loss": 0.2977, "step": 5143 }, { "epoch": 1.844728663638537, "grad_norm": 0.3466764986515045, "learning_rate": 3.875481959108541e-06, "loss": 0.3282, "step": 5144 }, { "epoch": 1.8450872579488404, "grad_norm": 0.35823455452919006, "learning_rate": 3.873448638590872e-06, "loss": 0.3212, "step": 5145 }, { "epoch": 1.8454458522591441, "grad_norm": 0.2937273383140564, "learning_rate": 3.8714155143219564e-06, "loss": 0.2862, "step": 5146 }, { "epoch": 1.8458044465694479, "grad_norm": 0.314235121011734, "learning_rate": 3.86938258665597e-06, "loss": 0.3304, "step": 5147 }, { "epoch": 1.8461630408797514, "grad_norm": 0.3107534646987915, "learning_rate": 3.8673498559470535e-06, "loss": 0.2978, "step": 5148 }, { "epoch": 1.8465216351900549, "grad_norm": 0.32123738527297974, "learning_rate": 3.8653173225493144e-06, "loss": 0.3084, "step": 5149 }, { "epoch": 1.8468802295003586, "grad_norm": 0.3314676582813263, "learning_rate": 3.863284986816829e-06, "loss": 0.3346, "step": 5150 }, { "epoch": 1.8472388238106623, "grad_norm": 0.3014434576034546, "learning_rate": 3.8612528491036355e-06, "loss": 0.3215, "step": 5151 }, { "epoch": 1.8475974181209658, "grad_norm": 0.31710225343704224, "learning_rate": 3.859220909763739e-06, "loss": 0.3216, "step": 5152 }, { "epoch": 1.8479560124312693, "grad_norm": 0.329281210899353, "learning_rate": 3.85718916915111e-06, "loss": 0.3446, "step": 5153 }, { "epoch": 1.848314606741573, "grad_norm": 0.3068433105945587, "learning_rate": 3.855157627619682e-06, "loss": 0.3327, "step": 5154 }, { "epoch": 1.8486732010518767, "grad_norm": 0.30038967728614807, "learning_rate": 3.853126285523356e-06, "loss": 0.3349, "step": 5155 }, { "epoch": 1.8490317953621802, "grad_norm": 0.31706032156944275, "learning_rate": 3.851095143216001e-06, "loss": 0.3154, "step": 5156 }, { "epoch": 1.8493903896724837, "grad_norm": 0.30227774381637573, "learning_rate": 3.849064201051445e-06, "loss": 0.3051, "step": 5157 }, { "epoch": 1.8497489839827874, "grad_norm": 0.3034617006778717, "learning_rate": 3.847033459383488e-06, "loss": 0.3068, "step": 5158 }, { "epoch": 1.8501075782930911, "grad_norm": 0.31727108359336853, "learning_rate": 3.8450029185658886e-06, "loss": 0.3584, "step": 5159 }, { "epoch": 1.8504661726033946, "grad_norm": 0.3091219961643219, "learning_rate": 3.842972578952372e-06, "loss": 0.2933, "step": 5160 }, { "epoch": 1.8508247669136983, "grad_norm": 0.32084423303604126, "learning_rate": 3.840942440896634e-06, "loss": 0.3182, "step": 5161 }, { "epoch": 1.851183361224002, "grad_norm": 0.31090593338012695, "learning_rate": 3.83891250475233e-06, "loss": 0.2831, "step": 5162 }, { "epoch": 1.8515419555343056, "grad_norm": 0.31353962421417236, "learning_rate": 3.836882770873078e-06, "loss": 0.3215, "step": 5163 }, { "epoch": 1.851900549844609, "grad_norm": 0.33013173937797546, "learning_rate": 3.8348532396124665e-06, "loss": 0.343, "step": 5164 }, { "epoch": 1.8522591441549128, "grad_norm": 0.34168505668640137, "learning_rate": 3.832823911324046e-06, "loss": 0.305, "step": 5165 }, { "epoch": 1.8526177384652165, "grad_norm": 0.30013102293014526, "learning_rate": 3.83079478636133e-06, "loss": 0.3162, "step": 5166 }, { "epoch": 1.85297633277552, "grad_norm": 0.3056851029396057, "learning_rate": 3.8287658650778015e-06, "loss": 0.3232, "step": 5167 }, { "epoch": 1.8533349270858235, "grad_norm": 0.31316763162612915, "learning_rate": 3.826737147826902e-06, "loss": 0.3362, "step": 5168 }, { "epoch": 1.8536935213961272, "grad_norm": 0.3062085509300232, "learning_rate": 3.824708634962043e-06, "loss": 0.3212, "step": 5169 }, { "epoch": 1.854052115706431, "grad_norm": 0.29069846868515015, "learning_rate": 3.822680326836596e-06, "loss": 0.3049, "step": 5170 }, { "epoch": 1.8544107100167344, "grad_norm": 0.3099258244037628, "learning_rate": 3.820652223803899e-06, "loss": 0.3178, "step": 5171 }, { "epoch": 1.854769304327038, "grad_norm": 0.33386069536209106, "learning_rate": 3.818624326217252e-06, "loss": 0.3198, "step": 5172 }, { "epoch": 1.8551278986373416, "grad_norm": 0.31058910489082336, "learning_rate": 3.816596634429925e-06, "loss": 0.3265, "step": 5173 }, { "epoch": 1.8554864929476453, "grad_norm": 0.29829517006874084, "learning_rate": 3.8145691487951443e-06, "loss": 0.2812, "step": 5174 }, { "epoch": 1.8558450872579488, "grad_norm": 0.35488003492355347, "learning_rate": 3.812541869666107e-06, "loss": 0.3452, "step": 5175 }, { "epoch": 1.8562036815682523, "grad_norm": 0.3009920120239258, "learning_rate": 3.810514797395971e-06, "loss": 0.3219, "step": 5176 }, { "epoch": 1.856562275878556, "grad_norm": 0.31915125250816345, "learning_rate": 3.8084879323378555e-06, "loss": 0.3192, "step": 5177 }, { "epoch": 1.8569208701888598, "grad_norm": 0.3266572058200836, "learning_rate": 3.8064612748448516e-06, "loss": 0.3291, "step": 5178 }, { "epoch": 1.8572794644991633, "grad_norm": 0.33014485239982605, "learning_rate": 3.804434825270007e-06, "loss": 0.3226, "step": 5179 }, { "epoch": 1.8576380588094668, "grad_norm": 0.31686756014823914, "learning_rate": 3.802408583966334e-06, "loss": 0.3117, "step": 5180 }, { "epoch": 1.8579966531197705, "grad_norm": 0.32323986291885376, "learning_rate": 3.8003825512868125e-06, "loss": 0.3163, "step": 5181 }, { "epoch": 1.8583552474300742, "grad_norm": 0.2945961654186249, "learning_rate": 3.7983567275843835e-06, "loss": 0.3062, "step": 5182 }, { "epoch": 1.8587138417403777, "grad_norm": 0.3221542239189148, "learning_rate": 3.7963311132119485e-06, "loss": 0.3165, "step": 5183 }, { "epoch": 1.8590724360506812, "grad_norm": 0.3242722749710083, "learning_rate": 3.7943057085223807e-06, "loss": 0.3183, "step": 5184 }, { "epoch": 1.859431030360985, "grad_norm": 0.313179612159729, "learning_rate": 3.792280513868509e-06, "loss": 0.3209, "step": 5185 }, { "epoch": 1.8597896246712886, "grad_norm": 0.329214870929718, "learning_rate": 3.790255529603129e-06, "loss": 0.2971, "step": 5186 }, { "epoch": 1.860148218981592, "grad_norm": 0.3022780418395996, "learning_rate": 3.788230756079e-06, "loss": 0.3097, "step": 5187 }, { "epoch": 1.8605068132918958, "grad_norm": 0.3203171491622925, "learning_rate": 3.7862061936488435e-06, "loss": 0.3158, "step": 5188 }, { "epoch": 1.8608654076021995, "grad_norm": 0.3193347752094269, "learning_rate": 3.784181842665343e-06, "loss": 0.3377, "step": 5189 }, { "epoch": 1.861224001912503, "grad_norm": 0.28298962116241455, "learning_rate": 3.782157703481149e-06, "loss": 0.297, "step": 5190 }, { "epoch": 1.8615825962228065, "grad_norm": 0.3351912498474121, "learning_rate": 3.7801337764488726e-06, "loss": 0.3313, "step": 5191 }, { "epoch": 1.8619411905331102, "grad_norm": 0.3645448684692383, "learning_rate": 3.7781100619210863e-06, "loss": 0.3361, "step": 5192 }, { "epoch": 1.862299784843414, "grad_norm": 0.30478161573410034, "learning_rate": 3.77608656025033e-06, "loss": 0.3343, "step": 5193 }, { "epoch": 1.8626583791537175, "grad_norm": 0.3191460072994232, "learning_rate": 3.774063271789101e-06, "loss": 0.3544, "step": 5194 }, { "epoch": 1.863016973464021, "grad_norm": 0.3074877858161926, "learning_rate": 3.7720401968898655e-06, "loss": 0.3075, "step": 5195 }, { "epoch": 1.8633755677743247, "grad_norm": 0.3121276795864105, "learning_rate": 3.7700173359050486e-06, "loss": 0.3064, "step": 5196 }, { "epoch": 1.8637341620846284, "grad_norm": 0.32923373579978943, "learning_rate": 3.7679946891870366e-06, "loss": 0.2928, "step": 5197 }, { "epoch": 1.8640927563949319, "grad_norm": 0.3327983617782593, "learning_rate": 3.7659722570881847e-06, "loss": 0.3225, "step": 5198 }, { "epoch": 1.8644513507052354, "grad_norm": 0.32094722986221313, "learning_rate": 3.7639500399608047e-06, "loss": 0.3046, "step": 5199 }, { "epoch": 1.864809945015539, "grad_norm": 0.2964801788330078, "learning_rate": 3.761928038157171e-06, "loss": 0.3204, "step": 5200 }, { "epoch": 1.8651685393258428, "grad_norm": 0.3260438144207001, "learning_rate": 3.759906252029527e-06, "loss": 0.3084, "step": 5201 }, { "epoch": 1.8655271336361463, "grad_norm": 0.33241555094718933, "learning_rate": 3.7578846819300725e-06, "loss": 0.3025, "step": 5202 }, { "epoch": 1.8658857279464498, "grad_norm": 0.3272447884082794, "learning_rate": 3.755863328210969e-06, "loss": 0.3344, "step": 5203 }, { "epoch": 1.8662443222567535, "grad_norm": 0.31053709983825684, "learning_rate": 3.7538421912243462e-06, "loss": 0.3013, "step": 5204 }, { "epoch": 1.8666029165670572, "grad_norm": 0.3237230181694031, "learning_rate": 3.7518212713222905e-06, "loss": 0.3129, "step": 5205 }, { "epoch": 1.8669615108773607, "grad_norm": 0.30273792147636414, "learning_rate": 3.7498005688568505e-06, "loss": 0.2885, "step": 5206 }, { "epoch": 1.8673201051876642, "grad_norm": 0.33183902502059937, "learning_rate": 3.7477800841800433e-06, "loss": 0.3638, "step": 5207 }, { "epoch": 1.867678699497968, "grad_norm": 0.3059844970703125, "learning_rate": 3.7457598176438408e-06, "loss": 0.2973, "step": 5208 }, { "epoch": 1.8680372938082717, "grad_norm": 0.323047935962677, "learning_rate": 3.74373976960018e-06, "loss": 0.32, "step": 5209 }, { "epoch": 1.8683958881185752, "grad_norm": 0.36951062083244324, "learning_rate": 3.7417199404009596e-06, "loss": 0.3437, "step": 5210 }, { "epoch": 1.8687544824288786, "grad_norm": 0.32536524534225464, "learning_rate": 3.7397003303980416e-06, "loss": 0.3059, "step": 5211 }, { "epoch": 1.8691130767391824, "grad_norm": 0.31055009365081787, "learning_rate": 3.7376809399432447e-06, "loss": 0.3293, "step": 5212 }, { "epoch": 1.869471671049486, "grad_norm": 0.30015239119529724, "learning_rate": 3.7356617693883568e-06, "loss": 0.3248, "step": 5213 }, { "epoch": 1.8698302653597896, "grad_norm": 0.3061984181404114, "learning_rate": 3.7336428190851227e-06, "loss": 0.3126, "step": 5214 }, { "epoch": 1.870188859670093, "grad_norm": 0.3418022096157074, "learning_rate": 3.7316240893852484e-06, "loss": 0.3525, "step": 5215 }, { "epoch": 1.8705474539803968, "grad_norm": 0.31708255410194397, "learning_rate": 3.7296055806404045e-06, "loss": 0.31, "step": 5216 }, { "epoch": 1.8709060482907005, "grad_norm": 0.3301028609275818, "learning_rate": 3.7275872932022194e-06, "loss": 0.3059, "step": 5217 }, { "epoch": 1.871264642601004, "grad_norm": 0.34256938099861145, "learning_rate": 3.7255692274222877e-06, "loss": 0.3269, "step": 5218 }, { "epoch": 1.8716232369113077, "grad_norm": 0.34860825538635254, "learning_rate": 3.7235513836521626e-06, "loss": 0.3458, "step": 5219 }, { "epoch": 1.8719818312216114, "grad_norm": 0.3361254632472992, "learning_rate": 3.7215337622433565e-06, "loss": 0.2812, "step": 5220 }, { "epoch": 1.872340425531915, "grad_norm": 0.3535362184047699, "learning_rate": 3.719516363547347e-06, "loss": 0.3561, "step": 5221 }, { "epoch": 1.8726990198422184, "grad_norm": 0.30009356141090393, "learning_rate": 3.7174991879155707e-06, "loss": 0.3155, "step": 5222 }, { "epoch": 1.8730576141525221, "grad_norm": 0.32216858863830566, "learning_rate": 3.7154822356994246e-06, "loss": 0.3021, "step": 5223 }, { "epoch": 1.8734162084628259, "grad_norm": 0.3316386044025421, "learning_rate": 3.713465507250271e-06, "loss": 0.3445, "step": 5224 }, { "epoch": 1.8737748027731294, "grad_norm": 0.3202020227909088, "learning_rate": 3.7114490029194287e-06, "loss": 0.3202, "step": 5225 }, { "epoch": 1.8741333970834329, "grad_norm": 0.3176635205745697, "learning_rate": 3.7094327230581795e-06, "loss": 0.3301, "step": 5226 }, { "epoch": 1.8744919913937366, "grad_norm": 0.30681681632995605, "learning_rate": 3.7074166680177637e-06, "loss": 0.3133, "step": 5227 }, { "epoch": 1.8748505857040403, "grad_norm": 0.3334972560405731, "learning_rate": 3.7054008381493865e-06, "loss": 0.3219, "step": 5228 }, { "epoch": 1.8752091800143438, "grad_norm": 0.32869720458984375, "learning_rate": 3.7033852338042096e-06, "loss": 0.3278, "step": 5229 }, { "epoch": 1.8755677743246473, "grad_norm": 0.35147345066070557, "learning_rate": 3.701369855333361e-06, "loss": 0.3421, "step": 5230 }, { "epoch": 1.875926368634951, "grad_norm": 0.327566921710968, "learning_rate": 3.699354703087923e-06, "loss": 0.3196, "step": 5231 }, { "epoch": 1.8762849629452547, "grad_norm": 0.3349957764148712, "learning_rate": 3.6973397774189424e-06, "loss": 0.2915, "step": 5232 }, { "epoch": 1.8766435572555582, "grad_norm": 0.3079647421836853, "learning_rate": 3.6953250786774253e-06, "loss": 0.3148, "step": 5233 }, { "epoch": 1.8770021515658617, "grad_norm": 0.41264888644218445, "learning_rate": 3.693310607214337e-06, "loss": 0.3459, "step": 5234 }, { "epoch": 1.8773607458761654, "grad_norm": 0.3165878653526306, "learning_rate": 3.6912963633806085e-06, "loss": 0.3145, "step": 5235 }, { "epoch": 1.8777193401864691, "grad_norm": 0.30634787678718567, "learning_rate": 3.689282347527125e-06, "loss": 0.2859, "step": 5236 }, { "epoch": 1.8780779344967726, "grad_norm": 0.3273569345474243, "learning_rate": 3.687268560004734e-06, "loss": 0.322, "step": 5237 }, { "epoch": 1.8784365288070761, "grad_norm": 0.3507261276245117, "learning_rate": 3.685255001164244e-06, "loss": 0.3354, "step": 5238 }, { "epoch": 1.8787951231173798, "grad_norm": 0.30651113390922546, "learning_rate": 3.6832416713564232e-06, "loss": 0.3092, "step": 5239 }, { "epoch": 1.8791537174276836, "grad_norm": 0.294804185628891, "learning_rate": 3.6812285709319985e-06, "loss": 0.267, "step": 5240 }, { "epoch": 1.879512311737987, "grad_norm": 0.30985093116760254, "learning_rate": 3.679215700241662e-06, "loss": 0.3377, "step": 5241 }, { "epoch": 1.8798709060482905, "grad_norm": 0.3117198050022125, "learning_rate": 3.6772030596360585e-06, "loss": 0.311, "step": 5242 }, { "epoch": 1.8802295003585943, "grad_norm": 0.32505369186401367, "learning_rate": 3.6751906494657985e-06, "loss": 0.3219, "step": 5243 }, { "epoch": 1.880588094668898, "grad_norm": 0.3077116012573242, "learning_rate": 3.6731784700814476e-06, "loss": 0.3253, "step": 5244 }, { "epoch": 1.8809466889792015, "grad_norm": 0.3279406726360321, "learning_rate": 3.671166521833537e-06, "loss": 0.3401, "step": 5245 }, { "epoch": 1.8813052832895052, "grad_norm": 0.3273342549800873, "learning_rate": 3.669154805072549e-06, "loss": 0.3205, "step": 5246 }, { "epoch": 1.881663877599809, "grad_norm": 0.3022947311401367, "learning_rate": 3.6671433201489376e-06, "loss": 0.3118, "step": 5247 }, { "epoch": 1.8820224719101124, "grad_norm": 0.308189332485199, "learning_rate": 3.6651320674131063e-06, "loss": 0.3302, "step": 5248 }, { "epoch": 1.882381066220416, "grad_norm": 0.3343345820903778, "learning_rate": 3.663121047215422e-06, "loss": 0.3271, "step": 5249 }, { "epoch": 1.8827396605307196, "grad_norm": 0.3202182352542877, "learning_rate": 3.6611102599062092e-06, "loss": 0.3166, "step": 5250 }, { "epoch": 1.8830982548410233, "grad_norm": 0.2985600531101227, "learning_rate": 3.659099705835756e-06, "loss": 0.2962, "step": 5251 }, { "epoch": 1.8834568491513268, "grad_norm": 0.3079124987125397, "learning_rate": 3.6570893853543033e-06, "loss": 0.3301, "step": 5252 }, { "epoch": 1.8838154434616303, "grad_norm": 0.32412493228912354, "learning_rate": 3.6550792988120603e-06, "loss": 0.3119, "step": 5253 }, { "epoch": 1.884174037771934, "grad_norm": 0.31411871314048767, "learning_rate": 3.6530694465591876e-06, "loss": 0.3235, "step": 5254 }, { "epoch": 1.8845326320822378, "grad_norm": 0.3021986484527588, "learning_rate": 3.6510598289458076e-06, "loss": 0.2941, "step": 5255 }, { "epoch": 1.8848912263925413, "grad_norm": 0.3095738887786865, "learning_rate": 3.6490504463220016e-06, "loss": 0.3272, "step": 5256 }, { "epoch": 1.8852498207028447, "grad_norm": 0.29488199949264526, "learning_rate": 3.6470412990378094e-06, "loss": 0.279, "step": 5257 }, { "epoch": 1.8856084150131485, "grad_norm": 0.3105526268482208, "learning_rate": 3.6450323874432347e-06, "loss": 0.3251, "step": 5258 }, { "epoch": 1.8859670093234522, "grad_norm": 0.3118860423564911, "learning_rate": 3.6430237118882333e-06, "loss": 0.3226, "step": 5259 }, { "epoch": 1.8863256036337557, "grad_norm": 0.3142813742160797, "learning_rate": 3.641015272722723e-06, "loss": 0.2986, "step": 5260 }, { "epoch": 1.8866841979440592, "grad_norm": 0.3466244339942932, "learning_rate": 3.6390070702965795e-06, "loss": 0.3379, "step": 5261 }, { "epoch": 1.8870427922543629, "grad_norm": 0.3189569413661957, "learning_rate": 3.63699910495964e-06, "loss": 0.2975, "step": 5262 }, { "epoch": 1.8874013865646666, "grad_norm": 0.30635398626327515, "learning_rate": 3.634991377061694e-06, "loss": 0.2948, "step": 5263 }, { "epoch": 1.88775998087497, "grad_norm": 0.32397326827049255, "learning_rate": 3.6329838869524995e-06, "loss": 0.3269, "step": 5264 }, { "epoch": 1.8881185751852736, "grad_norm": 0.30680933594703674, "learning_rate": 3.6309766349817648e-06, "loss": 0.3261, "step": 5265 }, { "epoch": 1.8884771694955773, "grad_norm": 0.3062726855278015, "learning_rate": 3.62896962149916e-06, "loss": 0.3172, "step": 5266 }, { "epoch": 1.888835763805881, "grad_norm": 0.3141491115093231, "learning_rate": 3.626962846854311e-06, "loss": 0.3585, "step": 5267 }, { "epoch": 1.8891943581161845, "grad_norm": 0.30859896540641785, "learning_rate": 3.6249563113968074e-06, "loss": 0.2806, "step": 5268 }, { "epoch": 1.889552952426488, "grad_norm": 0.3122211992740631, "learning_rate": 3.62295001547619e-06, "loss": 0.3086, "step": 5269 }, { "epoch": 1.8899115467367917, "grad_norm": 0.3352351188659668, "learning_rate": 3.620943959441966e-06, "loss": 0.3106, "step": 5270 }, { "epoch": 1.8902701410470955, "grad_norm": 0.32033607363700867, "learning_rate": 3.618938143643595e-06, "loss": 0.343, "step": 5271 }, { "epoch": 1.890628735357399, "grad_norm": 0.2962038815021515, "learning_rate": 3.6169325684304956e-06, "loss": 0.3175, "step": 5272 }, { "epoch": 1.8909873296677024, "grad_norm": 0.3312377333641052, "learning_rate": 3.614927234152046e-06, "loss": 0.3334, "step": 5273 }, { "epoch": 1.8913459239780062, "grad_norm": 0.34316951036453247, "learning_rate": 3.61292214115758e-06, "loss": 0.3556, "step": 5274 }, { "epoch": 1.8917045182883099, "grad_norm": 0.31116756796836853, "learning_rate": 3.610917289796393e-06, "loss": 0.3105, "step": 5275 }, { "epoch": 1.8920631125986134, "grad_norm": 0.31847065687179565, "learning_rate": 3.6089126804177373e-06, "loss": 0.3332, "step": 5276 }, { "epoch": 1.892421706908917, "grad_norm": 0.2838166058063507, "learning_rate": 3.6069083133708206e-06, "loss": 0.303, "step": 5277 }, { "epoch": 1.8927803012192208, "grad_norm": 0.3009176254272461, "learning_rate": 3.6049041890048084e-06, "loss": 0.3051, "step": 5278 }, { "epoch": 1.8931388955295243, "grad_norm": 0.29779285192489624, "learning_rate": 3.602900307668827e-06, "loss": 0.3128, "step": 5279 }, { "epoch": 1.8934974898398278, "grad_norm": 0.29565665125846863, "learning_rate": 3.600896669711958e-06, "loss": 0.313, "step": 5280 }, { "epoch": 1.8938560841501315, "grad_norm": 0.3179234266281128, "learning_rate": 3.5988932754832435e-06, "loss": 0.3446, "step": 5281 }, { "epoch": 1.8942146784604352, "grad_norm": 0.28148525953292847, "learning_rate": 3.5968901253316798e-06, "loss": 0.2851, "step": 5282 }, { "epoch": 1.8945732727707387, "grad_norm": 0.342604398727417, "learning_rate": 3.594887219606221e-06, "loss": 0.3766, "step": 5283 }, { "epoch": 1.8949318670810422, "grad_norm": 0.30581074953079224, "learning_rate": 3.59288455865578e-06, "loss": 0.3037, "step": 5284 }, { "epoch": 1.895290461391346, "grad_norm": 0.3093906342983246, "learning_rate": 3.5908821428292262e-06, "loss": 0.3031, "step": 5285 }, { "epoch": 1.8956490557016497, "grad_norm": 0.30023685097694397, "learning_rate": 3.5888799724753865e-06, "loss": 0.3313, "step": 5286 }, { "epoch": 1.8960076500119531, "grad_norm": 0.3164438009262085, "learning_rate": 3.586878047943047e-06, "loss": 0.2963, "step": 5287 }, { "epoch": 1.8963662443222566, "grad_norm": 0.3114888370037079, "learning_rate": 3.5848763695809485e-06, "loss": 0.3361, "step": 5288 }, { "epoch": 1.8967248386325604, "grad_norm": 0.3097308576107025, "learning_rate": 3.582874937737789e-06, "loss": 0.3361, "step": 5289 }, { "epoch": 1.897083432942864, "grad_norm": 0.3369371294975281, "learning_rate": 3.5808737527622233e-06, "loss": 0.3165, "step": 5290 }, { "epoch": 1.8974420272531676, "grad_norm": 0.3432331681251526, "learning_rate": 3.578872815002865e-06, "loss": 0.3266, "step": 5291 }, { "epoch": 1.897800621563471, "grad_norm": 0.30110761523246765, "learning_rate": 3.576872124808284e-06, "loss": 0.3091, "step": 5292 }, { "epoch": 1.8981592158737748, "grad_norm": 0.31964975595474243, "learning_rate": 3.574871682527007e-06, "loss": 0.2903, "step": 5293 }, { "epoch": 1.8985178101840785, "grad_norm": 0.30832695960998535, "learning_rate": 3.5728714885075168e-06, "loss": 0.3203, "step": 5294 }, { "epoch": 1.898876404494382, "grad_norm": 0.315154492855072, "learning_rate": 3.570871543098253e-06, "loss": 0.2941, "step": 5295 }, { "epoch": 1.8992349988046855, "grad_norm": 0.3280278146266937, "learning_rate": 3.5688718466476126e-06, "loss": 0.3107, "step": 5296 }, { "epoch": 1.8995935931149892, "grad_norm": 0.3137848973274231, "learning_rate": 3.5668723995039482e-06, "loss": 0.3106, "step": 5297 }, { "epoch": 1.899952187425293, "grad_norm": 0.32256844639778137, "learning_rate": 3.564873202015571e-06, "loss": 0.3219, "step": 5298 }, { "epoch": 1.9003107817355964, "grad_norm": 0.3088282644748688, "learning_rate": 3.562874254530748e-06, "loss": 0.3092, "step": 5299 }, { "epoch": 1.9006693760459, "grad_norm": 0.3121979832649231, "learning_rate": 3.5608755573977004e-06, "loss": 0.3225, "step": 5300 }, { "epoch": 1.9010279703562036, "grad_norm": 0.3378770649433136, "learning_rate": 3.5588771109646093e-06, "loss": 0.3316, "step": 5301 }, { "epoch": 1.9013865646665074, "grad_norm": 0.2939892113208771, "learning_rate": 3.556878915579607e-06, "loss": 0.2699, "step": 5302 }, { "epoch": 1.9017451589768108, "grad_norm": 0.3335823714733124, "learning_rate": 3.5548809715907874e-06, "loss": 0.3482, "step": 5303 }, { "epoch": 1.9021037532871146, "grad_norm": 0.3120954930782318, "learning_rate": 3.552883279346199e-06, "loss": 0.3137, "step": 5304 }, { "epoch": 1.9024623475974183, "grad_norm": 0.31626972556114197, "learning_rate": 3.550885839193846e-06, "loss": 0.3446, "step": 5305 }, { "epoch": 1.9028209419077218, "grad_norm": 0.3350069522857666, "learning_rate": 3.548888651481689e-06, "loss": 0.3002, "step": 5306 }, { "epoch": 1.9031795362180253, "grad_norm": 0.3709125816822052, "learning_rate": 3.546891716557643e-06, "loss": 0.3328, "step": 5307 }, { "epoch": 1.903538130528329, "grad_norm": 0.3117446005344391, "learning_rate": 3.54489503476958e-06, "loss": 0.2799, "step": 5308 }, { "epoch": 1.9038967248386327, "grad_norm": 0.32957780361175537, "learning_rate": 3.5428986064653292e-06, "loss": 0.3418, "step": 5309 }, { "epoch": 1.9042553191489362, "grad_norm": 0.34367096424102783, "learning_rate": 3.540902431992674e-06, "loss": 0.3261, "step": 5310 }, { "epoch": 1.9046139134592397, "grad_norm": 0.3460659086704254, "learning_rate": 3.538906511699356e-06, "loss": 0.3296, "step": 5311 }, { "epoch": 1.9049725077695434, "grad_norm": 0.31995299458503723, "learning_rate": 3.5369108459330697e-06, "loss": 0.3143, "step": 5312 }, { "epoch": 1.9053311020798471, "grad_norm": 0.33887752890586853, "learning_rate": 3.534915435041466e-06, "loss": 0.3498, "step": 5313 }, { "epoch": 1.9056896963901506, "grad_norm": 0.3294277787208557, "learning_rate": 3.532920279372149e-06, "loss": 0.3357, "step": 5314 }, { "epoch": 1.9060482907004541, "grad_norm": 0.33304303884506226, "learning_rate": 3.530925379272685e-06, "loss": 0.3052, "step": 5315 }, { "epoch": 1.9064068850107578, "grad_norm": 0.323147714138031, "learning_rate": 3.5289307350905914e-06, "loss": 0.2989, "step": 5316 }, { "epoch": 1.9067654793210616, "grad_norm": 0.3365870714187622, "learning_rate": 3.526936347173341e-06, "loss": 0.3387, "step": 5317 }, { "epoch": 1.907124073631365, "grad_norm": 0.335673063993454, "learning_rate": 3.524942215868361e-06, "loss": 0.3364, "step": 5318 }, { "epoch": 1.9074826679416685, "grad_norm": 0.30913081765174866, "learning_rate": 3.5229483415230357e-06, "loss": 0.2577, "step": 5319 }, { "epoch": 1.9078412622519723, "grad_norm": 0.33213359117507935, "learning_rate": 3.5209547244847048e-06, "loss": 0.3318, "step": 5320 }, { "epoch": 1.908199856562276, "grad_norm": 0.32453957200050354, "learning_rate": 3.5189613651006623e-06, "loss": 0.3334, "step": 5321 }, { "epoch": 1.9085584508725795, "grad_norm": 0.34137988090515137, "learning_rate": 3.516968263718159e-06, "loss": 0.3514, "step": 5322 }, { "epoch": 1.908917045182883, "grad_norm": 0.3139071464538574, "learning_rate": 3.514975420684398e-06, "loss": 0.3006, "step": 5323 }, { "epoch": 1.9092756394931867, "grad_norm": 0.3149694502353668, "learning_rate": 3.5129828363465386e-06, "loss": 0.3207, "step": 5324 }, { "epoch": 1.9096342338034904, "grad_norm": 0.40317559242248535, "learning_rate": 3.5109905110516943e-06, "loss": 0.3391, "step": 5325 }, { "epoch": 1.909992828113794, "grad_norm": 0.32586950063705444, "learning_rate": 3.508998445146936e-06, "loss": 0.29, "step": 5326 }, { "epoch": 1.9103514224240974, "grad_norm": 0.3089097738265991, "learning_rate": 3.507006638979287e-06, "loss": 0.3151, "step": 5327 }, { "epoch": 1.910710016734401, "grad_norm": 0.3148076832294464, "learning_rate": 3.5050150928957285e-06, "loss": 0.3147, "step": 5328 }, { "epoch": 1.9110686110447048, "grad_norm": 0.3070763051509857, "learning_rate": 3.503023807243191e-06, "loss": 0.318, "step": 5329 }, { "epoch": 1.9114272053550083, "grad_norm": 0.322956383228302, "learning_rate": 3.5010327823685632e-06, "loss": 0.3357, "step": 5330 }, { "epoch": 1.9117857996653118, "grad_norm": 0.3084110617637634, "learning_rate": 3.499042018618687e-06, "loss": 0.296, "step": 5331 }, { "epoch": 1.9121443939756158, "grad_norm": 0.3169831335544586, "learning_rate": 3.497051516340363e-06, "loss": 0.3321, "step": 5332 }, { "epoch": 1.9125029882859192, "grad_norm": 0.3076106905937195, "learning_rate": 3.495061275880339e-06, "loss": 0.2861, "step": 5333 }, { "epoch": 1.9128615825962227, "grad_norm": 0.3560449182987213, "learning_rate": 3.4930712975853248e-06, "loss": 0.3596, "step": 5334 }, { "epoch": 1.9132201769065265, "grad_norm": 0.3112521767616272, "learning_rate": 3.4910815818019783e-06, "loss": 0.3003, "step": 5335 }, { "epoch": 1.9135787712168302, "grad_norm": 0.3499453067779541, "learning_rate": 3.4890921288769154e-06, "loss": 0.3445, "step": 5336 }, { "epoch": 1.9139373655271337, "grad_norm": 0.3085782527923584, "learning_rate": 3.4871029391567017e-06, "loss": 0.3108, "step": 5337 }, { "epoch": 1.9142959598374372, "grad_norm": 0.31139901280403137, "learning_rate": 3.485114012987865e-06, "loss": 0.295, "step": 5338 }, { "epoch": 1.9146545541477409, "grad_norm": 0.3238002061843872, "learning_rate": 3.4831253507168796e-06, "loss": 0.3109, "step": 5339 }, { "epoch": 1.9150131484580446, "grad_norm": 0.3424634337425232, "learning_rate": 3.4811369526901783e-06, "loss": 0.3614, "step": 5340 }, { "epoch": 1.915371742768348, "grad_norm": 0.3261106014251709, "learning_rate": 3.4791488192541455e-06, "loss": 0.3351, "step": 5341 }, { "epoch": 1.9157303370786516, "grad_norm": 0.307796448469162, "learning_rate": 3.4771609507551196e-06, "loss": 0.2988, "step": 5342 }, { "epoch": 1.9160889313889553, "grad_norm": 0.317569375038147, "learning_rate": 3.4751733475393916e-06, "loss": 0.3506, "step": 5343 }, { "epoch": 1.916447525699259, "grad_norm": 0.32935386896133423, "learning_rate": 3.4731860099532124e-06, "loss": 0.2925, "step": 5344 }, { "epoch": 1.9168061200095625, "grad_norm": 0.3469105362892151, "learning_rate": 3.4711989383427808e-06, "loss": 0.3259, "step": 5345 }, { "epoch": 1.917164714319866, "grad_norm": 0.3176804184913635, "learning_rate": 3.4692121330542504e-06, "loss": 0.3105, "step": 5346 }, { "epoch": 1.9175233086301697, "grad_norm": 0.33294036984443665, "learning_rate": 3.4672255944337295e-06, "loss": 0.3184, "step": 5347 }, { "epoch": 1.9178819029404734, "grad_norm": 0.2973614037036896, "learning_rate": 3.465239322827277e-06, "loss": 0.3034, "step": 5348 }, { "epoch": 1.918240497250777, "grad_norm": 0.33771923184394836, "learning_rate": 3.4632533185809102e-06, "loss": 0.3397, "step": 5349 }, { "epoch": 1.9185990915610804, "grad_norm": 0.3197658360004425, "learning_rate": 3.461267582040596e-06, "loss": 0.2949, "step": 5350 }, { "epoch": 1.9189576858713842, "grad_norm": 0.33884337544441223, "learning_rate": 3.4592821135522573e-06, "loss": 0.326, "step": 5351 }, { "epoch": 1.9193162801816879, "grad_norm": 0.3570083677768707, "learning_rate": 3.4572969134617685e-06, "loss": 0.3254, "step": 5352 }, { "epoch": 1.9196748744919914, "grad_norm": 0.32444098591804504, "learning_rate": 3.455311982114956e-06, "loss": 0.3407, "step": 5353 }, { "epoch": 1.9200334688022949, "grad_norm": 0.3017325699329376, "learning_rate": 3.4533273198576007e-06, "loss": 0.3087, "step": 5354 }, { "epoch": 1.9203920631125986, "grad_norm": 0.28026366233825684, "learning_rate": 3.4513429270354397e-06, "loss": 0.3233, "step": 5355 }, { "epoch": 1.9207506574229023, "grad_norm": 0.2939804792404175, "learning_rate": 3.4493588039941585e-06, "loss": 0.3314, "step": 5356 }, { "epoch": 1.9211092517332058, "grad_norm": 0.30432718992233276, "learning_rate": 3.4473749510793984e-06, "loss": 0.3135, "step": 5357 }, { "epoch": 1.9214678460435093, "grad_norm": 0.3233451247215271, "learning_rate": 3.445391368636753e-06, "loss": 0.3352, "step": 5358 }, { "epoch": 1.921826440353813, "grad_norm": 0.29668867588043213, "learning_rate": 3.4434080570117672e-06, "loss": 0.3044, "step": 5359 }, { "epoch": 1.9221850346641167, "grad_norm": 0.32342812418937683, "learning_rate": 3.441425016549939e-06, "loss": 0.3487, "step": 5360 }, { "epoch": 1.9225436289744202, "grad_norm": 0.33538737893104553, "learning_rate": 3.439442247596724e-06, "loss": 0.3386, "step": 5361 }, { "epoch": 1.922902223284724, "grad_norm": 0.29524582624435425, "learning_rate": 3.437459750497524e-06, "loss": 0.309, "step": 5362 }, { "epoch": 1.9232608175950276, "grad_norm": 0.3182678818702698, "learning_rate": 3.435477525597697e-06, "loss": 0.3182, "step": 5363 }, { "epoch": 1.9236194119053311, "grad_norm": 0.345660924911499, "learning_rate": 3.4334955732425527e-06, "loss": 0.3267, "step": 5364 }, { "epoch": 1.9239780062156346, "grad_norm": 0.3244782090187073, "learning_rate": 3.4315138937773535e-06, "loss": 0.2887, "step": 5365 }, { "epoch": 1.9243366005259384, "grad_norm": 0.3318449854850769, "learning_rate": 3.4295324875473113e-06, "loss": 0.3307, "step": 5366 }, { "epoch": 1.924695194836242, "grad_norm": 0.3058112561702728, "learning_rate": 3.4275513548975986e-06, "loss": 0.3129, "step": 5367 }, { "epoch": 1.9250537891465456, "grad_norm": 0.31343933939933777, "learning_rate": 3.4255704961733305e-06, "loss": 0.3091, "step": 5368 }, { "epoch": 1.925412383456849, "grad_norm": 0.3396916091442108, "learning_rate": 3.4235899117195812e-06, "loss": 0.3379, "step": 5369 }, { "epoch": 1.9257709777671528, "grad_norm": 0.3273017406463623, "learning_rate": 3.421609601881374e-06, "loss": 0.2946, "step": 5370 }, { "epoch": 1.9261295720774565, "grad_norm": 0.32918691635131836, "learning_rate": 3.4196295670036834e-06, "loss": 0.2974, "step": 5371 }, { "epoch": 1.92648816638776, "grad_norm": 0.3304256498813629, "learning_rate": 3.41764980743144e-06, "loss": 0.3458, "step": 5372 }, { "epoch": 1.9268467606980635, "grad_norm": 0.29422202706336975, "learning_rate": 3.4156703235095227e-06, "loss": 0.3287, "step": 5373 }, { "epoch": 1.9272053550083672, "grad_norm": 0.30143749713897705, "learning_rate": 3.4136911155827655e-06, "loss": 0.3112, "step": 5374 }, { "epoch": 1.927563949318671, "grad_norm": 0.33192718029022217, "learning_rate": 3.4117121839959512e-06, "loss": 0.3261, "step": 5375 }, { "epoch": 1.9279225436289744, "grad_norm": 0.33677932620048523, "learning_rate": 3.409733529093815e-06, "loss": 0.303, "step": 5376 }, { "epoch": 1.928281137939278, "grad_norm": 0.3229929506778717, "learning_rate": 3.4077551512210437e-06, "loss": 0.3175, "step": 5377 }, { "epoch": 1.9286397322495816, "grad_norm": 0.3389986753463745, "learning_rate": 3.405777050722282e-06, "loss": 0.3654, "step": 5378 }, { "epoch": 1.9289983265598853, "grad_norm": 0.3468207120895386, "learning_rate": 3.403799227942116e-06, "loss": 0.3278, "step": 5379 }, { "epoch": 1.9293569208701888, "grad_norm": 0.3249775767326355, "learning_rate": 3.4018216832250916e-06, "loss": 0.289, "step": 5380 }, { "epoch": 1.9297155151804923, "grad_norm": 0.3156798481941223, "learning_rate": 3.3998444169157026e-06, "loss": 0.339, "step": 5381 }, { "epoch": 1.930074109490796, "grad_norm": 0.3425358235836029, "learning_rate": 3.397867429358394e-06, "loss": 0.3574, "step": 5382 }, { "epoch": 1.9304327038010998, "grad_norm": 0.32995715737342834, "learning_rate": 3.395890720897562e-06, "loss": 0.3264, "step": 5383 }, { "epoch": 1.9307912981114033, "grad_norm": 0.2818794548511505, "learning_rate": 3.3939142918775593e-06, "loss": 0.2983, "step": 5384 }, { "epoch": 1.9311498924217068, "grad_norm": 0.302425742149353, "learning_rate": 3.391938142642682e-06, "loss": 0.3103, "step": 5385 }, { "epoch": 1.9315084867320105, "grad_norm": 0.33113330602645874, "learning_rate": 3.389962273537185e-06, "loss": 0.3256, "step": 5386 }, { "epoch": 1.9318670810423142, "grad_norm": 0.3141104280948639, "learning_rate": 3.3879866849052694e-06, "loss": 0.3132, "step": 5387 }, { "epoch": 1.9322256753526177, "grad_norm": 0.3068496882915497, "learning_rate": 3.386011377091088e-06, "loss": 0.333, "step": 5388 }, { "epoch": 1.9325842696629212, "grad_norm": 0.33589664101600647, "learning_rate": 3.384036350438744e-06, "loss": 0.3441, "step": 5389 }, { "epoch": 1.9329428639732251, "grad_norm": 0.324687123298645, "learning_rate": 3.3820616052922977e-06, "loss": 0.3071, "step": 5390 }, { "epoch": 1.9333014582835286, "grad_norm": 0.3055034279823303, "learning_rate": 3.380087141995752e-06, "loss": 0.2892, "step": 5391 }, { "epoch": 1.9336600525938321, "grad_norm": 0.32552069425582886, "learning_rate": 3.3781129608930674e-06, "loss": 0.3273, "step": 5392 }, { "epoch": 1.9340186469041358, "grad_norm": 0.29318052530288696, "learning_rate": 3.376139062328151e-06, "loss": 0.3032, "step": 5393 }, { "epoch": 1.9343772412144395, "grad_norm": 0.3080149292945862, "learning_rate": 3.3741654466448594e-06, "loss": 0.2843, "step": 5394 }, { "epoch": 1.934735835524743, "grad_norm": 0.3611147701740265, "learning_rate": 3.372192114187008e-06, "loss": 0.3411, "step": 5395 }, { "epoch": 1.9350944298350465, "grad_norm": 0.30673936009407043, "learning_rate": 3.3702190652983546e-06, "loss": 0.2967, "step": 5396 }, { "epoch": 1.9354530241453503, "grad_norm": 0.3093169629573822, "learning_rate": 3.368246300322609e-06, "loss": 0.2995, "step": 5397 }, { "epoch": 1.935811618455654, "grad_norm": 0.3194393515586853, "learning_rate": 3.3662738196034363e-06, "loss": 0.3103, "step": 5398 }, { "epoch": 1.9361702127659575, "grad_norm": 0.31184110045433044, "learning_rate": 3.3643016234844463e-06, "loss": 0.3274, "step": 5399 }, { "epoch": 1.936528807076261, "grad_norm": 0.30942243337631226, "learning_rate": 3.3623297123092007e-06, "loss": 0.3162, "step": 5400 }, { "epoch": 1.9368874013865647, "grad_norm": 0.31153807044029236, "learning_rate": 3.3603580864212166e-06, "loss": 0.3144, "step": 5401 }, { "epoch": 1.9372459956968684, "grad_norm": 0.34134483337402344, "learning_rate": 3.358386746163954e-06, "loss": 0.3261, "step": 5402 }, { "epoch": 1.937604590007172, "grad_norm": 0.3083029091358185, "learning_rate": 3.3564156918808283e-06, "loss": 0.2898, "step": 5403 }, { "epoch": 1.9379631843174754, "grad_norm": 0.3102162778377533, "learning_rate": 3.3544449239152026e-06, "loss": 0.3045, "step": 5404 }, { "epoch": 1.938321778627779, "grad_norm": 0.3338381052017212, "learning_rate": 3.352474442610391e-06, "loss": 0.312, "step": 5405 }, { "epoch": 1.9386803729380828, "grad_norm": 0.3408164381980896, "learning_rate": 3.350504248309655e-06, "loss": 0.3485, "step": 5406 }, { "epoch": 1.9390389672483863, "grad_norm": 0.3213885426521301, "learning_rate": 3.348534341356213e-06, "loss": 0.3131, "step": 5407 }, { "epoch": 1.9393975615586898, "grad_norm": 0.34160423278808594, "learning_rate": 3.3465647220932257e-06, "loss": 0.351, "step": 5408 }, { "epoch": 1.9397561558689935, "grad_norm": 0.3185049295425415, "learning_rate": 3.344595390863809e-06, "loss": 0.3203, "step": 5409 }, { "epoch": 1.9401147501792972, "grad_norm": 0.3225555419921875, "learning_rate": 3.3426263480110257e-06, "loss": 0.3044, "step": 5410 }, { "epoch": 1.9404733444896007, "grad_norm": 0.3137964904308319, "learning_rate": 3.3406575938778864e-06, "loss": 0.321, "step": 5411 }, { "epoch": 1.9408319387999042, "grad_norm": 0.31168603897094727, "learning_rate": 3.3386891288073586e-06, "loss": 0.3336, "step": 5412 }, { "epoch": 1.941190533110208, "grad_norm": 0.2847035229206085, "learning_rate": 3.336720953142354e-06, "loss": 0.2961, "step": 5413 }, { "epoch": 1.9415491274205117, "grad_norm": 0.33168599009513855, "learning_rate": 3.334753067225732e-06, "loss": 0.356, "step": 5414 }, { "epoch": 1.9419077217308152, "grad_norm": 0.30862724781036377, "learning_rate": 3.3327854714003086e-06, "loss": 0.3123, "step": 5415 }, { "epoch": 1.9422663160411187, "grad_norm": 0.31316322088241577, "learning_rate": 3.330818166008843e-06, "loss": 0.3137, "step": 5416 }, { "epoch": 1.9426249103514224, "grad_norm": 0.298825740814209, "learning_rate": 3.3288511513940435e-06, "loss": 0.2913, "step": 5417 }, { "epoch": 1.942983504661726, "grad_norm": 0.3265649676322937, "learning_rate": 3.326884427898575e-06, "loss": 0.3241, "step": 5418 }, { "epoch": 1.9433420989720296, "grad_norm": 0.2997543215751648, "learning_rate": 3.3249179958650446e-06, "loss": 0.3114, "step": 5419 }, { "epoch": 1.9437006932823333, "grad_norm": 0.32228589057922363, "learning_rate": 3.3229518556360097e-06, "loss": 0.3389, "step": 5420 }, { "epoch": 1.944059287592637, "grad_norm": 0.31583458185195923, "learning_rate": 3.32098600755398e-06, "loss": 0.3428, "step": 5421 }, { "epoch": 1.9444178819029405, "grad_norm": 0.3143451511859894, "learning_rate": 3.3190204519614117e-06, "loss": 0.3427, "step": 5422 }, { "epoch": 1.944776476213244, "grad_norm": 0.294744074344635, "learning_rate": 3.3170551892007086e-06, "loss": 0.2875, "step": 5423 }, { "epoch": 1.9451350705235477, "grad_norm": 0.31865978240966797, "learning_rate": 3.315090219614229e-06, "loss": 0.3513, "step": 5424 }, { "epoch": 1.9454936648338514, "grad_norm": 0.29418492317199707, "learning_rate": 3.3131255435442755e-06, "loss": 0.2988, "step": 5425 }, { "epoch": 1.945852259144155, "grad_norm": 0.30205556750297546, "learning_rate": 3.3111611613330997e-06, "loss": 0.3355, "step": 5426 }, { "epoch": 1.9462108534544584, "grad_norm": 0.331879585981369, "learning_rate": 3.309197073322904e-06, "loss": 0.3386, "step": 5427 }, { "epoch": 1.9465694477647622, "grad_norm": 0.33781397342681885, "learning_rate": 3.307233279855837e-06, "loss": 0.337, "step": 5428 }, { "epoch": 1.9469280420750659, "grad_norm": 0.3271535038948059, "learning_rate": 3.3052697812740015e-06, "loss": 0.2892, "step": 5429 }, { "epoch": 1.9472866363853694, "grad_norm": 0.31123998761177063, "learning_rate": 3.303306577919443e-06, "loss": 0.3316, "step": 5430 }, { "epoch": 1.9476452306956729, "grad_norm": 0.3141992688179016, "learning_rate": 3.301343670134155e-06, "loss": 0.305, "step": 5431 }, { "epoch": 1.9480038250059766, "grad_norm": 0.3496764600276947, "learning_rate": 3.2993810582600868e-06, "loss": 0.3574, "step": 5432 }, { "epoch": 1.9483624193162803, "grad_norm": 0.3104877173900604, "learning_rate": 3.2974187426391287e-06, "loss": 0.3125, "step": 5433 }, { "epoch": 1.9487210136265838, "grad_norm": 0.3342352509498596, "learning_rate": 3.29545672361312e-06, "loss": 0.317, "step": 5434 }, { "epoch": 1.9490796079368873, "grad_norm": 0.31185415387153625, "learning_rate": 3.2934950015238563e-06, "loss": 0.2945, "step": 5435 }, { "epoch": 1.949438202247191, "grad_norm": 0.31657934188842773, "learning_rate": 3.2915335767130717e-06, "loss": 0.3404, "step": 5436 }, { "epoch": 1.9497967965574947, "grad_norm": 0.2887706756591797, "learning_rate": 3.289572449522453e-06, "loss": 0.3381, "step": 5437 }, { "epoch": 1.9501553908677982, "grad_norm": 0.3103954792022705, "learning_rate": 3.2876116202936357e-06, "loss": 0.3184, "step": 5438 }, { "epoch": 1.9505139851781017, "grad_norm": 0.32809343934059143, "learning_rate": 3.285651089368202e-06, "loss": 0.3138, "step": 5439 }, { "epoch": 1.9508725794884054, "grad_norm": 0.3122265934944153, "learning_rate": 3.2836908570876804e-06, "loss": 0.2764, "step": 5440 }, { "epoch": 1.9512311737987091, "grad_norm": 0.3374349772930145, "learning_rate": 3.2817309237935525e-06, "loss": 0.3677, "step": 5441 }, { "epoch": 1.9515897681090126, "grad_norm": 0.32923468947410583, "learning_rate": 3.279771289827244e-06, "loss": 0.3448, "step": 5442 }, { "epoch": 1.9519483624193161, "grad_norm": 0.29280513525009155, "learning_rate": 3.277811955530127e-06, "loss": 0.3231, "step": 5443 }, { "epoch": 1.9523069567296198, "grad_norm": 0.298427939414978, "learning_rate": 3.2758529212435275e-06, "loss": 0.3349, "step": 5444 }, { "epoch": 1.9526655510399236, "grad_norm": 0.3510188162326813, "learning_rate": 3.2738941873087125e-06, "loss": 0.3118, "step": 5445 }, { "epoch": 1.953024145350227, "grad_norm": 0.3307933211326599, "learning_rate": 3.2719357540668985e-06, "loss": 0.3211, "step": 5446 }, { "epoch": 1.9533827396605306, "grad_norm": 0.3294332027435303, "learning_rate": 3.2699776218592544e-06, "loss": 0.3214, "step": 5447 }, { "epoch": 1.9537413339708345, "grad_norm": 0.30782532691955566, "learning_rate": 3.2680197910268905e-06, "loss": 0.3288, "step": 5448 }, { "epoch": 1.954099928281138, "grad_norm": 0.297088623046875, "learning_rate": 3.266062261910867e-06, "loss": 0.2789, "step": 5449 }, { "epoch": 1.9544585225914415, "grad_norm": 0.3348884582519531, "learning_rate": 3.2641050348521936e-06, "loss": 0.3185, "step": 5450 }, { "epoch": 1.9548171169017452, "grad_norm": 0.3456557095050812, "learning_rate": 3.262148110191821e-06, "loss": 0.349, "step": 5451 }, { "epoch": 1.955175711212049, "grad_norm": 0.3072223365306854, "learning_rate": 3.2601914882706564e-06, "loss": 0.3095, "step": 5452 }, { "epoch": 1.9555343055223524, "grad_norm": 0.3437521457672119, "learning_rate": 3.2582351694295478e-06, "loss": 0.3439, "step": 5453 }, { "epoch": 1.955892899832656, "grad_norm": 0.3591518998146057, "learning_rate": 3.256279154009292e-06, "loss": 0.3449, "step": 5454 }, { "epoch": 1.9562514941429596, "grad_norm": 0.31763261556625366, "learning_rate": 3.254323442350631e-06, "loss": 0.2811, "step": 5455 }, { "epoch": 1.9566100884532633, "grad_norm": 0.3215171992778778, "learning_rate": 3.252368034794259e-06, "loss": 0.3121, "step": 5456 }, { "epoch": 1.9569686827635668, "grad_norm": 0.3495505452156067, "learning_rate": 3.2504129316808098e-06, "loss": 0.342, "step": 5457 }, { "epoch": 1.9573272770738703, "grad_norm": 0.3233304023742676, "learning_rate": 3.248458133350873e-06, "loss": 0.3046, "step": 5458 }, { "epoch": 1.957685871384174, "grad_norm": 0.34013038873672485, "learning_rate": 3.246503640144979e-06, "loss": 0.3237, "step": 5459 }, { "epoch": 1.9580444656944778, "grad_norm": 0.31668999791145325, "learning_rate": 3.244549452403606e-06, "loss": 0.2856, "step": 5460 }, { "epoch": 1.9584030600047813, "grad_norm": 0.30443328619003296, "learning_rate": 3.24259557046718e-06, "loss": 0.3146, "step": 5461 }, { "epoch": 1.9587616543150848, "grad_norm": 0.30860698223114014, "learning_rate": 3.2406419946760738e-06, "loss": 0.3138, "step": 5462 }, { "epoch": 1.9591202486253885, "grad_norm": 0.32838910818099976, "learning_rate": 3.2386887253706033e-06, "loss": 0.32, "step": 5463 }, { "epoch": 1.9594788429356922, "grad_norm": 0.3135278820991516, "learning_rate": 3.236735762891039e-06, "loss": 0.3185, "step": 5464 }, { "epoch": 1.9598374372459957, "grad_norm": 0.32213661074638367, "learning_rate": 3.2347831075775902e-06, "loss": 0.336, "step": 5465 }, { "epoch": 1.9601960315562992, "grad_norm": 0.3102368712425232, "learning_rate": 3.2328307597704155e-06, "loss": 0.2934, "step": 5466 }, { "epoch": 1.960554625866603, "grad_norm": 0.35012581944465637, "learning_rate": 3.2308787198096215e-06, "loss": 0.3281, "step": 5467 }, { "epoch": 1.9609132201769066, "grad_norm": 0.36392876505851746, "learning_rate": 3.228926988035256e-06, "loss": 0.314, "step": 5468 }, { "epoch": 1.96127181448721, "grad_norm": 0.32515430450439453, "learning_rate": 3.226975564787322e-06, "loss": 0.2898, "step": 5469 }, { "epoch": 1.9616304087975136, "grad_norm": 0.31166040897369385, "learning_rate": 3.2250244504057606e-06, "loss": 0.3102, "step": 5470 }, { "epoch": 1.9619890031078173, "grad_norm": 0.303157240152359, "learning_rate": 3.2230736452304617e-06, "loss": 0.2947, "step": 5471 }, { "epoch": 1.962347597418121, "grad_norm": 0.3128032088279724, "learning_rate": 3.2211231496012616e-06, "loss": 0.3271, "step": 5472 }, { "epoch": 1.9627061917284245, "grad_norm": 0.3327583074569702, "learning_rate": 3.219172963857944e-06, "loss": 0.3433, "step": 5473 }, { "epoch": 1.963064786038728, "grad_norm": 0.30094510316848755, "learning_rate": 3.2172230883402343e-06, "loss": 0.3129, "step": 5474 }, { "epoch": 1.9634233803490317, "grad_norm": 0.3000068664550781, "learning_rate": 3.2152735233878116e-06, "loss": 0.3187, "step": 5475 }, { "epoch": 1.9637819746593355, "grad_norm": 0.31780847907066345, "learning_rate": 3.213324269340294e-06, "loss": 0.345, "step": 5476 }, { "epoch": 1.964140568969639, "grad_norm": 0.29556748270988464, "learning_rate": 3.2113753265372467e-06, "loss": 0.3096, "step": 5477 }, { "epoch": 1.9644991632799427, "grad_norm": 0.3088488280773163, "learning_rate": 3.2094266953181817e-06, "loss": 0.3192, "step": 5478 }, { "epoch": 1.9648577575902464, "grad_norm": 0.3164600133895874, "learning_rate": 3.207478376022558e-06, "loss": 0.3101, "step": 5479 }, { "epoch": 1.9652163519005499, "grad_norm": 0.3246488869190216, "learning_rate": 3.2055303689897755e-06, "loss": 0.3319, "step": 5480 }, { "epoch": 1.9655749462108534, "grad_norm": 0.3184642493724823, "learning_rate": 3.203582674559188e-06, "loss": 0.3171, "step": 5481 }, { "epoch": 1.965933540521157, "grad_norm": 0.312508761882782, "learning_rate": 3.2016352930700877e-06, "loss": 0.2906, "step": 5482 }, { "epoch": 1.9662921348314608, "grad_norm": 0.36691516637802124, "learning_rate": 3.199688224861715e-06, "loss": 0.3748, "step": 5483 }, { "epoch": 1.9666507291417643, "grad_norm": 0.31313398480415344, "learning_rate": 3.197741470273253e-06, "loss": 0.3203, "step": 5484 }, { "epoch": 1.9670093234520678, "grad_norm": 0.32000041007995605, "learning_rate": 3.195795029643835e-06, "loss": 0.3387, "step": 5485 }, { "epoch": 1.9673679177623715, "grad_norm": 0.338771790266037, "learning_rate": 3.1938489033125343e-06, "loss": 0.308, "step": 5486 }, { "epoch": 1.9677265120726752, "grad_norm": 0.3041436970233917, "learning_rate": 3.1919030916183757e-06, "loss": 0.2771, "step": 5487 }, { "epoch": 1.9680851063829787, "grad_norm": 0.32272663712501526, "learning_rate": 3.1899575949003237e-06, "loss": 0.3405, "step": 5488 }, { "epoch": 1.9684437006932822, "grad_norm": 0.32383957505226135, "learning_rate": 3.188012413497289e-06, "loss": 0.3265, "step": 5489 }, { "epoch": 1.968802295003586, "grad_norm": 0.2996688485145569, "learning_rate": 3.186067547748131e-06, "loss": 0.2838, "step": 5490 }, { "epoch": 1.9691608893138897, "grad_norm": 0.32840079069137573, "learning_rate": 3.1841229979916465e-06, "loss": 0.3103, "step": 5491 }, { "epoch": 1.9695194836241932, "grad_norm": 0.305329293012619, "learning_rate": 3.1821787645665882e-06, "loss": 0.3075, "step": 5492 }, { "epoch": 1.9698780779344967, "grad_norm": 0.33623287081718445, "learning_rate": 3.180234847811644e-06, "loss": 0.3483, "step": 5493 }, { "epoch": 1.9702366722448004, "grad_norm": 0.3200933337211609, "learning_rate": 3.1782912480654517e-06, "loss": 0.3105, "step": 5494 }, { "epoch": 1.970595266555104, "grad_norm": 0.29464420676231384, "learning_rate": 3.1763479656665897e-06, "loss": 0.2759, "step": 5495 }, { "epoch": 1.9709538608654076, "grad_norm": 0.30896976590156555, "learning_rate": 3.1744050009535876e-06, "loss": 0.3316, "step": 5496 }, { "epoch": 1.971312455175711, "grad_norm": 0.325474351644516, "learning_rate": 3.1724623542649114e-06, "loss": 0.3118, "step": 5497 }, { "epoch": 1.9716710494860148, "grad_norm": 0.3238166868686676, "learning_rate": 3.1705200259389814e-06, "loss": 0.3005, "step": 5498 }, { "epoch": 1.9720296437963185, "grad_norm": 0.3099847137928009, "learning_rate": 3.1685780163141543e-06, "loss": 0.3149, "step": 5499 }, { "epoch": 1.972388238106622, "grad_norm": 0.33259904384613037, "learning_rate": 3.1666363257287353e-06, "loss": 0.3213, "step": 5500 }, { "epoch": 1.9727468324169255, "grad_norm": 0.3221844434738159, "learning_rate": 3.1646949545209702e-06, "loss": 0.3515, "step": 5501 }, { "epoch": 1.9731054267272292, "grad_norm": 0.3066423237323761, "learning_rate": 3.1627539030290564e-06, "loss": 0.2945, "step": 5502 }, { "epoch": 1.973464021037533, "grad_norm": 0.3496764004230499, "learning_rate": 3.160813171591126e-06, "loss": 0.3112, "step": 5503 }, { "epoch": 1.9738226153478364, "grad_norm": 0.3325982093811035, "learning_rate": 3.158872760545265e-06, "loss": 0.3159, "step": 5504 }, { "epoch": 1.9741812096581401, "grad_norm": 0.310751736164093, "learning_rate": 3.156932670229497e-06, "loss": 0.2944, "step": 5505 }, { "epoch": 1.9745398039684439, "grad_norm": 0.3154454529285431, "learning_rate": 3.1549929009817924e-06, "loss": 0.3314, "step": 5506 }, { "epoch": 1.9748983982787474, "grad_norm": 0.33702585101127625, "learning_rate": 3.1530534531400636e-06, "loss": 0.3279, "step": 5507 }, { "epoch": 1.9752569925890509, "grad_norm": 0.30869346857070923, "learning_rate": 3.151114327042169e-06, "loss": 0.3144, "step": 5508 }, { "epoch": 1.9756155868993546, "grad_norm": 0.3431978225708008, "learning_rate": 3.1491755230259113e-06, "loss": 0.3416, "step": 5509 }, { "epoch": 1.9759741812096583, "grad_norm": 0.3253493309020996, "learning_rate": 3.1472370414290367e-06, "loss": 0.3556, "step": 5510 }, { "epoch": 1.9763327755199618, "grad_norm": 0.33317312598228455, "learning_rate": 3.145298882589233e-06, "loss": 0.3085, "step": 5511 }, { "epoch": 1.9766913698302653, "grad_norm": 0.3486998975276947, "learning_rate": 3.1433610468441354e-06, "loss": 0.2873, "step": 5512 }, { "epoch": 1.977049964140569, "grad_norm": 0.3488939106464386, "learning_rate": 3.141423534531318e-06, "loss": 0.3438, "step": 5513 }, { "epoch": 1.9774085584508727, "grad_norm": 0.29451191425323486, "learning_rate": 3.1394863459883025e-06, "loss": 0.3048, "step": 5514 }, { "epoch": 1.9777671527611762, "grad_norm": 0.31736910343170166, "learning_rate": 3.137549481552556e-06, "loss": 0.3297, "step": 5515 }, { "epoch": 1.9781257470714797, "grad_norm": 0.3392820358276367, "learning_rate": 3.1356129415614845e-06, "loss": 0.3417, "step": 5516 }, { "epoch": 1.9784843413817834, "grad_norm": 0.3592895269393921, "learning_rate": 3.133676726352438e-06, "loss": 0.31, "step": 5517 }, { "epoch": 1.9788429356920871, "grad_norm": 0.34712478518486023, "learning_rate": 3.131740836262713e-06, "loss": 0.3123, "step": 5518 }, { "epoch": 1.9792015300023906, "grad_norm": 0.32756438851356506, "learning_rate": 3.1298052716295456e-06, "loss": 0.3135, "step": 5519 }, { "epoch": 1.9795601243126941, "grad_norm": 0.339952290058136, "learning_rate": 3.1278700327901177e-06, "loss": 0.3205, "step": 5520 }, { "epoch": 1.9799187186229978, "grad_norm": 0.34864506125450134, "learning_rate": 3.1259351200815565e-06, "loss": 0.3118, "step": 5521 }, { "epoch": 1.9802773129333016, "grad_norm": 0.33742111921310425, "learning_rate": 3.1240005338409273e-06, "loss": 0.2886, "step": 5522 }, { "epoch": 1.980635907243605, "grad_norm": 0.3646160960197449, "learning_rate": 3.122066274405242e-06, "loss": 0.2972, "step": 5523 }, { "epoch": 1.9809945015539085, "grad_norm": 0.310554563999176, "learning_rate": 3.1201323421114537e-06, "loss": 0.3093, "step": 5524 }, { "epoch": 1.9813530958642123, "grad_norm": 0.3462277054786682, "learning_rate": 3.11819873729646e-06, "loss": 0.3392, "step": 5525 }, { "epoch": 1.981711690174516, "grad_norm": 0.34205004572868347, "learning_rate": 3.116265460297101e-06, "loss": 0.3126, "step": 5526 }, { "epoch": 1.9820702844848195, "grad_norm": 0.3152782917022705, "learning_rate": 3.1143325114501598e-06, "loss": 0.3135, "step": 5527 }, { "epoch": 1.982428878795123, "grad_norm": 0.34050044417381287, "learning_rate": 3.112399891092362e-06, "loss": 0.3352, "step": 5528 }, { "epoch": 1.9827874731054267, "grad_norm": 0.33996787667274475, "learning_rate": 3.1104675995603763e-06, "loss": 0.293, "step": 5529 }, { "epoch": 1.9831460674157304, "grad_norm": 0.30923357605934143, "learning_rate": 3.1085356371908115e-06, "loss": 0.2886, "step": 5530 }, { "epoch": 1.983504661726034, "grad_norm": 0.3235537111759186, "learning_rate": 3.106604004320224e-06, "loss": 0.335, "step": 5531 }, { "epoch": 1.9838632560363374, "grad_norm": 0.2998218834400177, "learning_rate": 3.1046727012851097e-06, "loss": 0.3064, "step": 5532 }, { "epoch": 1.9842218503466411, "grad_norm": 0.3207104206085205, "learning_rate": 3.1027417284219085e-06, "loss": 0.3211, "step": 5533 }, { "epoch": 1.9845804446569448, "grad_norm": 0.3349582552909851, "learning_rate": 3.1008110860670005e-06, "loss": 0.2915, "step": 5534 }, { "epoch": 1.9849390389672483, "grad_norm": 0.31161466240882874, "learning_rate": 3.0988807745567107e-06, "loss": 0.3008, "step": 5535 }, { "epoch": 1.985297633277552, "grad_norm": 0.3470865786075592, "learning_rate": 3.096950794227303e-06, "loss": 0.3718, "step": 5536 }, { "epoch": 1.9856562275878558, "grad_norm": 0.3094068467617035, "learning_rate": 3.095021145414988e-06, "loss": 0.3135, "step": 5537 }, { "epoch": 1.9860148218981593, "grad_norm": 0.34765467047691345, "learning_rate": 3.0930918284559164e-06, "loss": 0.3179, "step": 5538 }, { "epoch": 1.9863734162084627, "grad_norm": 0.34664085507392883, "learning_rate": 3.091162843686182e-06, "loss": 0.3716, "step": 5539 }, { "epoch": 1.9867320105187665, "grad_norm": 0.31981927156448364, "learning_rate": 3.089234191441818e-06, "loss": 0.3077, "step": 5540 }, { "epoch": 1.9870906048290702, "grad_norm": 0.30282077193260193, "learning_rate": 3.0873058720588033e-06, "loss": 0.3074, "step": 5541 }, { "epoch": 1.9874491991393737, "grad_norm": 0.31916990876197815, "learning_rate": 3.0853778858730553e-06, "loss": 0.2836, "step": 5542 }, { "epoch": 1.9878077934496772, "grad_norm": 0.31964415311813354, "learning_rate": 3.0834502332204357e-06, "loss": 0.3612, "step": 5543 }, { "epoch": 1.988166387759981, "grad_norm": 0.2996757924556732, "learning_rate": 3.0815229144367496e-06, "loss": 0.2721, "step": 5544 }, { "epoch": 1.9885249820702846, "grad_norm": 0.31655028462409973, "learning_rate": 3.0795959298577416e-06, "loss": 0.3086, "step": 5545 }, { "epoch": 1.988883576380588, "grad_norm": 0.30206990242004395, "learning_rate": 3.0776692798190968e-06, "loss": 0.301, "step": 5546 }, { "epoch": 1.9892421706908916, "grad_norm": 0.3428664207458496, "learning_rate": 3.0757429646564434e-06, "loss": 0.3577, "step": 5547 }, { "epoch": 1.9896007650011953, "grad_norm": 0.31455790996551514, "learning_rate": 3.0738169847053513e-06, "loss": 0.3331, "step": 5548 }, { "epoch": 1.989959359311499, "grad_norm": 0.32342177629470825, "learning_rate": 3.071891340301333e-06, "loss": 0.3134, "step": 5549 }, { "epoch": 1.9903179536218025, "grad_norm": 0.3069482147693634, "learning_rate": 3.069966031779844e-06, "loss": 0.2976, "step": 5550 }, { "epoch": 1.990676547932106, "grad_norm": 0.3118830919265747, "learning_rate": 3.0680410594762765e-06, "loss": 0.2956, "step": 5551 }, { "epoch": 1.9910351422424097, "grad_norm": 0.32913094758987427, "learning_rate": 3.066116423725967e-06, "loss": 0.3175, "step": 5552 }, { "epoch": 1.9913937365527135, "grad_norm": 0.3066924810409546, "learning_rate": 3.0641921248641926e-06, "loss": 0.2979, "step": 5553 }, { "epoch": 1.991752330863017, "grad_norm": 0.3232656717300415, "learning_rate": 3.062268163226172e-06, "loss": 0.3166, "step": 5554 }, { "epoch": 1.9921109251733204, "grad_norm": 0.31392768025398254, "learning_rate": 3.0603445391470666e-06, "loss": 0.3073, "step": 5555 }, { "epoch": 1.9924695194836242, "grad_norm": 0.33880820870399475, "learning_rate": 3.0584212529619777e-06, "loss": 0.3337, "step": 5556 }, { "epoch": 1.9928281137939279, "grad_norm": 0.3076532185077667, "learning_rate": 3.056498305005947e-06, "loss": 0.3097, "step": 5557 }, { "epoch": 1.9931867081042314, "grad_norm": 0.3201838433742523, "learning_rate": 3.0545756956139587e-06, "loss": 0.3029, "step": 5558 }, { "epoch": 1.9935453024145349, "grad_norm": 0.32536154985427856, "learning_rate": 3.0526534251209355e-06, "loss": 0.3313, "step": 5559 }, { "epoch": 1.9939038967248386, "grad_norm": 0.3703297972679138, "learning_rate": 3.0507314938617443e-06, "loss": 0.3596, "step": 5560 }, { "epoch": 1.9942624910351423, "grad_norm": 0.3009648323059082, "learning_rate": 3.0488099021711914e-06, "loss": 0.2837, "step": 5561 }, { "epoch": 1.9946210853454458, "grad_norm": 0.33217522501945496, "learning_rate": 3.0468886503840245e-06, "loss": 0.326, "step": 5562 }, { "epoch": 1.9949796796557495, "grad_norm": 0.3138926923274994, "learning_rate": 3.044967738834932e-06, "loss": 0.2933, "step": 5563 }, { "epoch": 1.9953382739660532, "grad_norm": 0.3367607891559601, "learning_rate": 3.0430471678585416e-06, "loss": 0.3286, "step": 5564 }, { "epoch": 1.9956968682763567, "grad_norm": 0.318353533744812, "learning_rate": 3.0411269377894205e-06, "loss": 0.336, "step": 5565 }, { "epoch": 1.9960554625866602, "grad_norm": 0.30874404311180115, "learning_rate": 3.039207048962084e-06, "loss": 0.3096, "step": 5566 }, { "epoch": 1.996414056896964, "grad_norm": 0.3124130964279175, "learning_rate": 3.0372875017109783e-06, "loss": 0.3221, "step": 5567 }, { "epoch": 1.9967726512072677, "grad_norm": 0.33663320541381836, "learning_rate": 3.0353682963704973e-06, "loss": 0.3251, "step": 5568 }, { "epoch": 1.9971312455175712, "grad_norm": 0.30915817618370056, "learning_rate": 3.0334494332749716e-06, "loss": 0.3363, "step": 5569 }, { "epoch": 1.9974898398278746, "grad_norm": 0.3290591835975647, "learning_rate": 3.031530912758673e-06, "loss": 0.2908, "step": 5570 }, { "epoch": 1.9978484341381784, "grad_norm": 0.3350808322429657, "learning_rate": 3.029612735155811e-06, "loss": 0.3317, "step": 5571 }, { "epoch": 1.998207028448482, "grad_norm": 0.3208479583263397, "learning_rate": 3.0276949008005422e-06, "loss": 0.3349, "step": 5572 }, { "epoch": 1.9985656227587856, "grad_norm": 0.29386550188064575, "learning_rate": 3.025777410026959e-06, "loss": 0.3049, "step": 5573 }, { "epoch": 1.998924217069089, "grad_norm": 0.32354044914245605, "learning_rate": 3.0238602631690927e-06, "loss": 0.3293, "step": 5574 }, { "epoch": 1.9992828113793928, "grad_norm": 0.3362058103084564, "learning_rate": 3.021943460560918e-06, "loss": 0.3302, "step": 5575 }, { "epoch": 1.9996414056896965, "grad_norm": 0.31354087591171265, "learning_rate": 3.020027002536345e-06, "loss": 0.33, "step": 5576 }, { "epoch": 2.0001195314367677, "grad_norm": 0.6068454384803772, "learning_rate": 3.0181108894292267e-06, "loss": 0.4941, "step": 5577 }, { "epoch": 2.0004781257470716, "grad_norm": 0.31365877389907837, "learning_rate": 3.0161951215733587e-06, "loss": 0.308, "step": 5578 }, { "epoch": 2.000836720057375, "grad_norm": 0.3443858027458191, "learning_rate": 3.0142796993024727e-06, "loss": 0.2833, "step": 5579 }, { "epoch": 2.0011953143676786, "grad_norm": 0.34751224517822266, "learning_rate": 3.0123646229502417e-06, "loss": 0.2822, "step": 5580 }, { "epoch": 2.001553908677982, "grad_norm": 0.3844909369945526, "learning_rate": 3.0104498928502772e-06, "loss": 0.3224, "step": 5581 }, { "epoch": 2.001912502988286, "grad_norm": 0.329545259475708, "learning_rate": 3.0085355093361302e-06, "loss": 0.3033, "step": 5582 }, { "epoch": 2.0022710972985895, "grad_norm": 0.34266403317451477, "learning_rate": 3.0066214727412933e-06, "loss": 0.2773, "step": 5583 }, { "epoch": 2.002629691608893, "grad_norm": 0.38236236572265625, "learning_rate": 3.004707783399198e-06, "loss": 0.3086, "step": 5584 }, { "epoch": 2.0029882859191965, "grad_norm": 0.3347253203392029, "learning_rate": 3.0027944416432147e-06, "loss": 0.2472, "step": 5585 }, { "epoch": 2.0033468802295005, "grad_norm": 0.3380807340145111, "learning_rate": 3.000881447806654e-06, "loss": 0.2954, "step": 5586 }, { "epoch": 2.003705474539804, "grad_norm": 0.340620219707489, "learning_rate": 2.9989688022227643e-06, "loss": 0.2982, "step": 5587 }, { "epoch": 2.0040640688501075, "grad_norm": 0.3394574522972107, "learning_rate": 2.9970565052247324e-06, "loss": 0.3039, "step": 5588 }, { "epoch": 2.0044226631604114, "grad_norm": 0.3304067552089691, "learning_rate": 2.9951445571456914e-06, "loss": 0.2842, "step": 5589 }, { "epoch": 2.004781257470715, "grad_norm": 0.31234410405158997, "learning_rate": 2.9932329583187054e-06, "loss": 0.2965, "step": 5590 }, { "epoch": 2.0051398517810184, "grad_norm": 0.33780786395072937, "learning_rate": 2.991321709076782e-06, "loss": 0.334, "step": 5591 }, { "epoch": 2.005498446091322, "grad_norm": 0.34181925654411316, "learning_rate": 2.9894108097528664e-06, "loss": 0.2957, "step": 5592 }, { "epoch": 2.005857040401626, "grad_norm": 0.3308338522911072, "learning_rate": 2.9875002606798425e-06, "loss": 0.2869, "step": 5593 }, { "epoch": 2.0062156347119293, "grad_norm": 0.3394169807434082, "learning_rate": 2.9855900621905333e-06, "loss": 0.284, "step": 5594 }, { "epoch": 2.006574229022233, "grad_norm": 0.31799834966659546, "learning_rate": 2.9836802146177034e-06, "loss": 0.2857, "step": 5595 }, { "epoch": 2.0069328233325363, "grad_norm": 0.33247166872024536, "learning_rate": 2.9817707182940533e-06, "loss": 0.304, "step": 5596 }, { "epoch": 2.0072914176428402, "grad_norm": 0.32182419300079346, "learning_rate": 2.9798615735522234e-06, "loss": 0.2577, "step": 5597 }, { "epoch": 2.0076500119531437, "grad_norm": 0.3548755943775177, "learning_rate": 2.9779527807247925e-06, "loss": 0.2946, "step": 5598 }, { "epoch": 2.0080086062634472, "grad_norm": 0.33581194281578064, "learning_rate": 2.9760443401442784e-06, "loss": 0.3171, "step": 5599 }, { "epoch": 2.0083672005737507, "grad_norm": 0.3079228401184082, "learning_rate": 2.974136252143135e-06, "loss": 0.2453, "step": 5600 }, { "epoch": 2.0087257948840547, "grad_norm": 0.3446086347103119, "learning_rate": 2.9722285170537613e-06, "loss": 0.3141, "step": 5601 }, { "epoch": 2.009084389194358, "grad_norm": 0.3256726562976837, "learning_rate": 2.970321135208488e-06, "loss": 0.2661, "step": 5602 }, { "epoch": 2.0094429835046617, "grad_norm": 0.34690138697624207, "learning_rate": 2.9684141069395878e-06, "loss": 0.2823, "step": 5603 }, { "epoch": 2.009801577814965, "grad_norm": 0.3184877336025238, "learning_rate": 2.966507432579272e-06, "loss": 0.3133, "step": 5604 }, { "epoch": 2.010160172125269, "grad_norm": 0.3131295442581177, "learning_rate": 2.9646011124596854e-06, "loss": 0.3055, "step": 5605 }, { "epoch": 2.0105187664355726, "grad_norm": 0.3307993412017822, "learning_rate": 2.9626951469129194e-06, "loss": 0.3008, "step": 5606 }, { "epoch": 2.010877360745876, "grad_norm": 0.3413907587528229, "learning_rate": 2.960789536270996e-06, "loss": 0.2939, "step": 5607 }, { "epoch": 2.0112359550561796, "grad_norm": 0.2942325472831726, "learning_rate": 2.9588842808658814e-06, "loss": 0.2731, "step": 5608 }, { "epoch": 2.0115945493664835, "grad_norm": 0.32295653223991394, "learning_rate": 2.9569793810294745e-06, "loss": 0.3013, "step": 5609 }, { "epoch": 2.011953143676787, "grad_norm": 0.31980934739112854, "learning_rate": 2.955074837093616e-06, "loss": 0.2995, "step": 5610 }, { "epoch": 2.0123117379870905, "grad_norm": 0.3346659541130066, "learning_rate": 2.9531706493900812e-06, "loss": 0.2842, "step": 5611 }, { "epoch": 2.012670332297394, "grad_norm": 0.3530621826648712, "learning_rate": 2.951266818250589e-06, "loss": 0.3061, "step": 5612 }, { "epoch": 2.013028926607698, "grad_norm": 0.31813353300094604, "learning_rate": 2.9493633440067903e-06, "loss": 0.2922, "step": 5613 }, { "epoch": 2.0133875209180014, "grad_norm": 0.31664976477622986, "learning_rate": 2.947460226990278e-06, "loss": 0.2904, "step": 5614 }, { "epoch": 2.013746115228305, "grad_norm": 0.3194781243801117, "learning_rate": 2.94555746753258e-06, "loss": 0.2803, "step": 5615 }, { "epoch": 2.014104709538609, "grad_norm": 0.32514673471450806, "learning_rate": 2.9436550659651625e-06, "loss": 0.3095, "step": 5616 }, { "epoch": 2.0144633038489124, "grad_norm": 0.33287838101387024, "learning_rate": 2.9417530226194284e-06, "loss": 0.3164, "step": 5617 }, { "epoch": 2.014821898159216, "grad_norm": 0.3348199725151062, "learning_rate": 2.9398513378267236e-06, "loss": 0.3164, "step": 5618 }, { "epoch": 2.0151804924695194, "grad_norm": 0.3120361864566803, "learning_rate": 2.9379500119183236e-06, "loss": 0.2736, "step": 5619 }, { "epoch": 2.0155390867798233, "grad_norm": 0.3177831172943115, "learning_rate": 2.9360490452254487e-06, "loss": 0.2857, "step": 5620 }, { "epoch": 2.015897681090127, "grad_norm": 0.3629129230976105, "learning_rate": 2.9341484380792507e-06, "loss": 0.2963, "step": 5621 }, { "epoch": 2.0162562754004303, "grad_norm": 0.3461131751537323, "learning_rate": 2.9322481908108203e-06, "loss": 0.3221, "step": 5622 }, { "epoch": 2.016614869710734, "grad_norm": 0.318516343832016, "learning_rate": 2.9303483037511904e-06, "loss": 0.2995, "step": 5623 }, { "epoch": 2.0169734640210377, "grad_norm": 0.3360503613948822, "learning_rate": 2.928448777231325e-06, "loss": 0.3104, "step": 5624 }, { "epoch": 2.017332058331341, "grad_norm": 0.32437193393707275, "learning_rate": 2.9265496115821267e-06, "loss": 0.2655, "step": 5625 }, { "epoch": 2.0176906526416447, "grad_norm": 0.3424074053764343, "learning_rate": 2.9246508071344378e-06, "loss": 0.2897, "step": 5626 }, { "epoch": 2.018049246951948, "grad_norm": 0.35095837712287903, "learning_rate": 2.922752364219035e-06, "loss": 0.3309, "step": 5627 }, { "epoch": 2.018407841262252, "grad_norm": 0.3408873379230499, "learning_rate": 2.920854283166631e-06, "loss": 0.3116, "step": 5628 }, { "epoch": 2.0187664355725556, "grad_norm": 0.35265466570854187, "learning_rate": 2.91895656430788e-06, "loss": 0.2595, "step": 5629 }, { "epoch": 2.019125029882859, "grad_norm": 0.34259313344955444, "learning_rate": 2.917059207973373e-06, "loss": 0.3121, "step": 5630 }, { "epoch": 2.0194836241931626, "grad_norm": 0.3175087869167328, "learning_rate": 2.9151622144936296e-06, "loss": 0.2968, "step": 5631 }, { "epoch": 2.0198422185034666, "grad_norm": 0.32393988966941833, "learning_rate": 2.913265584199117e-06, "loss": 0.2771, "step": 5632 }, { "epoch": 2.02020081281377, "grad_norm": 0.30473652482032776, "learning_rate": 2.9113693174202297e-06, "loss": 0.2707, "step": 5633 }, { "epoch": 2.0205594071240736, "grad_norm": 0.35838645696640015, "learning_rate": 2.9094734144873037e-06, "loss": 0.3061, "step": 5634 }, { "epoch": 2.020918001434377, "grad_norm": 0.32526686787605286, "learning_rate": 2.9075778757306165e-06, "loss": 0.2918, "step": 5635 }, { "epoch": 2.021276595744681, "grad_norm": 0.3013099730014801, "learning_rate": 2.9056827014803712e-06, "loss": 0.2682, "step": 5636 }, { "epoch": 2.0216351900549845, "grad_norm": 0.3290508985519409, "learning_rate": 2.903787892066718e-06, "loss": 0.3416, "step": 5637 }, { "epoch": 2.021993784365288, "grad_norm": 0.3049080967903137, "learning_rate": 2.9018934478197324e-06, "loss": 0.2805, "step": 5638 }, { "epoch": 2.0223523786755915, "grad_norm": 0.31833216547966003, "learning_rate": 2.899999369069437e-06, "loss": 0.2945, "step": 5639 }, { "epoch": 2.0227109729858954, "grad_norm": 0.29817092418670654, "learning_rate": 2.898105656145784e-06, "loss": 0.2693, "step": 5640 }, { "epoch": 2.023069567296199, "grad_norm": 0.3320254385471344, "learning_rate": 2.896212309378665e-06, "loss": 0.3255, "step": 5641 }, { "epoch": 2.0234281616065024, "grad_norm": 0.31906163692474365, "learning_rate": 2.89431932909791e-06, "loss": 0.2782, "step": 5642 }, { "epoch": 2.023786755916806, "grad_norm": 0.330059677362442, "learning_rate": 2.892426715633276e-06, "loss": 0.2992, "step": 5643 }, { "epoch": 2.02414535022711, "grad_norm": 0.30833983421325684, "learning_rate": 2.8905344693144666e-06, "loss": 0.3047, "step": 5644 }, { "epoch": 2.0245039445374133, "grad_norm": 0.3019346296787262, "learning_rate": 2.8886425904711145e-06, "loss": 0.3017, "step": 5645 }, { "epoch": 2.024862538847717, "grad_norm": 0.331819623708725, "learning_rate": 2.886751079432792e-06, "loss": 0.3049, "step": 5646 }, { "epoch": 2.0252211331580208, "grad_norm": 0.3205018639564514, "learning_rate": 2.8848599365290077e-06, "loss": 0.3157, "step": 5647 }, { "epoch": 2.0255797274683243, "grad_norm": 0.31136050820350647, "learning_rate": 2.882969162089202e-06, "loss": 0.2959, "step": 5648 }, { "epoch": 2.0259383217786278, "grad_norm": 0.3062742352485657, "learning_rate": 2.8810787564427562e-06, "loss": 0.2714, "step": 5649 }, { "epoch": 2.0262969160889313, "grad_norm": 0.32567867636680603, "learning_rate": 2.879188719918981e-06, "loss": 0.3189, "step": 5650 }, { "epoch": 2.026655510399235, "grad_norm": 0.3303687572479248, "learning_rate": 2.8772990528471294e-06, "loss": 0.2927, "step": 5651 }, { "epoch": 2.0270141047095387, "grad_norm": 0.33808672428131104, "learning_rate": 2.875409755556387e-06, "loss": 0.2975, "step": 5652 }, { "epoch": 2.027372699019842, "grad_norm": 0.3226469159126282, "learning_rate": 2.8735208283758754e-06, "loss": 0.2861, "step": 5653 }, { "epoch": 2.0277312933301457, "grad_norm": 0.3033726215362549, "learning_rate": 2.871632271634653e-06, "loss": 0.2704, "step": 5654 }, { "epoch": 2.0280898876404496, "grad_norm": 0.31092649698257446, "learning_rate": 2.869744085661709e-06, "loss": 0.3143, "step": 5655 }, { "epoch": 2.028448481950753, "grad_norm": 0.2854653596878052, "learning_rate": 2.867856270785972e-06, "loss": 0.2738, "step": 5656 }, { "epoch": 2.0288070762610566, "grad_norm": 0.34602200984954834, "learning_rate": 2.8659688273363062e-06, "loss": 0.3328, "step": 5657 }, { "epoch": 2.02916567057136, "grad_norm": 0.3328738808631897, "learning_rate": 2.8640817556415112e-06, "loss": 0.2797, "step": 5658 }, { "epoch": 2.029524264881664, "grad_norm": 0.29772573709487915, "learning_rate": 2.86219505603032e-06, "loss": 0.2737, "step": 5659 }, { "epoch": 2.0298828591919675, "grad_norm": 0.34954896569252014, "learning_rate": 2.8603087288314004e-06, "loss": 0.3203, "step": 5660 }, { "epoch": 2.030241453502271, "grad_norm": 0.31878554821014404, "learning_rate": 2.858422774373359e-06, "loss": 0.259, "step": 5661 }, { "epoch": 2.0306000478125745, "grad_norm": 0.33372071385383606, "learning_rate": 2.8565371929847286e-06, "loss": 0.3034, "step": 5662 }, { "epoch": 2.0309586421228785, "grad_norm": 0.31300678849220276, "learning_rate": 2.85465198499399e-06, "loss": 0.2811, "step": 5663 }, { "epoch": 2.031317236433182, "grad_norm": 0.32900723814964294, "learning_rate": 2.852767150729553e-06, "loss": 0.2967, "step": 5664 }, { "epoch": 2.0316758307434855, "grad_norm": 0.31355470418930054, "learning_rate": 2.8508826905197557e-06, "loss": 0.2551, "step": 5665 }, { "epoch": 2.032034425053789, "grad_norm": 0.33887767791748047, "learning_rate": 2.8489986046928825e-06, "loss": 0.3248, "step": 5666 }, { "epoch": 2.032393019364093, "grad_norm": 0.35317233204841614, "learning_rate": 2.847114893577142e-06, "loss": 0.2885, "step": 5667 }, { "epoch": 2.0327516136743964, "grad_norm": 0.3301369547843933, "learning_rate": 2.845231557500686e-06, "loss": 0.3057, "step": 5668 }, { "epoch": 2.0331102079847, "grad_norm": 0.3227522373199463, "learning_rate": 2.843348596791596e-06, "loss": 0.275, "step": 5669 }, { "epoch": 2.0334688022950034, "grad_norm": 0.34705838561058044, "learning_rate": 2.8414660117778904e-06, "loss": 0.2874, "step": 5670 }, { "epoch": 2.0338273966053073, "grad_norm": 0.3560234308242798, "learning_rate": 2.8395838027875223e-06, "loss": 0.3314, "step": 5671 }, { "epoch": 2.034185990915611, "grad_norm": 0.31129127740859985, "learning_rate": 2.837701970148376e-06, "loss": 0.2645, "step": 5672 }, { "epoch": 2.0345445852259143, "grad_norm": 0.31344687938690186, "learning_rate": 2.8358205141882735e-06, "loss": 0.2821, "step": 5673 }, { "epoch": 2.0349031795362182, "grad_norm": 0.310009241104126, "learning_rate": 2.8339394352349704e-06, "loss": 0.3119, "step": 5674 }, { "epoch": 2.0352617738465217, "grad_norm": 0.3164549767971039, "learning_rate": 2.8320587336161577e-06, "loss": 0.2825, "step": 5675 }, { "epoch": 2.0356203681568252, "grad_norm": 0.3053237199783325, "learning_rate": 2.830178409659459e-06, "loss": 0.3138, "step": 5676 }, { "epoch": 2.0359789624671287, "grad_norm": 0.29143911600112915, "learning_rate": 2.828298463692431e-06, "loss": 0.2904, "step": 5677 }, { "epoch": 2.0363375567774327, "grad_norm": 0.31434422731399536, "learning_rate": 2.8264188960425686e-06, "loss": 0.3082, "step": 5678 }, { "epoch": 2.036696151087736, "grad_norm": 0.31411033868789673, "learning_rate": 2.8245397070372952e-06, "loss": 0.2973, "step": 5679 }, { "epoch": 2.0370547453980397, "grad_norm": 0.3390050530433655, "learning_rate": 2.822660897003973e-06, "loss": 0.294, "step": 5680 }, { "epoch": 2.037413339708343, "grad_norm": 0.3366994559764862, "learning_rate": 2.8207824662698968e-06, "loss": 0.3057, "step": 5681 }, { "epoch": 2.037771934018647, "grad_norm": 0.3101576566696167, "learning_rate": 2.8189044151622947e-06, "loss": 0.2924, "step": 5682 }, { "epoch": 2.0381305283289506, "grad_norm": 0.32495060563087463, "learning_rate": 2.817026744008332e-06, "loss": 0.3169, "step": 5683 }, { "epoch": 2.038489122639254, "grad_norm": 0.3219388723373413, "learning_rate": 2.8151494531350993e-06, "loss": 0.2879, "step": 5684 }, { "epoch": 2.0388477169495576, "grad_norm": 0.3295997679233551, "learning_rate": 2.8132725428696296e-06, "loss": 0.3159, "step": 5685 }, { "epoch": 2.0392063112598615, "grad_norm": 0.34107497334480286, "learning_rate": 2.8113960135388863e-06, "loss": 0.2755, "step": 5686 }, { "epoch": 2.039564905570165, "grad_norm": 0.32349419593811035, "learning_rate": 2.8095198654697665e-06, "loss": 0.2924, "step": 5687 }, { "epoch": 2.0399234998804685, "grad_norm": 0.33891043066978455, "learning_rate": 2.8076440989891025e-06, "loss": 0.3374, "step": 5688 }, { "epoch": 2.040282094190772, "grad_norm": 0.30277010798454285, "learning_rate": 2.8057687144236555e-06, "loss": 0.2594, "step": 5689 }, { "epoch": 2.040640688501076, "grad_norm": 0.3165140450000763, "learning_rate": 2.8038937121001274e-06, "loss": 0.2781, "step": 5690 }, { "epoch": 2.0409992828113794, "grad_norm": 0.306956946849823, "learning_rate": 2.8020190923451433e-06, "loss": 0.2949, "step": 5691 }, { "epoch": 2.041357877121683, "grad_norm": 0.3113008439540863, "learning_rate": 2.8001448554852735e-06, "loss": 0.3066, "step": 5692 }, { "epoch": 2.0417164714319864, "grad_norm": 0.30006125569343567, "learning_rate": 2.798271001847016e-06, "loss": 0.2857, "step": 5693 }, { "epoch": 2.0420750657422904, "grad_norm": 0.3249932527542114, "learning_rate": 2.796397531756798e-06, "loss": 0.3104, "step": 5694 }, { "epoch": 2.042433660052594, "grad_norm": 0.32722437381744385, "learning_rate": 2.7945244455409874e-06, "loss": 0.312, "step": 5695 }, { "epoch": 2.0427922543628974, "grad_norm": 0.3050755262374878, "learning_rate": 2.7926517435258782e-06, "loss": 0.3195, "step": 5696 }, { "epoch": 2.043150848673201, "grad_norm": 0.3045876920223236, "learning_rate": 2.790779426037703e-06, "loss": 0.2481, "step": 5697 }, { "epoch": 2.043509442983505, "grad_norm": 0.33414334058761597, "learning_rate": 2.788907493402625e-06, "loss": 0.304, "step": 5698 }, { "epoch": 2.0438680372938083, "grad_norm": 0.2992382347583771, "learning_rate": 2.78703594594674e-06, "loss": 0.2921, "step": 5699 }, { "epoch": 2.0442266316041118, "grad_norm": 0.3095855116844177, "learning_rate": 2.785164783996081e-06, "loss": 0.3007, "step": 5700 }, { "epoch": 2.0445852259144157, "grad_norm": 0.32317405939102173, "learning_rate": 2.783294007876605e-06, "loss": 0.3084, "step": 5701 }, { "epoch": 2.044943820224719, "grad_norm": 0.3239368796348572, "learning_rate": 2.7814236179142096e-06, "loss": 0.2871, "step": 5702 }, { "epoch": 2.0453024145350227, "grad_norm": 0.3579096794128418, "learning_rate": 2.7795536144347224e-06, "loss": 0.3138, "step": 5703 }, { "epoch": 2.045661008845326, "grad_norm": 0.30595290660858154, "learning_rate": 2.777683997763904e-06, "loss": 0.2774, "step": 5704 }, { "epoch": 2.04601960315563, "grad_norm": 0.3105093240737915, "learning_rate": 2.775814768227448e-06, "loss": 0.2548, "step": 5705 }, { "epoch": 2.0463781974659336, "grad_norm": 0.3393482267856598, "learning_rate": 2.7739459261509783e-06, "loss": 0.2998, "step": 5706 }, { "epoch": 2.046736791776237, "grad_norm": 0.328006774187088, "learning_rate": 2.772077471860056e-06, "loss": 0.2776, "step": 5707 }, { "epoch": 2.0470953860865406, "grad_norm": 0.31613898277282715, "learning_rate": 2.7702094056801652e-06, "loss": 0.276, "step": 5708 }, { "epoch": 2.0474539803968446, "grad_norm": 0.36181187629699707, "learning_rate": 2.768341727936737e-06, "loss": 0.3505, "step": 5709 }, { "epoch": 2.047812574707148, "grad_norm": 0.30746451020240784, "learning_rate": 2.766474438955121e-06, "loss": 0.2663, "step": 5710 }, { "epoch": 2.0481711690174516, "grad_norm": 0.33254048228263855, "learning_rate": 2.7646075390606066e-06, "loss": 0.2884, "step": 5711 }, { "epoch": 2.048529763327755, "grad_norm": 0.3308348059654236, "learning_rate": 2.7627410285784164e-06, "loss": 0.2875, "step": 5712 }, { "epoch": 2.048888357638059, "grad_norm": 0.35102811455726624, "learning_rate": 2.7608749078336962e-06, "loss": 0.3031, "step": 5713 }, { "epoch": 2.0492469519483625, "grad_norm": 0.3370361328125, "learning_rate": 2.759009177151534e-06, "loss": 0.2891, "step": 5714 }, { "epoch": 2.049605546258666, "grad_norm": 0.31897732615470886, "learning_rate": 2.757143836856946e-06, "loss": 0.2745, "step": 5715 }, { "epoch": 2.0499641405689695, "grad_norm": 0.3321336805820465, "learning_rate": 2.755278887274878e-06, "loss": 0.3011, "step": 5716 }, { "epoch": 2.0503227348792734, "grad_norm": 0.31944549083709717, "learning_rate": 2.7534143287302145e-06, "loss": 0.3097, "step": 5717 }, { "epoch": 2.050681329189577, "grad_norm": 0.30635955929756165, "learning_rate": 2.7515501615477626e-06, "loss": 0.2837, "step": 5718 }, { "epoch": 2.0510399234998804, "grad_norm": 0.33162832260131836, "learning_rate": 2.749686386052267e-06, "loss": 0.2946, "step": 5719 }, { "epoch": 2.051398517810184, "grad_norm": 0.3240230977535248, "learning_rate": 2.747823002568405e-06, "loss": 0.2757, "step": 5720 }, { "epoch": 2.051757112120488, "grad_norm": 0.32509884238243103, "learning_rate": 2.745960011420783e-06, "loss": 0.3265, "step": 5721 }, { "epoch": 2.0521157064307913, "grad_norm": 0.3009259104728699, "learning_rate": 2.7440974129339415e-06, "loss": 0.2979, "step": 5722 }, { "epoch": 2.052474300741095, "grad_norm": 0.3078624904155731, "learning_rate": 2.7422352074323465e-06, "loss": 0.2542, "step": 5723 }, { "epoch": 2.0528328950513983, "grad_norm": 0.30870282649993896, "learning_rate": 2.7403733952404055e-06, "loss": 0.2675, "step": 5724 }, { "epoch": 2.0531914893617023, "grad_norm": 0.34390583634376526, "learning_rate": 2.7385119766824442e-06, "loss": 0.3241, "step": 5725 }, { "epoch": 2.0535500836720058, "grad_norm": 0.31183409690856934, "learning_rate": 2.7366509520827377e-06, "loss": 0.2745, "step": 5726 }, { "epoch": 2.0539086779823092, "grad_norm": 0.31265413761138916, "learning_rate": 2.7347903217654743e-06, "loss": 0.2911, "step": 5727 }, { "epoch": 2.0542672722926127, "grad_norm": 0.32728826999664307, "learning_rate": 2.732930086054784e-06, "loss": 0.3329, "step": 5728 }, { "epoch": 2.0546258666029167, "grad_norm": 0.2875693440437317, "learning_rate": 2.7310702452747283e-06, "loss": 0.2574, "step": 5729 }, { "epoch": 2.05498446091322, "grad_norm": 0.3167516887187958, "learning_rate": 2.7292107997492933e-06, "loss": 0.2978, "step": 5730 }, { "epoch": 2.0553430552235237, "grad_norm": 0.32310861349105835, "learning_rate": 2.7273517498024016e-06, "loss": 0.2915, "step": 5731 }, { "epoch": 2.0557016495338276, "grad_norm": 0.32194387912750244, "learning_rate": 2.7254930957579052e-06, "loss": 0.3133, "step": 5732 }, { "epoch": 2.056060243844131, "grad_norm": 0.299385666847229, "learning_rate": 2.7236348379395884e-06, "loss": 0.2985, "step": 5733 }, { "epoch": 2.0564188381544346, "grad_norm": 0.3302931487560272, "learning_rate": 2.721776976671167e-06, "loss": 0.2989, "step": 5734 }, { "epoch": 2.056777432464738, "grad_norm": 0.31818994879722595, "learning_rate": 2.7199195122762823e-06, "loss": 0.289, "step": 5735 }, { "epoch": 2.057136026775042, "grad_norm": 0.329240620136261, "learning_rate": 2.7180624450785143e-06, "loss": 0.3096, "step": 5736 }, { "epoch": 2.0574946210853455, "grad_norm": 0.34556257724761963, "learning_rate": 2.716205775401364e-06, "loss": 0.2806, "step": 5737 }, { "epoch": 2.057853215395649, "grad_norm": 0.33198919892311096, "learning_rate": 2.7143495035682758e-06, "loss": 0.3226, "step": 5738 }, { "epoch": 2.0582118097059525, "grad_norm": 0.3519647419452667, "learning_rate": 2.712493629902614e-06, "loss": 0.3062, "step": 5739 }, { "epoch": 2.0585704040162565, "grad_norm": 0.3302714228630066, "learning_rate": 2.710638154727679e-06, "loss": 0.3022, "step": 5740 }, { "epoch": 2.05892899832656, "grad_norm": 0.31525054574012756, "learning_rate": 2.7087830783667014e-06, "loss": 0.3055, "step": 5741 }, { "epoch": 2.0592875926368635, "grad_norm": 0.3214378356933594, "learning_rate": 2.7069284011428363e-06, "loss": 0.2923, "step": 5742 }, { "epoch": 2.059646186947167, "grad_norm": 0.34144628047943115, "learning_rate": 2.7050741233791823e-06, "loss": 0.2957, "step": 5743 }, { "epoch": 2.060004781257471, "grad_norm": 0.3370274007320404, "learning_rate": 2.7032202453987533e-06, "loss": 0.3136, "step": 5744 }, { "epoch": 2.0603633755677744, "grad_norm": 0.3350156545639038, "learning_rate": 2.701366767524503e-06, "loss": 0.3099, "step": 5745 }, { "epoch": 2.060721969878078, "grad_norm": 0.33500078320503235, "learning_rate": 2.6995136900793162e-06, "loss": 0.3144, "step": 5746 }, { "epoch": 2.0610805641883814, "grad_norm": 0.29325199127197266, "learning_rate": 2.6976610133859992e-06, "loss": 0.2613, "step": 5747 }, { "epoch": 2.0614391584986853, "grad_norm": 0.3315756916999817, "learning_rate": 2.6958087377672966e-06, "loss": 0.2911, "step": 5748 }, { "epoch": 2.061797752808989, "grad_norm": 0.3169020712375641, "learning_rate": 2.69395686354588e-06, "loss": 0.2981, "step": 5749 }, { "epoch": 2.0621563471192923, "grad_norm": 0.30657827854156494, "learning_rate": 2.692105391044354e-06, "loss": 0.2899, "step": 5750 }, { "epoch": 2.062514941429596, "grad_norm": 0.3188519775867462, "learning_rate": 2.6902543205852496e-06, "loss": 0.2971, "step": 5751 }, { "epoch": 2.0628735357398997, "grad_norm": 0.31336283683776855, "learning_rate": 2.688403652491027e-06, "loss": 0.2669, "step": 5752 }, { "epoch": 2.0632321300502032, "grad_norm": 0.31901323795318604, "learning_rate": 2.6865533870840822e-06, "loss": 0.2856, "step": 5753 }, { "epoch": 2.0635907243605067, "grad_norm": 0.3074072003364563, "learning_rate": 2.684703524686731e-06, "loss": 0.2916, "step": 5754 }, { "epoch": 2.06394931867081, "grad_norm": 0.31633445620536804, "learning_rate": 2.6828540656212333e-06, "loss": 0.3127, "step": 5755 }, { "epoch": 2.064307912981114, "grad_norm": 0.3222895860671997, "learning_rate": 2.6810050102097635e-06, "loss": 0.3213, "step": 5756 }, { "epoch": 2.0646665072914177, "grad_norm": 0.28191444277763367, "learning_rate": 2.6791563587744363e-06, "loss": 0.2855, "step": 5757 }, { "epoch": 2.065025101601721, "grad_norm": 0.3096463084220886, "learning_rate": 2.6773081116372933e-06, "loss": 0.2774, "step": 5758 }, { "epoch": 2.0653836959120246, "grad_norm": 0.348519891500473, "learning_rate": 2.675460269120301e-06, "loss": 0.3225, "step": 5759 }, { "epoch": 2.0657422902223286, "grad_norm": 0.3248961269855499, "learning_rate": 2.6736128315453645e-06, "loss": 0.2728, "step": 5760 }, { "epoch": 2.066100884532632, "grad_norm": 0.31531548500061035, "learning_rate": 2.6717657992343093e-06, "loss": 0.3089, "step": 5761 }, { "epoch": 2.0664594788429356, "grad_norm": 0.28834691643714905, "learning_rate": 2.6699191725088954e-06, "loss": 0.2725, "step": 5762 }, { "epoch": 2.0668180731532395, "grad_norm": 0.3224486708641052, "learning_rate": 2.6680729516908123e-06, "loss": 0.3148, "step": 5763 }, { "epoch": 2.067176667463543, "grad_norm": 0.3260754644870758, "learning_rate": 2.6662271371016745e-06, "loss": 0.2987, "step": 5764 }, { "epoch": 2.0675352617738465, "grad_norm": 0.3142983019351959, "learning_rate": 2.6643817290630302e-06, "loss": 0.293, "step": 5765 }, { "epoch": 2.06789385608415, "grad_norm": 0.29086488485336304, "learning_rate": 2.6625367278963555e-06, "loss": 0.2673, "step": 5766 }, { "epoch": 2.068252450394454, "grad_norm": 0.33149173855781555, "learning_rate": 2.660692133923056e-06, "loss": 0.294, "step": 5767 }, { "epoch": 2.0686110447047574, "grad_norm": 0.34154653549194336, "learning_rate": 2.658847947464464e-06, "loss": 0.2859, "step": 5768 }, { "epoch": 2.068969639015061, "grad_norm": 0.32915598154067993, "learning_rate": 2.657004168841843e-06, "loss": 0.2945, "step": 5769 }, { "epoch": 2.0693282333253644, "grad_norm": 0.28034907579421997, "learning_rate": 2.655160798376387e-06, "loss": 0.2919, "step": 5770 }, { "epoch": 2.0696868276356684, "grad_norm": 0.3238065838813782, "learning_rate": 2.6533178363892108e-06, "loss": 0.3314, "step": 5771 }, { "epoch": 2.070045421945972, "grad_norm": 0.30043381452560425, "learning_rate": 2.6514752832013734e-06, "loss": 0.2781, "step": 5772 }, { "epoch": 2.0704040162562753, "grad_norm": 0.3101198375225067, "learning_rate": 2.649633139133846e-06, "loss": 0.2887, "step": 5773 }, { "epoch": 2.070762610566579, "grad_norm": 0.3221406936645508, "learning_rate": 2.647791404507538e-06, "loss": 0.3256, "step": 5774 }, { "epoch": 2.071121204876883, "grad_norm": 0.2851792573928833, "learning_rate": 2.645950079643287e-06, "loss": 0.2679, "step": 5775 }, { "epoch": 2.0714797991871863, "grad_norm": 0.31243517994880676, "learning_rate": 2.6441091648618543e-06, "loss": 0.2937, "step": 5776 }, { "epoch": 2.0718383934974898, "grad_norm": 0.3287814259529114, "learning_rate": 2.6422686604839352e-06, "loss": 0.2882, "step": 5777 }, { "epoch": 2.0721969878077933, "grad_norm": 0.3216042220592499, "learning_rate": 2.6404285668301505e-06, "loss": 0.2852, "step": 5778 }, { "epoch": 2.072555582118097, "grad_norm": 0.3151402771472931, "learning_rate": 2.6385888842210506e-06, "loss": 0.2879, "step": 5779 }, { "epoch": 2.0729141764284007, "grad_norm": 0.33958572149276733, "learning_rate": 2.6367496129771153e-06, "loss": 0.2932, "step": 5780 }, { "epoch": 2.073272770738704, "grad_norm": 0.31128746271133423, "learning_rate": 2.634910753418748e-06, "loss": 0.2726, "step": 5781 }, { "epoch": 2.0736313650490077, "grad_norm": 0.3214646875858307, "learning_rate": 2.633072305866286e-06, "loss": 0.294, "step": 5782 }, { "epoch": 2.0739899593593116, "grad_norm": 0.2965613901615143, "learning_rate": 2.631234270639992e-06, "loss": 0.2963, "step": 5783 }, { "epoch": 2.074348553669615, "grad_norm": 0.3189033269882202, "learning_rate": 2.62939664806006e-06, "loss": 0.3087, "step": 5784 }, { "epoch": 2.0747071479799186, "grad_norm": 0.31587541103363037, "learning_rate": 2.6275594384466054e-06, "loss": 0.2974, "step": 5785 }, { "epoch": 2.075065742290222, "grad_norm": 0.30601394176483154, "learning_rate": 2.6257226421196776e-06, "loss": 0.2799, "step": 5786 }, { "epoch": 2.075424336600526, "grad_norm": 0.3197486102581024, "learning_rate": 2.6238862593992543e-06, "loss": 0.2805, "step": 5787 }, { "epoch": 2.0757829309108295, "grad_norm": 0.3130912780761719, "learning_rate": 2.6220502906052327e-06, "loss": 0.2966, "step": 5788 }, { "epoch": 2.076141525221133, "grad_norm": 0.34717342257499695, "learning_rate": 2.6202147360574526e-06, "loss": 0.3188, "step": 5789 }, { "epoch": 2.0765001195314365, "grad_norm": 0.3031749725341797, "learning_rate": 2.618379596075668e-06, "loss": 0.2619, "step": 5790 }, { "epoch": 2.0768587138417405, "grad_norm": 0.33196118474006653, "learning_rate": 2.616544870979567e-06, "loss": 0.2865, "step": 5791 }, { "epoch": 2.077217308152044, "grad_norm": 0.3564009964466095, "learning_rate": 2.6147105610887676e-06, "loss": 0.2977, "step": 5792 }, { "epoch": 2.0775759024623475, "grad_norm": 0.33011823892593384, "learning_rate": 2.6128766667228065e-06, "loss": 0.2786, "step": 5793 }, { "epoch": 2.0779344967726514, "grad_norm": 0.3188830316066742, "learning_rate": 2.6110431882011577e-06, "loss": 0.2846, "step": 5794 }, { "epoch": 2.078293091082955, "grad_norm": 0.298965185880661, "learning_rate": 2.609210125843217e-06, "loss": 0.2639, "step": 5795 }, { "epoch": 2.0786516853932584, "grad_norm": 0.33187416195869446, "learning_rate": 2.6073774799683136e-06, "loss": 0.3311, "step": 5796 }, { "epoch": 2.079010279703562, "grad_norm": 0.30962178111076355, "learning_rate": 2.6055452508956945e-06, "loss": 0.2973, "step": 5797 }, { "epoch": 2.079368874013866, "grad_norm": 0.3213891386985779, "learning_rate": 2.6037134389445425e-06, "loss": 0.291, "step": 5798 }, { "epoch": 2.0797274683241693, "grad_norm": 0.34672611951828003, "learning_rate": 2.6018820444339643e-06, "loss": 0.3095, "step": 5799 }, { "epoch": 2.080086062634473, "grad_norm": 0.3521893620491028, "learning_rate": 2.600051067682995e-06, "loss": 0.31, "step": 5800 }, { "epoch": 2.0804446569447763, "grad_norm": 0.3052128553390503, "learning_rate": 2.598220509010599e-06, "loss": 0.2752, "step": 5801 }, { "epoch": 2.0808032512550803, "grad_norm": 0.30752870440483093, "learning_rate": 2.59639036873566e-06, "loss": 0.2831, "step": 5802 }, { "epoch": 2.0811618455653838, "grad_norm": 0.3001801073551178, "learning_rate": 2.594560647176997e-06, "loss": 0.2817, "step": 5803 }, { "epoch": 2.0815204398756872, "grad_norm": 0.3300153315067291, "learning_rate": 2.5927313446533554e-06, "loss": 0.3004, "step": 5804 }, { "epoch": 2.0818790341859907, "grad_norm": 0.32150211930274963, "learning_rate": 2.5909024614833987e-06, "loss": 0.2878, "step": 5805 }, { "epoch": 2.0822376284962947, "grad_norm": 0.33495032787323, "learning_rate": 2.589073997985732e-06, "loss": 0.2906, "step": 5806 }, { "epoch": 2.082596222806598, "grad_norm": 0.31110629439353943, "learning_rate": 2.5872459544788743e-06, "loss": 0.2846, "step": 5807 }, { "epoch": 2.0829548171169017, "grad_norm": 0.3441105782985687, "learning_rate": 2.5854183312812776e-06, "loss": 0.3385, "step": 5808 }, { "epoch": 2.083313411427205, "grad_norm": 0.31468045711517334, "learning_rate": 2.5835911287113215e-06, "loss": 0.2703, "step": 5809 }, { "epoch": 2.083672005737509, "grad_norm": 0.4085441529750824, "learning_rate": 2.5817643470873066e-06, "loss": 0.304, "step": 5810 }, { "epoch": 2.0840306000478126, "grad_norm": 0.30785316228866577, "learning_rate": 2.5799379867274666e-06, "loss": 0.2656, "step": 5811 }, { "epoch": 2.084389194358116, "grad_norm": 0.3171161413192749, "learning_rate": 2.5781120479499577e-06, "loss": 0.3086, "step": 5812 }, { "epoch": 2.0847477886684196, "grad_norm": 0.30340102314949036, "learning_rate": 2.576286531072868e-06, "loss": 0.292, "step": 5813 }, { "epoch": 2.0851063829787235, "grad_norm": 0.3244903087615967, "learning_rate": 2.574461436414203e-06, "loss": 0.285, "step": 5814 }, { "epoch": 2.085464977289027, "grad_norm": 0.375501424074173, "learning_rate": 2.5726367642919016e-06, "loss": 0.313, "step": 5815 }, { "epoch": 2.0858235715993305, "grad_norm": 0.33900749683380127, "learning_rate": 2.5708125150238294e-06, "loss": 0.2939, "step": 5816 }, { "epoch": 2.0861821659096345, "grad_norm": 0.2933381199836731, "learning_rate": 2.5689886889277716e-06, "loss": 0.2563, "step": 5817 }, { "epoch": 2.086540760219938, "grad_norm": 0.3227969706058502, "learning_rate": 2.5671652863214514e-06, "loss": 0.3195, "step": 5818 }, { "epoch": 2.0868993545302414, "grad_norm": 0.29277053475379944, "learning_rate": 2.5653423075225046e-06, "loss": 0.2841, "step": 5819 }, { "epoch": 2.087257948840545, "grad_norm": 0.3161381185054779, "learning_rate": 2.5635197528485036e-06, "loss": 0.2846, "step": 5820 }, { "epoch": 2.087616543150849, "grad_norm": 0.3354038596153259, "learning_rate": 2.561697622616943e-06, "loss": 0.3423, "step": 5821 }, { "epoch": 2.0879751374611524, "grad_norm": 0.2987460494041443, "learning_rate": 2.5598759171452387e-06, "loss": 0.267, "step": 5822 }, { "epoch": 2.088333731771456, "grad_norm": 0.34344154596328735, "learning_rate": 2.558054636750745e-06, "loss": 0.3316, "step": 5823 }, { "epoch": 2.0886923260817594, "grad_norm": 0.3165276050567627, "learning_rate": 2.5562337817507298e-06, "loss": 0.2687, "step": 5824 }, { "epoch": 2.0890509203920633, "grad_norm": 0.3194006085395813, "learning_rate": 2.5544133524623942e-06, "loss": 0.291, "step": 5825 }, { "epoch": 2.089409514702367, "grad_norm": 0.3322909474372864, "learning_rate": 2.55259334920286e-06, "loss": 0.2917, "step": 5826 }, { "epoch": 2.0897681090126703, "grad_norm": 0.2958454191684723, "learning_rate": 2.550773772289178e-06, "loss": 0.2966, "step": 5827 }, { "epoch": 2.090126703322974, "grad_norm": 0.30482017993927, "learning_rate": 2.5489546220383244e-06, "loss": 0.2631, "step": 5828 }, { "epoch": 2.0904852976332777, "grad_norm": 0.3336748480796814, "learning_rate": 2.547135898767202e-06, "loss": 0.3303, "step": 5829 }, { "epoch": 2.0908438919435812, "grad_norm": 0.3317694365978241, "learning_rate": 2.545317602792639e-06, "loss": 0.2594, "step": 5830 }, { "epoch": 2.0912024862538847, "grad_norm": 0.35725492238998413, "learning_rate": 2.543499734431385e-06, "loss": 0.3263, "step": 5831 }, { "epoch": 2.091561080564188, "grad_norm": 0.2857055962085724, "learning_rate": 2.5416822940001195e-06, "loss": 0.2552, "step": 5832 }, { "epoch": 2.091919674874492, "grad_norm": 0.3110571801662445, "learning_rate": 2.5398652818154485e-06, "loss": 0.2862, "step": 5833 }, { "epoch": 2.0922782691847956, "grad_norm": 0.30569401383399963, "learning_rate": 2.5380486981938955e-06, "loss": 0.332, "step": 5834 }, { "epoch": 2.092636863495099, "grad_norm": 0.2894768714904785, "learning_rate": 2.536232543451923e-06, "loss": 0.2594, "step": 5835 }, { "epoch": 2.0929954578054026, "grad_norm": 0.32382312417030334, "learning_rate": 2.534416817905905e-06, "loss": 0.2811, "step": 5836 }, { "epoch": 2.0933540521157066, "grad_norm": 0.3243968188762665, "learning_rate": 2.5326015218721488e-06, "loss": 0.2727, "step": 5837 }, { "epoch": 2.09371264642601, "grad_norm": 0.3266359269618988, "learning_rate": 2.530786655666883e-06, "loss": 0.3276, "step": 5838 }, { "epoch": 2.0940712407363136, "grad_norm": 0.29404565691947937, "learning_rate": 2.528972219606262e-06, "loss": 0.2849, "step": 5839 }, { "epoch": 2.094429835046617, "grad_norm": 0.30632486939430237, "learning_rate": 2.527158214006372e-06, "loss": 0.3005, "step": 5840 }, { "epoch": 2.094788429356921, "grad_norm": 0.32117825746536255, "learning_rate": 2.5253446391832124e-06, "loss": 0.2851, "step": 5841 }, { "epoch": 2.0951470236672245, "grad_norm": 0.33053308725357056, "learning_rate": 2.523531495452718e-06, "loss": 0.3085, "step": 5842 }, { "epoch": 2.095505617977528, "grad_norm": 0.31316325068473816, "learning_rate": 2.52171878313074e-06, "loss": 0.2826, "step": 5843 }, { "epoch": 2.0958642122878315, "grad_norm": 0.31619784235954285, "learning_rate": 2.5199065025330597e-06, "loss": 0.3191, "step": 5844 }, { "epoch": 2.0962228065981354, "grad_norm": 0.2930016815662384, "learning_rate": 2.5180946539753835e-06, "loss": 0.2737, "step": 5845 }, { "epoch": 2.096581400908439, "grad_norm": 0.31825125217437744, "learning_rate": 2.5162832377733394e-06, "loss": 0.2917, "step": 5846 }, { "epoch": 2.0969399952187424, "grad_norm": 0.3447604477405548, "learning_rate": 2.514472254242485e-06, "loss": 0.3256, "step": 5847 }, { "epoch": 2.0972985895290464, "grad_norm": 0.31979238986968994, "learning_rate": 2.5126617036982946e-06, "loss": 0.2842, "step": 5848 }, { "epoch": 2.09765718383935, "grad_norm": 0.30908551812171936, "learning_rate": 2.5108515864561746e-06, "loss": 0.2829, "step": 5849 }, { "epoch": 2.0980157781496533, "grad_norm": 0.36351636052131653, "learning_rate": 2.5090419028314537e-06, "loss": 0.3274, "step": 5850 }, { "epoch": 2.098374372459957, "grad_norm": 0.3036632537841797, "learning_rate": 2.5072326531393798e-06, "loss": 0.2862, "step": 5851 }, { "epoch": 2.0987329667702608, "grad_norm": 0.3174853026866913, "learning_rate": 2.505423837695137e-06, "loss": 0.3044, "step": 5852 }, { "epoch": 2.0990915610805643, "grad_norm": 0.30156147480010986, "learning_rate": 2.5036154568138206e-06, "loss": 0.2948, "step": 5853 }, { "epoch": 2.0994501553908678, "grad_norm": 0.32607561349868774, "learning_rate": 2.50180751081046e-06, "loss": 0.317, "step": 5854 }, { "epoch": 2.0998087497011713, "grad_norm": 0.3042302429676056, "learning_rate": 2.5000000000000015e-06, "loss": 0.2904, "step": 5855 }, { "epoch": 2.100167344011475, "grad_norm": 0.3250087797641754, "learning_rate": 2.498192924697321e-06, "loss": 0.2907, "step": 5856 }, { "epoch": 2.1005259383217787, "grad_norm": 0.32012268900871277, "learning_rate": 2.4963862852172166e-06, "loss": 0.2938, "step": 5857 }, { "epoch": 2.100884532632082, "grad_norm": 0.3208240270614624, "learning_rate": 2.49458008187441e-06, "loss": 0.2694, "step": 5858 }, { "epoch": 2.1012431269423857, "grad_norm": 0.34836989641189575, "learning_rate": 2.4927743149835504e-06, "loss": 0.3409, "step": 5859 }, { "epoch": 2.1016017212526896, "grad_norm": 0.3283798098564148, "learning_rate": 2.490968984859204e-06, "loss": 0.2462, "step": 5860 }, { "epoch": 2.101960315562993, "grad_norm": 0.3133801221847534, "learning_rate": 2.489164091815866e-06, "loss": 0.307, "step": 5861 }, { "epoch": 2.1023189098732966, "grad_norm": 0.35444217920303345, "learning_rate": 2.4873596361679546e-06, "loss": 0.3203, "step": 5862 }, { "epoch": 2.1026775041836, "grad_norm": 0.31090328097343445, "learning_rate": 2.4855556182298124e-06, "loss": 0.2944, "step": 5863 }, { "epoch": 2.103036098493904, "grad_norm": 0.3128144145011902, "learning_rate": 2.483752038315706e-06, "loss": 0.3053, "step": 5864 }, { "epoch": 2.1033946928042075, "grad_norm": 0.30195292830467224, "learning_rate": 2.4819488967398213e-06, "loss": 0.2846, "step": 5865 }, { "epoch": 2.103753287114511, "grad_norm": 0.3493706285953522, "learning_rate": 2.4801461938162746e-06, "loss": 0.3297, "step": 5866 }, { "epoch": 2.1041118814248145, "grad_norm": 0.30412593483924866, "learning_rate": 2.478343929859099e-06, "loss": 0.282, "step": 5867 }, { "epoch": 2.1044704757351185, "grad_norm": 0.3054194152355194, "learning_rate": 2.476542105182254e-06, "loss": 0.2814, "step": 5868 }, { "epoch": 2.104829070045422, "grad_norm": 0.3210216760635376, "learning_rate": 2.4747407200996294e-06, "loss": 0.312, "step": 5869 }, { "epoch": 2.1051876643557255, "grad_norm": 0.3246299922466278, "learning_rate": 2.4729397749250258e-06, "loss": 0.2897, "step": 5870 }, { "epoch": 2.105546258666029, "grad_norm": 0.3133496344089508, "learning_rate": 2.471139269972177e-06, "loss": 0.293, "step": 5871 }, { "epoch": 2.105904852976333, "grad_norm": 0.3356991112232208, "learning_rate": 2.4693392055547333e-06, "loss": 0.2743, "step": 5872 }, { "epoch": 2.1062634472866364, "grad_norm": 0.3728862702846527, "learning_rate": 2.4675395819862737e-06, "loss": 0.3532, "step": 5873 }, { "epoch": 2.10662204159694, "grad_norm": 0.33082151412963867, "learning_rate": 2.4657403995802977e-06, "loss": 0.2971, "step": 5874 }, { "epoch": 2.1069806359072434, "grad_norm": 0.3450760841369629, "learning_rate": 2.4639416586502286e-06, "loss": 0.27, "step": 5875 }, { "epoch": 2.1073392302175473, "grad_norm": 0.3240991234779358, "learning_rate": 2.4621433595094145e-06, "loss": 0.2979, "step": 5876 }, { "epoch": 2.107697824527851, "grad_norm": 0.3238527476787567, "learning_rate": 2.460345502471121e-06, "loss": 0.3057, "step": 5877 }, { "epoch": 2.1080564188381543, "grad_norm": 0.33220645785331726, "learning_rate": 2.4585480878485423e-06, "loss": 0.2919, "step": 5878 }, { "epoch": 2.1084150131484583, "grad_norm": 0.337346613407135, "learning_rate": 2.456751115954794e-06, "loss": 0.3089, "step": 5879 }, { "epoch": 2.1087736074587617, "grad_norm": 0.3204178810119629, "learning_rate": 2.4549545871029136e-06, "loss": 0.2822, "step": 5880 }, { "epoch": 2.1091322017690652, "grad_norm": 0.312257319688797, "learning_rate": 2.453158501605864e-06, "loss": 0.3076, "step": 5881 }, { "epoch": 2.1094907960793687, "grad_norm": 0.32465067505836487, "learning_rate": 2.451362859776525e-06, "loss": 0.3018, "step": 5882 }, { "epoch": 2.1098493903896727, "grad_norm": 0.33145296573638916, "learning_rate": 2.449567661927707e-06, "loss": 0.3117, "step": 5883 }, { "epoch": 2.110207984699976, "grad_norm": 0.3185211718082428, "learning_rate": 2.447772908372136e-06, "loss": 0.3061, "step": 5884 }, { "epoch": 2.1105665790102797, "grad_norm": 0.3157852590084076, "learning_rate": 2.445978599422464e-06, "loss": 0.2836, "step": 5885 }, { "epoch": 2.110925173320583, "grad_norm": 0.31320133805274963, "learning_rate": 2.444184735391266e-06, "loss": 0.3128, "step": 5886 }, { "epoch": 2.111283767630887, "grad_norm": 0.3155536949634552, "learning_rate": 2.4423913165910392e-06, "loss": 0.3021, "step": 5887 }, { "epoch": 2.1116423619411906, "grad_norm": 0.3252730071544647, "learning_rate": 2.4405983433342036e-06, "loss": 0.3036, "step": 5888 }, { "epoch": 2.112000956251494, "grad_norm": 0.302094042301178, "learning_rate": 2.438805815933098e-06, "loss": 0.279, "step": 5889 }, { "epoch": 2.1123595505617976, "grad_norm": 0.3489224314689636, "learning_rate": 2.4370137346999865e-06, "loss": 0.3022, "step": 5890 }, { "epoch": 2.1127181448721015, "grad_norm": 0.34675467014312744, "learning_rate": 2.4352220999470576e-06, "loss": 0.3172, "step": 5891 }, { "epoch": 2.113076739182405, "grad_norm": 0.29762572050094604, "learning_rate": 2.433430911986418e-06, "loss": 0.2812, "step": 5892 }, { "epoch": 2.1134353334927085, "grad_norm": 0.32481837272644043, "learning_rate": 2.4316401711301003e-06, "loss": 0.3305, "step": 5893 }, { "epoch": 2.113793927803012, "grad_norm": 0.328334242105484, "learning_rate": 2.429849877690053e-06, "loss": 0.2924, "step": 5894 }, { "epoch": 2.114152522113316, "grad_norm": 0.3366663455963135, "learning_rate": 2.4280600319781562e-06, "loss": 0.2884, "step": 5895 }, { "epoch": 2.1145111164236194, "grad_norm": 0.35257670283317566, "learning_rate": 2.4262706343061994e-06, "loss": 0.3268, "step": 5896 }, { "epoch": 2.114869710733923, "grad_norm": 0.3003575801849365, "learning_rate": 2.4244816849859073e-06, "loss": 0.2879, "step": 5897 }, { "epoch": 2.1152283050442264, "grad_norm": 0.3028564453125, "learning_rate": 2.4226931843289198e-06, "loss": 0.3175, "step": 5898 }, { "epoch": 2.1155868993545304, "grad_norm": 0.3114095628261566, "learning_rate": 2.420905132646797e-06, "loss": 0.286, "step": 5899 }, { "epoch": 2.115945493664834, "grad_norm": 0.3342496454715729, "learning_rate": 2.4191175302510255e-06, "loss": 0.3041, "step": 5900 }, { "epoch": 2.1163040879751374, "grad_norm": 0.3239682614803314, "learning_rate": 2.417330377453008e-06, "loss": 0.292, "step": 5901 }, { "epoch": 2.1166626822854413, "grad_norm": 0.3167209327220917, "learning_rate": 2.415543674564074e-06, "loss": 0.2864, "step": 5902 }, { "epoch": 2.117021276595745, "grad_norm": 0.3219943344593048, "learning_rate": 2.4137574218954723e-06, "loss": 0.2807, "step": 5903 }, { "epoch": 2.1173798709060483, "grad_norm": 0.33630818128585815, "learning_rate": 2.411971619758375e-06, "loss": 0.2861, "step": 5904 }, { "epoch": 2.117738465216352, "grad_norm": 0.3260372579097748, "learning_rate": 2.4101862684638738e-06, "loss": 0.3133, "step": 5905 }, { "epoch": 2.1180970595266557, "grad_norm": 0.3189767599105835, "learning_rate": 2.4084013683229813e-06, "loss": 0.2841, "step": 5906 }, { "epoch": 2.118455653836959, "grad_norm": 0.30832868814468384, "learning_rate": 2.4066169196466326e-06, "loss": 0.2643, "step": 5907 }, { "epoch": 2.1188142481472627, "grad_norm": 0.3126294016838074, "learning_rate": 2.404832922745685e-06, "loss": 0.3124, "step": 5908 }, { "epoch": 2.119172842457566, "grad_norm": 0.32233837246894836, "learning_rate": 2.4030493779309167e-06, "loss": 0.2923, "step": 5909 }, { "epoch": 2.11953143676787, "grad_norm": 0.3101293444633484, "learning_rate": 2.401266285513028e-06, "loss": 0.2983, "step": 5910 }, { "epoch": 2.1198900310781736, "grad_norm": 0.3305850028991699, "learning_rate": 2.399483645802636e-06, "loss": 0.2916, "step": 5911 }, { "epoch": 2.120248625388477, "grad_norm": 0.3080557584762573, "learning_rate": 2.3977014591102848e-06, "loss": 0.275, "step": 5912 }, { "epoch": 2.1206072196987806, "grad_norm": 0.30648770928382874, "learning_rate": 2.3959197257464343e-06, "loss": 0.3015, "step": 5913 }, { "epoch": 2.1209658140090846, "grad_norm": 0.31711649894714355, "learning_rate": 2.394138446021469e-06, "loss": 0.2778, "step": 5914 }, { "epoch": 2.121324408319388, "grad_norm": 0.3365003764629364, "learning_rate": 2.392357620245694e-06, "loss": 0.281, "step": 5915 }, { "epoch": 2.1216830026296916, "grad_norm": 0.3146350383758545, "learning_rate": 2.390577248729334e-06, "loss": 0.3077, "step": 5916 }, { "epoch": 2.122041596939995, "grad_norm": 0.33025676012039185, "learning_rate": 2.3887973317825374e-06, "loss": 0.3064, "step": 5917 }, { "epoch": 2.122400191250299, "grad_norm": 0.3146510422229767, "learning_rate": 2.3870178697153686e-06, "loss": 0.2794, "step": 5918 }, { "epoch": 2.1227587855606025, "grad_norm": 0.32041117548942566, "learning_rate": 2.3852388628378153e-06, "loss": 0.2958, "step": 5919 }, { "epoch": 2.123117379870906, "grad_norm": 0.31027549505233765, "learning_rate": 2.3834603114597875e-06, "loss": 0.3085, "step": 5920 }, { "epoch": 2.1234759741812095, "grad_norm": 0.33083218336105347, "learning_rate": 2.381682215891114e-06, "loss": 0.3196, "step": 5921 }, { "epoch": 2.1238345684915134, "grad_norm": 0.3186880052089691, "learning_rate": 2.3799045764415464e-06, "loss": 0.3186, "step": 5922 }, { "epoch": 2.124193162801817, "grad_norm": 0.3102025091648102, "learning_rate": 2.3781273934207512e-06, "loss": 0.2767, "step": 5923 }, { "epoch": 2.1245517571121204, "grad_norm": 0.30696427822113037, "learning_rate": 2.376350667138323e-06, "loss": 0.2857, "step": 5924 }, { "epoch": 2.124910351422424, "grad_norm": 0.3228246867656708, "learning_rate": 2.374574397903768e-06, "loss": 0.3158, "step": 5925 }, { "epoch": 2.125268945732728, "grad_norm": 0.31244832277297974, "learning_rate": 2.3727985860265224e-06, "loss": 0.2848, "step": 5926 }, { "epoch": 2.1256275400430313, "grad_norm": 0.3004782199859619, "learning_rate": 2.3710232318159387e-06, "loss": 0.2798, "step": 5927 }, { "epoch": 2.125986134353335, "grad_norm": 0.3393665552139282, "learning_rate": 2.369248335581285e-06, "loss": 0.2996, "step": 5928 }, { "epoch": 2.1263447286636383, "grad_norm": 0.3278115689754486, "learning_rate": 2.3674738976317585e-06, "loss": 0.286, "step": 5929 }, { "epoch": 2.1267033229739423, "grad_norm": 0.32174885272979736, "learning_rate": 2.3656999182764673e-06, "loss": 0.3044, "step": 5930 }, { "epoch": 2.1270619172842458, "grad_norm": 0.2970079779624939, "learning_rate": 2.363926397824446e-06, "loss": 0.2784, "step": 5931 }, { "epoch": 2.1274205115945493, "grad_norm": 0.3182488977909088, "learning_rate": 2.3621533365846473e-06, "loss": 0.3117, "step": 5932 }, { "epoch": 2.127779105904853, "grad_norm": 0.3058863878250122, "learning_rate": 2.3603807348659447e-06, "loss": 0.2844, "step": 5933 }, { "epoch": 2.1281377002151567, "grad_norm": 0.3208722174167633, "learning_rate": 2.3586085929771317e-06, "loss": 0.326, "step": 5934 }, { "epoch": 2.12849629452546, "grad_norm": 0.2958495616912842, "learning_rate": 2.356836911226918e-06, "loss": 0.2865, "step": 5935 }, { "epoch": 2.1288548888357637, "grad_norm": 0.32425516843795776, "learning_rate": 2.3550656899239377e-06, "loss": 0.3071, "step": 5936 }, { "epoch": 2.1292134831460676, "grad_norm": 0.3338034749031067, "learning_rate": 2.353294929376743e-06, "loss": 0.2858, "step": 5937 }, { "epoch": 2.129572077456371, "grad_norm": 0.29636064171791077, "learning_rate": 2.351524629893806e-06, "loss": 0.2819, "step": 5938 }, { "epoch": 2.1299306717666746, "grad_norm": 0.3317559063434601, "learning_rate": 2.3497547917835207e-06, "loss": 0.2968, "step": 5939 }, { "epoch": 2.130289266076978, "grad_norm": 0.31572604179382324, "learning_rate": 2.3479854153541935e-06, "loss": 0.2845, "step": 5940 }, { "epoch": 2.130647860387282, "grad_norm": 0.3144468069076538, "learning_rate": 2.34621650091406e-06, "loss": 0.2779, "step": 5941 }, { "epoch": 2.1310064546975855, "grad_norm": 0.34196171164512634, "learning_rate": 2.3444480487712643e-06, "loss": 0.2892, "step": 5942 }, { "epoch": 2.131365049007889, "grad_norm": 0.33727991580963135, "learning_rate": 2.342680059233885e-06, "loss": 0.2851, "step": 5943 }, { "epoch": 2.1317236433181925, "grad_norm": 0.31254708766937256, "learning_rate": 2.340912532609904e-06, "loss": 0.2857, "step": 5944 }, { "epoch": 2.1320822376284965, "grad_norm": 0.322506844997406, "learning_rate": 2.3391454692072337e-06, "loss": 0.3175, "step": 5945 }, { "epoch": 2.1324408319388, "grad_norm": 0.3017715513706207, "learning_rate": 2.3373788693337024e-06, "loss": 0.2814, "step": 5946 }, { "epoch": 2.1327994262491035, "grad_norm": 0.3063904941082001, "learning_rate": 2.3356127332970537e-06, "loss": 0.291, "step": 5947 }, { "epoch": 2.133158020559407, "grad_norm": 0.32479795813560486, "learning_rate": 2.333847061404957e-06, "loss": 0.3057, "step": 5948 }, { "epoch": 2.133516614869711, "grad_norm": 0.32911279797554016, "learning_rate": 2.3320818539649965e-06, "loss": 0.306, "step": 5949 }, { "epoch": 2.1338752091800144, "grad_norm": 0.31811144948005676, "learning_rate": 2.3303171112846775e-06, "loss": 0.3139, "step": 5950 }, { "epoch": 2.134233803490318, "grad_norm": 0.3025299906730652, "learning_rate": 2.328552833671425e-06, "loss": 0.2816, "step": 5951 }, { "epoch": 2.1345923978006214, "grad_norm": 0.30525723099708557, "learning_rate": 2.326789021432579e-06, "loss": 0.2759, "step": 5952 }, { "epoch": 2.1349509921109253, "grad_norm": 0.3464708626270294, "learning_rate": 2.3250256748754026e-06, "loss": 0.3371, "step": 5953 }, { "epoch": 2.135309586421229, "grad_norm": 0.3243806064128876, "learning_rate": 2.323262794307075e-06, "loss": 0.2951, "step": 5954 }, { "epoch": 2.1356681807315323, "grad_norm": 0.3236928880214691, "learning_rate": 2.321500380034697e-06, "loss": 0.314, "step": 5955 }, { "epoch": 2.136026775041836, "grad_norm": 0.3320813775062561, "learning_rate": 2.319738432365288e-06, "loss": 0.2973, "step": 5956 }, { "epoch": 2.1363853693521397, "grad_norm": 0.3073425889015198, "learning_rate": 2.317976951605781e-06, "loss": 0.2767, "step": 5957 }, { "epoch": 2.1367439636624432, "grad_norm": 0.3259352147579193, "learning_rate": 2.316215938063035e-06, "loss": 0.2979, "step": 5958 }, { "epoch": 2.1371025579727467, "grad_norm": 0.3102295696735382, "learning_rate": 2.3144553920438183e-06, "loss": 0.2913, "step": 5959 }, { "epoch": 2.1374611522830502, "grad_norm": 0.30689728260040283, "learning_rate": 2.3126953138548317e-06, "loss": 0.2744, "step": 5960 }, { "epoch": 2.137819746593354, "grad_norm": 0.3447137475013733, "learning_rate": 2.3109357038026804e-06, "loss": 0.3431, "step": 5961 }, { "epoch": 2.1381783409036577, "grad_norm": 0.30496159195899963, "learning_rate": 2.309176562193895e-06, "loss": 0.284, "step": 5962 }, { "epoch": 2.138536935213961, "grad_norm": 0.2855098247528076, "learning_rate": 2.3074178893349263e-06, "loss": 0.2518, "step": 5963 }, { "epoch": 2.138895529524265, "grad_norm": 0.3589498996734619, "learning_rate": 2.3056596855321366e-06, "loss": 0.337, "step": 5964 }, { "epoch": 2.1392541238345686, "grad_norm": 0.2916960120201111, "learning_rate": 2.303901951091812e-06, "loss": 0.2778, "step": 5965 }, { "epoch": 2.139612718144872, "grad_norm": 0.30247074365615845, "learning_rate": 2.3021446863201556e-06, "loss": 0.2793, "step": 5966 }, { "epoch": 2.1399713124551756, "grad_norm": 0.2981411814689636, "learning_rate": 2.3003878915232887e-06, "loss": 0.2884, "step": 5967 }, { "epoch": 2.1403299067654795, "grad_norm": 0.3338834047317505, "learning_rate": 2.2986315670072516e-06, "loss": 0.3176, "step": 5968 }, { "epoch": 2.140688501075783, "grad_norm": 0.31059908866882324, "learning_rate": 2.2968757130779985e-06, "loss": 0.2813, "step": 5969 }, { "epoch": 2.1410470953860865, "grad_norm": 0.30759790539741516, "learning_rate": 2.2951203300414077e-06, "loss": 0.2859, "step": 5970 }, { "epoch": 2.14140568969639, "grad_norm": 0.3393057584762573, "learning_rate": 2.2933654182032668e-06, "loss": 0.308, "step": 5971 }, { "epoch": 2.141764284006694, "grad_norm": 0.32989388704299927, "learning_rate": 2.2916109778692948e-06, "loss": 0.3109, "step": 5972 }, { "epoch": 2.1421228783169974, "grad_norm": 0.3131861686706543, "learning_rate": 2.289857009345115e-06, "loss": 0.2942, "step": 5973 }, { "epoch": 2.142481472627301, "grad_norm": 0.3290354311466217, "learning_rate": 2.2881035129362757e-06, "loss": 0.3023, "step": 5974 }, { "epoch": 2.1428400669376044, "grad_norm": 0.2890737056732178, "learning_rate": 2.2863504889482434e-06, "loss": 0.2592, "step": 5975 }, { "epoch": 2.1431986612479084, "grad_norm": 0.32279911637306213, "learning_rate": 2.2845979376863954e-06, "loss": 0.3143, "step": 5976 }, { "epoch": 2.143557255558212, "grad_norm": 0.3297102749347687, "learning_rate": 2.2828458594560375e-06, "loss": 0.3093, "step": 5977 }, { "epoch": 2.1439158498685154, "grad_norm": 0.3110537827014923, "learning_rate": 2.281094254562383e-06, "loss": 0.289, "step": 5978 }, { "epoch": 2.144274444178819, "grad_norm": 0.3173585832118988, "learning_rate": 2.279343123310569e-06, "loss": 0.3089, "step": 5979 }, { "epoch": 2.144633038489123, "grad_norm": 0.3101155459880829, "learning_rate": 2.2775924660056484e-06, "loss": 0.2765, "step": 5980 }, { "epoch": 2.1449916327994263, "grad_norm": 0.3103584349155426, "learning_rate": 2.275842282952589e-06, "loss": 0.3104, "step": 5981 }, { "epoch": 2.14535022710973, "grad_norm": 0.3216474950313568, "learning_rate": 2.2740925744562788e-06, "loss": 0.3132, "step": 5982 }, { "epoch": 2.1457088214200333, "grad_norm": 0.32007431983947754, "learning_rate": 2.2723433408215228e-06, "loss": 0.2748, "step": 5983 }, { "epoch": 2.146067415730337, "grad_norm": 0.3641224503517151, "learning_rate": 2.270594582353043e-06, "loss": 0.3046, "step": 5984 }, { "epoch": 2.1464260100406407, "grad_norm": 0.31477537751197815, "learning_rate": 2.268846299355481e-06, "loss": 0.2726, "step": 5985 }, { "epoch": 2.146784604350944, "grad_norm": 0.29413819313049316, "learning_rate": 2.267098492133388e-06, "loss": 0.2925, "step": 5986 }, { "epoch": 2.147143198661248, "grad_norm": 0.3050673007965088, "learning_rate": 2.265351160991243e-06, "loss": 0.2907, "step": 5987 }, { "epoch": 2.1475017929715516, "grad_norm": 0.3482457101345062, "learning_rate": 2.2636043062334295e-06, "loss": 0.3224, "step": 5988 }, { "epoch": 2.147860387281855, "grad_norm": 0.29922741651535034, "learning_rate": 2.261857928164263e-06, "loss": 0.2635, "step": 5989 }, { "epoch": 2.1482189815921586, "grad_norm": 0.34937945008277893, "learning_rate": 2.260112027087963e-06, "loss": 0.3498, "step": 5990 }, { "epoch": 2.148577575902462, "grad_norm": 0.30546045303344727, "learning_rate": 2.2583666033086714e-06, "loss": 0.2849, "step": 5991 }, { "epoch": 2.148936170212766, "grad_norm": 0.3164132535457611, "learning_rate": 2.2566216571304497e-06, "loss": 0.2998, "step": 5992 }, { "epoch": 2.1492947645230696, "grad_norm": 0.31450527906417847, "learning_rate": 2.2548771888572664e-06, "loss": 0.2829, "step": 5993 }, { "epoch": 2.149653358833373, "grad_norm": 0.3078412115573883, "learning_rate": 2.253133198793021e-06, "loss": 0.2622, "step": 5994 }, { "epoch": 2.150011953143677, "grad_norm": 0.3170449435710907, "learning_rate": 2.2513896872415163e-06, "loss": 0.285, "step": 5995 }, { "epoch": 2.1503705474539805, "grad_norm": 0.3096064627170563, "learning_rate": 2.24964665450648e-06, "loss": 0.2865, "step": 5996 }, { "epoch": 2.150729141764284, "grad_norm": 0.3347029387950897, "learning_rate": 2.247904100891555e-06, "loss": 0.3381, "step": 5997 }, { "epoch": 2.1510877360745875, "grad_norm": 0.30757591128349304, "learning_rate": 2.246162026700296e-06, "loss": 0.2932, "step": 5998 }, { "epoch": 2.1514463303848914, "grad_norm": 0.31733888387680054, "learning_rate": 2.2444204322361795e-06, "loss": 0.296, "step": 5999 }, { "epoch": 2.151804924695195, "grad_norm": 0.3611571192741394, "learning_rate": 2.2426793178025967e-06, "loss": 0.3194, "step": 6000 }, { "epoch": 2.1521635190054984, "grad_norm": 0.2855796813964844, "learning_rate": 2.240938683702857e-06, "loss": 0.2661, "step": 6001 }, { "epoch": 2.152522113315802, "grad_norm": 0.3174881935119629, "learning_rate": 2.2391985302401813e-06, "loss": 0.3133, "step": 6002 }, { "epoch": 2.152880707626106, "grad_norm": 0.2980683147907257, "learning_rate": 2.237458857717711e-06, "loss": 0.2749, "step": 6003 }, { "epoch": 2.1532393019364093, "grad_norm": 0.32550790905952454, "learning_rate": 2.235719666438504e-06, "loss": 0.2956, "step": 6004 }, { "epoch": 2.153597896246713, "grad_norm": 0.3352324962615967, "learning_rate": 2.2339809567055277e-06, "loss": 0.2769, "step": 6005 }, { "epoch": 2.1539564905570163, "grad_norm": 0.32996055483818054, "learning_rate": 2.2322427288216774e-06, "loss": 0.2898, "step": 6006 }, { "epoch": 2.1543150848673203, "grad_norm": 0.3281393051147461, "learning_rate": 2.2305049830897533e-06, "loss": 0.3195, "step": 6007 }, { "epoch": 2.1546736791776238, "grad_norm": 0.3161008358001709, "learning_rate": 2.228767719812478e-06, "loss": 0.2726, "step": 6008 }, { "epoch": 2.1550322734879273, "grad_norm": 0.31616640090942383, "learning_rate": 2.2270309392924887e-06, "loss": 0.3186, "step": 6009 }, { "epoch": 2.1553908677982307, "grad_norm": 0.34179067611694336, "learning_rate": 2.2252946418323357e-06, "loss": 0.3065, "step": 6010 }, { "epoch": 2.1557494621085347, "grad_norm": 0.33308103680610657, "learning_rate": 2.223558827734489e-06, "loss": 0.3037, "step": 6011 }, { "epoch": 2.156108056418838, "grad_norm": 0.30556392669677734, "learning_rate": 2.2218234973013316e-06, "loss": 0.2859, "step": 6012 }, { "epoch": 2.1564666507291417, "grad_norm": 0.29639896750450134, "learning_rate": 2.220088650835165e-06, "loss": 0.2882, "step": 6013 }, { "epoch": 2.156825245039445, "grad_norm": 0.29716572165489197, "learning_rate": 2.218354288638206e-06, "loss": 0.2992, "step": 6014 }, { "epoch": 2.157183839349749, "grad_norm": 0.3337743580341339, "learning_rate": 2.2166204110125815e-06, "loss": 0.2967, "step": 6015 }, { "epoch": 2.1575424336600526, "grad_norm": 0.3314521014690399, "learning_rate": 2.2148870182603422e-06, "loss": 0.2838, "step": 6016 }, { "epoch": 2.157901027970356, "grad_norm": 0.3371826410293579, "learning_rate": 2.2131541106834485e-06, "loss": 0.3239, "step": 6017 }, { "epoch": 2.15825962228066, "grad_norm": 0.29800546169281006, "learning_rate": 2.2114216885837815e-06, "loss": 0.2701, "step": 6018 }, { "epoch": 2.1586182165909635, "grad_norm": 0.2960995137691498, "learning_rate": 2.2096897522631294e-06, "loss": 0.2842, "step": 6019 }, { "epoch": 2.158976810901267, "grad_norm": 0.30444371700286865, "learning_rate": 2.207958302023204e-06, "loss": 0.2765, "step": 6020 }, { "epoch": 2.1593354052115705, "grad_norm": 0.32683515548706055, "learning_rate": 2.2062273381656314e-06, "loss": 0.2965, "step": 6021 }, { "epoch": 2.159693999521874, "grad_norm": 0.3210570812225342, "learning_rate": 2.2044968609919445e-06, "loss": 0.303, "step": 6022 }, { "epoch": 2.160052593832178, "grad_norm": 0.300322026014328, "learning_rate": 2.202766870803605e-06, "loss": 0.25, "step": 6023 }, { "epoch": 2.1604111881424815, "grad_norm": 0.3319312632083893, "learning_rate": 2.2010373679019773e-06, "loss": 0.3215, "step": 6024 }, { "epoch": 2.160769782452785, "grad_norm": 0.30694255232810974, "learning_rate": 2.1993083525883483e-06, "loss": 0.2887, "step": 6025 }, { "epoch": 2.161128376763089, "grad_norm": 0.3166017532348633, "learning_rate": 2.1975798251639203e-06, "loss": 0.3056, "step": 6026 }, { "epoch": 2.1614869710733924, "grad_norm": 0.46883484721183777, "learning_rate": 2.195851785929803e-06, "loss": 0.3146, "step": 6027 }, { "epoch": 2.161845565383696, "grad_norm": 0.3323955833911896, "learning_rate": 2.194124235187029e-06, "loss": 0.2815, "step": 6028 }, { "epoch": 2.1622041596939994, "grad_norm": 0.31737783551216125, "learning_rate": 2.192397173236542e-06, "loss": 0.282, "step": 6029 }, { "epoch": 2.1625627540043033, "grad_norm": 0.3097688853740692, "learning_rate": 2.1906706003792053e-06, "loss": 0.2709, "step": 6030 }, { "epoch": 2.162921348314607, "grad_norm": 0.33575719594955444, "learning_rate": 2.1889445169157888e-06, "loss": 0.3227, "step": 6031 }, { "epoch": 2.1632799426249103, "grad_norm": 0.336831659078598, "learning_rate": 2.187218923146982e-06, "loss": 0.315, "step": 6032 }, { "epoch": 2.163638536935214, "grad_norm": 0.3096773028373718, "learning_rate": 2.1854938193733905e-06, "loss": 0.2887, "step": 6033 }, { "epoch": 2.1639971312455177, "grad_norm": 0.3305068612098694, "learning_rate": 2.1837692058955325e-06, "loss": 0.3284, "step": 6034 }, { "epoch": 2.1643557255558212, "grad_norm": 0.3030721843242645, "learning_rate": 2.1820450830138416e-06, "loss": 0.2597, "step": 6035 }, { "epoch": 2.1647143198661247, "grad_norm": 0.3170962333679199, "learning_rate": 2.180321451028663e-06, "loss": 0.2827, "step": 6036 }, { "epoch": 2.165072914176428, "grad_norm": 0.31940293312072754, "learning_rate": 2.17859831024026e-06, "loss": 0.2837, "step": 6037 }, { "epoch": 2.165431508486732, "grad_norm": 0.32109251618385315, "learning_rate": 2.1768756609488112e-06, "loss": 0.2891, "step": 6038 }, { "epoch": 2.1657901027970357, "grad_norm": 0.3568204343318939, "learning_rate": 2.1751535034544014e-06, "loss": 0.3241, "step": 6039 }, { "epoch": 2.166148697107339, "grad_norm": 0.32173654437065125, "learning_rate": 2.173431838057044e-06, "loss": 0.2986, "step": 6040 }, { "epoch": 2.1665072914176426, "grad_norm": 0.3290850818157196, "learning_rate": 2.171710665056653e-06, "loss": 0.2971, "step": 6041 }, { "epoch": 2.1668658857279466, "grad_norm": 0.29852303862571716, "learning_rate": 2.1699899847530628e-06, "loss": 0.2767, "step": 6042 }, { "epoch": 2.16722448003825, "grad_norm": 0.32558587193489075, "learning_rate": 2.168269797446024e-06, "loss": 0.2975, "step": 6043 }, { "epoch": 2.1675830743485536, "grad_norm": 0.3745686709880829, "learning_rate": 2.1665501034351938e-06, "loss": 0.336, "step": 6044 }, { "epoch": 2.167941668658857, "grad_norm": 0.2905016243457794, "learning_rate": 2.164830903020152e-06, "loss": 0.2819, "step": 6045 }, { "epoch": 2.168300262969161, "grad_norm": 0.3060871958732605, "learning_rate": 2.163112196500387e-06, "loss": 0.3004, "step": 6046 }, { "epoch": 2.1686588572794645, "grad_norm": 0.3002018332481384, "learning_rate": 2.1613939841753057e-06, "loss": 0.2883, "step": 6047 }, { "epoch": 2.169017451589768, "grad_norm": 0.3096209168434143, "learning_rate": 2.159676266344222e-06, "loss": 0.2729, "step": 6048 }, { "epoch": 2.169376045900072, "grad_norm": 0.31303295493125916, "learning_rate": 2.157959043306369e-06, "loss": 0.2989, "step": 6049 }, { "epoch": 2.1697346402103754, "grad_norm": 0.33470967411994934, "learning_rate": 2.156242315360893e-06, "loss": 0.3063, "step": 6050 }, { "epoch": 2.170093234520679, "grad_norm": 0.3077617287635803, "learning_rate": 2.1545260828068533e-06, "loss": 0.2746, "step": 6051 }, { "epoch": 2.1704518288309824, "grad_norm": 0.29791679978370667, "learning_rate": 2.1528103459432242e-06, "loss": 0.2698, "step": 6052 }, { "epoch": 2.1708104231412864, "grad_norm": 0.3209863305091858, "learning_rate": 2.1510951050688896e-06, "loss": 0.338, "step": 6053 }, { "epoch": 2.17116901745159, "grad_norm": 0.3184432089328766, "learning_rate": 2.14938036048265e-06, "loss": 0.2714, "step": 6054 }, { "epoch": 2.1715276117618934, "grad_norm": 0.3112766146659851, "learning_rate": 2.147666112483223e-06, "loss": 0.294, "step": 6055 }, { "epoch": 2.171886206072197, "grad_norm": 0.3052445948123932, "learning_rate": 2.1459523613692303e-06, "loss": 0.2823, "step": 6056 }, { "epoch": 2.172244800382501, "grad_norm": 0.3259936571121216, "learning_rate": 2.144239107439219e-06, "loss": 0.3066, "step": 6057 }, { "epoch": 2.1726033946928043, "grad_norm": 0.33861857652664185, "learning_rate": 2.142526350991638e-06, "loss": 0.3259, "step": 6058 }, { "epoch": 2.1729619890031078, "grad_norm": 0.2928573489189148, "learning_rate": 2.1408140923248577e-06, "loss": 0.2582, "step": 6059 }, { "epoch": 2.1733205833134113, "grad_norm": 0.2965698540210724, "learning_rate": 2.139102331737157e-06, "loss": 0.2794, "step": 6060 }, { "epoch": 2.173679177623715, "grad_norm": 0.34072503447532654, "learning_rate": 2.137391069526731e-06, "loss": 0.3457, "step": 6061 }, { "epoch": 2.1740377719340187, "grad_norm": 0.3022668659687042, "learning_rate": 2.1356803059916864e-06, "loss": 0.2911, "step": 6062 }, { "epoch": 2.174396366244322, "grad_norm": 0.34142398834228516, "learning_rate": 2.133970041430044e-06, "loss": 0.3064, "step": 6063 }, { "epoch": 2.1747549605546257, "grad_norm": 0.3010367751121521, "learning_rate": 2.1322602761397395e-06, "loss": 0.2916, "step": 6064 }, { "epoch": 2.1751135548649296, "grad_norm": 0.3363165259361267, "learning_rate": 2.1305510104186146e-06, "loss": 0.3102, "step": 6065 }, { "epoch": 2.175472149175233, "grad_norm": 0.32372409105300903, "learning_rate": 2.1288422445644308e-06, "loss": 0.3058, "step": 6066 }, { "epoch": 2.1758307434855366, "grad_norm": 0.3221772015094757, "learning_rate": 2.1271339788748624e-06, "loss": 0.2962, "step": 6067 }, { "epoch": 2.17618933779584, "grad_norm": 0.3022993206977844, "learning_rate": 2.1254262136474895e-06, "loss": 0.2909, "step": 6068 }, { "epoch": 2.176547932106144, "grad_norm": 0.31987616419792175, "learning_rate": 2.1237189491798176e-06, "loss": 0.2998, "step": 6069 }, { "epoch": 2.1769065264164476, "grad_norm": 0.3045942783355713, "learning_rate": 2.122012185769251e-06, "loss": 0.2835, "step": 6070 }, { "epoch": 2.177265120726751, "grad_norm": 0.34039703011512756, "learning_rate": 2.1203059237131156e-06, "loss": 0.3316, "step": 6071 }, { "epoch": 2.1776237150370545, "grad_norm": 0.2963671386241913, "learning_rate": 2.1186001633086497e-06, "loss": 0.2373, "step": 6072 }, { "epoch": 2.1779823093473585, "grad_norm": 0.3227580487728119, "learning_rate": 2.1168949048529962e-06, "loss": 0.2995, "step": 6073 }, { "epoch": 2.178340903657662, "grad_norm": 0.30306071043014526, "learning_rate": 2.1151901486432236e-06, "loss": 0.275, "step": 6074 }, { "epoch": 2.1786994979679655, "grad_norm": 0.3210992217063904, "learning_rate": 2.1134858949763004e-06, "loss": 0.3057, "step": 6075 }, { "epoch": 2.179058092278269, "grad_norm": 0.322057843208313, "learning_rate": 2.1117821441491166e-06, "loss": 0.3117, "step": 6076 }, { "epoch": 2.179416686588573, "grad_norm": 0.3127935230731964, "learning_rate": 2.110078896458467e-06, "loss": 0.2925, "step": 6077 }, { "epoch": 2.1797752808988764, "grad_norm": 0.3232097327709198, "learning_rate": 2.1083761522010648e-06, "loss": 0.2927, "step": 6078 }, { "epoch": 2.18013387520918, "grad_norm": 0.31388401985168457, "learning_rate": 2.1066739116735335e-06, "loss": 0.2745, "step": 6079 }, { "epoch": 2.180492469519484, "grad_norm": 0.3104642331600189, "learning_rate": 2.104972175172408e-06, "loss": 0.2973, "step": 6080 }, { "epoch": 2.1808510638297873, "grad_norm": 0.3282613754272461, "learning_rate": 2.103270942994139e-06, "loss": 0.3122, "step": 6081 }, { "epoch": 2.181209658140091, "grad_norm": 0.3091844320297241, "learning_rate": 2.101570215435082e-06, "loss": 0.2896, "step": 6082 }, { "epoch": 2.1815682524503943, "grad_norm": 0.33470121026039124, "learning_rate": 2.0998699927915108e-06, "loss": 0.337, "step": 6083 }, { "epoch": 2.1819268467606983, "grad_norm": 0.32929113507270813, "learning_rate": 2.0981702753596116e-06, "loss": 0.3173, "step": 6084 }, { "epoch": 2.1822854410710018, "grad_norm": 0.30445536971092224, "learning_rate": 2.0964710634354744e-06, "loss": 0.2604, "step": 6085 }, { "epoch": 2.1826440353813052, "grad_norm": 0.3283555209636688, "learning_rate": 2.0947723573151156e-06, "loss": 0.3034, "step": 6086 }, { "epoch": 2.1830026296916087, "grad_norm": 0.317999929189682, "learning_rate": 2.093074157294449e-06, "loss": 0.2533, "step": 6087 }, { "epoch": 2.1833612240019127, "grad_norm": 0.2971544861793518, "learning_rate": 2.0913764636693096e-06, "loss": 0.3168, "step": 6088 }, { "epoch": 2.183719818312216, "grad_norm": 0.3238101601600647, "learning_rate": 2.089679276735438e-06, "loss": 0.3015, "step": 6089 }, { "epoch": 2.1840784126225197, "grad_norm": 0.32951149344444275, "learning_rate": 2.087982596788491e-06, "loss": 0.2926, "step": 6090 }, { "epoch": 2.184437006932823, "grad_norm": 0.3406866490840912, "learning_rate": 2.086286424124035e-06, "loss": 0.2886, "step": 6091 }, { "epoch": 2.184795601243127, "grad_norm": 0.33764874935150146, "learning_rate": 2.0845907590375493e-06, "loss": 0.2893, "step": 6092 }, { "epoch": 2.1851541955534306, "grad_norm": 0.3123445510864258, "learning_rate": 2.0828956018244252e-06, "loss": 0.2964, "step": 6093 }, { "epoch": 2.185512789863734, "grad_norm": 0.3188720643520355, "learning_rate": 2.0812009527799616e-06, "loss": 0.312, "step": 6094 }, { "epoch": 2.1858713841740376, "grad_norm": 0.30764827132225037, "learning_rate": 2.0795068121993724e-06, "loss": 0.2682, "step": 6095 }, { "epoch": 2.1862299784843415, "grad_norm": 0.3153703808784485, "learning_rate": 2.077813180377783e-06, "loss": 0.2976, "step": 6096 }, { "epoch": 2.186588572794645, "grad_norm": 0.33872294425964355, "learning_rate": 2.0761200576102285e-06, "loss": 0.2786, "step": 6097 }, { "epoch": 2.1869471671049485, "grad_norm": 0.32278022170066833, "learning_rate": 2.0744274441916594e-06, "loss": 0.2983, "step": 6098 }, { "epoch": 2.187305761415252, "grad_norm": 0.2919842004776001, "learning_rate": 2.072735340416929e-06, "loss": 0.282, "step": 6099 }, { "epoch": 2.187664355725556, "grad_norm": 0.3147578835487366, "learning_rate": 2.07104374658081e-06, "loss": 0.2985, "step": 6100 }, { "epoch": 2.1880229500358594, "grad_norm": 0.31663355231285095, "learning_rate": 2.0693526629779843e-06, "loss": 0.275, "step": 6101 }, { "epoch": 2.188381544346163, "grad_norm": 0.32375839352607727, "learning_rate": 2.0676620899030393e-06, "loss": 0.29, "step": 6102 }, { "epoch": 2.188740138656467, "grad_norm": 0.31535574793815613, "learning_rate": 2.065972027650484e-06, "loss": 0.292, "step": 6103 }, { "epoch": 2.1890987329667704, "grad_norm": 0.320071816444397, "learning_rate": 2.0642824765147286e-06, "loss": 0.2935, "step": 6104 }, { "epoch": 2.189457327277074, "grad_norm": 0.31112828850746155, "learning_rate": 2.062593436790101e-06, "loss": 0.2939, "step": 6105 }, { "epoch": 2.1898159215873774, "grad_norm": 0.33126622438430786, "learning_rate": 2.060904908770834e-06, "loss": 0.3239, "step": 6106 }, { "epoch": 2.190174515897681, "grad_norm": 0.29946932196617126, "learning_rate": 2.059216892751075e-06, "loss": 0.291, "step": 6107 }, { "epoch": 2.190533110207985, "grad_norm": 0.3238198459148407, "learning_rate": 2.0575293890248837e-06, "loss": 0.2721, "step": 6108 }, { "epoch": 2.1908917045182883, "grad_norm": 0.33436161279678345, "learning_rate": 2.0558423978862267e-06, "loss": 0.3121, "step": 6109 }, { "epoch": 2.191250298828592, "grad_norm": 0.320168137550354, "learning_rate": 2.0541559196289864e-06, "loss": 0.2991, "step": 6110 }, { "epoch": 2.1916088931388957, "grad_norm": 0.3258512616157532, "learning_rate": 2.0524699545469473e-06, "loss": 0.2895, "step": 6111 }, { "epoch": 2.1919674874491992, "grad_norm": 0.3232389986515045, "learning_rate": 2.0507845029338126e-06, "loss": 0.2995, "step": 6112 }, { "epoch": 2.1923260817595027, "grad_norm": 0.3191869258880615, "learning_rate": 2.049099565083193e-06, "loss": 0.2601, "step": 6113 }, { "epoch": 2.192684676069806, "grad_norm": 0.3459968864917755, "learning_rate": 2.0474151412886102e-06, "loss": 0.2996, "step": 6114 }, { "epoch": 2.19304327038011, "grad_norm": 0.29993805289268494, "learning_rate": 2.0457312318434975e-06, "loss": 0.2819, "step": 6115 }, { "epoch": 2.1934018646904136, "grad_norm": 0.2981569170951843, "learning_rate": 2.0440478370411938e-06, "loss": 0.3124, "step": 6116 }, { "epoch": 2.193760459000717, "grad_norm": 0.29859498143196106, "learning_rate": 2.042364957174955e-06, "loss": 0.2918, "step": 6117 }, { "epoch": 2.1941190533110206, "grad_norm": 0.2882567048072815, "learning_rate": 2.04068259253794e-06, "loss": 0.2769, "step": 6118 }, { "epoch": 2.1944776476213246, "grad_norm": 0.32280439138412476, "learning_rate": 2.0390007434232252e-06, "loss": 0.3098, "step": 6119 }, { "epoch": 2.194836241931628, "grad_norm": 0.3625796437263489, "learning_rate": 2.0373194101237924e-06, "loss": 0.3262, "step": 6120 }, { "epoch": 2.1951948362419316, "grad_norm": 0.31076717376708984, "learning_rate": 2.0356385929325365e-06, "loss": 0.2635, "step": 6121 }, { "epoch": 2.195553430552235, "grad_norm": 0.28500980138778687, "learning_rate": 2.033958292142262e-06, "loss": 0.3069, "step": 6122 }, { "epoch": 2.195912024862539, "grad_norm": 0.2923825681209564, "learning_rate": 2.0322785080456788e-06, "loss": 0.2697, "step": 6123 }, { "epoch": 2.1962706191728425, "grad_norm": 0.34473124146461487, "learning_rate": 2.0305992409354127e-06, "loss": 0.3231, "step": 6124 }, { "epoch": 2.196629213483146, "grad_norm": 0.32925572991371155, "learning_rate": 2.028920491103997e-06, "loss": 0.3, "step": 6125 }, { "epoch": 2.1969878077934495, "grad_norm": 0.3106662333011627, "learning_rate": 2.0272422588438757e-06, "loss": 0.2533, "step": 6126 }, { "epoch": 2.1973464021037534, "grad_norm": 0.31884056329727173, "learning_rate": 2.025564544447403e-06, "loss": 0.3002, "step": 6127 }, { "epoch": 2.197704996414057, "grad_norm": 0.3174198269844055, "learning_rate": 2.0238873482068396e-06, "loss": 0.2852, "step": 6128 }, { "epoch": 2.1980635907243604, "grad_norm": 0.3231554329395294, "learning_rate": 2.0222106704143607e-06, "loss": 0.3205, "step": 6129 }, { "epoch": 2.198422185034664, "grad_norm": 0.30792132019996643, "learning_rate": 2.020534511362044e-06, "loss": 0.2851, "step": 6130 }, { "epoch": 2.198780779344968, "grad_norm": 0.3037363290786743, "learning_rate": 2.0188588713418865e-06, "loss": 0.2734, "step": 6131 }, { "epoch": 2.1991393736552713, "grad_norm": 0.33911818265914917, "learning_rate": 2.0171837506457903e-06, "loss": 0.3218, "step": 6132 }, { "epoch": 2.199497967965575, "grad_norm": 0.3098434805870056, "learning_rate": 2.015509149565563e-06, "loss": 0.2911, "step": 6133 }, { "epoch": 2.199856562275879, "grad_norm": 0.31253960728645325, "learning_rate": 2.013835068392929e-06, "loss": 0.3276, "step": 6134 }, { "epoch": 2.2002151565861823, "grad_norm": 0.28038471937179565, "learning_rate": 2.012161507419515e-06, "loss": 0.2512, "step": 6135 }, { "epoch": 2.2005737508964858, "grad_norm": 0.2947709560394287, "learning_rate": 2.0104884669368624e-06, "loss": 0.3093, "step": 6136 }, { "epoch": 2.2009323452067893, "grad_norm": 0.3132513165473938, "learning_rate": 2.0088159472364193e-06, "loss": 0.2952, "step": 6137 }, { "epoch": 2.2012909395170928, "grad_norm": 0.32297444343566895, "learning_rate": 2.007143948609544e-06, "loss": 0.2821, "step": 6138 }, { "epoch": 2.2016495338273967, "grad_norm": 0.34122419357299805, "learning_rate": 2.0054724713475066e-06, "loss": 0.3263, "step": 6139 }, { "epoch": 2.2020081281377, "grad_norm": 0.3464725911617279, "learning_rate": 2.0038015157414794e-06, "loss": 0.2917, "step": 6140 }, { "epoch": 2.2023667224480037, "grad_norm": 0.32940390706062317, "learning_rate": 2.002131082082549e-06, "loss": 0.2839, "step": 6141 }, { "epoch": 2.2027253167583076, "grad_norm": 0.3079092800617218, "learning_rate": 2.0004611706617116e-06, "loss": 0.2878, "step": 6142 }, { "epoch": 2.203083911068611, "grad_norm": 0.31660473346710205, "learning_rate": 1.9987917817698703e-06, "loss": 0.2944, "step": 6143 }, { "epoch": 2.2034425053789146, "grad_norm": 0.3096756041049957, "learning_rate": 1.99712291569784e-06, "loss": 0.2952, "step": 6144 }, { "epoch": 2.203801099689218, "grad_norm": 0.3235984742641449, "learning_rate": 1.9954545727363378e-06, "loss": 0.3071, "step": 6145 }, { "epoch": 2.204159693999522, "grad_norm": 0.34706616401672363, "learning_rate": 1.993786753175999e-06, "loss": 0.3155, "step": 6146 }, { "epoch": 2.2045182883098255, "grad_norm": 0.2992976903915405, "learning_rate": 1.992119457307357e-06, "loss": 0.2837, "step": 6147 }, { "epoch": 2.204876882620129, "grad_norm": 0.322902649641037, "learning_rate": 1.9904526854208674e-06, "loss": 0.2743, "step": 6148 }, { "epoch": 2.2052354769304325, "grad_norm": 0.30864495038986206, "learning_rate": 1.988786437806881e-06, "loss": 0.2946, "step": 6149 }, { "epoch": 2.2055940712407365, "grad_norm": 0.318514883518219, "learning_rate": 1.987120714755666e-06, "loss": 0.2906, "step": 6150 }, { "epoch": 2.20595266555104, "grad_norm": 0.2983616292476654, "learning_rate": 1.9854555165573975e-06, "loss": 0.3067, "step": 6151 }, { "epoch": 2.2063112598613435, "grad_norm": 0.3129550814628601, "learning_rate": 1.9837908435021553e-06, "loss": 0.2933, "step": 6152 }, { "epoch": 2.206669854171647, "grad_norm": 0.29595792293548584, "learning_rate": 1.9821266958799317e-06, "loss": 0.2969, "step": 6153 }, { "epoch": 2.207028448481951, "grad_norm": 0.2980157732963562, "learning_rate": 1.9804630739806267e-06, "loss": 0.2679, "step": 6154 }, { "epoch": 2.2073870427922544, "grad_norm": 0.3023778200149536, "learning_rate": 1.978799978094049e-06, "loss": 0.2929, "step": 6155 }, { "epoch": 2.207745637102558, "grad_norm": 0.3309318423271179, "learning_rate": 1.9771374085099166e-06, "loss": 0.3003, "step": 6156 }, { "epoch": 2.2081042314128614, "grad_norm": 0.3216823935508728, "learning_rate": 1.97547536551785e-06, "loss": 0.3038, "step": 6157 }, { "epoch": 2.2084628257231653, "grad_norm": 0.3069339394569397, "learning_rate": 1.9738138494073866e-06, "loss": 0.2731, "step": 6158 }, { "epoch": 2.208821420033469, "grad_norm": 0.3389713168144226, "learning_rate": 1.972152860467962e-06, "loss": 0.2933, "step": 6159 }, { "epoch": 2.2091800143437723, "grad_norm": 0.300240159034729, "learning_rate": 1.9704923989889315e-06, "loss": 0.2687, "step": 6160 }, { "epoch": 2.209538608654076, "grad_norm": 0.33478668332099915, "learning_rate": 1.9688324652595513e-06, "loss": 0.2841, "step": 6161 }, { "epoch": 2.2098972029643797, "grad_norm": 0.33061522245407104, "learning_rate": 1.967173059568985e-06, "loss": 0.3065, "step": 6162 }, { "epoch": 2.2102557972746832, "grad_norm": 0.3221679925918579, "learning_rate": 1.965514182206309e-06, "loss": 0.2931, "step": 6163 }, { "epoch": 2.2106143915849867, "grad_norm": 0.32134324312210083, "learning_rate": 1.963855833460501e-06, "loss": 0.3037, "step": 6164 }, { "epoch": 2.2109729858952907, "grad_norm": 0.309937059879303, "learning_rate": 1.9621980136204537e-06, "loss": 0.2688, "step": 6165 }, { "epoch": 2.211331580205594, "grad_norm": 0.3145167827606201, "learning_rate": 1.9605407229749635e-06, "loss": 0.2826, "step": 6166 }, { "epoch": 2.2116901745158977, "grad_norm": 0.3318707346916199, "learning_rate": 1.9588839618127355e-06, "loss": 0.3275, "step": 6167 }, { "epoch": 2.212048768826201, "grad_norm": 0.2941336929798126, "learning_rate": 1.957227730422384e-06, "loss": 0.2611, "step": 6168 }, { "epoch": 2.212407363136505, "grad_norm": 0.32603517174720764, "learning_rate": 1.955572029092427e-06, "loss": 0.3131, "step": 6169 }, { "epoch": 2.2127659574468086, "grad_norm": 0.32555264234542847, "learning_rate": 1.953916858111294e-06, "loss": 0.3042, "step": 6170 }, { "epoch": 2.213124551757112, "grad_norm": 0.3185387849807739, "learning_rate": 1.9522622177673225e-06, "loss": 0.2912, "step": 6171 }, { "epoch": 2.2134831460674156, "grad_norm": 0.308857262134552, "learning_rate": 1.9506081083487533e-06, "loss": 0.2921, "step": 6172 }, { "epoch": 2.2138417403777195, "grad_norm": 0.3348735570907593, "learning_rate": 1.9489545301437414e-06, "loss": 0.2785, "step": 6173 }, { "epoch": 2.214200334688023, "grad_norm": 0.3309650123119354, "learning_rate": 1.94730148344034e-06, "loss": 0.2959, "step": 6174 }, { "epoch": 2.2145589289983265, "grad_norm": 0.32365092635154724, "learning_rate": 1.9456489685265206e-06, "loss": 0.2828, "step": 6175 }, { "epoch": 2.21491752330863, "grad_norm": 0.3234417736530304, "learning_rate": 1.9439969856901496e-06, "loss": 0.3029, "step": 6176 }, { "epoch": 2.215276117618934, "grad_norm": 0.3008962869644165, "learning_rate": 1.9423455352190145e-06, "loss": 0.2927, "step": 6177 }, { "epoch": 2.2156347119292374, "grad_norm": 0.3044247627258301, "learning_rate": 1.940694617400799e-06, "loss": 0.2853, "step": 6178 }, { "epoch": 2.215993306239541, "grad_norm": 0.32300636172294617, "learning_rate": 1.9390442325230987e-06, "loss": 0.288, "step": 6179 }, { "epoch": 2.2163519005498444, "grad_norm": 0.31606876850128174, "learning_rate": 1.937394380873418e-06, "loss": 0.2879, "step": 6180 }, { "epoch": 2.2167104948601484, "grad_norm": 0.33935248851776123, "learning_rate": 1.9357450627391634e-06, "loss": 0.2995, "step": 6181 }, { "epoch": 2.217069089170452, "grad_norm": 0.316118985414505, "learning_rate": 1.9340962784076516e-06, "loss": 0.2772, "step": 6182 }, { "epoch": 2.2174276834807554, "grad_norm": 0.3019518256187439, "learning_rate": 1.9324480281661066e-06, "loss": 0.3078, "step": 6183 }, { "epoch": 2.217786277791059, "grad_norm": 0.30358630418777466, "learning_rate": 1.9308003123016594e-06, "loss": 0.274, "step": 6184 }, { "epoch": 2.218144872101363, "grad_norm": 0.32596534490585327, "learning_rate": 1.929153131101348e-06, "loss": 0.2946, "step": 6185 }, { "epoch": 2.2185034664116663, "grad_norm": 0.31538307666778564, "learning_rate": 1.927506484852113e-06, "loss": 0.2992, "step": 6186 }, { "epoch": 2.21886206072197, "grad_norm": 0.32697197794914246, "learning_rate": 1.925860373840808e-06, "loss": 0.2877, "step": 6187 }, { "epoch": 2.2192206550322733, "grad_norm": 0.30451342463493347, "learning_rate": 1.9242147983541896e-06, "loss": 0.2844, "step": 6188 }, { "epoch": 2.219579249342577, "grad_norm": 0.31850045919418335, "learning_rate": 1.9225697586789234e-06, "loss": 0.2962, "step": 6189 }, { "epoch": 2.2199378436528807, "grad_norm": 0.31957584619522095, "learning_rate": 1.9209252551015808e-06, "loss": 0.3034, "step": 6190 }, { "epoch": 2.220296437963184, "grad_norm": 0.3242148756980896, "learning_rate": 1.9192812879086364e-06, "loss": 0.2902, "step": 6191 }, { "epoch": 2.2206550322734877, "grad_norm": 0.32979896664619446, "learning_rate": 1.9176378573864783e-06, "loss": 0.2856, "step": 6192 }, { "epoch": 2.2210136265837916, "grad_norm": 0.32308781147003174, "learning_rate": 1.915994963821392e-06, "loss": 0.3234, "step": 6193 }, { "epoch": 2.221372220894095, "grad_norm": 0.30692052841186523, "learning_rate": 1.9143526074995813e-06, "loss": 0.2786, "step": 6194 }, { "epoch": 2.2217308152043986, "grad_norm": 0.32566383481025696, "learning_rate": 1.9127107887071443e-06, "loss": 0.2837, "step": 6195 }, { "epoch": 2.2220894095147026, "grad_norm": 0.3443588614463806, "learning_rate": 1.9110695077300935e-06, "loss": 0.2825, "step": 6196 }, { "epoch": 2.222448003825006, "grad_norm": 0.3432336449623108, "learning_rate": 1.909428764854346e-06, "loss": 0.3011, "step": 6197 }, { "epoch": 2.2228065981353096, "grad_norm": 0.32846730947494507, "learning_rate": 1.907788560365721e-06, "loss": 0.3259, "step": 6198 }, { "epoch": 2.223165192445613, "grad_norm": 0.30460038781166077, "learning_rate": 1.9061488945499497e-06, "loss": 0.298, "step": 6199 }, { "epoch": 2.223523786755917, "grad_norm": 0.33525410294532776, "learning_rate": 1.904509767692666e-06, "loss": 0.3489, "step": 6200 }, { "epoch": 2.2238823810662205, "grad_norm": 0.304820716381073, "learning_rate": 1.902871180079412e-06, "loss": 0.2635, "step": 6201 }, { "epoch": 2.224240975376524, "grad_norm": 0.30473312735557556, "learning_rate": 1.9012331319956357e-06, "loss": 0.2687, "step": 6202 }, { "epoch": 2.2245995696868275, "grad_norm": 0.3180418908596039, "learning_rate": 1.8995956237266866e-06, "loss": 0.2933, "step": 6203 }, { "epoch": 2.2249581639971314, "grad_norm": 0.30918028950691223, "learning_rate": 1.8979586555578279e-06, "loss": 0.3017, "step": 6204 }, { "epoch": 2.225316758307435, "grad_norm": 0.29973503947257996, "learning_rate": 1.8963222277742182e-06, "loss": 0.2419, "step": 6205 }, { "epoch": 2.2256753526177384, "grad_norm": 0.328033983707428, "learning_rate": 1.8946863406609367e-06, "loss": 0.295, "step": 6206 }, { "epoch": 2.226033946928042, "grad_norm": 0.32248133420944214, "learning_rate": 1.893050994502954e-06, "loss": 0.2959, "step": 6207 }, { "epoch": 2.226392541238346, "grad_norm": 0.3144599497318268, "learning_rate": 1.891416189585154e-06, "loss": 0.2959, "step": 6208 }, { "epoch": 2.2267511355486493, "grad_norm": 0.3334738314151764, "learning_rate": 1.8897819261923273e-06, "loss": 0.3063, "step": 6209 }, { "epoch": 2.227109729858953, "grad_norm": 0.3131459355354309, "learning_rate": 1.8881482046091615e-06, "loss": 0.2864, "step": 6210 }, { "epoch": 2.2274683241692563, "grad_norm": 0.31014367938041687, "learning_rate": 1.8865150251202641e-06, "loss": 0.2914, "step": 6211 }, { "epoch": 2.2278269184795603, "grad_norm": 0.307700514793396, "learning_rate": 1.8848823880101335e-06, "loss": 0.2976, "step": 6212 }, { "epoch": 2.2281855127898638, "grad_norm": 0.28722453117370605, "learning_rate": 1.883250293563183e-06, "loss": 0.31, "step": 6213 }, { "epoch": 2.2285441071001673, "grad_norm": 0.34076499938964844, "learning_rate": 1.88161874206373e-06, "loss": 0.3282, "step": 6214 }, { "epoch": 2.2289027014104708, "grad_norm": 0.32471317052841187, "learning_rate": 1.879987733795992e-06, "loss": 0.281, "step": 6215 }, { "epoch": 2.2292612957207747, "grad_norm": 0.3260065019130707, "learning_rate": 1.8783572690440976e-06, "loss": 0.3094, "step": 6216 }, { "epoch": 2.229619890031078, "grad_norm": 0.29558393359184265, "learning_rate": 1.876727348092079e-06, "loss": 0.2741, "step": 6217 }, { "epoch": 2.2299784843413817, "grad_norm": 0.3373374342918396, "learning_rate": 1.8750979712238731e-06, "loss": 0.3002, "step": 6218 }, { "epoch": 2.2303370786516856, "grad_norm": 0.31168681383132935, "learning_rate": 1.873469138723325e-06, "loss": 0.2795, "step": 6219 }, { "epoch": 2.230695672961989, "grad_norm": 0.32654067873954773, "learning_rate": 1.8718408508741782e-06, "loss": 0.2987, "step": 6220 }, { "epoch": 2.2310542672722926, "grad_norm": 0.3197932839393616, "learning_rate": 1.8702131079600893e-06, "loss": 0.2805, "step": 6221 }, { "epoch": 2.231412861582596, "grad_norm": 0.3139921724796295, "learning_rate": 1.8685859102646109e-06, "loss": 0.3093, "step": 6222 }, { "epoch": 2.2317714558928996, "grad_norm": 0.30242547392845154, "learning_rate": 1.8669592580712125e-06, "loss": 0.2909, "step": 6223 }, { "epoch": 2.2321300502032035, "grad_norm": 0.29334205389022827, "learning_rate": 1.8653331516632573e-06, "loss": 0.2875, "step": 6224 }, { "epoch": 2.232488644513507, "grad_norm": 0.3348780572414398, "learning_rate": 1.8637075913240193e-06, "loss": 0.3166, "step": 6225 }, { "epoch": 2.2328472388238105, "grad_norm": 0.3207058012485504, "learning_rate": 1.862082577336678e-06, "loss": 0.2992, "step": 6226 }, { "epoch": 2.2332058331341145, "grad_norm": 0.3122115433216095, "learning_rate": 1.8604581099843116e-06, "loss": 0.2886, "step": 6227 }, { "epoch": 2.233564427444418, "grad_norm": 0.3047320544719696, "learning_rate": 1.858834189549913e-06, "loss": 0.2802, "step": 6228 }, { "epoch": 2.2339230217547215, "grad_norm": 0.32443690299987793, "learning_rate": 1.8572108163163698e-06, "loss": 0.2927, "step": 6229 }, { "epoch": 2.234281616065025, "grad_norm": 0.31613749265670776, "learning_rate": 1.8555879905664797e-06, "loss": 0.302, "step": 6230 }, { "epoch": 2.234640210375329, "grad_norm": 0.3121374547481537, "learning_rate": 1.8539657125829468e-06, "loss": 0.3003, "step": 6231 }, { "epoch": 2.2349988046856324, "grad_norm": 0.31465986371040344, "learning_rate": 1.852343982648373e-06, "loss": 0.299, "step": 6232 }, { "epoch": 2.235357398995936, "grad_norm": 0.3214639723300934, "learning_rate": 1.8507228010452698e-06, "loss": 0.2731, "step": 6233 }, { "epoch": 2.2357159933062394, "grad_norm": 0.31817129254341125, "learning_rate": 1.8491021680560529e-06, "loss": 0.2747, "step": 6234 }, { "epoch": 2.2360745876165433, "grad_norm": 0.31252482533454895, "learning_rate": 1.847482083963043e-06, "loss": 0.2785, "step": 6235 }, { "epoch": 2.236433181926847, "grad_norm": 0.30472591519355774, "learning_rate": 1.8458625490484604e-06, "loss": 0.2678, "step": 6236 }, { "epoch": 2.2367917762371503, "grad_norm": 0.3501386046409607, "learning_rate": 1.844243563594435e-06, "loss": 0.3079, "step": 6237 }, { "epoch": 2.237150370547454, "grad_norm": 0.28848132491111755, "learning_rate": 1.8426251278830004e-06, "loss": 0.2803, "step": 6238 }, { "epoch": 2.2375089648577577, "grad_norm": 0.31041666865348816, "learning_rate": 1.8410072421960884e-06, "loss": 0.2969, "step": 6239 }, { "epoch": 2.2378675591680612, "grad_norm": 0.33971309661865234, "learning_rate": 1.8393899068155463e-06, "loss": 0.3097, "step": 6240 }, { "epoch": 2.2382261534783647, "grad_norm": 0.3044355511665344, "learning_rate": 1.8377731220231144e-06, "loss": 0.2841, "step": 6241 }, { "epoch": 2.2385847477886682, "grad_norm": 0.3044903576374054, "learning_rate": 1.8361568881004422e-06, "loss": 0.2783, "step": 6242 }, { "epoch": 2.238943342098972, "grad_norm": 0.32850000262260437, "learning_rate": 1.8345412053290857e-06, "loss": 0.3465, "step": 6243 }, { "epoch": 2.2393019364092757, "grad_norm": 0.29169395565986633, "learning_rate": 1.8329260739904959e-06, "loss": 0.2796, "step": 6244 }, { "epoch": 2.239660530719579, "grad_norm": 0.2962048649787903, "learning_rate": 1.8313114943660403e-06, "loss": 0.2922, "step": 6245 }, { "epoch": 2.2400191250298827, "grad_norm": 0.3094688653945923, "learning_rate": 1.8296974667369794e-06, "loss": 0.3112, "step": 6246 }, { "epoch": 2.2403777193401866, "grad_norm": 0.2878476083278656, "learning_rate": 1.8280839913844833e-06, "loss": 0.2698, "step": 6247 }, { "epoch": 2.24073631365049, "grad_norm": 0.3095506727695465, "learning_rate": 1.8264710685896259e-06, "loss": 0.3058, "step": 6248 }, { "epoch": 2.2410949079607936, "grad_norm": 0.29540756344795227, "learning_rate": 1.8248586986333799e-06, "loss": 0.268, "step": 6249 }, { "epoch": 2.2414535022710975, "grad_norm": 0.33197835087776184, "learning_rate": 1.823246881796627e-06, "loss": 0.2961, "step": 6250 }, { "epoch": 2.241812096581401, "grad_norm": 0.3214990198612213, "learning_rate": 1.8216356183601513e-06, "loss": 0.2945, "step": 6251 }, { "epoch": 2.2421706908917045, "grad_norm": 0.33074063062667847, "learning_rate": 1.8200249086046406e-06, "loss": 0.3106, "step": 6252 }, { "epoch": 2.242529285202008, "grad_norm": 0.2960309684276581, "learning_rate": 1.8184147528106827e-06, "loss": 0.2594, "step": 6253 }, { "epoch": 2.242887879512312, "grad_norm": 0.3116932511329651, "learning_rate": 1.816805151258773e-06, "loss": 0.2788, "step": 6254 }, { "epoch": 2.2432464738226154, "grad_norm": 0.31739798188209534, "learning_rate": 1.8151961042293116e-06, "loss": 0.3214, "step": 6255 }, { "epoch": 2.243605068132919, "grad_norm": 0.32228755950927734, "learning_rate": 1.8135876120025937e-06, "loss": 0.2955, "step": 6256 }, { "epoch": 2.2439636624432224, "grad_norm": 0.3400677740573883, "learning_rate": 1.8119796748588304e-06, "loss": 0.3075, "step": 6257 }, { "epoch": 2.2443222567535264, "grad_norm": 0.3112064301967621, "learning_rate": 1.8103722930781249e-06, "loss": 0.2701, "step": 6258 }, { "epoch": 2.24468085106383, "grad_norm": 0.3246403634548187, "learning_rate": 1.808765466940489e-06, "loss": 0.2949, "step": 6259 }, { "epoch": 2.2450394453741334, "grad_norm": 0.32612162828445435, "learning_rate": 1.8071591967258395e-06, "loss": 0.3281, "step": 6260 }, { "epoch": 2.245398039684437, "grad_norm": 0.30188554525375366, "learning_rate": 1.8055534827139898e-06, "loss": 0.2652, "step": 6261 }, { "epoch": 2.245756633994741, "grad_norm": 0.31161564588546753, "learning_rate": 1.8039483251846617e-06, "loss": 0.2921, "step": 6262 }, { "epoch": 2.2461152283050443, "grad_norm": 0.3310050368309021, "learning_rate": 1.8023437244174785e-06, "loss": 0.3321, "step": 6263 }, { "epoch": 2.246473822615348, "grad_norm": 0.3111637234687805, "learning_rate": 1.800739680691969e-06, "loss": 0.2836, "step": 6264 }, { "epoch": 2.2468324169256513, "grad_norm": 0.32047897577285767, "learning_rate": 1.799136194287559e-06, "loss": 0.2892, "step": 6265 }, { "epoch": 2.247191011235955, "grad_norm": 0.3273777365684509, "learning_rate": 1.797533265483582e-06, "loss": 0.291, "step": 6266 }, { "epoch": 2.2475496055462587, "grad_norm": 0.3251219093799591, "learning_rate": 1.7959308945592741e-06, "loss": 0.3088, "step": 6267 }, { "epoch": 2.247908199856562, "grad_norm": 0.32975220680236816, "learning_rate": 1.7943290817937724e-06, "loss": 0.2965, "step": 6268 }, { "epoch": 2.2482667941668657, "grad_norm": 0.29183825850486755, "learning_rate": 1.7927278274661204e-06, "loss": 0.2624, "step": 6269 }, { "epoch": 2.2486253884771696, "grad_norm": 0.30009785294532776, "learning_rate": 1.791127131855257e-06, "loss": 0.3146, "step": 6270 }, { "epoch": 2.248983982787473, "grad_norm": 0.3068028390407562, "learning_rate": 1.7895269952400303e-06, "loss": 0.3095, "step": 6271 }, { "epoch": 2.2493425770977766, "grad_norm": 0.32667016983032227, "learning_rate": 1.7879274178991919e-06, "loss": 0.3022, "step": 6272 }, { "epoch": 2.24970117140808, "grad_norm": 0.306050181388855, "learning_rate": 1.7863284001113862e-06, "loss": 0.2988, "step": 6273 }, { "epoch": 2.250059765718384, "grad_norm": 0.31292304396629333, "learning_rate": 1.7847299421551756e-06, "loss": 0.2856, "step": 6274 }, { "epoch": 2.2504183600286876, "grad_norm": 0.3215630352497101, "learning_rate": 1.7831320443090107e-06, "loss": 0.3025, "step": 6275 }, { "epoch": 2.250776954338991, "grad_norm": 0.345272034406662, "learning_rate": 1.7815347068512517e-06, "loss": 0.3107, "step": 6276 }, { "epoch": 2.2511355486492945, "grad_norm": 0.3160615861415863, "learning_rate": 1.7799379300601616e-06, "loss": 0.2856, "step": 6277 }, { "epoch": 2.2514941429595985, "grad_norm": 0.3407844603061676, "learning_rate": 1.778341714213901e-06, "loss": 0.3179, "step": 6278 }, { "epoch": 2.251852737269902, "grad_norm": 0.309871107339859, "learning_rate": 1.7767460595905367e-06, "loss": 0.2814, "step": 6279 }, { "epoch": 2.2522113315802055, "grad_norm": 0.30349111557006836, "learning_rate": 1.7751509664680367e-06, "loss": 0.2767, "step": 6280 }, { "epoch": 2.2525699258905094, "grad_norm": 0.316040575504303, "learning_rate": 1.7735564351242734e-06, "loss": 0.2832, "step": 6281 }, { "epoch": 2.252928520200813, "grad_norm": 0.31717512011528015, "learning_rate": 1.771962465837015e-06, "loss": 0.2952, "step": 6282 }, { "epoch": 2.2532871145111164, "grad_norm": 0.3299836218357086, "learning_rate": 1.7703690588839384e-06, "loss": 0.2813, "step": 6283 }, { "epoch": 2.25364570882142, "grad_norm": 0.30864599347114563, "learning_rate": 1.7687762145426197e-06, "loss": 0.2727, "step": 6284 }, { "epoch": 2.2540043031317234, "grad_norm": 0.3465861678123474, "learning_rate": 1.7671839330905377e-06, "loss": 0.3179, "step": 6285 }, { "epoch": 2.2543628974420273, "grad_norm": 0.28833961486816406, "learning_rate": 1.7655922148050737e-06, "loss": 0.2712, "step": 6286 }, { "epoch": 2.254721491752331, "grad_norm": 0.3168106973171234, "learning_rate": 1.7640010599635066e-06, "loss": 0.298, "step": 6287 }, { "epoch": 2.2550800860626343, "grad_norm": 0.30088362097740173, "learning_rate": 1.7624104688430232e-06, "loss": 0.2738, "step": 6288 }, { "epoch": 2.2554386803729383, "grad_norm": 0.3151426613330841, "learning_rate": 1.7608204417207103e-06, "loss": 0.2878, "step": 6289 }, { "epoch": 2.2557972746832418, "grad_norm": 0.3281175196170807, "learning_rate": 1.7592309788735512e-06, "loss": 0.2947, "step": 6290 }, { "epoch": 2.2561558689935453, "grad_norm": 0.34403517842292786, "learning_rate": 1.757642080578441e-06, "loss": 0.2732, "step": 6291 }, { "epoch": 2.2565144633038487, "grad_norm": 0.3046186864376068, "learning_rate": 1.7560537471121663e-06, "loss": 0.2885, "step": 6292 }, { "epoch": 2.2568730576141527, "grad_norm": 0.3178236186504364, "learning_rate": 1.7544659787514234e-06, "loss": 0.3265, "step": 6293 }, { "epoch": 2.257231651924456, "grad_norm": 0.30533984303474426, "learning_rate": 1.7528787757728021e-06, "loss": 0.2867, "step": 6294 }, { "epoch": 2.2575902462347597, "grad_norm": 0.3050362467765808, "learning_rate": 1.7512921384528009e-06, "loss": 0.2947, "step": 6295 }, { "epoch": 2.257948840545063, "grad_norm": 0.3008984923362732, "learning_rate": 1.7497060670678163e-06, "loss": 0.2916, "step": 6296 }, { "epoch": 2.258307434855367, "grad_norm": 0.3220530152320862, "learning_rate": 1.7481205618941472e-06, "loss": 0.2832, "step": 6297 }, { "epoch": 2.2586660291656706, "grad_norm": 0.3519871234893799, "learning_rate": 1.746535623207995e-06, "loss": 0.31, "step": 6298 }, { "epoch": 2.259024623475974, "grad_norm": 0.3196523189544678, "learning_rate": 1.7449512512854576e-06, "loss": 0.2731, "step": 6299 }, { "epoch": 2.2593832177862776, "grad_norm": 0.3161497116088867, "learning_rate": 1.7433674464025397e-06, "loss": 0.2969, "step": 6300 }, { "epoch": 2.2597418120965815, "grad_norm": 0.2940008044242859, "learning_rate": 1.741784208835146e-06, "loss": 0.2821, "step": 6301 }, { "epoch": 2.260100406406885, "grad_norm": 0.3066967725753784, "learning_rate": 1.7402015388590764e-06, "loss": 0.2742, "step": 6302 }, { "epoch": 2.2604590007171885, "grad_norm": 0.3320428729057312, "learning_rate": 1.7386194367500442e-06, "loss": 0.294, "step": 6303 }, { "epoch": 2.2608175950274925, "grad_norm": 0.3316277265548706, "learning_rate": 1.7370379027836509e-06, "loss": 0.2744, "step": 6304 }, { "epoch": 2.261176189337796, "grad_norm": 0.32924678921699524, "learning_rate": 1.7354569372354069e-06, "loss": 0.2982, "step": 6305 }, { "epoch": 2.2615347836480995, "grad_norm": 0.3187037408351898, "learning_rate": 1.7338765403807223e-06, "loss": 0.2947, "step": 6306 }, { "epoch": 2.261893377958403, "grad_norm": 0.30915290117263794, "learning_rate": 1.732296712494903e-06, "loss": 0.2686, "step": 6307 }, { "epoch": 2.2622519722687064, "grad_norm": 0.34205353260040283, "learning_rate": 1.7307174538531658e-06, "loss": 0.3147, "step": 6308 }, { "epoch": 2.2626105665790104, "grad_norm": 0.29357561469078064, "learning_rate": 1.729138764730618e-06, "loss": 0.2722, "step": 6309 }, { "epoch": 2.262969160889314, "grad_norm": 0.3015700578689575, "learning_rate": 1.7275606454022748e-06, "loss": 0.299, "step": 6310 }, { "epoch": 2.2633277551996174, "grad_norm": 0.34778928756713867, "learning_rate": 1.7259830961430469e-06, "loss": 0.3142, "step": 6311 }, { "epoch": 2.2636863495099213, "grad_norm": 0.29828664660453796, "learning_rate": 1.7244061172277498e-06, "loss": 0.2654, "step": 6312 }, { "epoch": 2.264044943820225, "grad_norm": 0.2981330454349518, "learning_rate": 1.722829708931098e-06, "loss": 0.2852, "step": 6313 }, { "epoch": 2.2644035381305283, "grad_norm": 0.30593693256378174, "learning_rate": 1.7212538715277066e-06, "loss": 0.279, "step": 6314 }, { "epoch": 2.264762132440832, "grad_norm": 0.3116174042224884, "learning_rate": 1.7196786052920938e-06, "loss": 0.3133, "step": 6315 }, { "epoch": 2.2651207267511357, "grad_norm": 0.2968517541885376, "learning_rate": 1.7181039104986713e-06, "loss": 0.2855, "step": 6316 }, { "epoch": 2.2654793210614392, "grad_norm": 0.3046603798866272, "learning_rate": 1.7165297874217584e-06, "loss": 0.2977, "step": 6317 }, { "epoch": 2.2658379153717427, "grad_norm": 0.3234066665172577, "learning_rate": 1.7149562363355737e-06, "loss": 0.2596, "step": 6318 }, { "epoch": 2.2661965096820462, "grad_norm": 0.3184663951396942, "learning_rate": 1.7133832575142296e-06, "loss": 0.2857, "step": 6319 }, { "epoch": 2.26655510399235, "grad_norm": 0.33807939291000366, "learning_rate": 1.7118108512317505e-06, "loss": 0.3037, "step": 6320 }, { "epoch": 2.2669136983026537, "grad_norm": 0.28831085562705994, "learning_rate": 1.7102390177620498e-06, "loss": 0.2794, "step": 6321 }, { "epoch": 2.267272292612957, "grad_norm": 0.3096071779727936, "learning_rate": 1.708667757378949e-06, "loss": 0.3058, "step": 6322 }, { "epoch": 2.2676308869232606, "grad_norm": 0.3325149714946747, "learning_rate": 1.7070970703561624e-06, "loss": 0.2918, "step": 6323 }, { "epoch": 2.2679894812335646, "grad_norm": 0.3115173280239105, "learning_rate": 1.7055269569673116e-06, "loss": 0.2739, "step": 6324 }, { "epoch": 2.268348075543868, "grad_norm": 0.29959648847579956, "learning_rate": 1.7039574174859148e-06, "loss": 0.2869, "step": 6325 }, { "epoch": 2.2687066698541716, "grad_norm": 0.3301265835762024, "learning_rate": 1.7023884521853901e-06, "loss": 0.286, "step": 6326 }, { "epoch": 2.269065264164475, "grad_norm": 0.3149356245994568, "learning_rate": 1.7008200613390579e-06, "loss": 0.2955, "step": 6327 }, { "epoch": 2.269423858474779, "grad_norm": 0.2991240620613098, "learning_rate": 1.6992522452201338e-06, "loss": 0.2718, "step": 6328 }, { "epoch": 2.2697824527850825, "grad_norm": 0.30872565507888794, "learning_rate": 1.6976850041017379e-06, "loss": 0.2808, "step": 6329 }, { "epoch": 2.270141047095386, "grad_norm": 0.3118938207626343, "learning_rate": 1.6961183382568886e-06, "loss": 0.2931, "step": 6330 }, { "epoch": 2.2704996414056895, "grad_norm": 0.3080759644508362, "learning_rate": 1.6945522479585037e-06, "loss": 0.3072, "step": 6331 }, { "epoch": 2.2708582357159934, "grad_norm": 0.3041117191314697, "learning_rate": 1.692986733479402e-06, "loss": 0.2668, "step": 6332 }, { "epoch": 2.271216830026297, "grad_norm": 0.3152551054954529, "learning_rate": 1.691421795092299e-06, "loss": 0.3166, "step": 6333 }, { "epoch": 2.2715754243366004, "grad_norm": 0.3073660135269165, "learning_rate": 1.689857433069813e-06, "loss": 0.2953, "step": 6334 }, { "epoch": 2.2719340186469044, "grad_norm": 0.3214683532714844, "learning_rate": 1.6882936476844618e-06, "loss": 0.293, "step": 6335 }, { "epoch": 2.272292612957208, "grad_norm": 0.3214509189128876, "learning_rate": 1.6867304392086575e-06, "loss": 0.2882, "step": 6336 }, { "epoch": 2.2726512072675114, "grad_norm": 0.3131941258907318, "learning_rate": 1.685167807914722e-06, "loss": 0.3008, "step": 6337 }, { "epoch": 2.273009801577815, "grad_norm": 0.3251982033252716, "learning_rate": 1.6836057540748656e-06, "loss": 0.3049, "step": 6338 }, { "epoch": 2.2733683958881183, "grad_norm": 0.31079772114753723, "learning_rate": 1.6820442779612068e-06, "loss": 0.285, "step": 6339 }, { "epoch": 2.2737269901984223, "grad_norm": 0.321410596370697, "learning_rate": 1.6804833798457554e-06, "loss": 0.3099, "step": 6340 }, { "epoch": 2.2740855845087258, "grad_norm": 0.3000636100769043, "learning_rate": 1.6789230600004275e-06, "loss": 0.287, "step": 6341 }, { "epoch": 2.2744441788190293, "grad_norm": 0.32170626521110535, "learning_rate": 1.6773633186970346e-06, "loss": 0.294, "step": 6342 }, { "epoch": 2.274802773129333, "grad_norm": 0.3016500174999237, "learning_rate": 1.6758041562072886e-06, "loss": 0.3046, "step": 6343 }, { "epoch": 2.2751613674396367, "grad_norm": 0.293209046125412, "learning_rate": 1.674245572802803e-06, "loss": 0.2665, "step": 6344 }, { "epoch": 2.27551996174994, "grad_norm": 0.30457764863967896, "learning_rate": 1.6726875687550836e-06, "loss": 0.286, "step": 6345 }, { "epoch": 2.2758785560602437, "grad_norm": 0.3484102785587311, "learning_rate": 1.6711301443355416e-06, "loss": 0.3331, "step": 6346 }, { "epoch": 2.2762371503705476, "grad_norm": 0.3074020445346832, "learning_rate": 1.669573299815485e-06, "loss": 0.2955, "step": 6347 }, { "epoch": 2.276595744680851, "grad_norm": 0.3305183947086334, "learning_rate": 1.6680170354661212e-06, "loss": 0.2935, "step": 6348 }, { "epoch": 2.2769543389911546, "grad_norm": 0.31075558066368103, "learning_rate": 1.6664613515585582e-06, "loss": 0.3121, "step": 6349 }, { "epoch": 2.277312933301458, "grad_norm": 0.2903361916542053, "learning_rate": 1.6649062483637974e-06, "loss": 0.2687, "step": 6350 }, { "epoch": 2.277671527611762, "grad_norm": 0.2965335249900818, "learning_rate": 1.6633517261527455e-06, "loss": 0.2773, "step": 6351 }, { "epoch": 2.2780301219220656, "grad_norm": 0.33753734827041626, "learning_rate": 1.6617977851962019e-06, "loss": 0.3098, "step": 6352 }, { "epoch": 2.278388716232369, "grad_norm": 0.3524932861328125, "learning_rate": 1.6602444257648702e-06, "loss": 0.3199, "step": 6353 }, { "epoch": 2.2787473105426725, "grad_norm": 0.3077675402164459, "learning_rate": 1.6586916481293503e-06, "loss": 0.3034, "step": 6354 }, { "epoch": 2.2791059048529765, "grad_norm": 0.31001150608062744, "learning_rate": 1.6571394525601403e-06, "loss": 0.2807, "step": 6355 }, { "epoch": 2.27946449916328, "grad_norm": 0.3060672879219055, "learning_rate": 1.6555878393276399e-06, "loss": 0.2769, "step": 6356 }, { "epoch": 2.2798230934735835, "grad_norm": 0.2952846586704254, "learning_rate": 1.6540368087021413e-06, "loss": 0.2856, "step": 6357 }, { "epoch": 2.2801816877838874, "grad_norm": 0.32634806632995605, "learning_rate": 1.65248636095384e-06, "loss": 0.3014, "step": 6358 }, { "epoch": 2.280540282094191, "grad_norm": 0.304092139005661, "learning_rate": 1.65093649635283e-06, "loss": 0.2597, "step": 6359 }, { "epoch": 2.2808988764044944, "grad_norm": 0.31183162331581116, "learning_rate": 1.6493872151691016e-06, "loss": 0.3159, "step": 6360 }, { "epoch": 2.281257470714798, "grad_norm": 0.3457096219062805, "learning_rate": 1.6478385176725458e-06, "loss": 0.2888, "step": 6361 }, { "epoch": 2.2816160650251014, "grad_norm": 0.31834807991981506, "learning_rate": 1.6462904041329485e-06, "loss": 0.3158, "step": 6362 }, { "epoch": 2.2819746593354053, "grad_norm": 0.2790471017360687, "learning_rate": 1.6447428748199967e-06, "loss": 0.2647, "step": 6363 }, { "epoch": 2.282333253645709, "grad_norm": 0.3062369227409363, "learning_rate": 1.6431959300032747e-06, "loss": 0.2994, "step": 6364 }, { "epoch": 2.2826918479560123, "grad_norm": 0.30638134479522705, "learning_rate": 1.641649569952265e-06, "loss": 0.2964, "step": 6365 }, { "epoch": 2.2830504422663163, "grad_norm": 0.3335450291633606, "learning_rate": 1.6401037949363509e-06, "loss": 0.3195, "step": 6366 }, { "epoch": 2.2834090365766198, "grad_norm": 0.31523987650871277, "learning_rate": 1.638558605224807e-06, "loss": 0.3113, "step": 6367 }, { "epoch": 2.2837676308869232, "grad_norm": 0.31665199995040894, "learning_rate": 1.6370140010868135e-06, "loss": 0.2737, "step": 6368 }, { "epoch": 2.2841262251972267, "grad_norm": 0.33439064025878906, "learning_rate": 1.6354699827914422e-06, "loss": 0.3294, "step": 6369 }, { "epoch": 2.2844848195075302, "grad_norm": 0.31592410802841187, "learning_rate": 1.633926550607668e-06, "loss": 0.2736, "step": 6370 }, { "epoch": 2.284843413817834, "grad_norm": 0.31345951557159424, "learning_rate": 1.6323837048043611e-06, "loss": 0.3088, "step": 6371 }, { "epoch": 2.2852020081281377, "grad_norm": 0.2938510775566101, "learning_rate": 1.6308414456502897e-06, "loss": 0.2672, "step": 6372 }, { "epoch": 2.285560602438441, "grad_norm": 0.3549934923648834, "learning_rate": 1.6292997734141224e-06, "loss": 0.2934, "step": 6373 }, { "epoch": 2.285919196748745, "grad_norm": 0.3052998185157776, "learning_rate": 1.6277586883644198e-06, "loss": 0.2901, "step": 6374 }, { "epoch": 2.2862777910590486, "grad_norm": 0.3477855324745178, "learning_rate": 1.6262181907696456e-06, "loss": 0.33, "step": 6375 }, { "epoch": 2.286636385369352, "grad_norm": 0.2977287173271179, "learning_rate": 1.6246782808981582e-06, "loss": 0.2552, "step": 6376 }, { "epoch": 2.2869949796796556, "grad_norm": 0.31099337339401245, "learning_rate": 1.6231389590182162e-06, "loss": 0.2854, "step": 6377 }, { "epoch": 2.2873535739899595, "grad_norm": 0.3390917181968689, "learning_rate": 1.6216002253979752e-06, "loss": 0.3148, "step": 6378 }, { "epoch": 2.287712168300263, "grad_norm": 0.30652403831481934, "learning_rate": 1.6200620803054833e-06, "loss": 0.2786, "step": 6379 }, { "epoch": 2.2880707626105665, "grad_norm": 0.3080349564552307, "learning_rate": 1.618524524008695e-06, "loss": 0.2889, "step": 6380 }, { "epoch": 2.28842935692087, "grad_norm": 0.3195173740386963, "learning_rate": 1.6169875567754518e-06, "loss": 0.2607, "step": 6381 }, { "epoch": 2.288787951231174, "grad_norm": 0.32113760709762573, "learning_rate": 1.615451178873504e-06, "loss": 0.3126, "step": 6382 }, { "epoch": 2.2891465455414775, "grad_norm": 0.3504800498485565, "learning_rate": 1.6139153905704897e-06, "loss": 0.323, "step": 6383 }, { "epoch": 2.289505139851781, "grad_norm": 0.3153412342071533, "learning_rate": 1.6123801921339489e-06, "loss": 0.2847, "step": 6384 }, { "epoch": 2.2898637341620844, "grad_norm": 0.29299575090408325, "learning_rate": 1.6108455838313198e-06, "loss": 0.2896, "step": 6385 }, { "epoch": 2.2902223284723884, "grad_norm": 0.34493669867515564, "learning_rate": 1.6093115659299324e-06, "loss": 0.3091, "step": 6386 }, { "epoch": 2.290580922782692, "grad_norm": 0.2977626323699951, "learning_rate": 1.6077781386970192e-06, "loss": 0.2897, "step": 6387 }, { "epoch": 2.2909395170929954, "grad_norm": 0.29322731494903564, "learning_rate": 1.6062453023997083e-06, "loss": 0.2724, "step": 6388 }, { "epoch": 2.2912981114032993, "grad_norm": 0.31173399090766907, "learning_rate": 1.604713057305024e-06, "loss": 0.2731, "step": 6389 }, { "epoch": 2.291656705713603, "grad_norm": 0.28303632140159607, "learning_rate": 1.6031814036798904e-06, "loss": 0.2893, "step": 6390 }, { "epoch": 2.2920153000239063, "grad_norm": 0.3466184437274933, "learning_rate": 1.6016503417911229e-06, "loss": 0.3335, "step": 6391 }, { "epoch": 2.29237389433421, "grad_norm": 0.3027389943599701, "learning_rate": 1.6001198719054383e-06, "loss": 0.2475, "step": 6392 }, { "epoch": 2.2927324886445133, "grad_norm": 0.30602753162384033, "learning_rate": 1.59858999428945e-06, "loss": 0.3017, "step": 6393 }, { "epoch": 2.2930910829548172, "grad_norm": 0.3318288028240204, "learning_rate": 1.597060709209667e-06, "loss": 0.3448, "step": 6394 }, { "epoch": 2.2934496772651207, "grad_norm": 0.2975353002548218, "learning_rate": 1.5955320169324968e-06, "loss": 0.2557, "step": 6395 }, { "epoch": 2.293808271575424, "grad_norm": 0.31980594992637634, "learning_rate": 1.59400391772424e-06, "loss": 0.3174, "step": 6396 }, { "epoch": 2.294166865885728, "grad_norm": 0.32745206356048584, "learning_rate": 1.5924764118510988e-06, "loss": 0.3021, "step": 6397 }, { "epoch": 2.2945254601960317, "grad_norm": 0.3036920130252838, "learning_rate": 1.5909494995791664e-06, "loss": 0.2913, "step": 6398 }, { "epoch": 2.294884054506335, "grad_norm": 0.32792481780052185, "learning_rate": 1.5894231811744376e-06, "loss": 0.2862, "step": 6399 }, { "epoch": 2.2952426488166386, "grad_norm": 0.31182461977005005, "learning_rate": 1.5878974569028016e-06, "loss": 0.2731, "step": 6400 }, { "epoch": 2.295601243126942, "grad_norm": 0.31687286496162415, "learning_rate": 1.5863723270300441e-06, "loss": 0.3003, "step": 6401 }, { "epoch": 2.295959837437246, "grad_norm": 0.30213063955307007, "learning_rate": 1.584847791821849e-06, "loss": 0.287, "step": 6402 }, { "epoch": 2.2963184317475496, "grad_norm": 0.31402459740638733, "learning_rate": 1.5833238515437921e-06, "loss": 0.3048, "step": 6403 }, { "epoch": 2.296677026057853, "grad_norm": 0.29230207204818726, "learning_rate": 1.5818005064613507e-06, "loss": 0.2675, "step": 6404 }, { "epoch": 2.297035620368157, "grad_norm": 0.31391018629074097, "learning_rate": 1.5802777568398953e-06, "loss": 0.2983, "step": 6405 }, { "epoch": 2.2973942146784605, "grad_norm": 0.3064253032207489, "learning_rate": 1.5787556029446939e-06, "loss": 0.3229, "step": 6406 }, { "epoch": 2.297752808988764, "grad_norm": 0.3104316294193268, "learning_rate": 1.5772340450409124e-06, "loss": 0.2969, "step": 6407 }, { "epoch": 2.2981114032990675, "grad_norm": 0.32384079694747925, "learning_rate": 1.5757130833936075e-06, "loss": 0.3066, "step": 6408 }, { "epoch": 2.2984699976093714, "grad_norm": 0.32091987133026123, "learning_rate": 1.5741927182677386e-06, "loss": 0.3038, "step": 6409 }, { "epoch": 2.298828591919675, "grad_norm": 0.34947580099105835, "learning_rate": 1.5726729499281529e-06, "loss": 0.3036, "step": 6410 }, { "epoch": 2.2991871862299784, "grad_norm": 0.3104707598686218, "learning_rate": 1.5711537786396057e-06, "loss": 0.2649, "step": 6411 }, { "epoch": 2.299545780540282, "grad_norm": 0.34283584356307983, "learning_rate": 1.5696352046667369e-06, "loss": 0.3064, "step": 6412 }, { "epoch": 2.299904374850586, "grad_norm": 0.3119279742240906, "learning_rate": 1.5681172282740875e-06, "loss": 0.3111, "step": 6413 }, { "epoch": 2.3002629691608893, "grad_norm": 0.3331277370452881, "learning_rate": 1.5665998497260959e-06, "loss": 0.3216, "step": 6414 }, { "epoch": 2.300621563471193, "grad_norm": 0.3145698010921478, "learning_rate": 1.5650830692870905e-06, "loss": 0.2626, "step": 6415 }, { "epoch": 2.3009801577814963, "grad_norm": 0.3283415138721466, "learning_rate": 1.5635668872213018e-06, "loss": 0.3069, "step": 6416 }, { "epoch": 2.3013387520918003, "grad_norm": 0.31192871928215027, "learning_rate": 1.562051303792852e-06, "loss": 0.2938, "step": 6417 }, { "epoch": 2.3016973464021038, "grad_norm": 0.27855736017227173, "learning_rate": 1.5605363192657613e-06, "loss": 0.2699, "step": 6418 }, { "epoch": 2.3020559407124073, "grad_norm": 0.31461092829704285, "learning_rate": 1.5590219339039464e-06, "loss": 0.2897, "step": 6419 }, { "epoch": 2.302414535022711, "grad_norm": 0.3409656584262848, "learning_rate": 1.557508147971214e-06, "loss": 0.3054, "step": 6420 }, { "epoch": 2.3027731293330147, "grad_norm": 0.3299320340156555, "learning_rate": 1.555994961731273e-06, "loss": 0.2887, "step": 6421 }, { "epoch": 2.303131723643318, "grad_norm": 0.33718141913414, "learning_rate": 1.554482375447724e-06, "loss": 0.3121, "step": 6422 }, { "epoch": 2.3034903179536217, "grad_norm": 0.334774911403656, "learning_rate": 1.5529703893840653e-06, "loss": 0.2723, "step": 6423 }, { "epoch": 2.303848912263925, "grad_norm": 0.34595927596092224, "learning_rate": 1.5514590038036902e-06, "loss": 0.3284, "step": 6424 }, { "epoch": 2.304207506574229, "grad_norm": 0.30923184752464294, "learning_rate": 1.5499482189698845e-06, "loss": 0.3093, "step": 6425 }, { "epoch": 2.3045661008845326, "grad_norm": 0.2966243624687195, "learning_rate": 1.5484380351458344e-06, "loss": 0.2718, "step": 6426 }, { "epoch": 2.304924695194836, "grad_norm": 0.3296680152416229, "learning_rate": 1.5469284525946138e-06, "loss": 0.3124, "step": 6427 }, { "epoch": 2.30528328950514, "grad_norm": 0.32901471853256226, "learning_rate": 1.5454194715792027e-06, "loss": 0.2907, "step": 6428 }, { "epoch": 2.3056418838154435, "grad_norm": 0.30546343326568604, "learning_rate": 1.543911092362466e-06, "loss": 0.3014, "step": 6429 }, { "epoch": 2.306000478125747, "grad_norm": 0.33838340640068054, "learning_rate": 1.54240331520717e-06, "loss": 0.3085, "step": 6430 }, { "epoch": 2.3063590724360505, "grad_norm": 0.29902443289756775, "learning_rate": 1.5408961403759748e-06, "loss": 0.2765, "step": 6431 }, { "epoch": 2.3067176667463545, "grad_norm": 0.31463533639907837, "learning_rate": 1.5393895681314319e-06, "loss": 0.3231, "step": 6432 }, { "epoch": 2.307076261056658, "grad_norm": 0.3195948898792267, "learning_rate": 1.5378835987359936e-06, "loss": 0.2742, "step": 6433 }, { "epoch": 2.3074348553669615, "grad_norm": 0.331917405128479, "learning_rate": 1.5363782324520033e-06, "loss": 0.2726, "step": 6434 }, { "epoch": 2.307793449677265, "grad_norm": 0.3358009159564972, "learning_rate": 1.5348734695417006e-06, "loss": 0.3218, "step": 6435 }, { "epoch": 2.308152043987569, "grad_norm": 0.3069700300693512, "learning_rate": 1.533369310267222e-06, "loss": 0.3017, "step": 6436 }, { "epoch": 2.3085106382978724, "grad_norm": 0.3135002851486206, "learning_rate": 1.5318657548905935e-06, "loss": 0.2759, "step": 6437 }, { "epoch": 2.308869232608176, "grad_norm": 0.33122506737709045, "learning_rate": 1.5303628036737417e-06, "loss": 0.3135, "step": 6438 }, { "epoch": 2.3092278269184794, "grad_norm": 0.2930465638637543, "learning_rate": 1.528860456878481e-06, "loss": 0.2901, "step": 6439 }, { "epoch": 2.3095864212287833, "grad_norm": 0.28791892528533936, "learning_rate": 1.5273587147665315e-06, "loss": 0.2691, "step": 6440 }, { "epoch": 2.309945015539087, "grad_norm": 0.31315678358078003, "learning_rate": 1.525857577599496e-06, "loss": 0.3171, "step": 6441 }, { "epoch": 2.3103036098493903, "grad_norm": 0.3211638629436493, "learning_rate": 1.524357045638879e-06, "loss": 0.3112, "step": 6442 }, { "epoch": 2.310662204159694, "grad_norm": 0.324480265378952, "learning_rate": 1.52285711914608e-06, "loss": 0.3208, "step": 6443 }, { "epoch": 2.3110207984699978, "grad_norm": 0.3235909640789032, "learning_rate": 1.5213577983823852e-06, "loss": 0.3153, "step": 6444 }, { "epoch": 2.3113793927803012, "grad_norm": 0.32242351770401, "learning_rate": 1.5198590836089883e-06, "loss": 0.2972, "step": 6445 }, { "epoch": 2.3117379870906047, "grad_norm": 0.3249562978744507, "learning_rate": 1.5183609750869644e-06, "loss": 0.3089, "step": 6446 }, { "epoch": 2.3120965814009082, "grad_norm": 0.30633142590522766, "learning_rate": 1.5168634730772907e-06, "loss": 0.3127, "step": 6447 }, { "epoch": 2.312455175711212, "grad_norm": 0.2988121211528778, "learning_rate": 1.5153665778408383e-06, "loss": 0.2749, "step": 6448 }, { "epoch": 2.3128137700215157, "grad_norm": 0.3080437481403351, "learning_rate": 1.5138702896383678e-06, "loss": 0.292, "step": 6449 }, { "epoch": 2.313172364331819, "grad_norm": 0.3312663435935974, "learning_rate": 1.5123746087305392e-06, "loss": 0.311, "step": 6450 }, { "epoch": 2.313530958642123, "grad_norm": 0.2925700843334198, "learning_rate": 1.5108795353779037e-06, "loss": 0.2623, "step": 6451 }, { "epoch": 2.3138895529524266, "grad_norm": 0.3353904187679291, "learning_rate": 1.5093850698409085e-06, "loss": 0.3134, "step": 6452 }, { "epoch": 2.31424814726273, "grad_norm": 0.32596296072006226, "learning_rate": 1.507891212379896e-06, "loss": 0.2946, "step": 6453 }, { "epoch": 2.3146067415730336, "grad_norm": 0.3334715962409973, "learning_rate": 1.5063979632550973e-06, "loss": 0.3136, "step": 6454 }, { "epoch": 2.314965335883337, "grad_norm": 0.32626160979270935, "learning_rate": 1.5049053227266435e-06, "loss": 0.2976, "step": 6455 }, { "epoch": 2.315323930193641, "grad_norm": 0.30366435647010803, "learning_rate": 1.5034132910545541e-06, "loss": 0.2739, "step": 6456 }, { "epoch": 2.3156825245039445, "grad_norm": 0.30691763758659363, "learning_rate": 1.5019218684987507e-06, "loss": 0.2857, "step": 6457 }, { "epoch": 2.316041118814248, "grad_norm": 0.35725972056388855, "learning_rate": 1.5004310553190394e-06, "loss": 0.3256, "step": 6458 }, { "epoch": 2.316399713124552, "grad_norm": 0.3140603303909302, "learning_rate": 1.4989408517751258e-06, "loss": 0.2801, "step": 6459 }, { "epoch": 2.3167583074348554, "grad_norm": 0.29859432578086853, "learning_rate": 1.49745125812661e-06, "loss": 0.2822, "step": 6460 }, { "epoch": 2.317116901745159, "grad_norm": 0.3169177770614624, "learning_rate": 1.495962274632979e-06, "loss": 0.3012, "step": 6461 }, { "epoch": 2.3174754960554624, "grad_norm": 0.3354937434196472, "learning_rate": 1.494473901553624e-06, "loss": 0.2882, "step": 6462 }, { "epoch": 2.3178340903657664, "grad_norm": 0.32576221227645874, "learning_rate": 1.4929861391478207e-06, "loss": 0.2799, "step": 6463 }, { "epoch": 2.31819268467607, "grad_norm": 0.30007699131965637, "learning_rate": 1.4914989876747427e-06, "loss": 0.2994, "step": 6464 }, { "epoch": 2.3185512789863734, "grad_norm": 0.34023839235305786, "learning_rate": 1.4900124473934585e-06, "loss": 0.291, "step": 6465 }, { "epoch": 2.318909873296677, "grad_norm": 0.3230481445789337, "learning_rate": 1.4885265185629244e-06, "loss": 0.3037, "step": 6466 }, { "epoch": 2.319268467606981, "grad_norm": 0.2889873683452606, "learning_rate": 1.4870412014419955e-06, "loss": 0.2649, "step": 6467 }, { "epoch": 2.3196270619172843, "grad_norm": 0.32051360607147217, "learning_rate": 1.4855564962894186e-06, "loss": 0.3262, "step": 6468 }, { "epoch": 2.319985656227588, "grad_norm": 0.3237419128417969, "learning_rate": 1.4840724033638359e-06, "loss": 0.2862, "step": 6469 }, { "epoch": 2.3203442505378913, "grad_norm": 0.33265215158462524, "learning_rate": 1.4825889229237778e-06, "loss": 0.2819, "step": 6470 }, { "epoch": 2.3207028448481952, "grad_norm": 0.3283068537712097, "learning_rate": 1.4811060552276724e-06, "loss": 0.2628, "step": 6471 }, { "epoch": 2.3210614391584987, "grad_norm": 0.32966339588165283, "learning_rate": 1.4796238005338414e-06, "loss": 0.3135, "step": 6472 }, { "epoch": 2.321420033468802, "grad_norm": 0.3042256236076355, "learning_rate": 1.4781421591004941e-06, "loss": 0.2689, "step": 6473 }, { "epoch": 2.321778627779106, "grad_norm": 0.3194669187068939, "learning_rate": 1.4766611311857432e-06, "loss": 0.3181, "step": 6474 }, { "epoch": 2.3221372220894096, "grad_norm": 0.3134753406047821, "learning_rate": 1.4751807170475828e-06, "loss": 0.2634, "step": 6475 }, { "epoch": 2.322495816399713, "grad_norm": 0.33122336864471436, "learning_rate": 1.4737009169439082e-06, "loss": 0.2995, "step": 6476 }, { "epoch": 2.3228544107100166, "grad_norm": 0.3232319951057434, "learning_rate": 1.472221731132506e-06, "loss": 0.2476, "step": 6477 }, { "epoch": 2.32321300502032, "grad_norm": 0.334788054227829, "learning_rate": 1.470743159871051e-06, "loss": 0.2976, "step": 6478 }, { "epoch": 2.323571599330624, "grad_norm": 0.31052786111831665, "learning_rate": 1.4692652034171207e-06, "loss": 0.2659, "step": 6479 }, { "epoch": 2.3239301936409276, "grad_norm": 0.32570168375968933, "learning_rate": 1.4677878620281748e-06, "loss": 0.3028, "step": 6480 }, { "epoch": 2.324288787951231, "grad_norm": 0.3442636728286743, "learning_rate": 1.4663111359615733e-06, "loss": 0.2824, "step": 6481 }, { "epoch": 2.324647382261535, "grad_norm": 0.30077841877937317, "learning_rate": 1.4648350254745674e-06, "loss": 0.2638, "step": 6482 }, { "epoch": 2.3250059765718385, "grad_norm": 0.31998613476753235, "learning_rate": 1.4633595308242965e-06, "loss": 0.3033, "step": 6483 }, { "epoch": 2.325364570882142, "grad_norm": 0.31864210963249207, "learning_rate": 1.4618846522677987e-06, "loss": 0.3065, "step": 6484 }, { "epoch": 2.3257231651924455, "grad_norm": 0.29209887981414795, "learning_rate": 1.4604103900620026e-06, "loss": 0.264, "step": 6485 }, { "epoch": 2.326081759502749, "grad_norm": 0.3164184093475342, "learning_rate": 1.4589367444637298e-06, "loss": 0.3155, "step": 6486 }, { "epoch": 2.326440353813053, "grad_norm": 0.32800155878067017, "learning_rate": 1.4574637157296918e-06, "loss": 0.3122, "step": 6487 }, { "epoch": 2.3267989481233564, "grad_norm": 0.2934146523475647, "learning_rate": 1.4559913041164957e-06, "loss": 0.2571, "step": 6488 }, { "epoch": 2.32715754243366, "grad_norm": 0.3364354074001312, "learning_rate": 1.4545195098806419e-06, "loss": 0.2987, "step": 6489 }, { "epoch": 2.327516136743964, "grad_norm": 0.32533618807792664, "learning_rate": 1.4530483332785173e-06, "loss": 0.2915, "step": 6490 }, { "epoch": 2.3278747310542673, "grad_norm": 0.2981909513473511, "learning_rate": 1.4515777745664112e-06, "loss": 0.2677, "step": 6491 }, { "epoch": 2.328233325364571, "grad_norm": 0.33714747428894043, "learning_rate": 1.4501078340004954e-06, "loss": 0.3027, "step": 6492 }, { "epoch": 2.3285919196748743, "grad_norm": 0.3273293673992157, "learning_rate": 1.4486385118368385e-06, "loss": 0.3424, "step": 6493 }, { "epoch": 2.3289505139851783, "grad_norm": 0.2893737256526947, "learning_rate": 1.447169808331404e-06, "loss": 0.2655, "step": 6494 }, { "epoch": 2.3293091082954818, "grad_norm": 0.30255982279777527, "learning_rate": 1.4457017237400406e-06, "loss": 0.2836, "step": 6495 }, { "epoch": 2.3296677026057853, "grad_norm": 0.32215824723243713, "learning_rate": 1.4442342583184948e-06, "loss": 0.3271, "step": 6496 }, { "epoch": 2.3300262969160888, "grad_norm": 0.300370991230011, "learning_rate": 1.442767412322404e-06, "loss": 0.2837, "step": 6497 }, { "epoch": 2.3303848912263927, "grad_norm": 0.286695659160614, "learning_rate": 1.4413011860072984e-06, "loss": 0.2709, "step": 6498 }, { "epoch": 2.330743485536696, "grad_norm": 0.33069121837615967, "learning_rate": 1.4398355796285969e-06, "loss": 0.2796, "step": 6499 }, { "epoch": 2.3311020798469997, "grad_norm": 0.32268160581588745, "learning_rate": 1.4383705934416131e-06, "loss": 0.2883, "step": 6500 }, { "epoch": 2.331460674157303, "grad_norm": 0.3461190462112427, "learning_rate": 1.4369062277015533e-06, "loss": 0.2905, "step": 6501 }, { "epoch": 2.331819268467607, "grad_norm": 0.3240025043487549, "learning_rate": 1.4354424826635137e-06, "loss": 0.2933, "step": 6502 }, { "epoch": 2.3321778627779106, "grad_norm": 0.2945164144039154, "learning_rate": 1.4339793585824857e-06, "loss": 0.2695, "step": 6503 }, { "epoch": 2.332536457088214, "grad_norm": 0.3015609383583069, "learning_rate": 1.4325168557133456e-06, "loss": 0.2896, "step": 6504 }, { "epoch": 2.332895051398518, "grad_norm": 0.29888004064559937, "learning_rate": 1.4310549743108688e-06, "loss": 0.2802, "step": 6505 }, { "epoch": 2.3332536457088215, "grad_norm": 0.3114720582962036, "learning_rate": 1.4295937146297206e-06, "loss": 0.2855, "step": 6506 }, { "epoch": 2.333612240019125, "grad_norm": 0.2866611182689667, "learning_rate": 1.4281330769244522e-06, "loss": 0.2568, "step": 6507 }, { "epoch": 2.3339708343294285, "grad_norm": 0.3129275441169739, "learning_rate": 1.4266730614495179e-06, "loss": 0.2903, "step": 6508 }, { "epoch": 2.334329428639732, "grad_norm": 0.3052823543548584, "learning_rate": 1.4252136684592521e-06, "loss": 0.2813, "step": 6509 }, { "epoch": 2.334688022950036, "grad_norm": 0.3041490912437439, "learning_rate": 1.423754898207887e-06, "loss": 0.2941, "step": 6510 }, { "epoch": 2.3350466172603395, "grad_norm": 0.30652105808258057, "learning_rate": 1.4222967509495462e-06, "loss": 0.2991, "step": 6511 }, { "epoch": 2.335405211570643, "grad_norm": 0.3167301118373871, "learning_rate": 1.420839226938241e-06, "loss": 0.2855, "step": 6512 }, { "epoch": 2.335763805880947, "grad_norm": 0.3217184245586395, "learning_rate": 1.4193823264278772e-06, "loss": 0.307, "step": 6513 }, { "epoch": 2.3361224001912504, "grad_norm": 0.3052130341529846, "learning_rate": 1.417926049672253e-06, "loss": 0.3046, "step": 6514 }, { "epoch": 2.336480994501554, "grad_norm": 0.29919835925102234, "learning_rate": 1.4164703969250564e-06, "loss": 0.2979, "step": 6515 }, { "epoch": 2.3368395888118574, "grad_norm": 0.2994377613067627, "learning_rate": 1.4150153684398643e-06, "loss": 0.287, "step": 6516 }, { "epoch": 2.3371981831221613, "grad_norm": 0.33767011761665344, "learning_rate": 1.4135609644701488e-06, "loss": 0.287, "step": 6517 }, { "epoch": 2.337556777432465, "grad_norm": 0.31866201758384705, "learning_rate": 1.4121071852692708e-06, "loss": 0.2709, "step": 6518 }, { "epoch": 2.3379153717427683, "grad_norm": 0.34485867619514465, "learning_rate": 1.4106540310904838e-06, "loss": 0.3264, "step": 6519 }, { "epoch": 2.338273966053072, "grad_norm": 0.31425726413726807, "learning_rate": 1.4092015021869331e-06, "loss": 0.2856, "step": 6520 }, { "epoch": 2.3386325603633757, "grad_norm": 0.301201730966568, "learning_rate": 1.4077495988116513e-06, "loss": 0.2682, "step": 6521 }, { "epoch": 2.3389911546736792, "grad_norm": 0.31471654772758484, "learning_rate": 1.4062983212175652e-06, "loss": 0.2983, "step": 6522 }, { "epoch": 2.3393497489839827, "grad_norm": 0.32325083017349243, "learning_rate": 1.4048476696574935e-06, "loss": 0.2891, "step": 6523 }, { "epoch": 2.3397083432942862, "grad_norm": 0.30486980080604553, "learning_rate": 1.4033976443841401e-06, "loss": 0.2851, "step": 6524 }, { "epoch": 2.34006693760459, "grad_norm": 0.3035051226615906, "learning_rate": 1.40194824565011e-06, "loss": 0.2863, "step": 6525 }, { "epoch": 2.3404255319148937, "grad_norm": 0.3245609402656555, "learning_rate": 1.4004994737078882e-06, "loss": 0.3034, "step": 6526 }, { "epoch": 2.340784126225197, "grad_norm": 0.30393511056900024, "learning_rate": 1.3990513288098579e-06, "loss": 0.2754, "step": 6527 }, { "epoch": 2.3411427205355007, "grad_norm": 0.3027450144290924, "learning_rate": 1.3976038112082879e-06, "loss": 0.3096, "step": 6528 }, { "epoch": 2.3415013148458046, "grad_norm": 0.29032692313194275, "learning_rate": 1.3961569211553416e-06, "loss": 0.2687, "step": 6529 }, { "epoch": 2.341859909156108, "grad_norm": 0.33207249641418457, "learning_rate": 1.3947106589030719e-06, "loss": 0.3325, "step": 6530 }, { "epoch": 2.3422185034664116, "grad_norm": 0.32581454515457153, "learning_rate": 1.393265024703422e-06, "loss": 0.3258, "step": 6531 }, { "epoch": 2.342577097776715, "grad_norm": 0.29634231328964233, "learning_rate": 1.3918200188082276e-06, "loss": 0.2496, "step": 6532 }, { "epoch": 2.342935692087019, "grad_norm": 0.30373844504356384, "learning_rate": 1.3903756414692104e-06, "loss": 0.307, "step": 6533 }, { "epoch": 2.3432942863973225, "grad_norm": 0.3085157573223114, "learning_rate": 1.3889318929379863e-06, "loss": 0.2693, "step": 6534 }, { "epoch": 2.343652880707626, "grad_norm": 0.3133479356765747, "learning_rate": 1.3874887734660624e-06, "loss": 0.2836, "step": 6535 }, { "epoch": 2.34401147501793, "grad_norm": 0.2907311022281647, "learning_rate": 1.386046283304831e-06, "loss": 0.2694, "step": 6536 }, { "epoch": 2.3443700693282334, "grad_norm": 0.30387556552886963, "learning_rate": 1.3846044227055833e-06, "loss": 0.2759, "step": 6537 }, { "epoch": 2.344728663638537, "grad_norm": 0.31053411960601807, "learning_rate": 1.3831631919194915e-06, "loss": 0.3159, "step": 6538 }, { "epoch": 2.3450872579488404, "grad_norm": 0.30996420979499817, "learning_rate": 1.381722591197625e-06, "loss": 0.274, "step": 6539 }, { "epoch": 2.345445852259144, "grad_norm": 0.3284037411212921, "learning_rate": 1.3802826207909414e-06, "loss": 0.3072, "step": 6540 }, { "epoch": 2.345804446569448, "grad_norm": 0.3064841628074646, "learning_rate": 1.3788432809502832e-06, "loss": 0.2506, "step": 6541 }, { "epoch": 2.3461630408797514, "grad_norm": 0.3127132058143616, "learning_rate": 1.3774045719263956e-06, "loss": 0.2972, "step": 6542 }, { "epoch": 2.346521635190055, "grad_norm": 0.30770212411880493, "learning_rate": 1.3759664939698997e-06, "loss": 0.3131, "step": 6543 }, { "epoch": 2.346880229500359, "grad_norm": 0.3439137041568756, "learning_rate": 1.3745290473313177e-06, "loss": 0.3454, "step": 6544 }, { "epoch": 2.3472388238106623, "grad_norm": 0.31620460748672485, "learning_rate": 1.3730922322610535e-06, "loss": 0.2989, "step": 6545 }, { "epoch": 2.347597418120966, "grad_norm": 0.30851173400878906, "learning_rate": 1.3716560490094061e-06, "loss": 0.2751, "step": 6546 }, { "epoch": 2.3479560124312693, "grad_norm": 0.3040272295475006, "learning_rate": 1.3702204978265637e-06, "loss": 0.28, "step": 6547 }, { "epoch": 2.348314606741573, "grad_norm": 0.3216986656188965, "learning_rate": 1.368785578962603e-06, "loss": 0.2901, "step": 6548 }, { "epoch": 2.3486732010518767, "grad_norm": 0.33191201090812683, "learning_rate": 1.3673512926674937e-06, "loss": 0.3024, "step": 6549 }, { "epoch": 2.34903179536218, "grad_norm": 0.3251016438007355, "learning_rate": 1.3659176391910894e-06, "loss": 0.3027, "step": 6550 }, { "epoch": 2.3493903896724837, "grad_norm": 0.33271175622940063, "learning_rate": 1.364484618783139e-06, "loss": 0.2996, "step": 6551 }, { "epoch": 2.3497489839827876, "grad_norm": 0.340334951877594, "learning_rate": 1.36305223169328e-06, "loss": 0.2831, "step": 6552 }, { "epoch": 2.350107578293091, "grad_norm": 0.3271491527557373, "learning_rate": 1.3616204781710341e-06, "loss": 0.2887, "step": 6553 }, { "epoch": 2.3504661726033946, "grad_norm": 0.3404785394668579, "learning_rate": 1.3601893584658237e-06, "loss": 0.2906, "step": 6554 }, { "epoch": 2.350824766913698, "grad_norm": 0.3218340575695038, "learning_rate": 1.3587588728269501e-06, "loss": 0.2658, "step": 6555 }, { "epoch": 2.351183361224002, "grad_norm": 0.33268529176712036, "learning_rate": 1.3573290215036105e-06, "loss": 0.3005, "step": 6556 }, { "epoch": 2.3515419555343056, "grad_norm": 0.3070424497127533, "learning_rate": 1.3558998047448869e-06, "loss": 0.3014, "step": 6557 }, { "epoch": 2.351900549844609, "grad_norm": 0.32009175419807434, "learning_rate": 1.3544712227997547e-06, "loss": 0.2972, "step": 6558 }, { "epoch": 2.3522591441549126, "grad_norm": 0.30900171399116516, "learning_rate": 1.3530432759170775e-06, "loss": 0.2583, "step": 6559 }, { "epoch": 2.3526177384652165, "grad_norm": 0.33118903636932373, "learning_rate": 1.3516159643456079e-06, "loss": 0.3216, "step": 6560 }, { "epoch": 2.35297633277552, "grad_norm": 0.31196489930152893, "learning_rate": 1.3501892883339897e-06, "loss": 0.3242, "step": 6561 }, { "epoch": 2.3533349270858235, "grad_norm": 0.2908107340335846, "learning_rate": 1.3487632481307512e-06, "loss": 0.2798, "step": 6562 }, { "epoch": 2.353693521396127, "grad_norm": 0.2828187942504883, "learning_rate": 1.3473378439843139e-06, "loss": 0.2503, "step": 6563 }, { "epoch": 2.354052115706431, "grad_norm": 0.31943538784980774, "learning_rate": 1.345913076142989e-06, "loss": 0.3223, "step": 6564 }, { "epoch": 2.3544107100167344, "grad_norm": 0.3265112340450287, "learning_rate": 1.3444889448549742e-06, "loss": 0.2827, "step": 6565 }, { "epoch": 2.354769304327038, "grad_norm": 0.3300071060657501, "learning_rate": 1.34306545036836e-06, "loss": 0.2867, "step": 6566 }, { "epoch": 2.355127898637342, "grad_norm": 0.3274992108345032, "learning_rate": 1.34164259293112e-06, "loss": 0.2712, "step": 6567 }, { "epoch": 2.3554864929476453, "grad_norm": 0.32159003615379333, "learning_rate": 1.3402203727911223e-06, "loss": 0.3064, "step": 6568 }, { "epoch": 2.355845087257949, "grad_norm": 0.31524065136909485, "learning_rate": 1.3387987901961235e-06, "loss": 0.2967, "step": 6569 }, { "epoch": 2.3562036815682523, "grad_norm": 0.3256705701351166, "learning_rate": 1.337377845393763e-06, "loss": 0.32, "step": 6570 }, { "epoch": 2.356562275878556, "grad_norm": 0.30382072925567627, "learning_rate": 1.3359575386315799e-06, "loss": 0.3112, "step": 6571 }, { "epoch": 2.3569208701888598, "grad_norm": 0.3234080970287323, "learning_rate": 1.334537870156991e-06, "loss": 0.2891, "step": 6572 }, { "epoch": 2.3572794644991633, "grad_norm": 0.3155865967273712, "learning_rate": 1.3331188402173111e-06, "loss": 0.2976, "step": 6573 }, { "epoch": 2.3576380588094668, "grad_norm": 0.30823493003845215, "learning_rate": 1.3317004490597363e-06, "loss": 0.2713, "step": 6574 }, { "epoch": 2.3579966531197707, "grad_norm": 0.3190951645374298, "learning_rate": 1.3302826969313554e-06, "loss": 0.2745, "step": 6575 }, { "epoch": 2.358355247430074, "grad_norm": 0.3025375306606293, "learning_rate": 1.3288655840791465e-06, "loss": 0.3098, "step": 6576 }, { "epoch": 2.3587138417403777, "grad_norm": 0.31143179535865784, "learning_rate": 1.3274491107499738e-06, "loss": 0.2872, "step": 6577 }, { "epoch": 2.359072436050681, "grad_norm": 0.30903664231300354, "learning_rate": 1.3260332771905936e-06, "loss": 0.2733, "step": 6578 }, { "epoch": 2.359431030360985, "grad_norm": 0.31079792976379395, "learning_rate": 1.3246180836476457e-06, "loss": 0.3134, "step": 6579 }, { "epoch": 2.3597896246712886, "grad_norm": 0.3134825527667999, "learning_rate": 1.323203530367662e-06, "loss": 0.3041, "step": 6580 }, { "epoch": 2.360148218981592, "grad_norm": 0.33106479048728943, "learning_rate": 1.3217896175970624e-06, "loss": 0.3142, "step": 6581 }, { "epoch": 2.3605068132918956, "grad_norm": 0.2967517077922821, "learning_rate": 1.3203763455821555e-06, "loss": 0.2885, "step": 6582 }, { "epoch": 2.3608654076021995, "grad_norm": 0.30926448106765747, "learning_rate": 1.318963714569138e-06, "loss": 0.2931, "step": 6583 }, { "epoch": 2.361224001912503, "grad_norm": 0.3081001341342926, "learning_rate": 1.3175517248040925e-06, "loss": 0.2816, "step": 6584 }, { "epoch": 2.3615825962228065, "grad_norm": 0.30808237195014954, "learning_rate": 1.316140376532995e-06, "loss": 0.3007, "step": 6585 }, { "epoch": 2.36194119053311, "grad_norm": 0.2983947992324829, "learning_rate": 1.3147296700017037e-06, "loss": 0.2922, "step": 6586 }, { "epoch": 2.362299784843414, "grad_norm": 0.3042120039463043, "learning_rate": 1.3133196054559693e-06, "loss": 0.283, "step": 6587 }, { "epoch": 2.3626583791537175, "grad_norm": 0.30058807134628296, "learning_rate": 1.3119101831414294e-06, "loss": 0.3238, "step": 6588 }, { "epoch": 2.363016973464021, "grad_norm": 0.30883902311325073, "learning_rate": 1.3105014033036102e-06, "loss": 0.2868, "step": 6589 }, { "epoch": 2.363375567774325, "grad_norm": 0.3201278746128082, "learning_rate": 1.3090932661879263e-06, "loss": 0.3056, "step": 6590 }, { "epoch": 2.3637341620846284, "grad_norm": 0.3152300715446472, "learning_rate": 1.3076857720396768e-06, "loss": 0.2613, "step": 6591 }, { "epoch": 2.364092756394932, "grad_norm": 0.3089677095413208, "learning_rate": 1.306278921104054e-06, "loss": 0.2946, "step": 6592 }, { "epoch": 2.3644513507052354, "grad_norm": 0.28475135564804077, "learning_rate": 1.3048727136261342e-06, "loss": 0.2871, "step": 6593 }, { "epoch": 2.364809945015539, "grad_norm": 0.2885894477367401, "learning_rate": 1.3034671498508845e-06, "loss": 0.2816, "step": 6594 }, { "epoch": 2.365168539325843, "grad_norm": 0.3102569878101349, "learning_rate": 1.302062230023159e-06, "loss": 0.2981, "step": 6595 }, { "epoch": 2.3655271336361463, "grad_norm": 0.3208865523338318, "learning_rate": 1.3006579543876963e-06, "loss": 0.3024, "step": 6596 }, { "epoch": 2.36588572794645, "grad_norm": 0.30617231130599976, "learning_rate": 1.299254323189128e-06, "loss": 0.3017, "step": 6597 }, { "epoch": 2.3662443222567537, "grad_norm": 0.29291462898254395, "learning_rate": 1.2978513366719696e-06, "loss": 0.262, "step": 6598 }, { "epoch": 2.3666029165670572, "grad_norm": 0.3204861581325531, "learning_rate": 1.2964489950806265e-06, "loss": 0.325, "step": 6599 }, { "epoch": 2.3669615108773607, "grad_norm": 0.3002999424934387, "learning_rate": 1.2950472986593926e-06, "loss": 0.284, "step": 6600 }, { "epoch": 2.3673201051876642, "grad_norm": 0.3084770441055298, "learning_rate": 1.2936462476524442e-06, "loss": 0.2973, "step": 6601 }, { "epoch": 2.3676786994979677, "grad_norm": 0.30841416120529175, "learning_rate": 1.2922458423038525e-06, "loss": 0.3033, "step": 6602 }, { "epoch": 2.3680372938082717, "grad_norm": 0.3236452639102936, "learning_rate": 1.2908460828575686e-06, "loss": 0.32, "step": 6603 }, { "epoch": 2.368395888118575, "grad_norm": 0.2883043885231018, "learning_rate": 1.2894469695574374e-06, "loss": 0.2584, "step": 6604 }, { "epoch": 2.3687544824288786, "grad_norm": 0.34338444471359253, "learning_rate": 1.2880485026471878e-06, "loss": 0.3124, "step": 6605 }, { "epoch": 2.3691130767391826, "grad_norm": 0.3017657697200775, "learning_rate": 1.2866506823704377e-06, "loss": 0.3013, "step": 6606 }, { "epoch": 2.369471671049486, "grad_norm": 0.3319458067417145, "learning_rate": 1.2852535089706935e-06, "loss": 0.2984, "step": 6607 }, { "epoch": 2.3698302653597896, "grad_norm": 0.31916671991348267, "learning_rate": 1.2838569826913433e-06, "loss": 0.2885, "step": 6608 }, { "epoch": 2.370188859670093, "grad_norm": 0.30518996715545654, "learning_rate": 1.2824611037756686e-06, "loss": 0.2985, "step": 6609 }, { "epoch": 2.370547453980397, "grad_norm": 0.2995670437812805, "learning_rate": 1.2810658724668356e-06, "loss": 0.3002, "step": 6610 }, { "epoch": 2.3709060482907005, "grad_norm": 0.2949444353580475, "learning_rate": 1.2796712890078977e-06, "loss": 0.2734, "step": 6611 }, { "epoch": 2.371264642601004, "grad_norm": 0.29568976163864136, "learning_rate": 1.2782773536417975e-06, "loss": 0.2711, "step": 6612 }, { "epoch": 2.3716232369113075, "grad_norm": 0.3115865886211395, "learning_rate": 1.2768840666113596e-06, "loss": 0.2917, "step": 6613 }, { "epoch": 2.3719818312216114, "grad_norm": 0.31989094614982605, "learning_rate": 1.2754914281593016e-06, "loss": 0.2944, "step": 6614 }, { "epoch": 2.372340425531915, "grad_norm": 0.3014860153198242, "learning_rate": 1.2740994385282223e-06, "loss": 0.2804, "step": 6615 }, { "epoch": 2.3726990198422184, "grad_norm": 0.31124407052993774, "learning_rate": 1.2727080979606154e-06, "loss": 0.3305, "step": 6616 }, { "epoch": 2.373057614152522, "grad_norm": 0.3007162809371948, "learning_rate": 1.2713174066988526e-06, "loss": 0.2606, "step": 6617 }, { "epoch": 2.373416208462826, "grad_norm": 0.3109586834907532, "learning_rate": 1.2699273649851978e-06, "loss": 0.303, "step": 6618 }, { "epoch": 2.3737748027731294, "grad_norm": 0.3019808828830719, "learning_rate": 1.2685379730618025e-06, "loss": 0.3037, "step": 6619 }, { "epoch": 2.374133397083433, "grad_norm": 0.3081531226634979, "learning_rate": 1.2671492311707007e-06, "loss": 0.2743, "step": 6620 }, { "epoch": 2.374491991393737, "grad_norm": 0.33175644278526306, "learning_rate": 1.2657611395538155e-06, "loss": 0.3025, "step": 6621 }, { "epoch": 2.3748505857040403, "grad_norm": 0.31763532757759094, "learning_rate": 1.2643736984529587e-06, "loss": 0.2905, "step": 6622 }, { "epoch": 2.375209180014344, "grad_norm": 0.32809144258499146, "learning_rate": 1.262986908109825e-06, "loss": 0.3173, "step": 6623 }, { "epoch": 2.3755677743246473, "grad_norm": 0.31533363461494446, "learning_rate": 1.2616007687660004e-06, "loss": 0.2851, "step": 6624 }, { "epoch": 2.3759263686349508, "grad_norm": 0.3116956353187561, "learning_rate": 1.2602152806629508e-06, "loss": 0.2781, "step": 6625 }, { "epoch": 2.3762849629452547, "grad_norm": 0.317070871591568, "learning_rate": 1.2588304440420347e-06, "loss": 0.3094, "step": 6626 }, { "epoch": 2.376643557255558, "grad_norm": 0.3089953660964966, "learning_rate": 1.257446259144494e-06, "loss": 0.2758, "step": 6627 }, { "epoch": 2.3770021515658617, "grad_norm": 0.2917500436306, "learning_rate": 1.2560627262114595e-06, "loss": 0.2726, "step": 6628 }, { "epoch": 2.3773607458761656, "grad_norm": 0.31169718503952026, "learning_rate": 1.2546798454839477e-06, "loss": 0.3105, "step": 6629 }, { "epoch": 2.377719340186469, "grad_norm": 0.31672194600105286, "learning_rate": 1.253297617202857e-06, "loss": 0.2744, "step": 6630 }, { "epoch": 2.3780779344967726, "grad_norm": 0.32167306542396545, "learning_rate": 1.2519160416089804e-06, "loss": 0.2748, "step": 6631 }, { "epoch": 2.378436528807076, "grad_norm": 0.31083330512046814, "learning_rate": 1.2505351189429883e-06, "loss": 0.2849, "step": 6632 }, { "epoch": 2.37879512311738, "grad_norm": 0.29779765009880066, "learning_rate": 1.2491548494454437e-06, "loss": 0.3171, "step": 6633 }, { "epoch": 2.3791537174276836, "grad_norm": 0.3040015399456024, "learning_rate": 1.2477752333567938e-06, "loss": 0.2918, "step": 6634 }, { "epoch": 2.379512311737987, "grad_norm": 0.32328587770462036, "learning_rate": 1.2463962709173727e-06, "loss": 0.2973, "step": 6635 }, { "epoch": 2.3798709060482905, "grad_norm": 0.32154929637908936, "learning_rate": 1.2450179623674002e-06, "loss": 0.2833, "step": 6636 }, { "epoch": 2.3802295003585945, "grad_norm": 0.31209906935691833, "learning_rate": 1.2436403079469795e-06, "loss": 0.2784, "step": 6637 }, { "epoch": 2.380588094668898, "grad_norm": 0.31729164719581604, "learning_rate": 1.2422633078961045e-06, "loss": 0.3101, "step": 6638 }, { "epoch": 2.3809466889792015, "grad_norm": 0.2997440993785858, "learning_rate": 1.2408869624546515e-06, "loss": 0.2818, "step": 6639 }, { "epoch": 2.381305283289505, "grad_norm": 0.30340591073036194, "learning_rate": 1.2395112718623853e-06, "loss": 0.2632, "step": 6640 }, { "epoch": 2.381663877599809, "grad_norm": 0.32495078444480896, "learning_rate": 1.2381362363589567e-06, "loss": 0.3223, "step": 6641 }, { "epoch": 2.3820224719101124, "grad_norm": 0.31104010343551636, "learning_rate": 1.236761856183898e-06, "loss": 0.2825, "step": 6642 }, { "epoch": 2.382381066220416, "grad_norm": 0.30840739607810974, "learning_rate": 1.235388131576633e-06, "loss": 0.3046, "step": 6643 }, { "epoch": 2.3827396605307194, "grad_norm": 0.3175647258758545, "learning_rate": 1.2340150627764657e-06, "loss": 0.2715, "step": 6644 }, { "epoch": 2.3830982548410233, "grad_norm": 0.3366505801677704, "learning_rate": 1.2326426500225935e-06, "loss": 0.3096, "step": 6645 }, { "epoch": 2.383456849151327, "grad_norm": 0.3304883539676666, "learning_rate": 1.2312708935540912e-06, "loss": 0.2812, "step": 6646 }, { "epoch": 2.3838154434616303, "grad_norm": 0.34732624888420105, "learning_rate": 1.229899793609925e-06, "loss": 0.3175, "step": 6647 }, { "epoch": 2.384174037771934, "grad_norm": 0.3095701038837433, "learning_rate": 1.2285293504289448e-06, "loss": 0.2921, "step": 6648 }, { "epoch": 2.3845326320822378, "grad_norm": 0.31866511702537537, "learning_rate": 1.2271595642498846e-06, "loss": 0.2743, "step": 6649 }, { "epoch": 2.3848912263925413, "grad_norm": 0.31781235337257385, "learning_rate": 1.2257904353113658e-06, "loss": 0.296, "step": 6650 }, { "epoch": 2.3852498207028447, "grad_norm": 0.34076541662216187, "learning_rate": 1.2244219638518962e-06, "loss": 0.3059, "step": 6651 }, { "epoch": 2.3856084150131487, "grad_norm": 0.28112709522247314, "learning_rate": 1.2230541501098664e-06, "loss": 0.2497, "step": 6652 }, { "epoch": 2.385967009323452, "grad_norm": 0.31644368171691895, "learning_rate": 1.221686994323556e-06, "loss": 0.3273, "step": 6653 }, { "epoch": 2.3863256036337557, "grad_norm": 0.32848894596099854, "learning_rate": 1.2203204967311243e-06, "loss": 0.3017, "step": 6654 }, { "epoch": 2.386684197944059, "grad_norm": 0.3112095594406128, "learning_rate": 1.2189546575706218e-06, "loss": 0.284, "step": 6655 }, { "epoch": 2.3870427922543627, "grad_norm": 0.3177432119846344, "learning_rate": 1.2175894770799811e-06, "loss": 0.2937, "step": 6656 }, { "epoch": 2.3874013865646666, "grad_norm": 0.30850160121917725, "learning_rate": 1.2162249554970219e-06, "loss": 0.2934, "step": 6657 }, { "epoch": 2.38775998087497, "grad_norm": 0.31903836131095886, "learning_rate": 1.2148610930594484e-06, "loss": 0.2923, "step": 6658 }, { "epoch": 2.3881185751852736, "grad_norm": 0.3069155812263489, "learning_rate": 1.2134978900048472e-06, "loss": 0.303, "step": 6659 }, { "epoch": 2.3884771694955775, "grad_norm": 0.3257942795753479, "learning_rate": 1.212135346570696e-06, "loss": 0.2694, "step": 6660 }, { "epoch": 2.388835763805881, "grad_norm": 0.33674561977386475, "learning_rate": 1.2107734629943485e-06, "loss": 0.32, "step": 6661 }, { "epoch": 2.3891943581161845, "grad_norm": 0.3006424307823181, "learning_rate": 1.2094122395130564e-06, "loss": 0.2552, "step": 6662 }, { "epoch": 2.389552952426488, "grad_norm": 0.30573570728302, "learning_rate": 1.2080516763639433e-06, "loss": 0.2874, "step": 6663 }, { "epoch": 2.389911546736792, "grad_norm": 0.32482126355171204, "learning_rate": 1.2066917737840255e-06, "loss": 0.323, "step": 6664 }, { "epoch": 2.3902701410470955, "grad_norm": 0.3205181360244751, "learning_rate": 1.2053325320102044e-06, "loss": 0.2717, "step": 6665 }, { "epoch": 2.390628735357399, "grad_norm": 0.3010980486869812, "learning_rate": 1.2039739512792603e-06, "loss": 0.2854, "step": 6666 }, { "epoch": 2.3909873296677024, "grad_norm": 0.3133790194988251, "learning_rate": 1.2026160318278641e-06, "loss": 0.3062, "step": 6667 }, { "epoch": 2.3913459239780064, "grad_norm": 0.3057747781276703, "learning_rate": 1.2012587738925695e-06, "loss": 0.303, "step": 6668 }, { "epoch": 2.39170451828831, "grad_norm": 0.31451115012168884, "learning_rate": 1.1999021777098152e-06, "loss": 0.2852, "step": 6669 }, { "epoch": 2.3920631125986134, "grad_norm": 0.3293355107307434, "learning_rate": 1.1985462435159256e-06, "loss": 0.2937, "step": 6670 }, { "epoch": 2.392421706908917, "grad_norm": 0.3373132050037384, "learning_rate": 1.197190971547106e-06, "loss": 0.3075, "step": 6671 }, { "epoch": 2.392780301219221, "grad_norm": 0.30240604281425476, "learning_rate": 1.1958363620394504e-06, "loss": 0.3016, "step": 6672 }, { "epoch": 2.3931388955295243, "grad_norm": 0.32295146584510803, "learning_rate": 1.1944824152289359e-06, "loss": 0.288, "step": 6673 }, { "epoch": 2.393497489839828, "grad_norm": 0.34886837005615234, "learning_rate": 1.1931291313514255e-06, "loss": 0.311, "step": 6674 }, { "epoch": 2.3938560841501313, "grad_norm": 0.3000986874103546, "learning_rate": 1.1917765106426633e-06, "loss": 0.2765, "step": 6675 }, { "epoch": 2.3942146784604352, "grad_norm": 0.3201315104961395, "learning_rate": 1.1904245533382803e-06, "loss": 0.3042, "step": 6676 }, { "epoch": 2.3945732727707387, "grad_norm": 0.3116632401943207, "learning_rate": 1.1890732596737942e-06, "loss": 0.2911, "step": 6677 }, { "epoch": 2.394931867081042, "grad_norm": 0.3231832981109619, "learning_rate": 1.1877226298846e-06, "loss": 0.307, "step": 6678 }, { "epoch": 2.3952904613913457, "grad_norm": 0.2821432650089264, "learning_rate": 1.186372664205987e-06, "loss": 0.2717, "step": 6679 }, { "epoch": 2.3956490557016497, "grad_norm": 0.30732932686805725, "learning_rate": 1.1850233628731196e-06, "loss": 0.3164, "step": 6680 }, { "epoch": 2.396007650011953, "grad_norm": 0.3206387162208557, "learning_rate": 1.1836747261210512e-06, "loss": 0.2845, "step": 6681 }, { "epoch": 2.3963662443222566, "grad_norm": 0.3178499639034271, "learning_rate": 1.1823267541847205e-06, "loss": 0.3114, "step": 6682 }, { "epoch": 2.3967248386325606, "grad_norm": 0.3299827575683594, "learning_rate": 1.180979447298945e-06, "loss": 0.3123, "step": 6683 }, { "epoch": 2.397083432942864, "grad_norm": 0.3017628490924835, "learning_rate": 1.1796328056984314e-06, "loss": 0.2742, "step": 6684 }, { "epoch": 2.3974420272531676, "grad_norm": 0.32499879598617554, "learning_rate": 1.178286829617769e-06, "loss": 0.3189, "step": 6685 }, { "epoch": 2.397800621563471, "grad_norm": 0.32794415950775146, "learning_rate": 1.1769415192914309e-06, "loss": 0.3017, "step": 6686 }, { "epoch": 2.3981592158737746, "grad_norm": 0.30408966541290283, "learning_rate": 1.1755968749537755e-06, "loss": 0.2893, "step": 6687 }, { "epoch": 2.3985178101840785, "grad_norm": 0.31008586287498474, "learning_rate": 1.1742528968390422e-06, "loss": 0.2749, "step": 6688 }, { "epoch": 2.398876404494382, "grad_norm": 0.2854694426059723, "learning_rate": 1.1729095851813578e-06, "loss": 0.273, "step": 6689 }, { "epoch": 2.3992349988046855, "grad_norm": 0.3446497917175293, "learning_rate": 1.1715669402147274e-06, "loss": 0.3292, "step": 6690 }, { "epoch": 2.3995935931149894, "grad_norm": 0.299940824508667, "learning_rate": 1.1702249621730498e-06, "loss": 0.2845, "step": 6691 }, { "epoch": 2.399952187425293, "grad_norm": 0.32224029302597046, "learning_rate": 1.1688836512900976e-06, "loss": 0.3213, "step": 6692 }, { "epoch": 2.4003107817355964, "grad_norm": 0.30776453018188477, "learning_rate": 1.1675430077995325e-06, "loss": 0.2849, "step": 6693 }, { "epoch": 2.4006693760459, "grad_norm": 0.31204453110694885, "learning_rate": 1.1662030319349004e-06, "loss": 0.2677, "step": 6694 }, { "epoch": 2.401027970356204, "grad_norm": 0.30206459760665894, "learning_rate": 1.164863723929625e-06, "loss": 0.2901, "step": 6695 }, { "epoch": 2.4013865646665074, "grad_norm": 0.3108510375022888, "learning_rate": 1.1635250840170238e-06, "loss": 0.2861, "step": 6696 }, { "epoch": 2.401745158976811, "grad_norm": 0.32633090019226074, "learning_rate": 1.1621871124302876e-06, "loss": 0.2853, "step": 6697 }, { "epoch": 2.4021037532871143, "grad_norm": 0.3243556618690491, "learning_rate": 1.1608498094024956e-06, "loss": 0.3049, "step": 6698 }, { "epoch": 2.4024623475974183, "grad_norm": 0.3024750053882599, "learning_rate": 1.1595131751666134e-06, "loss": 0.3074, "step": 6699 }, { "epoch": 2.4028209419077218, "grad_norm": 0.3050667643547058, "learning_rate": 1.1581772099554828e-06, "loss": 0.2968, "step": 6700 }, { "epoch": 2.4031795362180253, "grad_norm": 0.3068348467350006, "learning_rate": 1.1568419140018343e-06, "loss": 0.2926, "step": 6701 }, { "epoch": 2.4035381305283288, "grad_norm": 0.302282452583313, "learning_rate": 1.1555072875382817e-06, "loss": 0.2745, "step": 6702 }, { "epoch": 2.4038967248386327, "grad_norm": 0.30700817704200745, "learning_rate": 1.1541733307973218e-06, "loss": 0.2965, "step": 6703 }, { "epoch": 2.404255319148936, "grad_norm": 0.3105202615261078, "learning_rate": 1.1528400440113314e-06, "loss": 0.2771, "step": 6704 }, { "epoch": 2.4046139134592397, "grad_norm": 0.3139185905456543, "learning_rate": 1.1515074274125743e-06, "loss": 0.2581, "step": 6705 }, { "epoch": 2.4049725077695436, "grad_norm": 0.36683157086372375, "learning_rate": 1.1501754812331982e-06, "loss": 0.3355, "step": 6706 }, { "epoch": 2.405331102079847, "grad_norm": 0.3011900782585144, "learning_rate": 1.1488442057052278e-06, "loss": 0.2796, "step": 6707 }, { "epoch": 2.4056896963901506, "grad_norm": 0.3530774712562561, "learning_rate": 1.147513601060582e-06, "loss": 0.3413, "step": 6708 }, { "epoch": 2.406048290700454, "grad_norm": 0.3289220631122589, "learning_rate": 1.1461836675310506e-06, "loss": 0.3043, "step": 6709 }, { "epoch": 2.4064068850107576, "grad_norm": 0.3050328493118286, "learning_rate": 1.144854405348314e-06, "loss": 0.2615, "step": 6710 }, { "epoch": 2.4067654793210616, "grad_norm": 0.3128402829170227, "learning_rate": 1.1435258147439359e-06, "loss": 0.3031, "step": 6711 }, { "epoch": 2.407124073631365, "grad_norm": 0.3003685176372528, "learning_rate": 1.1421978959493557e-06, "loss": 0.3077, "step": 6712 }, { "epoch": 2.4074826679416685, "grad_norm": 0.2959889769554138, "learning_rate": 1.1408706491959076e-06, "loss": 0.2878, "step": 6713 }, { "epoch": 2.4078412622519725, "grad_norm": 0.3202722370624542, "learning_rate": 1.1395440747147974e-06, "loss": 0.2883, "step": 6714 }, { "epoch": 2.408199856562276, "grad_norm": 0.3018799424171448, "learning_rate": 1.13821817273712e-06, "loss": 0.289, "step": 6715 }, { "epoch": 2.4085584508725795, "grad_norm": 0.31432539224624634, "learning_rate": 1.1368929434938524e-06, "loss": 0.308, "step": 6716 }, { "epoch": 2.408917045182883, "grad_norm": 0.3137361705303192, "learning_rate": 1.1355683872158523e-06, "loss": 0.2692, "step": 6717 }, { "epoch": 2.4092756394931865, "grad_norm": 0.31163936853408813, "learning_rate": 1.1342445041338619e-06, "loss": 0.3189, "step": 6718 }, { "epoch": 2.4096342338034904, "grad_norm": 0.31359079480171204, "learning_rate": 1.1329212944785056e-06, "loss": 0.2921, "step": 6719 }, { "epoch": 2.409992828113794, "grad_norm": 0.3160858452320099, "learning_rate": 1.1315987584802923e-06, "loss": 0.2926, "step": 6720 }, { "epoch": 2.4103514224240974, "grad_norm": 0.31643691658973694, "learning_rate": 1.1302768963696092e-06, "loss": 0.3197, "step": 6721 }, { "epoch": 2.4107100167344013, "grad_norm": 0.2891283929347992, "learning_rate": 1.1289557083767306e-06, "loss": 0.2514, "step": 6722 }, { "epoch": 2.411068611044705, "grad_norm": 0.32749491930007935, "learning_rate": 1.127635194731812e-06, "loss": 0.323, "step": 6723 }, { "epoch": 2.4114272053550083, "grad_norm": 0.308747798204422, "learning_rate": 1.1263153556648876e-06, "loss": 0.2965, "step": 6724 }, { "epoch": 2.411785799665312, "grad_norm": 0.31121671199798584, "learning_rate": 1.1249961914058826e-06, "loss": 0.2907, "step": 6725 }, { "epoch": 2.4121443939756158, "grad_norm": 0.2971899211406708, "learning_rate": 1.1236777021845957e-06, "loss": 0.2646, "step": 6726 }, { "epoch": 2.4125029882859192, "grad_norm": 0.29474589228630066, "learning_rate": 1.122359888230713e-06, "loss": 0.3028, "step": 6727 }, { "epoch": 2.4128615825962227, "grad_norm": 0.28925177454948425, "learning_rate": 1.121042749773803e-06, "loss": 0.2663, "step": 6728 }, { "epoch": 2.4132201769065262, "grad_norm": 0.312207967042923, "learning_rate": 1.1197262870433124e-06, "loss": 0.291, "step": 6729 }, { "epoch": 2.41357877121683, "grad_norm": 0.3255046308040619, "learning_rate": 1.1184105002685752e-06, "loss": 0.3171, "step": 6730 }, { "epoch": 2.4139373655271337, "grad_norm": 0.31836986541748047, "learning_rate": 1.117095389678805e-06, "loss": 0.2908, "step": 6731 }, { "epoch": 2.414295959837437, "grad_norm": 0.3140513598918915, "learning_rate": 1.1157809555031001e-06, "loss": 0.2845, "step": 6732 }, { "epoch": 2.4146545541477407, "grad_norm": 0.32169532775878906, "learning_rate": 1.1144671979704357e-06, "loss": 0.2801, "step": 6733 }, { "epoch": 2.4150131484580446, "grad_norm": 0.33228009939193726, "learning_rate": 1.1131541173096739e-06, "loss": 0.3274, "step": 6734 }, { "epoch": 2.415371742768348, "grad_norm": 0.282705694437027, "learning_rate": 1.1118417137495574e-06, "loss": 0.2559, "step": 6735 }, { "epoch": 2.4157303370786516, "grad_norm": 0.3219558596611023, "learning_rate": 1.1105299875187114e-06, "loss": 0.3089, "step": 6736 }, { "epoch": 2.4160889313889555, "grad_norm": 0.30615702271461487, "learning_rate": 1.1092189388456436e-06, "loss": 0.2913, "step": 6737 }, { "epoch": 2.416447525699259, "grad_norm": 0.3006119132041931, "learning_rate": 1.1079085679587404e-06, "loss": 0.3011, "step": 6738 }, { "epoch": 2.4168061200095625, "grad_norm": 0.3152659833431244, "learning_rate": 1.1065988750862732e-06, "loss": 0.2997, "step": 6739 }, { "epoch": 2.417164714319866, "grad_norm": 0.2932075262069702, "learning_rate": 1.1052898604563967e-06, "loss": 0.2525, "step": 6740 }, { "epoch": 2.4175233086301695, "grad_norm": 0.30145636200904846, "learning_rate": 1.1039815242971407e-06, "loss": 0.2814, "step": 6741 }, { "epoch": 2.4178819029404734, "grad_norm": 0.3099713921546936, "learning_rate": 1.1026738668364274e-06, "loss": 0.2867, "step": 6742 }, { "epoch": 2.418240497250777, "grad_norm": 0.2902045249938965, "learning_rate": 1.1013668883020502e-06, "loss": 0.2806, "step": 6743 }, { "epoch": 2.4185990915610804, "grad_norm": 0.3074541389942169, "learning_rate": 1.1000605889216903e-06, "loss": 0.2937, "step": 6744 }, { "epoch": 2.4189576858713844, "grad_norm": 0.31054821610450745, "learning_rate": 1.098754968922911e-06, "loss": 0.3039, "step": 6745 }, { "epoch": 2.419316280181688, "grad_norm": 0.3127462565898895, "learning_rate": 1.0974500285331518e-06, "loss": 0.2855, "step": 6746 }, { "epoch": 2.4196748744919914, "grad_norm": 0.2769920825958252, "learning_rate": 1.0961457679797394e-06, "loss": 0.2537, "step": 6747 }, { "epoch": 2.420033468802295, "grad_norm": 0.3250322639942169, "learning_rate": 1.0948421874898795e-06, "loss": 0.3041, "step": 6748 }, { "epoch": 2.420392063112599, "grad_norm": 0.3158252239227295, "learning_rate": 1.093539287290662e-06, "loss": 0.2651, "step": 6749 }, { "epoch": 2.4207506574229023, "grad_norm": 0.31558966636657715, "learning_rate": 1.0922370676090527e-06, "loss": 0.312, "step": 6750 }, { "epoch": 2.421109251733206, "grad_norm": 0.32099610567092896, "learning_rate": 1.0909355286719042e-06, "loss": 0.3269, "step": 6751 }, { "epoch": 2.4214678460435093, "grad_norm": 0.3254868686199188, "learning_rate": 1.089634670705948e-06, "loss": 0.307, "step": 6752 }, { "epoch": 2.4218264403538132, "grad_norm": 0.3269774913787842, "learning_rate": 1.0883344939377981e-06, "loss": 0.2633, "step": 6753 }, { "epoch": 2.4221850346641167, "grad_norm": 0.3024323880672455, "learning_rate": 1.087034998593951e-06, "loss": 0.303, "step": 6754 }, { "epoch": 2.42254362897442, "grad_norm": 0.2973131537437439, "learning_rate": 1.085736184900779e-06, "loss": 0.2737, "step": 6755 }, { "epoch": 2.4229022232847237, "grad_norm": 0.33458641171455383, "learning_rate": 1.0844380530845422e-06, "loss": 0.3199, "step": 6756 }, { "epoch": 2.4232608175950276, "grad_norm": 0.30149945616722107, "learning_rate": 1.0831406033713798e-06, "loss": 0.2692, "step": 6757 }, { "epoch": 2.423619411905331, "grad_norm": 0.30840638279914856, "learning_rate": 1.0818438359873074e-06, "loss": 0.3132, "step": 6758 }, { "epoch": 2.4239780062156346, "grad_norm": 0.2983228266239166, "learning_rate": 1.0805477511582319e-06, "loss": 0.2992, "step": 6759 }, { "epoch": 2.424336600525938, "grad_norm": 0.3182274401187897, "learning_rate": 1.0792523491099305e-06, "loss": 0.2955, "step": 6760 }, { "epoch": 2.424695194836242, "grad_norm": 0.3033391833305359, "learning_rate": 1.0779576300680694e-06, "loss": 0.3064, "step": 6761 }, { "epoch": 2.4250537891465456, "grad_norm": 0.30457547307014465, "learning_rate": 1.0766635942581905e-06, "loss": 0.296, "step": 6762 }, { "epoch": 2.425412383456849, "grad_norm": 0.3269999623298645, "learning_rate": 1.0753702419057188e-06, "loss": 0.2781, "step": 6763 }, { "epoch": 2.4257709777671526, "grad_norm": 0.2783723771572113, "learning_rate": 1.074077573235962e-06, "loss": 0.2654, "step": 6764 }, { "epoch": 2.4261295720774565, "grad_norm": 0.32032278180122375, "learning_rate": 1.0727855884741057e-06, "loss": 0.2768, "step": 6765 }, { "epoch": 2.42648816638776, "grad_norm": 0.311557412147522, "learning_rate": 1.0714942878452194e-06, "loss": 0.2986, "step": 6766 }, { "epoch": 2.4268467606980635, "grad_norm": 0.3347723186016083, "learning_rate": 1.0702036715742498e-06, "loss": 0.3035, "step": 6767 }, { "epoch": 2.4272053550083674, "grad_norm": 0.3151112198829651, "learning_rate": 1.0689137398860261e-06, "loss": 0.3107, "step": 6768 }, { "epoch": 2.427563949318671, "grad_norm": 0.3135388195514679, "learning_rate": 1.0676244930052609e-06, "loss": 0.3029, "step": 6769 }, { "epoch": 2.4279225436289744, "grad_norm": 0.29491525888442993, "learning_rate": 1.0663359311565409e-06, "loss": 0.2639, "step": 6770 }, { "epoch": 2.428281137939278, "grad_norm": 0.3179062604904175, "learning_rate": 1.065048054564342e-06, "loss": 0.2979, "step": 6771 }, { "epoch": 2.4286397322495814, "grad_norm": 0.3215934932231903, "learning_rate": 1.0637608634530139e-06, "loss": 0.2862, "step": 6772 }, { "epoch": 2.4289983265598853, "grad_norm": 0.3358604609966278, "learning_rate": 1.062474358046789e-06, "loss": 0.3103, "step": 6773 }, { "epoch": 2.429356920870189, "grad_norm": 0.3076743185520172, "learning_rate": 1.0611885385697828e-06, "loss": 0.2608, "step": 6774 }, { "epoch": 2.4297155151804923, "grad_norm": 0.3136045038700104, "learning_rate": 1.059903405245985e-06, "loss": 0.3025, "step": 6775 }, { "epoch": 2.4300741094907963, "grad_norm": 0.3076859712600708, "learning_rate": 1.058618958299275e-06, "loss": 0.2897, "step": 6776 }, { "epoch": 2.4304327038010998, "grad_norm": 0.30833694338798523, "learning_rate": 1.0573351979534035e-06, "loss": 0.2799, "step": 6777 }, { "epoch": 2.4307912981114033, "grad_norm": 0.3305611312389374, "learning_rate": 1.0560521244320083e-06, "loss": 0.3046, "step": 6778 }, { "epoch": 2.4311498924217068, "grad_norm": 0.30977824330329895, "learning_rate": 1.0547697379586014e-06, "loss": 0.2655, "step": 6779 }, { "epoch": 2.4315084867320107, "grad_norm": 0.3186652958393097, "learning_rate": 1.053488038756581e-06, "loss": 0.3299, "step": 6780 }, { "epoch": 2.431867081042314, "grad_norm": 0.3115614950656891, "learning_rate": 1.0522070270492224e-06, "loss": 0.2943, "step": 6781 }, { "epoch": 2.4322256753526177, "grad_norm": 0.32358217239379883, "learning_rate": 1.0509267030596821e-06, "loss": 0.2914, "step": 6782 }, { "epoch": 2.432584269662921, "grad_norm": 0.3355426490306854, "learning_rate": 1.0496470670109977e-06, "loss": 0.3159, "step": 6783 }, { "epoch": 2.432942863973225, "grad_norm": 0.32753410935401917, "learning_rate": 1.0483681191260824e-06, "loss": 0.3075, "step": 6784 }, { "epoch": 2.4333014582835286, "grad_norm": 0.30117565393447876, "learning_rate": 1.0470898596277358e-06, "loss": 0.2966, "step": 6785 }, { "epoch": 2.433660052593832, "grad_norm": 0.3140029311180115, "learning_rate": 1.045812288738635e-06, "loss": 0.2823, "step": 6786 }, { "epoch": 2.4340186469041356, "grad_norm": 0.31755101680755615, "learning_rate": 1.044535406681333e-06, "loss": 0.3037, "step": 6787 }, { "epoch": 2.4343772412144395, "grad_norm": 0.325340211391449, "learning_rate": 1.043259213678272e-06, "loss": 0.3056, "step": 6788 }, { "epoch": 2.434735835524743, "grad_norm": 0.30154961347579956, "learning_rate": 1.041983709951765e-06, "loss": 0.2517, "step": 6789 }, { "epoch": 2.4350944298350465, "grad_norm": 0.31542113423347473, "learning_rate": 1.0407088957240108e-06, "loss": 0.2772, "step": 6790 }, { "epoch": 2.4354530241453505, "grad_norm": 0.37543442845344543, "learning_rate": 1.0394347712170837e-06, "loss": 0.3133, "step": 6791 }, { "epoch": 2.435811618455654, "grad_norm": 0.308933824300766, "learning_rate": 1.0381613366529402e-06, "loss": 0.2642, "step": 6792 }, { "epoch": 2.4361702127659575, "grad_norm": 0.31683671474456787, "learning_rate": 1.0368885922534205e-06, "loss": 0.3277, "step": 6793 }, { "epoch": 2.436528807076261, "grad_norm": 0.3209295868873596, "learning_rate": 1.0356165382402367e-06, "loss": 0.2548, "step": 6794 }, { "epoch": 2.4368874013865645, "grad_norm": 0.3156616985797882, "learning_rate": 1.0343451748349875e-06, "loss": 0.2813, "step": 6795 }, { "epoch": 2.4372459956968684, "grad_norm": 0.3068510890007019, "learning_rate": 1.0330745022591448e-06, "loss": 0.2891, "step": 6796 }, { "epoch": 2.437604590007172, "grad_norm": 0.30281126499176025, "learning_rate": 1.031804520734066e-06, "loss": 0.3117, "step": 6797 }, { "epoch": 2.4379631843174754, "grad_norm": 0.3109114170074463, "learning_rate": 1.0305352304809851e-06, "loss": 0.2869, "step": 6798 }, { "epoch": 2.4383217786277793, "grad_norm": 0.332771897315979, "learning_rate": 1.0292666317210171e-06, "loss": 0.3349, "step": 6799 }, { "epoch": 2.438680372938083, "grad_norm": 0.29579055309295654, "learning_rate": 1.0279987246751566e-06, "loss": 0.2926, "step": 6800 }, { "epoch": 2.4390389672483863, "grad_norm": 0.2880728542804718, "learning_rate": 1.0267315095642744e-06, "loss": 0.265, "step": 6801 }, { "epoch": 2.43939756155869, "grad_norm": 0.34628981351852417, "learning_rate": 1.025464986609125e-06, "loss": 0.3164, "step": 6802 }, { "epoch": 2.4397561558689933, "grad_norm": 0.3191331923007965, "learning_rate": 1.0241991560303415e-06, "loss": 0.2764, "step": 6803 }, { "epoch": 2.4401147501792972, "grad_norm": 0.2907700836658478, "learning_rate": 1.022934018048432e-06, "loss": 0.2785, "step": 6804 }, { "epoch": 2.4404733444896007, "grad_norm": 0.32063278555870056, "learning_rate": 1.0216695728837927e-06, "loss": 0.3007, "step": 6805 }, { "epoch": 2.4408319387999042, "grad_norm": 0.32650646567344666, "learning_rate": 1.0204058207566896e-06, "loss": 0.2967, "step": 6806 }, { "epoch": 2.441190533110208, "grad_norm": 0.30965057015419006, "learning_rate": 1.0191427618872752e-06, "loss": 0.2781, "step": 6807 }, { "epoch": 2.4415491274205117, "grad_norm": 0.3209812343120575, "learning_rate": 1.0178803964955758e-06, "loss": 0.3266, "step": 6808 }, { "epoch": 2.441907721730815, "grad_norm": 0.305717796087265, "learning_rate": 1.0166187248014997e-06, "loss": 0.3033, "step": 6809 }, { "epoch": 2.4422663160411187, "grad_norm": 0.3103388547897339, "learning_rate": 1.015357747024835e-06, "loss": 0.2835, "step": 6810 }, { "epoch": 2.4426249103514226, "grad_norm": 0.3039274215698242, "learning_rate": 1.014097463385248e-06, "loss": 0.2792, "step": 6811 }, { "epoch": 2.442983504661726, "grad_norm": 0.3125419318675995, "learning_rate": 1.0128378741022848e-06, "loss": 0.2789, "step": 6812 }, { "epoch": 2.4433420989720296, "grad_norm": 0.31887030601501465, "learning_rate": 1.0115789793953673e-06, "loss": 0.3534, "step": 6813 }, { "epoch": 2.443700693282333, "grad_norm": 0.28692591190338135, "learning_rate": 1.0103207794838004e-06, "loss": 0.2815, "step": 6814 }, { "epoch": 2.444059287592637, "grad_norm": 0.30167582631111145, "learning_rate": 1.0090632745867662e-06, "loss": 0.2802, "step": 6815 }, { "epoch": 2.4444178819029405, "grad_norm": 0.3327305316925049, "learning_rate": 1.0078064649233265e-06, "loss": 0.2935, "step": 6816 }, { "epoch": 2.444776476213244, "grad_norm": 0.31260719895362854, "learning_rate": 1.006550350712422e-06, "loss": 0.2871, "step": 6817 }, { "epoch": 2.4451350705235475, "grad_norm": 0.3016909658908844, "learning_rate": 1.0052949321728694e-06, "loss": 0.2732, "step": 6818 }, { "epoch": 2.4454936648338514, "grad_norm": 0.3058083653450012, "learning_rate": 1.0040402095233693e-06, "loss": 0.2862, "step": 6819 }, { "epoch": 2.445852259144155, "grad_norm": 0.28867411613464355, "learning_rate": 1.0027861829824953e-06, "loss": 0.2655, "step": 6820 }, { "epoch": 2.4462108534544584, "grad_norm": 0.31977006793022156, "learning_rate": 1.001532852768703e-06, "loss": 0.2714, "step": 6821 }, { "epoch": 2.4465694477647624, "grad_norm": 0.34362921118736267, "learning_rate": 1.0002802191003303e-06, "loss": 0.3304, "step": 6822 }, { "epoch": 2.446928042075066, "grad_norm": 0.30338793992996216, "learning_rate": 9.99028282195586e-07, "loss": 0.3036, "step": 6823 }, { "epoch": 2.4472866363853694, "grad_norm": 0.32139575481414795, "learning_rate": 9.977770422725641e-07, "loss": 0.2992, "step": 6824 }, { "epoch": 2.447645230695673, "grad_norm": 0.3298284709453583, "learning_rate": 9.965264995492318e-07, "loss": 0.2908, "step": 6825 }, { "epoch": 2.4480038250059764, "grad_norm": 0.3076830506324768, "learning_rate": 9.952766542434388e-07, "loss": 0.2611, "step": 6826 }, { "epoch": 2.4483624193162803, "grad_norm": 0.3286014497280121, "learning_rate": 9.940275065729117e-07, "loss": 0.3198, "step": 6827 }, { "epoch": 2.448721013626584, "grad_norm": 0.30758169293403625, "learning_rate": 9.927790567552564e-07, "loss": 0.2671, "step": 6828 }, { "epoch": 2.4490796079368873, "grad_norm": 0.3072111904621124, "learning_rate": 9.915313050079578e-07, "loss": 0.2841, "step": 6829 }, { "epoch": 2.449438202247191, "grad_norm": 0.30966028571128845, "learning_rate": 9.902842515483763e-07, "loss": 0.2819, "step": 6830 }, { "epoch": 2.4497967965574947, "grad_norm": 0.3301747441291809, "learning_rate": 9.890378965937525e-07, "loss": 0.3174, "step": 6831 }, { "epoch": 2.450155390867798, "grad_norm": 0.33581581711769104, "learning_rate": 9.877922403612057e-07, "loss": 0.3077, "step": 6832 }, { "epoch": 2.4505139851781017, "grad_norm": 0.2959488034248352, "learning_rate": 9.865472830677335e-07, "loss": 0.2727, "step": 6833 }, { "epoch": 2.4508725794884056, "grad_norm": 0.32128289341926575, "learning_rate": 9.853030249302125e-07, "loss": 0.3073, "step": 6834 }, { "epoch": 2.451231173798709, "grad_norm": 0.3108704388141632, "learning_rate": 9.840594661653941e-07, "loss": 0.2825, "step": 6835 }, { "epoch": 2.4515897681090126, "grad_norm": 0.2956339418888092, "learning_rate": 9.82816606989912e-07, "loss": 0.2801, "step": 6836 }, { "epoch": 2.451948362419316, "grad_norm": 0.3304259777069092, "learning_rate": 9.815744476202738e-07, "loss": 0.3284, "step": 6837 }, { "epoch": 2.45230695672962, "grad_norm": 0.3391055464744568, "learning_rate": 9.80332988272869e-07, "loss": 0.3062, "step": 6838 }, { "epoch": 2.4526655510399236, "grad_norm": 0.2806563377380371, "learning_rate": 9.79092229163963e-07, "loss": 0.2645, "step": 6839 }, { "epoch": 2.453024145350227, "grad_norm": 0.3167169690132141, "learning_rate": 9.778521705097011e-07, "loss": 0.2855, "step": 6840 }, { "epoch": 2.4533827396605306, "grad_norm": 0.31138861179351807, "learning_rate": 9.76612812526106e-07, "loss": 0.3138, "step": 6841 }, { "epoch": 2.4537413339708345, "grad_norm": 0.2890835702419281, "learning_rate": 9.753741554290747e-07, "loss": 0.2548, "step": 6842 }, { "epoch": 2.454099928281138, "grad_norm": 0.3158906102180481, "learning_rate": 9.741361994343867e-07, "loss": 0.3218, "step": 6843 }, { "epoch": 2.4544585225914415, "grad_norm": 0.30571308732032776, "learning_rate": 9.728989447576987e-07, "loss": 0.3012, "step": 6844 }, { "epoch": 2.454817116901745, "grad_norm": 0.29147210717201233, "learning_rate": 9.71662391614543e-07, "loss": 0.3128, "step": 6845 }, { "epoch": 2.455175711212049, "grad_norm": 0.3084321618080139, "learning_rate": 9.704265402203327e-07, "loss": 0.2852, "step": 6846 }, { "epoch": 2.4555343055223524, "grad_norm": 0.3103647232055664, "learning_rate": 9.691913907903549e-07, "loss": 0.2868, "step": 6847 }, { "epoch": 2.455892899832656, "grad_norm": 0.310335636138916, "learning_rate": 9.679569435397784e-07, "loss": 0.2733, "step": 6848 }, { "epoch": 2.4562514941429594, "grad_norm": 0.3134292960166931, "learning_rate": 9.667231986836446e-07, "loss": 0.3097, "step": 6849 }, { "epoch": 2.4566100884532633, "grad_norm": 0.3033094108104706, "learning_rate": 9.6549015643688e-07, "loss": 0.2786, "step": 6850 }, { "epoch": 2.456968682763567, "grad_norm": 0.3136123716831207, "learning_rate": 9.642578170142814e-07, "loss": 0.2874, "step": 6851 }, { "epoch": 2.4573272770738703, "grad_norm": 0.32028961181640625, "learning_rate": 9.630261806305263e-07, "loss": 0.287, "step": 6852 }, { "epoch": 2.4576858713841743, "grad_norm": 0.3267771601676941, "learning_rate": 9.617952475001718e-07, "loss": 0.3172, "step": 6853 }, { "epoch": 2.4580444656944778, "grad_norm": 0.3142220377922058, "learning_rate": 9.60565017837648e-07, "loss": 0.3098, "step": 6854 }, { "epoch": 2.4584030600047813, "grad_norm": 0.31796151399612427, "learning_rate": 9.59335491857265e-07, "loss": 0.2984, "step": 6855 }, { "epoch": 2.4587616543150848, "grad_norm": 0.3355769217014313, "learning_rate": 9.581066697732106e-07, "loss": 0.306, "step": 6856 }, { "epoch": 2.4591202486253882, "grad_norm": 0.322704941034317, "learning_rate": 9.568785517995495e-07, "loss": 0.2908, "step": 6857 }, { "epoch": 2.459478842935692, "grad_norm": 0.304050475358963, "learning_rate": 9.556511381502243e-07, "loss": 0.2887, "step": 6858 }, { "epoch": 2.4598374372459957, "grad_norm": 0.2939458191394806, "learning_rate": 9.544244290390526e-07, "loss": 0.3032, "step": 6859 }, { "epoch": 2.460196031556299, "grad_norm": 0.3080550730228424, "learning_rate": 9.531984246797321e-07, "loss": 0.2869, "step": 6860 }, { "epoch": 2.460554625866603, "grad_norm": 0.32717299461364746, "learning_rate": 9.519731252858361e-07, "loss": 0.2933, "step": 6861 }, { "epoch": 2.4609132201769066, "grad_norm": 0.3097947835922241, "learning_rate": 9.507485310708159e-07, "loss": 0.2832, "step": 6862 }, { "epoch": 2.46127181448721, "grad_norm": 0.325737327337265, "learning_rate": 9.495246422480009e-07, "loss": 0.3236, "step": 6863 }, { "epoch": 2.4616304087975136, "grad_norm": 0.29199928045272827, "learning_rate": 9.483014590305933e-07, "loss": 0.2731, "step": 6864 }, { "epoch": 2.4619890031078175, "grad_norm": 0.2954469323158264, "learning_rate": 9.470789816316783e-07, "loss": 0.3147, "step": 6865 }, { "epoch": 2.462347597418121, "grad_norm": 0.27476274967193604, "learning_rate": 9.458572102642133e-07, "loss": 0.2547, "step": 6866 }, { "epoch": 2.4627061917284245, "grad_norm": 0.304234117269516, "learning_rate": 9.446361451410352e-07, "loss": 0.2889, "step": 6867 }, { "epoch": 2.463064786038728, "grad_norm": 0.31114432215690613, "learning_rate": 9.434157864748578e-07, "loss": 0.2989, "step": 6868 }, { "epoch": 2.463423380349032, "grad_norm": 0.2984673082828522, "learning_rate": 9.421961344782709e-07, "loss": 0.2931, "step": 6869 }, { "epoch": 2.4637819746593355, "grad_norm": 0.3417685329914093, "learning_rate": 9.409771893637432e-07, "loss": 0.3021, "step": 6870 }, { "epoch": 2.464140568969639, "grad_norm": 0.30148592591285706, "learning_rate": 9.397589513436161e-07, "loss": 0.2912, "step": 6871 }, { "epoch": 2.4644991632799425, "grad_norm": 0.30267250537872314, "learning_rate": 9.385414206301119e-07, "loss": 0.2828, "step": 6872 }, { "epoch": 2.4648577575902464, "grad_norm": 0.28535574674606323, "learning_rate": 9.373245974353284e-07, "loss": 0.2797, "step": 6873 }, { "epoch": 2.46521635190055, "grad_norm": 0.2989966869354248, "learning_rate": 9.361084819712391e-07, "loss": 0.2827, "step": 6874 }, { "epoch": 2.4655749462108534, "grad_norm": 0.3093228042125702, "learning_rate": 9.348930744496975e-07, "loss": 0.3056, "step": 6875 }, { "epoch": 2.465933540521157, "grad_norm": 0.2993360161781311, "learning_rate": 9.33678375082428e-07, "loss": 0.2766, "step": 6876 }, { "epoch": 2.466292134831461, "grad_norm": 0.335212767124176, "learning_rate": 9.324643840810382e-07, "loss": 0.2909, "step": 6877 }, { "epoch": 2.4666507291417643, "grad_norm": 0.29288604855537415, "learning_rate": 9.312511016570048e-07, "loss": 0.2644, "step": 6878 }, { "epoch": 2.467009323452068, "grad_norm": 0.3034406304359436, "learning_rate": 9.300385280216911e-07, "loss": 0.2879, "step": 6879 }, { "epoch": 2.4673679177623713, "grad_norm": 0.3078252673149109, "learning_rate": 9.288266633863263e-07, "loss": 0.2969, "step": 6880 }, { "epoch": 2.4677265120726752, "grad_norm": 0.33252617716789246, "learning_rate": 9.27615507962023e-07, "loss": 0.3258, "step": 6881 }, { "epoch": 2.4680851063829787, "grad_norm": 0.29329171776771545, "learning_rate": 9.264050619597697e-07, "loss": 0.2639, "step": 6882 }, { "epoch": 2.4684437006932822, "grad_norm": 0.30469831824302673, "learning_rate": 9.251953255904267e-07, "loss": 0.2971, "step": 6883 }, { "epoch": 2.468802295003586, "grad_norm": 0.32218077778816223, "learning_rate": 9.239862990647358e-07, "loss": 0.312, "step": 6884 }, { "epoch": 2.4691608893138897, "grad_norm": 0.30721408128738403, "learning_rate": 9.227779825933125e-07, "loss": 0.2953, "step": 6885 }, { "epoch": 2.469519483624193, "grad_norm": 0.28270724415779114, "learning_rate": 9.215703763866496e-07, "loss": 0.2527, "step": 6886 }, { "epoch": 2.4698780779344967, "grad_norm": 0.3040730059146881, "learning_rate": 9.203634806551171e-07, "loss": 0.3205, "step": 6887 }, { "epoch": 2.4702366722448, "grad_norm": 0.3035781979560852, "learning_rate": 9.191572956089573e-07, "loss": 0.3069, "step": 6888 }, { "epoch": 2.470595266555104, "grad_norm": 0.3216620683670044, "learning_rate": 9.179518214582933e-07, "loss": 0.2966, "step": 6889 }, { "epoch": 2.4709538608654076, "grad_norm": 0.333138108253479, "learning_rate": 9.167470584131217e-07, "loss": 0.2894, "step": 6890 }, { "epoch": 2.471312455175711, "grad_norm": 0.3222096562385559, "learning_rate": 9.15543006683316e-07, "loss": 0.3033, "step": 6891 }, { "epoch": 2.471671049486015, "grad_norm": 0.3071329593658447, "learning_rate": 9.143396664786275e-07, "loss": 0.2795, "step": 6892 }, { "epoch": 2.4720296437963185, "grad_norm": 0.33659324049949646, "learning_rate": 9.13137038008679e-07, "loss": 0.2901, "step": 6893 }, { "epoch": 2.472388238106622, "grad_norm": 0.30026975274086, "learning_rate": 9.119351214829747e-07, "loss": 0.2907, "step": 6894 }, { "epoch": 2.4727468324169255, "grad_norm": 0.3292473256587982, "learning_rate": 9.107339171108887e-07, "loss": 0.2894, "step": 6895 }, { "epoch": 2.4731054267272294, "grad_norm": 0.3402252495288849, "learning_rate": 9.095334251016791e-07, "loss": 0.2901, "step": 6896 }, { "epoch": 2.473464021037533, "grad_norm": 0.31223422288894653, "learning_rate": 9.083336456644721e-07, "loss": 0.2898, "step": 6897 }, { "epoch": 2.4738226153478364, "grad_norm": 0.32671505212783813, "learning_rate": 9.071345790082731e-07, "loss": 0.2898, "step": 6898 }, { "epoch": 2.47418120965814, "grad_norm": 0.3143922984600067, "learning_rate": 9.059362253419657e-07, "loss": 0.2918, "step": 6899 }, { "epoch": 2.474539803968444, "grad_norm": 0.32286614179611206, "learning_rate": 9.04738584874304e-07, "loss": 0.3129, "step": 6900 }, { "epoch": 2.4748983982787474, "grad_norm": 0.3198910653591156, "learning_rate": 9.035416578139222e-07, "loss": 0.2773, "step": 6901 }, { "epoch": 2.475256992589051, "grad_norm": 0.29522180557250977, "learning_rate": 9.02345444369328e-07, "loss": 0.2722, "step": 6902 }, { "epoch": 2.4756155868993543, "grad_norm": 0.3037909269332886, "learning_rate": 9.011499447489064e-07, "loss": 0.3197, "step": 6903 }, { "epoch": 2.4759741812096583, "grad_norm": 0.3152028024196625, "learning_rate": 8.999551591609173e-07, "loss": 0.2822, "step": 6904 }, { "epoch": 2.476332775519962, "grad_norm": 0.3099863529205322, "learning_rate": 8.987610878134944e-07, "loss": 0.271, "step": 6905 }, { "epoch": 2.4766913698302653, "grad_norm": 0.3231039047241211, "learning_rate": 8.975677309146502e-07, "loss": 0.3392, "step": 6906 }, { "epoch": 2.477049964140569, "grad_norm": 0.31139707565307617, "learning_rate": 8.9637508867227e-07, "loss": 0.2886, "step": 6907 }, { "epoch": 2.4774085584508727, "grad_norm": 0.32002413272857666, "learning_rate": 8.95183161294118e-07, "loss": 0.2666, "step": 6908 }, { "epoch": 2.477767152761176, "grad_norm": 0.31765279173851013, "learning_rate": 8.939919489878285e-07, "loss": 0.2878, "step": 6909 }, { "epoch": 2.4781257470714797, "grad_norm": 0.3006434738636017, "learning_rate": 8.928014519609162e-07, "loss": 0.2591, "step": 6910 }, { "epoch": 2.478484341381783, "grad_norm": 0.3153288960456848, "learning_rate": 8.916116704207711e-07, "loss": 0.2973, "step": 6911 }, { "epoch": 2.478842935692087, "grad_norm": 0.30644047260284424, "learning_rate": 8.904226045746516e-07, "loss": 0.3165, "step": 6912 }, { "epoch": 2.4792015300023906, "grad_norm": 0.3061663508415222, "learning_rate": 8.892342546297028e-07, "loss": 0.2692, "step": 6913 }, { "epoch": 2.479560124312694, "grad_norm": 0.3119388520717621, "learning_rate": 8.88046620792935e-07, "loss": 0.3009, "step": 6914 }, { "epoch": 2.479918718622998, "grad_norm": 0.306680291891098, "learning_rate": 8.86859703271239e-07, "loss": 0.2742, "step": 6915 }, { "epoch": 2.4802773129333016, "grad_norm": 0.3165726959705353, "learning_rate": 8.856735022713803e-07, "loss": 0.2825, "step": 6916 }, { "epoch": 2.480635907243605, "grad_norm": 0.3396311104297638, "learning_rate": 8.844880179999965e-07, "loss": 0.3208, "step": 6917 }, { "epoch": 2.4809945015539085, "grad_norm": 0.2986660301685333, "learning_rate": 8.833032506636047e-07, "loss": 0.3083, "step": 6918 }, { "epoch": 2.481353095864212, "grad_norm": 0.2879549264907837, "learning_rate": 8.821192004685942e-07, "loss": 0.2763, "step": 6919 }, { "epoch": 2.481711690174516, "grad_norm": 0.3122788369655609, "learning_rate": 8.809358676212298e-07, "loss": 0.29, "step": 6920 }, { "epoch": 2.4820702844848195, "grad_norm": 0.2977747917175293, "learning_rate": 8.797532523276542e-07, "loss": 0.2901, "step": 6921 }, { "epoch": 2.482428878795123, "grad_norm": 0.3024349808692932, "learning_rate": 8.785713547938795e-07, "loss": 0.3155, "step": 6922 }, { "epoch": 2.482787473105427, "grad_norm": 0.3221292495727539, "learning_rate": 8.773901752257985e-07, "loss": 0.3098, "step": 6923 }, { "epoch": 2.4831460674157304, "grad_norm": 0.29430559277534485, "learning_rate": 8.762097138291731e-07, "loss": 0.2777, "step": 6924 }, { "epoch": 2.483504661726034, "grad_norm": 0.2826709747314453, "learning_rate": 8.750299708096471e-07, "loss": 0.2752, "step": 6925 }, { "epoch": 2.4838632560363374, "grad_norm": 0.3111109733581543, "learning_rate": 8.73850946372733e-07, "loss": 0.3098, "step": 6926 }, { "epoch": 2.4842218503466413, "grad_norm": 0.31146126985549927, "learning_rate": 8.726726407238212e-07, "loss": 0.2898, "step": 6927 }, { "epoch": 2.484580444656945, "grad_norm": 0.30040475726127625, "learning_rate": 8.714950540681777e-07, "loss": 0.2923, "step": 6928 }, { "epoch": 2.4849390389672483, "grad_norm": 0.33938634395599365, "learning_rate": 8.703181866109373e-07, "loss": 0.3254, "step": 6929 }, { "epoch": 2.485297633277552, "grad_norm": 0.30801230669021606, "learning_rate": 8.691420385571197e-07, "loss": 0.2701, "step": 6930 }, { "epoch": 2.4856562275878558, "grad_norm": 0.2910146117210388, "learning_rate": 8.679666101116091e-07, "loss": 0.2518, "step": 6931 }, { "epoch": 2.4860148218981593, "grad_norm": 0.3333219587802887, "learning_rate": 8.667919014791709e-07, "loss": 0.2879, "step": 6932 }, { "epoch": 2.4863734162084627, "grad_norm": 0.30850306153297424, "learning_rate": 8.656179128644443e-07, "loss": 0.2714, "step": 6933 }, { "epoch": 2.4867320105187662, "grad_norm": 0.32976529002189636, "learning_rate": 8.644446444719385e-07, "loss": 0.3011, "step": 6934 }, { "epoch": 2.48709060482907, "grad_norm": 0.2842040956020355, "learning_rate": 8.632720965060415e-07, "loss": 0.2585, "step": 6935 }, { "epoch": 2.4874491991393737, "grad_norm": 0.3014993667602539, "learning_rate": 8.621002691710162e-07, "loss": 0.2928, "step": 6936 }, { "epoch": 2.487807793449677, "grad_norm": 0.31211021542549133, "learning_rate": 8.609291626709981e-07, "loss": 0.2769, "step": 6937 }, { "epoch": 2.488166387759981, "grad_norm": 0.28465819358825684, "learning_rate": 8.597587772099969e-07, "loss": 0.2901, "step": 6938 }, { "epoch": 2.4885249820702846, "grad_norm": 0.30865976214408875, "learning_rate": 8.585891129918972e-07, "loss": 0.3236, "step": 6939 }, { "epoch": 2.488883576380588, "grad_norm": 0.30108126997947693, "learning_rate": 8.574201702204604e-07, "loss": 0.2942, "step": 6940 }, { "epoch": 2.4892421706908916, "grad_norm": 0.2897690534591675, "learning_rate": 8.562519490993154e-07, "loss": 0.2647, "step": 6941 }, { "epoch": 2.489600765001195, "grad_norm": 0.3037702441215515, "learning_rate": 8.550844498319755e-07, "loss": 0.2865, "step": 6942 }, { "epoch": 2.489959359311499, "grad_norm": 0.30415597558021545, "learning_rate": 8.539176726218185e-07, "loss": 0.2947, "step": 6943 }, { "epoch": 2.4903179536218025, "grad_norm": 0.3069254457950592, "learning_rate": 8.527516176721017e-07, "loss": 0.3119, "step": 6944 }, { "epoch": 2.490676547932106, "grad_norm": 0.3072342574596405, "learning_rate": 8.51586285185958e-07, "loss": 0.2859, "step": 6945 }, { "epoch": 2.49103514224241, "grad_norm": 0.30769458413124084, "learning_rate": 8.504216753663874e-07, "loss": 0.3102, "step": 6946 }, { "epoch": 2.4913937365527135, "grad_norm": 0.29612085223197937, "learning_rate": 8.492577884162728e-07, "loss": 0.2822, "step": 6947 }, { "epoch": 2.491752330863017, "grad_norm": 0.33087027072906494, "learning_rate": 8.480946245383643e-07, "loss": 0.2992, "step": 6948 }, { "epoch": 2.4921109251733204, "grad_norm": 0.3124939203262329, "learning_rate": 8.469321839352895e-07, "loss": 0.2944, "step": 6949 }, { "epoch": 2.4924695194836244, "grad_norm": 0.3358946144580841, "learning_rate": 8.457704668095507e-07, "loss": 0.2811, "step": 6950 }, { "epoch": 2.492828113793928, "grad_norm": 0.3149135112762451, "learning_rate": 8.446094733635196e-07, "loss": 0.2942, "step": 6951 }, { "epoch": 2.4931867081042314, "grad_norm": 0.30393317341804504, "learning_rate": 8.434492037994462e-07, "loss": 0.3002, "step": 6952 }, { "epoch": 2.493545302414535, "grad_norm": 0.31979891657829285, "learning_rate": 8.42289658319454e-07, "loss": 0.318, "step": 6953 }, { "epoch": 2.493903896724839, "grad_norm": 0.29645076394081116, "learning_rate": 8.411308371255389e-07, "loss": 0.2886, "step": 6954 }, { "epoch": 2.4942624910351423, "grad_norm": 0.29979103803634644, "learning_rate": 8.399727404195707e-07, "loss": 0.2984, "step": 6955 }, { "epoch": 2.494621085345446, "grad_norm": 0.3179466426372528, "learning_rate": 8.388153684032929e-07, "loss": 0.2802, "step": 6956 }, { "epoch": 2.4949796796557493, "grad_norm": 0.3322107791900635, "learning_rate": 8.37658721278326e-07, "loss": 0.3124, "step": 6957 }, { "epoch": 2.4953382739660532, "grad_norm": 0.3325168490409851, "learning_rate": 8.365027992461566e-07, "loss": 0.2981, "step": 6958 }, { "epoch": 2.4956968682763567, "grad_norm": 0.3173171579837799, "learning_rate": 8.353476025081559e-07, "loss": 0.2799, "step": 6959 }, { "epoch": 2.4960554625866602, "grad_norm": 0.29800596833229065, "learning_rate": 8.341931312655582e-07, "loss": 0.2685, "step": 6960 }, { "epoch": 2.4964140568969637, "grad_norm": 0.2959648668766022, "learning_rate": 8.33039385719478e-07, "loss": 0.2888, "step": 6961 }, { "epoch": 2.4967726512072677, "grad_norm": 0.31712573766708374, "learning_rate": 8.31886366070902e-07, "loss": 0.3205, "step": 6962 }, { "epoch": 2.497131245517571, "grad_norm": 0.29602062702178955, "learning_rate": 8.307340725206875e-07, "loss": 0.2722, "step": 6963 }, { "epoch": 2.4974898398278746, "grad_norm": 0.30850329995155334, "learning_rate": 8.295825052695689e-07, "loss": 0.2975, "step": 6964 }, { "epoch": 2.497848434138178, "grad_norm": 0.30844739079475403, "learning_rate": 8.284316645181523e-07, "loss": 0.3013, "step": 6965 }, { "epoch": 2.498207028448482, "grad_norm": 0.3050486147403717, "learning_rate": 8.272815504669201e-07, "loss": 0.3001, "step": 6966 }, { "epoch": 2.4985656227587856, "grad_norm": 0.30882441997528076, "learning_rate": 8.261321633162223e-07, "loss": 0.3129, "step": 6967 }, { "epoch": 2.498924217069089, "grad_norm": 0.3036520779132843, "learning_rate": 8.249835032662878e-07, "loss": 0.2739, "step": 6968 }, { "epoch": 2.499282811379393, "grad_norm": 0.33983638882637024, "learning_rate": 8.238355705172157e-07, "loss": 0.3016, "step": 6969 }, { "epoch": 2.4996414056896965, "grad_norm": 0.3029194176197052, "learning_rate": 8.226883652689804e-07, "loss": 0.2757, "step": 6970 }, { "epoch": 2.5, "grad_norm": 0.3234866261482239, "learning_rate": 8.215418877214293e-07, "loss": 0.2896, "step": 6971 }, { "epoch": 2.5003585943103035, "grad_norm": 0.31733453273773193, "learning_rate": 8.203961380742798e-07, "loss": 0.2809, "step": 6972 }, { "epoch": 2.500717188620607, "grad_norm": 0.30729958415031433, "learning_rate": 8.192511165271267e-07, "loss": 0.2821, "step": 6973 }, { "epoch": 2.501075782930911, "grad_norm": 0.28610342741012573, "learning_rate": 8.181068232794376e-07, "loss": 0.2848, "step": 6974 }, { "epoch": 2.5014343772412144, "grad_norm": 0.30315378308296204, "learning_rate": 8.169632585305482e-07, "loss": 0.2953, "step": 6975 }, { "epoch": 2.501792971551518, "grad_norm": 0.31356674432754517, "learning_rate": 8.15820422479675e-07, "loss": 0.2941, "step": 6976 }, { "epoch": 2.502151565861822, "grad_norm": 0.311095654964447, "learning_rate": 8.146783153259013e-07, "loss": 0.3006, "step": 6977 }, { "epoch": 2.5025101601721254, "grad_norm": 0.3479764461517334, "learning_rate": 8.135369372681862e-07, "loss": 0.313, "step": 6978 }, { "epoch": 2.502868754482429, "grad_norm": 0.30634576082229614, "learning_rate": 8.123962885053627e-07, "loss": 0.2735, "step": 6979 }, { "epoch": 2.5032273487927323, "grad_norm": 0.33980125188827515, "learning_rate": 8.112563692361331e-07, "loss": 0.3029, "step": 6980 }, { "epoch": 2.503585943103036, "grad_norm": 0.28604763746261597, "learning_rate": 8.101171796590756e-07, "loss": 0.2815, "step": 6981 }, { "epoch": 2.5039445374133398, "grad_norm": 0.3261112570762634, "learning_rate": 8.089787199726406e-07, "loss": 0.3105, "step": 6982 }, { "epoch": 2.5043031317236433, "grad_norm": 0.3138201832771301, "learning_rate": 8.078409903751533e-07, "loss": 0.2927, "step": 6983 }, { "epoch": 2.5046617260339468, "grad_norm": 0.30225300788879395, "learning_rate": 8.067039910648067e-07, "loss": 0.2851, "step": 6984 }, { "epoch": 2.5050203203442507, "grad_norm": 0.29600414633750916, "learning_rate": 8.055677222396712e-07, "loss": 0.2833, "step": 6985 }, { "epoch": 2.505378914654554, "grad_norm": 0.30365070700645447, "learning_rate": 8.044321840976876e-07, "loss": 0.3063, "step": 6986 }, { "epoch": 2.5057375089648577, "grad_norm": 0.3558433949947357, "learning_rate": 8.032973768366709e-07, "loss": 0.3366, "step": 6987 }, { "epoch": 2.506096103275161, "grad_norm": 0.29787734150886536, "learning_rate": 8.021633006543094e-07, "loss": 0.2688, "step": 6988 }, { "epoch": 2.506454697585465, "grad_norm": 0.3427428603172302, "learning_rate": 8.010299557481594e-07, "loss": 0.2992, "step": 6989 }, { "epoch": 2.5068132918957686, "grad_norm": 0.29398608207702637, "learning_rate": 7.998973423156554e-07, "loss": 0.2928, "step": 6990 }, { "epoch": 2.507171886206072, "grad_norm": 0.30682572722435, "learning_rate": 7.987654605541023e-07, "loss": 0.2592, "step": 6991 }, { "epoch": 2.507530480516376, "grad_norm": 0.30148229002952576, "learning_rate": 7.976343106606743e-07, "loss": 0.3054, "step": 6992 }, { "epoch": 2.5078890748266796, "grad_norm": 0.3122480511665344, "learning_rate": 7.965038928324264e-07, "loss": 0.308, "step": 6993 }, { "epoch": 2.508247669136983, "grad_norm": 0.32572582364082336, "learning_rate": 7.953742072662767e-07, "loss": 0.2888, "step": 6994 }, { "epoch": 2.5086062634472865, "grad_norm": 0.28499335050582886, "learning_rate": 7.942452541590234e-07, "loss": 0.2593, "step": 6995 }, { "epoch": 2.50896485775759, "grad_norm": 0.3058343231678009, "learning_rate": 7.931170337073296e-07, "loss": 0.3104, "step": 6996 }, { "epoch": 2.509323452067894, "grad_norm": 0.32743221521377563, "learning_rate": 7.919895461077371e-07, "loss": 0.3315, "step": 6997 }, { "epoch": 2.5096820463781975, "grad_norm": 0.3248104751110077, "learning_rate": 7.90862791556658e-07, "loss": 0.2715, "step": 6998 }, { "epoch": 2.510040640688501, "grad_norm": 0.28802117705345154, "learning_rate": 7.897367702503755e-07, "loss": 0.2559, "step": 6999 }, { "epoch": 2.510399234998805, "grad_norm": 0.3055715262889862, "learning_rate": 7.886114823850477e-07, "loss": 0.3048, "step": 7000 }, { "epoch": 2.5107578293091084, "grad_norm": 0.3246372938156128, "learning_rate": 7.87486928156701e-07, "loss": 0.2682, "step": 7001 }, { "epoch": 2.511116423619412, "grad_norm": 0.3049505650997162, "learning_rate": 7.863631077612372e-07, "loss": 0.3019, "step": 7002 }, { "epoch": 2.5114750179297154, "grad_norm": 0.2910998463630676, "learning_rate": 7.852400213944295e-07, "loss": 0.2837, "step": 7003 }, { "epoch": 2.511833612240019, "grad_norm": 0.3019599914550781, "learning_rate": 7.841176692519231e-07, "loss": 0.2933, "step": 7004 }, { "epoch": 2.512192206550323, "grad_norm": 0.32131093740463257, "learning_rate": 7.829960515292362e-07, "loss": 0.3191, "step": 7005 }, { "epoch": 2.5125508008606263, "grad_norm": 0.3314855694770813, "learning_rate": 7.818751684217552e-07, "loss": 0.2899, "step": 7006 }, { "epoch": 2.51290939517093, "grad_norm": 0.30334803462028503, "learning_rate": 7.807550201247438e-07, "loss": 0.2773, "step": 7007 }, { "epoch": 2.5132679894812338, "grad_norm": 0.31186407804489136, "learning_rate": 7.79635606833336e-07, "loss": 0.3128, "step": 7008 }, { "epoch": 2.5136265837915373, "grad_norm": 0.3071598410606384, "learning_rate": 7.785169287425332e-07, "loss": 0.2659, "step": 7009 }, { "epoch": 2.5139851781018407, "grad_norm": 0.30032116174697876, "learning_rate": 7.773989860472175e-07, "loss": 0.3029, "step": 7010 }, { "epoch": 2.5143437724121442, "grad_norm": 0.3053583800792694, "learning_rate": 7.762817789421345e-07, "loss": 0.2943, "step": 7011 }, { "epoch": 2.5147023667224477, "grad_norm": 0.31750890612602234, "learning_rate": 7.751653076219073e-07, "loss": 0.3034, "step": 7012 }, { "epoch": 2.5150609610327517, "grad_norm": 0.30687445402145386, "learning_rate": 7.740495722810271e-07, "loss": 0.2805, "step": 7013 }, { "epoch": 2.515419555343055, "grad_norm": 0.31280776858329773, "learning_rate": 7.729345731138588e-07, "loss": 0.2929, "step": 7014 }, { "epoch": 2.515778149653359, "grad_norm": 0.2957322299480438, "learning_rate": 7.718203103146388e-07, "loss": 0.2927, "step": 7015 }, { "epoch": 2.5161367439636626, "grad_norm": 0.30092790722846985, "learning_rate": 7.707067840774757e-07, "loss": 0.3191, "step": 7016 }, { "epoch": 2.516495338273966, "grad_norm": 0.30272433161735535, "learning_rate": 7.695939945963499e-07, "loss": 0.292, "step": 7017 }, { "epoch": 2.5168539325842696, "grad_norm": 0.30845361948013306, "learning_rate": 7.684819420651107e-07, "loss": 0.3099, "step": 7018 }, { "epoch": 2.517212526894573, "grad_norm": 0.28051871061325073, "learning_rate": 7.673706266774822e-07, "loss": 0.2621, "step": 7019 }, { "epoch": 2.517571121204877, "grad_norm": 0.31803271174430847, "learning_rate": 7.6626004862706e-07, "loss": 0.2951, "step": 7020 }, { "epoch": 2.5179297155151805, "grad_norm": 0.2967832088470459, "learning_rate": 7.65150208107307e-07, "loss": 0.3182, "step": 7021 }, { "epoch": 2.518288309825484, "grad_norm": 0.29032814502716064, "learning_rate": 7.640411053115654e-07, "loss": 0.3007, "step": 7022 }, { "epoch": 2.518646904135788, "grad_norm": 0.2915706932544708, "learning_rate": 7.629327404330412e-07, "loss": 0.2777, "step": 7023 }, { "epoch": 2.5190054984460915, "grad_norm": 0.31238746643066406, "learning_rate": 7.618251136648164e-07, "loss": 0.2994, "step": 7024 }, { "epoch": 2.519364092756395, "grad_norm": 0.2991020679473877, "learning_rate": 7.607182251998419e-07, "loss": 0.2894, "step": 7025 }, { "epoch": 2.5197226870666984, "grad_norm": 0.29550692439079285, "learning_rate": 7.596120752309405e-07, "loss": 0.2798, "step": 7026 }, { "epoch": 2.520081281377002, "grad_norm": 0.31029951572418213, "learning_rate": 7.585066639508104e-07, "loss": 0.3242, "step": 7027 }, { "epoch": 2.520439875687306, "grad_norm": 0.2985133230686188, "learning_rate": 7.574019915520142e-07, "loss": 0.2657, "step": 7028 }, { "epoch": 2.5207984699976094, "grad_norm": 0.311696320772171, "learning_rate": 7.562980582269918e-07, "loss": 0.2985, "step": 7029 }, { "epoch": 2.521157064307913, "grad_norm": 0.3129951059818268, "learning_rate": 7.551948641680495e-07, "loss": 0.3186, "step": 7030 }, { "epoch": 2.521515658618217, "grad_norm": 0.29347550868988037, "learning_rate": 7.540924095673685e-07, "loss": 0.2895, "step": 7031 }, { "epoch": 2.5218742529285203, "grad_norm": 0.2920108437538147, "learning_rate": 7.529906946169991e-07, "loss": 0.2769, "step": 7032 }, { "epoch": 2.522232847238824, "grad_norm": 0.29594865441322327, "learning_rate": 7.518897195088637e-07, "loss": 0.2835, "step": 7033 }, { "epoch": 2.5225914415491273, "grad_norm": 0.2812816798686981, "learning_rate": 7.507894844347574e-07, "loss": 0.2872, "step": 7034 }, { "epoch": 2.522950035859431, "grad_norm": 0.3121573030948639, "learning_rate": 7.496899895863418e-07, "loss": 0.3214, "step": 7035 }, { "epoch": 2.5233086301697347, "grad_norm": 0.2963307201862335, "learning_rate": 7.485912351551533e-07, "loss": 0.2783, "step": 7036 }, { "epoch": 2.523667224480038, "grad_norm": 0.3260645866394043, "learning_rate": 7.474932213325997e-07, "loss": 0.2873, "step": 7037 }, { "epoch": 2.5240258187903417, "grad_norm": 0.32029542326927185, "learning_rate": 7.463959483099547e-07, "loss": 0.2843, "step": 7038 }, { "epoch": 2.5243844131006457, "grad_norm": 0.3173942565917969, "learning_rate": 7.452994162783716e-07, "loss": 0.2967, "step": 7039 }, { "epoch": 2.524743007410949, "grad_norm": 0.31507164239883423, "learning_rate": 7.442036254288665e-07, "loss": 0.2634, "step": 7040 }, { "epoch": 2.5251016017212526, "grad_norm": 0.34502264857292175, "learning_rate": 7.431085759523315e-07, "loss": 0.3041, "step": 7041 }, { "epoch": 2.525460196031556, "grad_norm": 0.3048897981643677, "learning_rate": 7.420142680395253e-07, "loss": 0.2773, "step": 7042 }, { "epoch": 2.5258187903418596, "grad_norm": 0.2929920554161072, "learning_rate": 7.40920701881081e-07, "loss": 0.2483, "step": 7043 }, { "epoch": 2.5261773846521636, "grad_norm": 0.33498430252075195, "learning_rate": 7.398278776675005e-07, "loss": 0.3105, "step": 7044 }, { "epoch": 2.526535978962467, "grad_norm": 0.3142412602901459, "learning_rate": 7.387357955891589e-07, "loss": 0.2632, "step": 7045 }, { "epoch": 2.526894573272771, "grad_norm": 0.31583184003829956, "learning_rate": 7.376444558362999e-07, "loss": 0.3278, "step": 7046 }, { "epoch": 2.5272531675830745, "grad_norm": 0.30151617527008057, "learning_rate": 7.365538585990362e-07, "loss": 0.2409, "step": 7047 }, { "epoch": 2.527611761893378, "grad_norm": 0.3294817805290222, "learning_rate": 7.35464004067355e-07, "loss": 0.3059, "step": 7048 }, { "epoch": 2.5279703562036815, "grad_norm": 0.3112834095954895, "learning_rate": 7.343748924311122e-07, "loss": 0.3136, "step": 7049 }, { "epoch": 2.528328950513985, "grad_norm": 0.32579559087753296, "learning_rate": 7.33286523880034e-07, "loss": 0.3167, "step": 7050 }, { "epoch": 2.528687544824289, "grad_norm": 0.29442623257637024, "learning_rate": 7.321988986037193e-07, "loss": 0.2799, "step": 7051 }, { "epoch": 2.5290461391345924, "grad_norm": 0.2962840795516968, "learning_rate": 7.311120167916325e-07, "loss": 0.2958, "step": 7052 }, { "epoch": 2.529404733444896, "grad_norm": 0.3168041408061981, "learning_rate": 7.300258786331155e-07, "loss": 0.3409, "step": 7053 }, { "epoch": 2.5297633277552, "grad_norm": 0.32287031412124634, "learning_rate": 7.289404843173736e-07, "loss": 0.2987, "step": 7054 }, { "epoch": 2.5301219220655033, "grad_norm": 0.30927637219429016, "learning_rate": 7.278558340334862e-07, "loss": 0.2868, "step": 7055 }, { "epoch": 2.530480516375807, "grad_norm": 0.30110156536102295, "learning_rate": 7.267719279704067e-07, "loss": 0.2858, "step": 7056 }, { "epoch": 2.5308391106861103, "grad_norm": 0.2986203134059906, "learning_rate": 7.256887663169504e-07, "loss": 0.2919, "step": 7057 }, { "epoch": 2.531197704996414, "grad_norm": 0.3001232147216797, "learning_rate": 7.246063492618105e-07, "loss": 0.2944, "step": 7058 }, { "epoch": 2.5315562993067178, "grad_norm": 0.3153547942638397, "learning_rate": 7.235246769935444e-07, "loss": 0.2892, "step": 7059 }, { "epoch": 2.5319148936170213, "grad_norm": 0.34742602705955505, "learning_rate": 7.224437497005854e-07, "loss": 0.3353, "step": 7060 }, { "epoch": 2.5322734879273248, "grad_norm": 0.31705448031425476, "learning_rate": 7.213635675712327e-07, "loss": 0.3215, "step": 7061 }, { "epoch": 2.5326320822376287, "grad_norm": 0.3091074526309967, "learning_rate": 7.202841307936587e-07, "loss": 0.2599, "step": 7062 }, { "epoch": 2.532990676547932, "grad_norm": 0.30236056447029114, "learning_rate": 7.192054395559045e-07, "loss": 0.2916, "step": 7063 }, { "epoch": 2.5333492708582357, "grad_norm": 0.30202606320381165, "learning_rate": 7.181274940458804e-07, "loss": 0.2991, "step": 7064 }, { "epoch": 2.533707865168539, "grad_norm": 0.32036691904067993, "learning_rate": 7.170502944513685e-07, "loss": 0.3279, "step": 7065 }, { "epoch": 2.5340664594788427, "grad_norm": 0.30153587460517883, "learning_rate": 7.159738409600203e-07, "loss": 0.3086, "step": 7066 }, { "epoch": 2.5344250537891466, "grad_norm": 0.3011743426322937, "learning_rate": 7.148981337593569e-07, "loss": 0.2556, "step": 7067 }, { "epoch": 2.53478364809945, "grad_norm": 0.3220727741718292, "learning_rate": 7.138231730367718e-07, "loss": 0.3208, "step": 7068 }, { "epoch": 2.5351422424097536, "grad_norm": 0.30737173557281494, "learning_rate": 7.127489589795239e-07, "loss": 0.2684, "step": 7069 }, { "epoch": 2.5355008367200575, "grad_norm": 0.2964664697647095, "learning_rate": 7.116754917747471e-07, "loss": 0.2917, "step": 7070 }, { "epoch": 2.535859431030361, "grad_norm": 0.3205757439136505, "learning_rate": 7.106027716094399e-07, "loss": 0.285, "step": 7071 }, { "epoch": 2.5362180253406645, "grad_norm": 0.2927936911582947, "learning_rate": 7.095307986704752e-07, "loss": 0.2711, "step": 7072 }, { "epoch": 2.536576619650968, "grad_norm": 0.32105061411857605, "learning_rate": 7.084595731445937e-07, "loss": 0.3301, "step": 7073 }, { "epoch": 2.536935213961272, "grad_norm": 0.30026280879974365, "learning_rate": 7.073890952184059e-07, "loss": 0.2863, "step": 7074 }, { "epoch": 2.5372938082715755, "grad_norm": 0.2919066846370697, "learning_rate": 7.063193650783944e-07, "loss": 0.3011, "step": 7075 }, { "epoch": 2.537652402581879, "grad_norm": 0.29715049266815186, "learning_rate": 7.05250382910907e-07, "loss": 0.2857, "step": 7076 }, { "epoch": 2.538010996892183, "grad_norm": 0.31406062841415405, "learning_rate": 7.041821489021639e-07, "loss": 0.2826, "step": 7077 }, { "epoch": 2.5383695912024864, "grad_norm": 0.3207923173904419, "learning_rate": 7.031146632382563e-07, "loss": 0.2896, "step": 7078 }, { "epoch": 2.53872818551279, "grad_norm": 0.3257703483104706, "learning_rate": 7.020479261051422e-07, "loss": 0.3178, "step": 7079 }, { "epoch": 2.5390867798230934, "grad_norm": 0.29831403493881226, "learning_rate": 7.009819376886524e-07, "loss": 0.2852, "step": 7080 }, { "epoch": 2.539445374133397, "grad_norm": 0.3011532723903656, "learning_rate": 6.999166981744826e-07, "loss": 0.2985, "step": 7081 }, { "epoch": 2.539803968443701, "grad_norm": 0.305363267660141, "learning_rate": 6.988522077482035e-07, "loss": 0.2425, "step": 7082 }, { "epoch": 2.5401625627540043, "grad_norm": 0.3212531507015228, "learning_rate": 6.977884665952494e-07, "loss": 0.297, "step": 7083 }, { "epoch": 2.540521157064308, "grad_norm": 0.3258141577243805, "learning_rate": 6.967254749009305e-07, "loss": 0.2941, "step": 7084 }, { "epoch": 2.5408797513746118, "grad_norm": 0.3247663378715515, "learning_rate": 6.956632328504232e-07, "loss": 0.302, "step": 7085 }, { "epoch": 2.5412383456849152, "grad_norm": 0.29694652557373047, "learning_rate": 6.946017406287713e-07, "loss": 0.2687, "step": 7086 }, { "epoch": 2.5415969399952187, "grad_norm": 0.3220924735069275, "learning_rate": 6.935409984208918e-07, "loss": 0.295, "step": 7087 }, { "epoch": 2.5419555343055222, "grad_norm": 0.30164262652397156, "learning_rate": 6.924810064115678e-07, "loss": 0.2865, "step": 7088 }, { "epoch": 2.5423141286158257, "grad_norm": 0.29514187574386597, "learning_rate": 6.914217647854538e-07, "loss": 0.2866, "step": 7089 }, { "epoch": 2.5426727229261297, "grad_norm": 0.31070077419281006, "learning_rate": 6.903632737270732e-07, "loss": 0.2784, "step": 7090 }, { "epoch": 2.543031317236433, "grad_norm": 0.31745174527168274, "learning_rate": 6.893055334208182e-07, "loss": 0.3046, "step": 7091 }, { "epoch": 2.5433899115467367, "grad_norm": 0.2996663451194763, "learning_rate": 6.882485440509517e-07, "loss": 0.2962, "step": 7092 }, { "epoch": 2.5437485058570406, "grad_norm": 0.31306666135787964, "learning_rate": 6.871923058016028e-07, "loss": 0.3029, "step": 7093 }, { "epoch": 2.544107100167344, "grad_norm": 0.3123808205127716, "learning_rate": 6.861368188567713e-07, "loss": 0.3055, "step": 7094 }, { "epoch": 2.5444656944776476, "grad_norm": 0.29807260632514954, "learning_rate": 6.85082083400328e-07, "loss": 0.26, "step": 7095 }, { "epoch": 2.544824288787951, "grad_norm": 0.32147181034088135, "learning_rate": 6.840280996160098e-07, "loss": 0.312, "step": 7096 }, { "epoch": 2.5451828830982546, "grad_norm": 0.3197120726108551, "learning_rate": 6.829748676874254e-07, "loss": 0.3117, "step": 7097 }, { "epoch": 2.5455414774085585, "grad_norm": 0.31929999589920044, "learning_rate": 6.819223877980491e-07, "loss": 0.3143, "step": 7098 }, { "epoch": 2.545900071718862, "grad_norm": 0.30360278487205505, "learning_rate": 6.808706601312287e-07, "loss": 0.2811, "step": 7099 }, { "epoch": 2.5462586660291655, "grad_norm": 0.29803863167762756, "learning_rate": 6.798196848701738e-07, "loss": 0.2799, "step": 7100 }, { "epoch": 2.5466172603394694, "grad_norm": 0.32091036438941956, "learning_rate": 6.787694621979735e-07, "loss": 0.3212, "step": 7101 }, { "epoch": 2.546975854649773, "grad_norm": 0.30801844596862793, "learning_rate": 6.777199922975758e-07, "loss": 0.2856, "step": 7102 }, { "epoch": 2.5473344489600764, "grad_norm": 0.3394817113876343, "learning_rate": 6.766712753518029e-07, "loss": 0.3146, "step": 7103 }, { "epoch": 2.54769304327038, "grad_norm": 0.2833341956138611, "learning_rate": 6.756233115433453e-07, "loss": 0.2647, "step": 7104 }, { "epoch": 2.548051637580684, "grad_norm": 0.3167246878147125, "learning_rate": 6.745761010547597e-07, "loss": 0.3284, "step": 7105 }, { "epoch": 2.5484102318909874, "grad_norm": 0.29303327202796936, "learning_rate": 6.735296440684746e-07, "loss": 0.3109, "step": 7106 }, { "epoch": 2.548768826201291, "grad_norm": 0.281008243560791, "learning_rate": 6.724839407667855e-07, "loss": 0.2552, "step": 7107 }, { "epoch": 2.549127420511595, "grad_norm": 0.32010096311569214, "learning_rate": 6.714389913318581e-07, "loss": 0.3334, "step": 7108 }, { "epoch": 2.5494860148218983, "grad_norm": 0.3145306408405304, "learning_rate": 6.703947959457263e-07, "loss": 0.2983, "step": 7109 }, { "epoch": 2.549844609132202, "grad_norm": 0.2990857660770416, "learning_rate": 6.693513547902902e-07, "loss": 0.2906, "step": 7110 }, { "epoch": 2.5502032034425053, "grad_norm": 0.32124754786491394, "learning_rate": 6.683086680473222e-07, "loss": 0.3089, "step": 7111 }, { "epoch": 2.550561797752809, "grad_norm": 0.2870921790599823, "learning_rate": 6.672667358984592e-07, "loss": 0.2777, "step": 7112 }, { "epoch": 2.5509203920631127, "grad_norm": 0.3044428527355194, "learning_rate": 6.662255585252114e-07, "loss": 0.3005, "step": 7113 }, { "epoch": 2.551278986373416, "grad_norm": 0.3161366283893585, "learning_rate": 6.651851361089556e-07, "loss": 0.3075, "step": 7114 }, { "epoch": 2.5516375806837197, "grad_norm": 0.2982095181941986, "learning_rate": 6.641454688309346e-07, "loss": 0.2761, "step": 7115 }, { "epoch": 2.5519961749940236, "grad_norm": 0.3248668313026428, "learning_rate": 6.631065568722633e-07, "loss": 0.298, "step": 7116 }, { "epoch": 2.552354769304327, "grad_norm": 0.3156689405441284, "learning_rate": 6.620684004139216e-07, "loss": 0.288, "step": 7117 }, { "epoch": 2.5527133636146306, "grad_norm": 0.3144835829734802, "learning_rate": 6.610309996367608e-07, "loss": 0.3092, "step": 7118 }, { "epoch": 2.553071957924934, "grad_norm": 0.2980206310749054, "learning_rate": 6.599943547214993e-07, "loss": 0.261, "step": 7119 }, { "epoch": 2.5534305522352376, "grad_norm": 0.3419985771179199, "learning_rate": 6.589584658487236e-07, "loss": 0.3419, "step": 7120 }, { "epoch": 2.5537891465455416, "grad_norm": 0.312059223651886, "learning_rate": 6.579233331988899e-07, "loss": 0.2892, "step": 7121 }, { "epoch": 2.554147740855845, "grad_norm": 0.3144174814224243, "learning_rate": 6.568889569523195e-07, "loss": 0.2746, "step": 7122 }, { "epoch": 2.5545063351661486, "grad_norm": 0.30304384231567383, "learning_rate": 6.558553372892051e-07, "loss": 0.3032, "step": 7123 }, { "epoch": 2.5548649294764525, "grad_norm": 0.3475005626678467, "learning_rate": 6.548224743896064e-07, "loss": 0.3099, "step": 7124 }, { "epoch": 2.555223523786756, "grad_norm": 0.29719915986061096, "learning_rate": 6.537903684334507e-07, "loss": 0.2969, "step": 7125 }, { "epoch": 2.5555821180970595, "grad_norm": 0.2996123731136322, "learning_rate": 6.527590196005362e-07, "loss": 0.297, "step": 7126 }, { "epoch": 2.555940712407363, "grad_norm": 0.3267216384410858, "learning_rate": 6.517284280705244e-07, "loss": 0.2935, "step": 7127 }, { "epoch": 2.5562993067176665, "grad_norm": 0.3230208456516266, "learning_rate": 6.506985940229499e-07, "loss": 0.2858, "step": 7128 }, { "epoch": 2.5566579010279704, "grad_norm": 0.3128730356693268, "learning_rate": 6.496695176372092e-07, "loss": 0.3031, "step": 7129 }, { "epoch": 2.557016495338274, "grad_norm": 0.2911819517612457, "learning_rate": 6.486411990925756e-07, "loss": 0.274, "step": 7130 }, { "epoch": 2.557375089648578, "grad_norm": 0.32717716693878174, "learning_rate": 6.47613638568182e-07, "loss": 0.3242, "step": 7131 }, { "epoch": 2.5577336839588813, "grad_norm": 0.2992275655269623, "learning_rate": 6.465868362430333e-07, "loss": 0.274, "step": 7132 }, { "epoch": 2.558092278269185, "grad_norm": 0.3022635579109192, "learning_rate": 6.455607922960034e-07, "loss": 0.2885, "step": 7133 }, { "epoch": 2.5584508725794883, "grad_norm": 0.2894534170627594, "learning_rate": 6.445355069058295e-07, "loss": 0.2713, "step": 7134 }, { "epoch": 2.558809466889792, "grad_norm": 0.29059723019599915, "learning_rate": 6.435109802511213e-07, "loss": 0.2885, "step": 7135 }, { "epoch": 2.5591680612000958, "grad_norm": 0.3346426486968994, "learning_rate": 6.424872125103537e-07, "loss": 0.3437, "step": 7136 }, { "epoch": 2.5595266555103993, "grad_norm": 0.2848133146762848, "learning_rate": 6.414642038618712e-07, "loss": 0.2696, "step": 7137 }, { "epoch": 2.5598852498207028, "grad_norm": 0.3268906772136688, "learning_rate": 6.404419544838852e-07, "loss": 0.3053, "step": 7138 }, { "epoch": 2.5602438441310067, "grad_norm": 0.30551770329475403, "learning_rate": 6.394204645544733e-07, "loss": 0.2609, "step": 7139 }, { "epoch": 2.56060243844131, "grad_norm": 0.31485170125961304, "learning_rate": 6.38399734251583e-07, "loss": 0.2981, "step": 7140 }, { "epoch": 2.5609610327516137, "grad_norm": 0.2962020933628082, "learning_rate": 6.373797637530283e-07, "loss": 0.2991, "step": 7141 }, { "epoch": 2.561319627061917, "grad_norm": 0.31132426857948303, "learning_rate": 6.363605532364931e-07, "loss": 0.3025, "step": 7142 }, { "epoch": 2.5616782213722207, "grad_norm": 0.33586010336875916, "learning_rate": 6.353421028795242e-07, "loss": 0.2968, "step": 7143 }, { "epoch": 2.5620368156825246, "grad_norm": 0.34558114409446716, "learning_rate": 6.343244128595405e-07, "loss": 0.3406, "step": 7144 }, { "epoch": 2.562395409992828, "grad_norm": 0.3023865818977356, "learning_rate": 6.333074833538272e-07, "loss": 0.2786, "step": 7145 }, { "epoch": 2.5627540043031316, "grad_norm": 0.29799124598503113, "learning_rate": 6.322913145395343e-07, "loss": 0.2596, "step": 7146 }, { "epoch": 2.5631125986134355, "grad_norm": 0.3119080662727356, "learning_rate": 6.312759065936847e-07, "loss": 0.3174, "step": 7147 }, { "epoch": 2.563471192923739, "grad_norm": 0.3038700819015503, "learning_rate": 6.302612596931635e-07, "loss": 0.2913, "step": 7148 }, { "epoch": 2.5638297872340425, "grad_norm": 0.2835237979888916, "learning_rate": 6.292473740147254e-07, "loss": 0.2922, "step": 7149 }, { "epoch": 2.564188381544346, "grad_norm": 0.30588850378990173, "learning_rate": 6.282342497349941e-07, "loss": 0.3204, "step": 7150 }, { "epoch": 2.5645469758546495, "grad_norm": 0.282735139131546, "learning_rate": 6.27221887030457e-07, "loss": 0.2788, "step": 7151 }, { "epoch": 2.5649055701649535, "grad_norm": 0.29362839460372925, "learning_rate": 6.262102860774716e-07, "loss": 0.3095, "step": 7152 }, { "epoch": 2.565264164475257, "grad_norm": 0.31299713253974915, "learning_rate": 6.251994470522616e-07, "loss": 0.3044, "step": 7153 }, { "epoch": 2.5656227587855605, "grad_norm": 0.2997707724571228, "learning_rate": 6.241893701309193e-07, "loss": 0.2801, "step": 7154 }, { "epoch": 2.5659813530958644, "grad_norm": 0.3238390386104584, "learning_rate": 6.231800554894029e-07, "loss": 0.3137, "step": 7155 }, { "epoch": 2.566339947406168, "grad_norm": 0.3018217980861664, "learning_rate": 6.221715033035375e-07, "loss": 0.2812, "step": 7156 }, { "epoch": 2.5666985417164714, "grad_norm": 0.2807498574256897, "learning_rate": 6.211637137490173e-07, "loss": 0.2479, "step": 7157 }, { "epoch": 2.567057136026775, "grad_norm": 0.30351459980010986, "learning_rate": 6.201566870013997e-07, "loss": 0.2683, "step": 7158 }, { "epoch": 2.567415730337079, "grad_norm": 0.3129338324069977, "learning_rate": 6.191504232361156e-07, "loss": 0.3143, "step": 7159 }, { "epoch": 2.5677743246473823, "grad_norm": 0.30539241433143616, "learning_rate": 6.181449226284564e-07, "loss": 0.2763, "step": 7160 }, { "epoch": 2.568132918957686, "grad_norm": 0.3115142285823822, "learning_rate": 6.171401853535841e-07, "loss": 0.3287, "step": 7161 }, { "epoch": 2.5684915132679897, "grad_norm": 0.27129456400871277, "learning_rate": 6.161362115865283e-07, "loss": 0.2682, "step": 7162 }, { "epoch": 2.5688501075782932, "grad_norm": 0.30441173911094666, "learning_rate": 6.151330015021817e-07, "loss": 0.2986, "step": 7163 }, { "epoch": 2.5692087018885967, "grad_norm": 0.31601351499557495, "learning_rate": 6.141305552753102e-07, "loss": 0.3094, "step": 7164 }, { "epoch": 2.5695672961989002, "grad_norm": 0.29688799381256104, "learning_rate": 6.131288730805407e-07, "loss": 0.2968, "step": 7165 }, { "epoch": 2.5699258905092037, "grad_norm": 0.30334919691085815, "learning_rate": 6.121279550923693e-07, "loss": 0.2866, "step": 7166 }, { "epoch": 2.5702844848195077, "grad_norm": 0.3099946081638336, "learning_rate": 6.111278014851607e-07, "loss": 0.2937, "step": 7167 }, { "epoch": 2.570643079129811, "grad_norm": 0.2977554500102997, "learning_rate": 6.101284124331425e-07, "loss": 0.2819, "step": 7168 }, { "epoch": 2.5710016734401147, "grad_norm": 0.3117554187774658, "learning_rate": 6.091297881104124e-07, "loss": 0.2923, "step": 7169 }, { "epoch": 2.5713602677504186, "grad_norm": 0.30092984437942505, "learning_rate": 6.081319286909343e-07, "loss": 0.2946, "step": 7170 }, { "epoch": 2.571718862060722, "grad_norm": 0.3051036596298218, "learning_rate": 6.071348343485395e-07, "loss": 0.2867, "step": 7171 }, { "epoch": 2.5720774563710256, "grad_norm": 0.32072851061820984, "learning_rate": 6.061385052569219e-07, "loss": 0.3032, "step": 7172 }, { "epoch": 2.572436050681329, "grad_norm": 0.30942586064338684, "learning_rate": 6.05142941589647e-07, "loss": 0.3044, "step": 7173 }, { "epoch": 2.5727946449916326, "grad_norm": 0.2914445400238037, "learning_rate": 6.041481435201457e-07, "loss": 0.2567, "step": 7174 }, { "epoch": 2.5731532393019365, "grad_norm": 0.33416658639907837, "learning_rate": 6.031541112217121e-07, "loss": 0.3288, "step": 7175 }, { "epoch": 2.57351183361224, "grad_norm": 0.31169524788856506, "learning_rate": 6.021608448675137e-07, "loss": 0.3078, "step": 7176 }, { "epoch": 2.5738704279225435, "grad_norm": 0.2888971269130707, "learning_rate": 6.011683446305777e-07, "loss": 0.2623, "step": 7177 }, { "epoch": 2.5742290222328474, "grad_norm": 0.31683024764060974, "learning_rate": 6.001766106838014e-07, "loss": 0.3031, "step": 7178 }, { "epoch": 2.574587616543151, "grad_norm": 0.29926902055740356, "learning_rate": 5.991856431999494e-07, "loss": 0.3119, "step": 7179 }, { "epoch": 2.5749462108534544, "grad_norm": 0.32003483176231384, "learning_rate": 5.981954423516479e-07, "loss": 0.2822, "step": 7180 }, { "epoch": 2.575304805163758, "grad_norm": 0.29426106810569763, "learning_rate": 5.972060083113973e-07, "loss": 0.2941, "step": 7181 }, { "epoch": 2.5756633994740614, "grad_norm": 0.31808018684387207, "learning_rate": 5.96217341251557e-07, "loss": 0.2826, "step": 7182 }, { "epoch": 2.5760219937843654, "grad_norm": 0.3082408308982849, "learning_rate": 5.952294413443566e-07, "loss": 0.2586, "step": 7183 }, { "epoch": 2.576380588094669, "grad_norm": 0.2915023863315582, "learning_rate": 5.942423087618926e-07, "loss": 0.2822, "step": 7184 }, { "epoch": 2.5767391824049724, "grad_norm": 0.30228516459465027, "learning_rate": 5.932559436761243e-07, "loss": 0.2847, "step": 7185 }, { "epoch": 2.5770977767152763, "grad_norm": 0.31280383467674255, "learning_rate": 5.922703462588808e-07, "loss": 0.3158, "step": 7186 }, { "epoch": 2.57745637102558, "grad_norm": 0.31214210391044617, "learning_rate": 5.912855166818565e-07, "loss": 0.2852, "step": 7187 }, { "epoch": 2.5778149653358833, "grad_norm": 0.2985147535800934, "learning_rate": 5.903014551166125e-07, "loss": 0.2965, "step": 7188 }, { "epoch": 2.5781735596461868, "grad_norm": 0.3044123947620392, "learning_rate": 5.893181617345734e-07, "loss": 0.2691, "step": 7189 }, { "epoch": 2.5785321539564907, "grad_norm": 0.30296745896339417, "learning_rate": 5.883356367070325e-07, "loss": 0.2863, "step": 7190 }, { "epoch": 2.578890748266794, "grad_norm": 0.29885441064834595, "learning_rate": 5.873538802051504e-07, "loss": 0.2736, "step": 7191 }, { "epoch": 2.5792493425770977, "grad_norm": 0.32068362832069397, "learning_rate": 5.86372892399949e-07, "loss": 0.3461, "step": 7192 }, { "epoch": 2.5796079368874016, "grad_norm": 0.2970680892467499, "learning_rate": 5.853926734623233e-07, "loss": 0.2756, "step": 7193 }, { "epoch": 2.579966531197705, "grad_norm": 0.31494492292404175, "learning_rate": 5.844132235630273e-07, "loss": 0.3157, "step": 7194 }, { "epoch": 2.5803251255080086, "grad_norm": 0.3058414161205292, "learning_rate": 5.834345428726856e-07, "loss": 0.3015, "step": 7195 }, { "epoch": 2.580683719818312, "grad_norm": 0.2924179136753082, "learning_rate": 5.82456631561788e-07, "loss": 0.2729, "step": 7196 }, { "epoch": 2.5810423141286156, "grad_norm": 0.3021978735923767, "learning_rate": 5.814794898006882e-07, "loss": 0.307, "step": 7197 }, { "epoch": 2.5814009084389196, "grad_norm": 0.32573363184928894, "learning_rate": 5.805031177596082e-07, "loss": 0.3204, "step": 7198 }, { "epoch": 2.581759502749223, "grad_norm": 0.33813342452049255, "learning_rate": 5.79527515608635e-07, "loss": 0.3042, "step": 7199 }, { "epoch": 2.5821180970595266, "grad_norm": 0.31176820397377014, "learning_rate": 5.785526835177218e-07, "loss": 0.2844, "step": 7200 }, { "epoch": 2.5824766913698305, "grad_norm": 0.2830825448036194, "learning_rate": 5.775786216566865e-07, "loss": 0.3322, "step": 7201 }, { "epoch": 2.582835285680134, "grad_norm": 0.3022303879261017, "learning_rate": 5.766053301952146e-07, "loss": 0.2908, "step": 7202 }, { "epoch": 2.5831938799904375, "grad_norm": 0.30194321274757385, "learning_rate": 5.756328093028557e-07, "loss": 0.2939, "step": 7203 }, { "epoch": 2.583552474300741, "grad_norm": 0.308607280254364, "learning_rate": 5.746610591490265e-07, "loss": 0.3073, "step": 7204 }, { "epoch": 2.5839110686110445, "grad_norm": 0.31069183349609375, "learning_rate": 5.736900799030092e-07, "loss": 0.3005, "step": 7205 }, { "epoch": 2.5842696629213484, "grad_norm": 0.3512197732925415, "learning_rate": 5.727198717339511e-07, "loss": 0.3144, "step": 7206 }, { "epoch": 2.584628257231652, "grad_norm": 0.3114745318889618, "learning_rate": 5.717504348108649e-07, "loss": 0.2717, "step": 7207 }, { "epoch": 2.5849868515419554, "grad_norm": 0.30957934260368347, "learning_rate": 5.707817693026307e-07, "loss": 0.285, "step": 7208 }, { "epoch": 2.5853454458522593, "grad_norm": 0.2978377938270569, "learning_rate": 5.698138753779909e-07, "loss": 0.298, "step": 7209 }, { "epoch": 2.585704040162563, "grad_norm": 0.2940661907196045, "learning_rate": 5.688467532055586e-07, "loss": 0.2549, "step": 7210 }, { "epoch": 2.5860626344728663, "grad_norm": 0.3226463794708252, "learning_rate": 5.678804029538071e-07, "loss": 0.2865, "step": 7211 }, { "epoch": 2.58642122878317, "grad_norm": 0.30728426575660706, "learning_rate": 5.669148247910788e-07, "loss": 0.3111, "step": 7212 }, { "epoch": 2.5867798230934733, "grad_norm": 0.3206135034561157, "learning_rate": 5.65950018885581e-07, "loss": 0.2997, "step": 7213 }, { "epoch": 2.5871384174037773, "grad_norm": 0.298002690076828, "learning_rate": 5.649859854053841e-07, "loss": 0.2831, "step": 7214 }, { "epoch": 2.5874970117140808, "grad_norm": 0.291459858417511, "learning_rate": 5.640227245184266e-07, "loss": 0.294, "step": 7215 }, { "epoch": 2.5878556060243842, "grad_norm": 0.3138715922832489, "learning_rate": 5.630602363925119e-07, "loss": 0.3304, "step": 7216 }, { "epoch": 2.588214200334688, "grad_norm": 0.30294543504714966, "learning_rate": 5.620985211953083e-07, "loss": 0.2623, "step": 7217 }, { "epoch": 2.5885727946449917, "grad_norm": 0.3078239858150482, "learning_rate": 5.611375790943491e-07, "loss": 0.2722, "step": 7218 }, { "epoch": 2.588931388955295, "grad_norm": 0.30629169940948486, "learning_rate": 5.601774102570335e-07, "loss": 0.309, "step": 7219 }, { "epoch": 2.5892899832655987, "grad_norm": 0.3088425397872925, "learning_rate": 5.592180148506266e-07, "loss": 0.2761, "step": 7220 }, { "epoch": 2.5896485775759026, "grad_norm": 0.31715157628059387, "learning_rate": 5.58259393042257e-07, "loss": 0.3042, "step": 7221 }, { "epoch": 2.590007171886206, "grad_norm": 0.28993096947669983, "learning_rate": 5.573015449989211e-07, "loss": 0.2705, "step": 7222 }, { "epoch": 2.5903657661965096, "grad_norm": 0.2937827408313751, "learning_rate": 5.563444708874771e-07, "loss": 0.2884, "step": 7223 }, { "epoch": 2.5907243605068135, "grad_norm": 0.2864077091217041, "learning_rate": 5.553881708746517e-07, "loss": 0.2912, "step": 7224 }, { "epoch": 2.591082954817117, "grad_norm": 0.3093382716178894, "learning_rate": 5.544326451270354e-07, "loss": 0.2887, "step": 7225 }, { "epoch": 2.5914415491274205, "grad_norm": 0.29007676243782043, "learning_rate": 5.534778938110818e-07, "loss": 0.277, "step": 7226 }, { "epoch": 2.591800143437724, "grad_norm": 0.332256942987442, "learning_rate": 5.525239170931146e-07, "loss": 0.3182, "step": 7227 }, { "epoch": 2.5921587377480275, "grad_norm": 0.2983640432357788, "learning_rate": 5.515707151393173e-07, "loss": 0.304, "step": 7228 }, { "epoch": 2.5925173320583315, "grad_norm": 0.28523704409599304, "learning_rate": 5.506182881157418e-07, "loss": 0.2515, "step": 7229 }, { "epoch": 2.592875926368635, "grad_norm": 0.32258838415145874, "learning_rate": 5.496666361883029e-07, "loss": 0.3005, "step": 7230 }, { "epoch": 2.5932345206789384, "grad_norm": 0.29815909266471863, "learning_rate": 5.48715759522781e-07, "loss": 0.2744, "step": 7231 }, { "epoch": 2.5935931149892424, "grad_norm": 0.314421147108078, "learning_rate": 5.477656582848234e-07, "loss": 0.2976, "step": 7232 }, { "epoch": 2.593951709299546, "grad_norm": 0.29960572719573975, "learning_rate": 5.46816332639939e-07, "loss": 0.3145, "step": 7233 }, { "epoch": 2.5943103036098494, "grad_norm": 0.3022978603839874, "learning_rate": 5.458677827535059e-07, "loss": 0.3122, "step": 7234 }, { "epoch": 2.594668897920153, "grad_norm": 0.29676204919815063, "learning_rate": 5.449200087907608e-07, "loss": 0.2991, "step": 7235 }, { "epoch": 2.5950274922304564, "grad_norm": 0.3074590265750885, "learning_rate": 5.439730109168112e-07, "loss": 0.2909, "step": 7236 }, { "epoch": 2.5953860865407603, "grad_norm": 0.29596462845802307, "learning_rate": 5.430267892966262e-07, "loss": 0.3052, "step": 7237 }, { "epoch": 2.595744680851064, "grad_norm": 0.3118572533130646, "learning_rate": 5.420813440950418e-07, "loss": 0.2972, "step": 7238 }, { "epoch": 2.5961032751613673, "grad_norm": 0.3189474642276764, "learning_rate": 5.411366754767567e-07, "loss": 0.2645, "step": 7239 }, { "epoch": 2.5964618694716712, "grad_norm": 0.3321346640586853, "learning_rate": 5.401927836063348e-07, "loss": 0.3093, "step": 7240 }, { "epoch": 2.5968204637819747, "grad_norm": 0.3163914382457733, "learning_rate": 5.392496686482046e-07, "loss": 0.2796, "step": 7241 }, { "epoch": 2.5971790580922782, "grad_norm": 0.2916455566883087, "learning_rate": 5.383073307666614e-07, "loss": 0.2985, "step": 7242 }, { "epoch": 2.5975376524025817, "grad_norm": 0.2997249364852905, "learning_rate": 5.373657701258606e-07, "loss": 0.2872, "step": 7243 }, { "epoch": 2.597896246712885, "grad_norm": 0.29937636852264404, "learning_rate": 5.364249868898286e-07, "loss": 0.2552, "step": 7244 }, { "epoch": 2.598254841023189, "grad_norm": 0.3193155825138092, "learning_rate": 5.3548498122245e-07, "loss": 0.2814, "step": 7245 }, { "epoch": 2.5986134353334926, "grad_norm": 0.2889293432235718, "learning_rate": 5.345457532874782e-07, "loss": 0.2701, "step": 7246 }, { "epoch": 2.5989720296437966, "grad_norm": 0.30757251381874084, "learning_rate": 5.336073032485283e-07, "loss": 0.283, "step": 7247 }, { "epoch": 2.5993306239541, "grad_norm": 0.32587456703186035, "learning_rate": 5.326696312690816e-07, "loss": 0.3004, "step": 7248 }, { "epoch": 2.5996892182644036, "grad_norm": 0.29920607805252075, "learning_rate": 5.317327375124842e-07, "loss": 0.2839, "step": 7249 }, { "epoch": 2.600047812574707, "grad_norm": 0.3147262930870056, "learning_rate": 5.307966221419452e-07, "loss": 0.3421, "step": 7250 }, { "epoch": 2.6004064068850106, "grad_norm": 0.3022937476634979, "learning_rate": 5.298612853205398e-07, "loss": 0.2788, "step": 7251 }, { "epoch": 2.6007650011953145, "grad_norm": 0.3029651641845703, "learning_rate": 5.289267272112053e-07, "loss": 0.2881, "step": 7252 }, { "epoch": 2.601123595505618, "grad_norm": 0.30269235372543335, "learning_rate": 5.279929479767454e-07, "loss": 0.2976, "step": 7253 }, { "epoch": 2.6014821898159215, "grad_norm": 0.3040488064289093, "learning_rate": 5.270599477798283e-07, "loss": 0.2601, "step": 7254 }, { "epoch": 2.6018407841262254, "grad_norm": 0.30414775013923645, "learning_rate": 5.261277267829817e-07, "loss": 0.2907, "step": 7255 }, { "epoch": 2.602199378436529, "grad_norm": 0.310899943113327, "learning_rate": 5.251962851486065e-07, "loss": 0.3149, "step": 7256 }, { "epoch": 2.6025579727468324, "grad_norm": 0.28969526290893555, "learning_rate": 5.242656230389592e-07, "loss": 0.2927, "step": 7257 }, { "epoch": 2.602916567057136, "grad_norm": 0.3018651306629181, "learning_rate": 5.233357406161666e-07, "loss": 0.3091, "step": 7258 }, { "epoch": 2.6032751613674394, "grad_norm": 0.3044489026069641, "learning_rate": 5.224066380422143e-07, "loss": 0.2926, "step": 7259 }, { "epoch": 2.6036337556777434, "grad_norm": 0.28843945264816284, "learning_rate": 5.214783154789554e-07, "loss": 0.2744, "step": 7260 }, { "epoch": 2.603992349988047, "grad_norm": 0.3077356517314911, "learning_rate": 5.205507730881093e-07, "loss": 0.3134, "step": 7261 }, { "epoch": 2.6043509442983503, "grad_norm": 0.2995436489582062, "learning_rate": 5.19624011031254e-07, "loss": 0.3078, "step": 7262 }, { "epoch": 2.6047095386086543, "grad_norm": 0.3051605522632599, "learning_rate": 5.186980294698357e-07, "loss": 0.3056, "step": 7263 }, { "epoch": 2.605068132918958, "grad_norm": 0.30692964792251587, "learning_rate": 5.177728285651624e-07, "loss": 0.2798, "step": 7264 }, { "epoch": 2.6054267272292613, "grad_norm": 0.3240710198879242, "learning_rate": 5.16848408478407e-07, "loss": 0.3054, "step": 7265 }, { "epoch": 2.6057853215395648, "grad_norm": 0.3016199469566345, "learning_rate": 5.159247693706071e-07, "loss": 0.2874, "step": 7266 }, { "epoch": 2.6061439158498683, "grad_norm": 0.29405197501182556, "learning_rate": 5.150019114026627e-07, "loss": 0.2525, "step": 7267 }, { "epoch": 2.606502510160172, "grad_norm": 0.31382372975349426, "learning_rate": 5.140798347353404e-07, "loss": 0.3303, "step": 7268 }, { "epoch": 2.6068611044704757, "grad_norm": 0.27411985397338867, "learning_rate": 5.13158539529266e-07, "loss": 0.2799, "step": 7269 }, { "epoch": 2.607219698780779, "grad_norm": 0.2830888330936432, "learning_rate": 5.122380259449339e-07, "loss": 0.2961, "step": 7270 }, { "epoch": 2.607578293091083, "grad_norm": 0.2931675910949707, "learning_rate": 5.113182941427003e-07, "loss": 0.2813, "step": 7271 }, { "epoch": 2.6079368874013866, "grad_norm": 0.2802322804927826, "learning_rate": 5.103993442827832e-07, "loss": 0.2814, "step": 7272 }, { "epoch": 2.60829548171169, "grad_norm": 0.2885078191757202, "learning_rate": 5.094811765252705e-07, "loss": 0.312, "step": 7273 }, { "epoch": 2.6086540760219936, "grad_norm": 0.3196970522403717, "learning_rate": 5.085637910301061e-07, "loss": 0.3359, "step": 7274 }, { "epoch": 2.6090126703322976, "grad_norm": 0.2911653220653534, "learning_rate": 5.076471879571043e-07, "loss": 0.2793, "step": 7275 }, { "epoch": 2.609371264642601, "grad_norm": 0.29981744289398193, "learning_rate": 5.06731367465938e-07, "loss": 0.2983, "step": 7276 }, { "epoch": 2.6097298589529045, "grad_norm": 0.29209768772125244, "learning_rate": 5.058163297161467e-07, "loss": 0.2705, "step": 7277 }, { "epoch": 2.6100884532632085, "grad_norm": 0.2982546091079712, "learning_rate": 5.04902074867133e-07, "loss": 0.2948, "step": 7278 }, { "epoch": 2.610447047573512, "grad_norm": 0.3032509982585907, "learning_rate": 5.039886030781626e-07, "loss": 0.271, "step": 7279 }, { "epoch": 2.6108056418838155, "grad_norm": 0.30205652117729187, "learning_rate": 5.030759145083669e-07, "loss": 0.3004, "step": 7280 }, { "epoch": 2.611164236194119, "grad_norm": 0.2975291311740875, "learning_rate": 5.021640093167362e-07, "loss": 0.2834, "step": 7281 }, { "epoch": 2.6115228305044225, "grad_norm": 0.3015792965888977, "learning_rate": 5.012528876621292e-07, "loss": 0.2846, "step": 7282 }, { "epoch": 2.6118814248147264, "grad_norm": 0.31911298632621765, "learning_rate": 5.003425497032649e-07, "loss": 0.3064, "step": 7283 }, { "epoch": 2.61224001912503, "grad_norm": 0.32919615507125854, "learning_rate": 4.994329955987282e-07, "loss": 0.3125, "step": 7284 }, { "epoch": 2.6125986134353334, "grad_norm": 0.32579830288887024, "learning_rate": 4.985242255069661e-07, "loss": 0.2973, "step": 7285 }, { "epoch": 2.6129572077456373, "grad_norm": 0.3053910732269287, "learning_rate": 4.976162395862882e-07, "loss": 0.3109, "step": 7286 }, { "epoch": 2.613315802055941, "grad_norm": 0.294355183839798, "learning_rate": 4.967090379948703e-07, "loss": 0.2785, "step": 7287 }, { "epoch": 2.6136743963662443, "grad_norm": 0.30689162015914917, "learning_rate": 4.958026208907474e-07, "loss": 0.2796, "step": 7288 }, { "epoch": 2.614032990676548, "grad_norm": 0.31707602739334106, "learning_rate": 4.9489698843182e-07, "loss": 0.3004, "step": 7289 }, { "epoch": 2.6143915849868513, "grad_norm": 0.2901241183280945, "learning_rate": 4.939921407758557e-07, "loss": 0.2859, "step": 7290 }, { "epoch": 2.6147501792971553, "grad_norm": 0.31188538670539856, "learning_rate": 4.930880780804787e-07, "loss": 0.2994, "step": 7291 }, { "epoch": 2.6151087736074587, "grad_norm": 0.29217609763145447, "learning_rate": 4.921848005031809e-07, "loss": 0.2843, "step": 7292 }, { "epoch": 2.6154673679177622, "grad_norm": 0.32150185108184814, "learning_rate": 4.912823082013141e-07, "loss": 0.3293, "step": 7293 }, { "epoch": 2.615825962228066, "grad_norm": 0.30044832825660706, "learning_rate": 4.903806013320972e-07, "loss": 0.2772, "step": 7294 }, { "epoch": 2.6161845565383697, "grad_norm": 0.28470030426979065, "learning_rate": 4.894796800526097e-07, "loss": 0.2587, "step": 7295 }, { "epoch": 2.616543150848673, "grad_norm": 0.29719269275665283, "learning_rate": 4.88579544519795e-07, "loss": 0.3022, "step": 7296 }, { "epoch": 2.6169017451589767, "grad_norm": 0.3047693073749542, "learning_rate": 4.876801948904603e-07, "loss": 0.298, "step": 7297 }, { "epoch": 2.61726033946928, "grad_norm": 0.310275673866272, "learning_rate": 4.867816313212731e-07, "loss": 0.285, "step": 7298 }, { "epoch": 2.617618933779584, "grad_norm": 0.2952953279018402, "learning_rate": 4.858838539687677e-07, "loss": 0.2788, "step": 7299 }, { "epoch": 2.6179775280898876, "grad_norm": 0.3035371005535126, "learning_rate": 4.849868629893389e-07, "loss": 0.2816, "step": 7300 }, { "epoch": 2.618336122400191, "grad_norm": 0.30844900012016296, "learning_rate": 4.840906585392451e-07, "loss": 0.2844, "step": 7301 }, { "epoch": 2.618694716710495, "grad_norm": 0.3086059093475342, "learning_rate": 4.831952407746099e-07, "loss": 0.2976, "step": 7302 }, { "epoch": 2.6190533110207985, "grad_norm": 0.32647985219955444, "learning_rate": 4.823006098514155e-07, "loss": 0.3218, "step": 7303 }, { "epoch": 2.619411905331102, "grad_norm": 0.3210216760635376, "learning_rate": 4.814067659255106e-07, "loss": 0.3002, "step": 7304 }, { "epoch": 2.6197704996414055, "grad_norm": 0.3027578294277191, "learning_rate": 4.805137091526047e-07, "loss": 0.2729, "step": 7305 }, { "epoch": 2.6201290939517095, "grad_norm": 0.3155220150947571, "learning_rate": 4.796214396882714e-07, "loss": 0.2986, "step": 7306 }, { "epoch": 2.620487688262013, "grad_norm": 0.313044935464859, "learning_rate": 4.787299576879472e-07, "loss": 0.2877, "step": 7307 }, { "epoch": 2.6208462825723164, "grad_norm": 0.30508482456207275, "learning_rate": 4.778392633069307e-07, "loss": 0.2968, "step": 7308 }, { "epoch": 2.6212048768826204, "grad_norm": 0.2951241731643677, "learning_rate": 4.769493567003847e-07, "loss": 0.2962, "step": 7309 }, { "epoch": 2.621563471192924, "grad_norm": 0.29216256737709045, "learning_rate": 4.76060238023332e-07, "loss": 0.2823, "step": 7310 }, { "epoch": 2.6219220655032274, "grad_norm": 0.30015212297439575, "learning_rate": 4.751719074306604e-07, "loss": 0.2751, "step": 7311 }, { "epoch": 2.622280659813531, "grad_norm": 0.31872451305389404, "learning_rate": 4.7428436507712006e-07, "loss": 0.3064, "step": 7312 }, { "epoch": 2.6226392541238344, "grad_norm": 0.29299262166023254, "learning_rate": 4.7339761111732386e-07, "loss": 0.2736, "step": 7313 }, { "epoch": 2.6229978484341383, "grad_norm": 0.3050505220890045, "learning_rate": 4.725116457057477e-07, "loss": 0.3014, "step": 7314 }, { "epoch": 2.623356442744442, "grad_norm": 0.30109903216362, "learning_rate": 4.716264689967276e-07, "loss": 0.2947, "step": 7315 }, { "epoch": 2.6237150370547453, "grad_norm": 0.30089467763900757, "learning_rate": 4.707420811444663e-07, "loss": 0.2622, "step": 7316 }, { "epoch": 2.6240736313650492, "grad_norm": 0.3088708221912384, "learning_rate": 4.6985848230302387e-07, "loss": 0.2898, "step": 7317 }, { "epoch": 2.6244322256753527, "grad_norm": 0.29151639342308044, "learning_rate": 4.689756726263289e-07, "loss": 0.2633, "step": 7318 }, { "epoch": 2.624790819985656, "grad_norm": 0.3186648488044739, "learning_rate": 4.680936522681695e-07, "loss": 0.316, "step": 7319 }, { "epoch": 2.6251494142959597, "grad_norm": 0.3028002381324768, "learning_rate": 4.6721242138219435e-07, "loss": 0.3071, "step": 7320 }, { "epoch": 2.625508008606263, "grad_norm": 0.2946563959121704, "learning_rate": 4.663319801219185e-07, "loss": 0.2863, "step": 7321 }, { "epoch": 2.625866602916567, "grad_norm": 0.3114103376865387, "learning_rate": 4.654523286407164e-07, "loss": 0.3057, "step": 7322 }, { "epoch": 2.6262251972268706, "grad_norm": 0.29920950531959534, "learning_rate": 4.6457346709182617e-07, "loss": 0.2989, "step": 7323 }, { "epoch": 2.626583791537174, "grad_norm": 0.2956949472427368, "learning_rate": 4.6369539562834797e-07, "loss": 0.2584, "step": 7324 }, { "epoch": 2.626942385847478, "grad_norm": 0.309538334608078, "learning_rate": 4.6281811440324565e-07, "loss": 0.319, "step": 7325 }, { "epoch": 2.6273009801577816, "grad_norm": 0.30020415782928467, "learning_rate": 4.619416235693447e-07, "loss": 0.2876, "step": 7326 }, { "epoch": 2.627659574468085, "grad_norm": 0.3038996160030365, "learning_rate": 4.610659232793302e-07, "loss": 0.2844, "step": 7327 }, { "epoch": 2.6280181687783886, "grad_norm": 0.28407803177833557, "learning_rate": 4.60191013685754e-07, "loss": 0.277, "step": 7328 }, { "epoch": 2.628376763088692, "grad_norm": 0.3206651508808136, "learning_rate": 4.5931689494102705e-07, "loss": 0.2974, "step": 7329 }, { "epoch": 2.628735357398996, "grad_norm": 0.3305984437465668, "learning_rate": 4.5844356719742357e-07, "loss": 0.295, "step": 7330 }, { "epoch": 2.6290939517092995, "grad_norm": 0.31667810678482056, "learning_rate": 4.5757103060708187e-07, "loss": 0.2848, "step": 7331 }, { "epoch": 2.6294525460196034, "grad_norm": 0.3134051263332367, "learning_rate": 4.5669928532199757e-07, "loss": 0.276, "step": 7332 }, { "epoch": 2.629811140329907, "grad_norm": 0.28575438261032104, "learning_rate": 4.558283314940337e-07, "loss": 0.267, "step": 7333 }, { "epoch": 2.6301697346402104, "grad_norm": 0.3083690404891968, "learning_rate": 4.5495816927490996e-07, "loss": 0.3309, "step": 7334 }, { "epoch": 2.630528328950514, "grad_norm": 0.29659682512283325, "learning_rate": 4.5408879881621624e-07, "loss": 0.2625, "step": 7335 }, { "epoch": 2.6308869232608174, "grad_norm": 0.32632583379745483, "learning_rate": 4.5322022026939526e-07, "loss": 0.3203, "step": 7336 }, { "epoch": 2.6312455175711214, "grad_norm": 0.2946503758430481, "learning_rate": 4.523524337857582e-07, "loss": 0.2901, "step": 7337 }, { "epoch": 2.631604111881425, "grad_norm": 0.31670722365379333, "learning_rate": 4.514854395164764e-07, "loss": 0.3399, "step": 7338 }, { "epoch": 2.6319627061917283, "grad_norm": 0.30855074524879456, "learning_rate": 4.506192376125817e-07, "loss": 0.2738, "step": 7339 }, { "epoch": 2.6323213005020323, "grad_norm": 0.30261433124542236, "learning_rate": 4.497538282249697e-07, "loss": 0.2835, "step": 7340 }, { "epoch": 2.6326798948123358, "grad_norm": 0.31919392943382263, "learning_rate": 4.488892115043969e-07, "loss": 0.3101, "step": 7341 }, { "epoch": 2.6330384891226393, "grad_norm": 0.32097744941711426, "learning_rate": 4.480253876014834e-07, "loss": 0.2741, "step": 7342 }, { "epoch": 2.6333970834329428, "grad_norm": 0.3132316470146179, "learning_rate": 4.471623566667099e-07, "loss": 0.3009, "step": 7343 }, { "epoch": 2.6337556777432463, "grad_norm": 0.2907925248146057, "learning_rate": 4.4630011885041793e-07, "loss": 0.2982, "step": 7344 }, { "epoch": 2.63411427205355, "grad_norm": 0.3125377595424652, "learning_rate": 4.454386743028133e-07, "loss": 0.2901, "step": 7345 }, { "epoch": 2.6344728663638537, "grad_norm": 0.2980191707611084, "learning_rate": 4.445780231739599e-07, "loss": 0.2746, "step": 7346 }, { "epoch": 2.634831460674157, "grad_norm": 0.3051179349422455, "learning_rate": 4.4371816561378847e-07, "loss": 0.3036, "step": 7347 }, { "epoch": 2.635190054984461, "grad_norm": 0.30399686098098755, "learning_rate": 4.42859101772089e-07, "loss": 0.2967, "step": 7348 }, { "epoch": 2.6355486492947646, "grad_norm": 0.3110601007938385, "learning_rate": 4.4200083179851083e-07, "loss": 0.3147, "step": 7349 }, { "epoch": 2.635907243605068, "grad_norm": 0.31498241424560547, "learning_rate": 4.4114335584256986e-07, "loss": 0.2757, "step": 7350 }, { "epoch": 2.6362658379153716, "grad_norm": 0.3323615789413452, "learning_rate": 4.4028667405363824e-07, "loss": 0.309, "step": 7351 }, { "epoch": 2.636624432225675, "grad_norm": 0.2972240149974823, "learning_rate": 4.3943078658095497e-07, "loss": 0.2926, "step": 7352 }, { "epoch": 2.636983026535979, "grad_norm": 0.28406819701194763, "learning_rate": 4.3857569357361685e-07, "loss": 0.2728, "step": 7353 }, { "epoch": 2.6373416208462825, "grad_norm": 0.3080613315105438, "learning_rate": 4.377213951805842e-07, "loss": 0.3219, "step": 7354 }, { "epoch": 2.637700215156586, "grad_norm": 0.32451334595680237, "learning_rate": 4.3686789155067967e-07, "loss": 0.2962, "step": 7355 }, { "epoch": 2.63805880946689, "grad_norm": 0.305178701877594, "learning_rate": 4.3601518283258426e-07, "loss": 0.2723, "step": 7356 }, { "epoch": 2.6384174037771935, "grad_norm": 0.3104367256164551, "learning_rate": 4.351632691748431e-07, "loss": 0.2955, "step": 7357 }, { "epoch": 2.638775998087497, "grad_norm": 0.3106652498245239, "learning_rate": 4.343121507258624e-07, "loss": 0.3139, "step": 7358 }, { "epoch": 2.6391345923978005, "grad_norm": 0.30540573596954346, "learning_rate": 4.3346182763391033e-07, "loss": 0.2936, "step": 7359 }, { "epoch": 2.639493186708104, "grad_norm": 0.292915016412735, "learning_rate": 4.326123000471155e-07, "loss": 0.2919, "step": 7360 }, { "epoch": 2.639851781018408, "grad_norm": 0.3246549367904663, "learning_rate": 4.3176356811346744e-07, "loss": 0.3027, "step": 7361 }, { "epoch": 2.6402103753287114, "grad_norm": 0.3016047775745392, "learning_rate": 4.3091563198081945e-07, "loss": 0.2825, "step": 7362 }, { "epoch": 2.6405689696390153, "grad_norm": 0.3046342730522156, "learning_rate": 4.3006849179688115e-07, "loss": 0.293, "step": 7363 }, { "epoch": 2.640927563949319, "grad_norm": 0.28717610239982605, "learning_rate": 4.292221477092318e-07, "loss": 0.2666, "step": 7364 }, { "epoch": 2.6412861582596223, "grad_norm": 0.302571564912796, "learning_rate": 4.283765998653039e-07, "loss": 0.3029, "step": 7365 }, { "epoch": 2.641644752569926, "grad_norm": 0.2944874167442322, "learning_rate": 4.2753184841239525e-07, "loss": 0.2634, "step": 7366 }, { "epoch": 2.6420033468802293, "grad_norm": 0.30672553181648254, "learning_rate": 4.2668789349766484e-07, "loss": 0.2739, "step": 7367 }, { "epoch": 2.6423619411905332, "grad_norm": 0.3098323345184326, "learning_rate": 4.2584473526813165e-07, "loss": 0.2705, "step": 7368 }, { "epoch": 2.6427205355008367, "grad_norm": 0.3287743031978607, "learning_rate": 4.25002373870676e-07, "loss": 0.3083, "step": 7369 }, { "epoch": 2.6430791298111402, "grad_norm": 0.29980602860450745, "learning_rate": 4.241608094520405e-07, "loss": 0.2863, "step": 7370 }, { "epoch": 2.643437724121444, "grad_norm": 0.2907072901725769, "learning_rate": 4.2332004215882847e-07, "loss": 0.2819, "step": 7371 }, { "epoch": 2.6437963184317477, "grad_norm": 0.2764759063720703, "learning_rate": 4.2248007213750495e-07, "loss": 0.2805, "step": 7372 }, { "epoch": 2.644154912742051, "grad_norm": 0.3139581084251404, "learning_rate": 4.2164089953439344e-07, "loss": 0.3258, "step": 7373 }, { "epoch": 2.6445135070523547, "grad_norm": 0.31328868865966797, "learning_rate": 4.2080252449568093e-07, "loss": 0.2835, "step": 7374 }, { "epoch": 2.644872101362658, "grad_norm": 0.3135175406932831, "learning_rate": 4.1996494716741554e-07, "loss": 0.26, "step": 7375 }, { "epoch": 2.645230695672962, "grad_norm": 0.3078685402870178, "learning_rate": 4.191281676955061e-07, "loss": 0.2936, "step": 7376 }, { "epoch": 2.6455892899832656, "grad_norm": 0.31161820888519287, "learning_rate": 4.182921862257222e-07, "loss": 0.2939, "step": 7377 }, { "epoch": 2.645947884293569, "grad_norm": 0.27989616990089417, "learning_rate": 4.1745700290369285e-07, "loss": 0.2659, "step": 7378 }, { "epoch": 2.646306478603873, "grad_norm": 0.2993752062320709, "learning_rate": 4.166226178749122e-07, "loss": 0.3157, "step": 7379 }, { "epoch": 2.6466650729141765, "grad_norm": 0.30015867948532104, "learning_rate": 4.1578903128472904e-07, "loss": 0.2913, "step": 7380 }, { "epoch": 2.64702366722448, "grad_norm": 0.317364901304245, "learning_rate": 4.1495624327836047e-07, "loss": 0.3177, "step": 7381 }, { "epoch": 2.6473822615347835, "grad_norm": 0.3100501596927643, "learning_rate": 4.141242540008789e-07, "loss": 0.3122, "step": 7382 }, { "epoch": 2.647740855845087, "grad_norm": 0.2802010476589203, "learning_rate": 4.1329306359721933e-07, "loss": 0.2821, "step": 7383 }, { "epoch": 2.648099450155391, "grad_norm": 0.29529279470443726, "learning_rate": 4.124626722121794e-07, "loss": 0.2967, "step": 7384 }, { "epoch": 2.6484580444656944, "grad_norm": 0.31365537643432617, "learning_rate": 4.116330799904134e-07, "loss": 0.3272, "step": 7385 }, { "epoch": 2.648816638775998, "grad_norm": 0.29606229066848755, "learning_rate": 4.1080428707644005e-07, "loss": 0.2762, "step": 7386 }, { "epoch": 2.649175233086302, "grad_norm": 0.2956813871860504, "learning_rate": 4.099762936146379e-07, "loss": 0.2944, "step": 7387 }, { "epoch": 2.6495338273966054, "grad_norm": 0.30785831809043884, "learning_rate": 4.091490997492459e-07, "loss": 0.2881, "step": 7388 }, { "epoch": 2.649892421706909, "grad_norm": 0.29160794615745544, "learning_rate": 4.0832270562436436e-07, "loss": 0.2569, "step": 7389 }, { "epoch": 2.6502510160172124, "grad_norm": 0.3144904375076294, "learning_rate": 4.074971113839521e-07, "loss": 0.3053, "step": 7390 }, { "epoch": 2.6506096103275163, "grad_norm": 0.3036048114299774, "learning_rate": 4.0667231717183177e-07, "loss": 0.3136, "step": 7391 }, { "epoch": 2.65096820463782, "grad_norm": 0.30215638875961304, "learning_rate": 4.0584832313168287e-07, "loss": 0.2545, "step": 7392 }, { "epoch": 2.6513267989481233, "grad_norm": 0.30488821864128113, "learning_rate": 4.050251294070512e-07, "loss": 0.2867, "step": 7393 }, { "epoch": 2.6516853932584272, "grad_norm": 0.3048126995563507, "learning_rate": 4.04202736141337e-07, "loss": 0.2964, "step": 7394 }, { "epoch": 2.6520439875687307, "grad_norm": 0.3068011701107025, "learning_rate": 4.0338114347780453e-07, "loss": 0.2992, "step": 7395 }, { "epoch": 2.652402581879034, "grad_norm": 0.2987020015716553, "learning_rate": 4.0256035155957826e-07, "loss": 0.3025, "step": 7396 }, { "epoch": 2.6527611761893377, "grad_norm": 0.32819366455078125, "learning_rate": 4.0174036052964047e-07, "loss": 0.2943, "step": 7397 }, { "epoch": 2.653119770499641, "grad_norm": 0.3201604187488556, "learning_rate": 4.0092117053084023e-07, "loss": 0.2855, "step": 7398 }, { "epoch": 2.653478364809945, "grad_norm": 0.2981281280517578, "learning_rate": 4.001027817058789e-07, "loss": 0.2744, "step": 7399 }, { "epoch": 2.6538369591202486, "grad_norm": 0.2895088195800781, "learning_rate": 3.992851941973247e-07, "loss": 0.2864, "step": 7400 }, { "epoch": 2.654195553430552, "grad_norm": 0.30073317885398865, "learning_rate": 3.984684081476042e-07, "loss": 0.2924, "step": 7401 }, { "epoch": 2.654554147740856, "grad_norm": 0.2944469153881073, "learning_rate": 3.9765242369900205e-07, "loss": 0.2741, "step": 7402 }, { "epoch": 2.6549127420511596, "grad_norm": 0.2992575168609619, "learning_rate": 3.968372409936666e-07, "loss": 0.2999, "step": 7403 }, { "epoch": 2.655271336361463, "grad_norm": 0.3233589828014374, "learning_rate": 3.9602286017360493e-07, "loss": 0.3176, "step": 7404 }, { "epoch": 2.6556299306717666, "grad_norm": 0.2998752295970917, "learning_rate": 3.952092813806846e-07, "loss": 0.2925, "step": 7405 }, { "epoch": 2.65598852498207, "grad_norm": 0.3007432222366333, "learning_rate": 3.943965047566345e-07, "loss": 0.2888, "step": 7406 }, { "epoch": 2.656347119292374, "grad_norm": 0.28240635991096497, "learning_rate": 3.935845304430419e-07, "loss": 0.29, "step": 7407 }, { "epoch": 2.6567057136026775, "grad_norm": 0.294732928276062, "learning_rate": 3.927733585813559e-07, "loss": 0.3061, "step": 7408 }, { "epoch": 2.657064307912981, "grad_norm": 0.3370433449745178, "learning_rate": 3.919629893128829e-07, "loss": 0.3087, "step": 7409 }, { "epoch": 2.657422902223285, "grad_norm": 0.30600273609161377, "learning_rate": 3.9115342277879553e-07, "loss": 0.2905, "step": 7410 }, { "epoch": 2.6577814965335884, "grad_norm": 0.31581956148147583, "learning_rate": 3.9034465912012044e-07, "loss": 0.3037, "step": 7411 }, { "epoch": 2.658140090843892, "grad_norm": 0.3292066156864166, "learning_rate": 3.8953669847774656e-07, "loss": 0.3086, "step": 7412 }, { "epoch": 2.6584986851541954, "grad_norm": 0.30314385890960693, "learning_rate": 3.8872954099242466e-07, "loss": 0.2745, "step": 7413 }, { "epoch": 2.658857279464499, "grad_norm": 0.31331488490104675, "learning_rate": 3.8792318680476173e-07, "loss": 0.2803, "step": 7414 }, { "epoch": 2.659215873774803, "grad_norm": 0.3215433955192566, "learning_rate": 3.8711763605523035e-07, "loss": 0.3136, "step": 7415 }, { "epoch": 2.6595744680851063, "grad_norm": 0.2900163531303406, "learning_rate": 3.863128888841572e-07, "loss": 0.2891, "step": 7416 }, { "epoch": 2.65993306239541, "grad_norm": 0.32190003991127014, "learning_rate": 3.855089454317329e-07, "loss": 0.306, "step": 7417 }, { "epoch": 2.6602916567057138, "grad_norm": 0.2707263231277466, "learning_rate": 3.8470580583800764e-07, "loss": 0.2595, "step": 7418 }, { "epoch": 2.6606502510160173, "grad_norm": 0.3006476163864136, "learning_rate": 3.83903470242889e-07, "loss": 0.3116, "step": 7419 }, { "epoch": 2.6610088453263208, "grad_norm": 0.28547748923301697, "learning_rate": 3.8310193878614787e-07, "loss": 0.2852, "step": 7420 }, { "epoch": 2.6613674396366243, "grad_norm": 0.3059665560722351, "learning_rate": 3.8230121160741265e-07, "loss": 0.2985, "step": 7421 }, { "epoch": 2.661726033946928, "grad_norm": 0.34504085779190063, "learning_rate": 3.8150128884617335e-07, "loss": 0.3152, "step": 7422 }, { "epoch": 2.6620846282572317, "grad_norm": 0.3023560047149658, "learning_rate": 3.8070217064177796e-07, "loss": 0.2632, "step": 7423 }, { "epoch": 2.662443222567535, "grad_norm": 0.32164910435676575, "learning_rate": 3.7990385713343625e-07, "loss": 0.3208, "step": 7424 }, { "epoch": 2.662801816877839, "grad_norm": 0.29119500517845154, "learning_rate": 3.79106348460217e-07, "loss": 0.2582, "step": 7425 }, { "epoch": 2.6631604111881426, "grad_norm": 0.30718347430229187, "learning_rate": 3.783096447610463e-07, "loss": 0.3115, "step": 7426 }, { "epoch": 2.663519005498446, "grad_norm": 0.28792744874954224, "learning_rate": 3.7751374617471694e-07, "loss": 0.2569, "step": 7427 }, { "epoch": 2.6638775998087496, "grad_norm": 0.3032756745815277, "learning_rate": 3.7671865283987254e-07, "loss": 0.2932, "step": 7428 }, { "epoch": 2.664236194119053, "grad_norm": 0.3120632469654083, "learning_rate": 3.759243648950234e-07, "loss": 0.3011, "step": 7429 }, { "epoch": 2.664594788429357, "grad_norm": 0.2883196771144867, "learning_rate": 3.751308824785366e-07, "loss": 0.3026, "step": 7430 }, { "epoch": 2.6649533827396605, "grad_norm": 0.3085491359233856, "learning_rate": 3.7433820572863765e-07, "loss": 0.2828, "step": 7431 }, { "epoch": 2.665311977049964, "grad_norm": 0.3208056092262268, "learning_rate": 3.7354633478341563e-07, "loss": 0.3192, "step": 7432 }, { "epoch": 2.665670571360268, "grad_norm": 0.31568700075149536, "learning_rate": 3.7275526978081567e-07, "loss": 0.2875, "step": 7433 }, { "epoch": 2.6660291656705715, "grad_norm": 0.3142341673374176, "learning_rate": 3.7196501085864425e-07, "loss": 0.2856, "step": 7434 }, { "epoch": 2.666387759980875, "grad_norm": 0.29241034388542175, "learning_rate": 3.711755581545656e-07, "loss": 0.276, "step": 7435 }, { "epoch": 2.6667463542911785, "grad_norm": 0.30357202887535095, "learning_rate": 3.70386911806106e-07, "loss": 0.2929, "step": 7436 }, { "epoch": 2.667104948601482, "grad_norm": 0.34030312299728394, "learning_rate": 3.695990719506498e-07, "loss": 0.3095, "step": 7437 }, { "epoch": 2.667463542911786, "grad_norm": 0.3007189929485321, "learning_rate": 3.688120387254418e-07, "loss": 0.2737, "step": 7438 }, { "epoch": 2.6678221372220894, "grad_norm": 0.326225608587265, "learning_rate": 3.6802581226758506e-07, "loss": 0.3207, "step": 7439 }, { "epoch": 2.668180731532393, "grad_norm": 0.2806052267551422, "learning_rate": 3.672403927140428e-07, "loss": 0.2611, "step": 7440 }, { "epoch": 2.668539325842697, "grad_norm": 0.2887875437736511, "learning_rate": 3.664557802016366e-07, "loss": 0.3024, "step": 7441 }, { "epoch": 2.6688979201530003, "grad_norm": 0.30014172196388245, "learning_rate": 3.6567197486705064e-07, "loss": 0.2902, "step": 7442 }, { "epoch": 2.669256514463304, "grad_norm": 0.32915404438972473, "learning_rate": 3.6488897684682333e-07, "loss": 0.3025, "step": 7443 }, { "epoch": 2.6696151087736073, "grad_norm": 0.3039432168006897, "learning_rate": 3.641067862773584e-07, "loss": 0.2914, "step": 7444 }, { "epoch": 2.669973703083911, "grad_norm": 0.3047531247138977, "learning_rate": 3.6332540329491296e-07, "loss": 0.2944, "step": 7445 }, { "epoch": 2.6703322973942147, "grad_norm": 0.30655327439308167, "learning_rate": 3.6254482803560864e-07, "loss": 0.301, "step": 7446 }, { "epoch": 2.6706908917045182, "grad_norm": 0.3138880431652069, "learning_rate": 3.617650606354234e-07, "loss": 0.3127, "step": 7447 }, { "epoch": 2.671049486014822, "grad_norm": 0.31981128454208374, "learning_rate": 3.609861012301946e-07, "loss": 0.2988, "step": 7448 }, { "epoch": 2.6714080803251257, "grad_norm": 0.30776894092559814, "learning_rate": 3.602079499556199e-07, "loss": 0.2553, "step": 7449 }, { "epoch": 2.671766674635429, "grad_norm": 0.3283083438873291, "learning_rate": 3.5943060694725527e-07, "loss": 0.3174, "step": 7450 }, { "epoch": 2.6721252689457327, "grad_norm": 0.297855019569397, "learning_rate": 3.586540723405169e-07, "loss": 0.3108, "step": 7451 }, { "epoch": 2.672483863256036, "grad_norm": 0.30860385298728943, "learning_rate": 3.578783462706792e-07, "loss": 0.2961, "step": 7452 }, { "epoch": 2.67284245756634, "grad_norm": 0.29319247603416443, "learning_rate": 3.571034288728753e-07, "loss": 0.2837, "step": 7453 }, { "epoch": 2.6732010518766436, "grad_norm": 0.3241729140281677, "learning_rate": 3.5632932028209944e-07, "loss": 0.3165, "step": 7454 }, { "epoch": 2.673559646186947, "grad_norm": 0.2807650566101074, "learning_rate": 3.5555602063320314e-07, "loss": 0.2761, "step": 7455 }, { "epoch": 2.673918240497251, "grad_norm": 0.30393198132514954, "learning_rate": 3.547835300608987e-07, "loss": 0.2972, "step": 7456 }, { "epoch": 2.6742768348075545, "grad_norm": 0.31017133593559265, "learning_rate": 3.5401184869975405e-07, "loss": 0.278, "step": 7457 }, { "epoch": 2.674635429117858, "grad_norm": 0.2936673164367676, "learning_rate": 3.5324097668420044e-07, "loss": 0.272, "step": 7458 }, { "epoch": 2.6749940234281615, "grad_norm": 0.2977992296218872, "learning_rate": 3.524709141485261e-07, "loss": 0.3206, "step": 7459 }, { "epoch": 2.675352617738465, "grad_norm": 0.29097819328308105, "learning_rate": 3.5170166122687645e-07, "loss": 0.2662, "step": 7460 }, { "epoch": 2.675711212048769, "grad_norm": 0.3076026141643524, "learning_rate": 3.5093321805326043e-07, "loss": 0.3154, "step": 7461 }, { "epoch": 2.6760698063590724, "grad_norm": 0.2980334758758545, "learning_rate": 3.501655847615415e-07, "loss": 0.2859, "step": 7462 }, { "epoch": 2.676428400669376, "grad_norm": 0.2997245192527771, "learning_rate": 3.4939876148544437e-07, "loss": 0.2958, "step": 7463 }, { "epoch": 2.67678699497968, "grad_norm": 0.32386523485183716, "learning_rate": 3.486327483585517e-07, "loss": 0.2907, "step": 7464 }, { "epoch": 2.6771455892899834, "grad_norm": 0.29842856526374817, "learning_rate": 3.478675455143049e-07, "loss": 0.2741, "step": 7465 }, { "epoch": 2.677504183600287, "grad_norm": 0.30896177887916565, "learning_rate": 3.4710315308600527e-07, "loss": 0.3066, "step": 7466 }, { "epoch": 2.6778627779105904, "grad_norm": 0.2936650812625885, "learning_rate": 3.4633957120681294e-07, "loss": 0.2871, "step": 7467 }, { "epoch": 2.678221372220894, "grad_norm": 0.29345762729644775, "learning_rate": 3.455768000097459e-07, "loss": 0.2849, "step": 7468 }, { "epoch": 2.678579966531198, "grad_norm": 0.33612677454948425, "learning_rate": 3.448148396276807e-07, "loss": 0.3192, "step": 7469 }, { "epoch": 2.6789385608415013, "grad_norm": 0.32264119386672974, "learning_rate": 3.4405369019335333e-07, "loss": 0.2764, "step": 7470 }, { "epoch": 2.6792971551518048, "grad_norm": 0.28634482622146606, "learning_rate": 3.432933518393583e-07, "loss": 0.26, "step": 7471 }, { "epoch": 2.6796557494621087, "grad_norm": 0.30698588490486145, "learning_rate": 3.425338246981497e-07, "loss": 0.3008, "step": 7472 }, { "epoch": 2.680014343772412, "grad_norm": 0.29219290614128113, "learning_rate": 3.4177510890203934e-07, "loss": 0.2713, "step": 7473 }, { "epoch": 2.6803729380827157, "grad_norm": 0.2979409694671631, "learning_rate": 3.410172045831972e-07, "loss": 0.3057, "step": 7474 }, { "epoch": 2.680731532393019, "grad_norm": 0.3104787766933441, "learning_rate": 3.402601118736526e-07, "loss": 0.3202, "step": 7475 }, { "epoch": 2.681090126703323, "grad_norm": 0.30888450145721436, "learning_rate": 3.3950383090529506e-07, "loss": 0.3147, "step": 7476 }, { "epoch": 2.6814487210136266, "grad_norm": 0.3223680555820465, "learning_rate": 3.38748361809868e-07, "loss": 0.3173, "step": 7477 }, { "epoch": 2.68180731532393, "grad_norm": 0.30910080671310425, "learning_rate": 3.3799370471897963e-07, "loss": 0.2604, "step": 7478 }, { "epoch": 2.682165909634234, "grad_norm": 0.32813790440559387, "learning_rate": 3.3723985976409136e-07, "loss": 0.2817, "step": 7479 }, { "epoch": 2.6825245039445376, "grad_norm": 0.3131828308105469, "learning_rate": 3.3648682707652757e-07, "loss": 0.3069, "step": 7480 }, { "epoch": 2.682883098254841, "grad_norm": 0.30245599150657654, "learning_rate": 3.357346067874662e-07, "loss": 0.2891, "step": 7481 }, { "epoch": 2.6832416925651446, "grad_norm": 0.32360413670539856, "learning_rate": 3.3498319902794786e-07, "loss": 0.2993, "step": 7482 }, { "epoch": 2.683600286875448, "grad_norm": 0.3147381544113159, "learning_rate": 3.3423260392887016e-07, "loss": 0.2898, "step": 7483 }, { "epoch": 2.683958881185752, "grad_norm": 0.2788251042366028, "learning_rate": 3.3348282162098845e-07, "loss": 0.2529, "step": 7484 }, { "epoch": 2.6843174754960555, "grad_norm": 0.3049413561820984, "learning_rate": 3.3273385223491886e-07, "loss": 0.2944, "step": 7485 }, { "epoch": 2.684676069806359, "grad_norm": 0.29158487915992737, "learning_rate": 3.3198569590113193e-07, "loss": 0.2767, "step": 7486 }, { "epoch": 2.685034664116663, "grad_norm": 0.31474560499191284, "learning_rate": 3.3123835274995963e-07, "loss": 0.3229, "step": 7487 }, { "epoch": 2.6853932584269664, "grad_norm": 0.30296915769577026, "learning_rate": 3.304918229115928e-07, "loss": 0.2679, "step": 7488 }, { "epoch": 2.68575185273727, "grad_norm": 0.3031233251094818, "learning_rate": 3.297461065160762e-07, "loss": 0.2756, "step": 7489 }, { "epoch": 2.6861104470475734, "grad_norm": 0.2923312783241272, "learning_rate": 3.2900120369331935e-07, "loss": 0.2928, "step": 7490 }, { "epoch": 2.686469041357877, "grad_norm": 0.2927806079387665, "learning_rate": 3.282571145730845e-07, "loss": 0.3147, "step": 7491 }, { "epoch": 2.686827635668181, "grad_norm": 0.29251015186309814, "learning_rate": 3.2751383928499525e-07, "loss": 0.3071, "step": 7492 }, { "epoch": 2.6871862299784843, "grad_norm": 0.290179580450058, "learning_rate": 3.267713779585319e-07, "loss": 0.2757, "step": 7493 }, { "epoch": 2.687544824288788, "grad_norm": 0.29168277978897095, "learning_rate": 3.2602973072303255e-07, "loss": 0.2592, "step": 7494 }, { "epoch": 2.6879034185990918, "grad_norm": 0.310571551322937, "learning_rate": 3.2528889770769734e-07, "loss": 0.3348, "step": 7495 }, { "epoch": 2.6882620129093953, "grad_norm": 0.31225892901420593, "learning_rate": 3.245488790415796e-07, "loss": 0.2779, "step": 7496 }, { "epoch": 2.6886206072196988, "grad_norm": 0.350715309381485, "learning_rate": 3.2380967485359403e-07, "loss": 0.2972, "step": 7497 }, { "epoch": 2.6889792015300022, "grad_norm": 0.3075704276561737, "learning_rate": 3.230712852725104e-07, "loss": 0.2991, "step": 7498 }, { "epoch": 2.6893377958403057, "grad_norm": 0.30005866289138794, "learning_rate": 3.2233371042695973e-07, "loss": 0.2894, "step": 7499 }, { "epoch": 2.6896963901506097, "grad_norm": 0.30840596556663513, "learning_rate": 3.215969504454297e-07, "loss": 0.2728, "step": 7500 }, { "epoch": 2.690054984460913, "grad_norm": 0.3066266179084778, "learning_rate": 3.2086100545626665e-07, "loss": 0.3019, "step": 7501 }, { "epoch": 2.6904135787712167, "grad_norm": 0.2891068458557129, "learning_rate": 3.2012587558767407e-07, "loss": 0.2814, "step": 7502 }, { "epoch": 2.6907721730815206, "grad_norm": 0.2925783097743988, "learning_rate": 3.1939156096771396e-07, "loss": 0.2937, "step": 7503 }, { "epoch": 2.691130767391824, "grad_norm": 0.30497226119041443, "learning_rate": 3.186580617243057e-07, "loss": 0.2821, "step": 7504 }, { "epoch": 2.6914893617021276, "grad_norm": 0.32339268922805786, "learning_rate": 3.1792537798522817e-07, "loss": 0.291, "step": 7505 }, { "epoch": 2.691847956012431, "grad_norm": 0.30267056822776794, "learning_rate": 3.1719350987811537e-07, "loss": 0.3104, "step": 7506 }, { "epoch": 2.692206550322735, "grad_norm": 0.3046872615814209, "learning_rate": 3.164624575304637e-07, "loss": 0.2808, "step": 7507 }, { "epoch": 2.6925651446330385, "grad_norm": 0.31795158982276917, "learning_rate": 3.157322210696223e-07, "loss": 0.3012, "step": 7508 }, { "epoch": 2.692923738943342, "grad_norm": 0.32830193638801575, "learning_rate": 3.150028006228023e-07, "loss": 0.3024, "step": 7509 }, { "epoch": 2.693282333253646, "grad_norm": 0.32297495007514954, "learning_rate": 3.142741963170698e-07, "loss": 0.3033, "step": 7510 }, { "epoch": 2.6936409275639495, "grad_norm": 0.30095240473747253, "learning_rate": 3.1354640827935045e-07, "loss": 0.2811, "step": 7511 }, { "epoch": 2.693999521874253, "grad_norm": 0.27758389711380005, "learning_rate": 3.1281943663642676e-07, "loss": 0.2716, "step": 7512 }, { "epoch": 2.6943581161845565, "grad_norm": 0.3054787814617157, "learning_rate": 3.120932815149402e-07, "loss": 0.3015, "step": 7513 }, { "epoch": 2.69471671049486, "grad_norm": 0.3126533627510071, "learning_rate": 3.113679430413896e-07, "loss": 0.2936, "step": 7514 }, { "epoch": 2.695075304805164, "grad_norm": 0.2936883866786957, "learning_rate": 3.106434213421294e-07, "loss": 0.3045, "step": 7515 }, { "epoch": 2.6954338991154674, "grad_norm": 0.3073248565196991, "learning_rate": 3.0991971654337485e-07, "loss": 0.2802, "step": 7516 }, { "epoch": 2.695792493425771, "grad_norm": 0.30954742431640625, "learning_rate": 3.0919682877119774e-07, "loss": 0.2792, "step": 7517 }, { "epoch": 2.696151087736075, "grad_norm": 0.28863435983657837, "learning_rate": 3.0847475815152685e-07, "loss": 0.2645, "step": 7518 }, { "epoch": 2.6965096820463783, "grad_norm": 0.31004470586776733, "learning_rate": 3.077535048101493e-07, "loss": 0.2941, "step": 7519 }, { "epoch": 2.696868276356682, "grad_norm": 0.31459933519363403, "learning_rate": 3.0703306887270967e-07, "loss": 0.2986, "step": 7520 }, { "epoch": 2.6972268706669853, "grad_norm": 0.30655571818351746, "learning_rate": 3.063134504647103e-07, "loss": 0.3205, "step": 7521 }, { "epoch": 2.697585464977289, "grad_norm": 0.29291197657585144, "learning_rate": 3.055946497115103e-07, "loss": 0.2664, "step": 7522 }, { "epoch": 2.6979440592875927, "grad_norm": 0.3000508248806, "learning_rate": 3.0487666673832685e-07, "loss": 0.2839, "step": 7523 }, { "epoch": 2.6983026535978962, "grad_norm": 0.31404098868370056, "learning_rate": 3.0415950167023645e-07, "loss": 0.3195, "step": 7524 }, { "epoch": 2.6986612479081997, "grad_norm": 0.2795563340187073, "learning_rate": 3.034431546321703e-07, "loss": 0.2504, "step": 7525 }, { "epoch": 2.6990198422185037, "grad_norm": 0.31836143136024475, "learning_rate": 3.0272762574891866e-07, "loss": 0.3591, "step": 7526 }, { "epoch": 2.699378436528807, "grad_norm": 0.3154538571834564, "learning_rate": 3.0201291514512785e-07, "loss": 0.2912, "step": 7527 }, { "epoch": 2.6997370308391107, "grad_norm": 0.30959561467170715, "learning_rate": 3.012990229453039e-07, "loss": 0.3032, "step": 7528 }, { "epoch": 2.700095625149414, "grad_norm": 0.31363168358802795, "learning_rate": 3.0058594927380837e-07, "loss": 0.2788, "step": 7529 }, { "epoch": 2.7004542194597176, "grad_norm": 0.289472758769989, "learning_rate": 2.9987369425486137e-07, "loss": 0.2763, "step": 7530 }, { "epoch": 2.7008128137700216, "grad_norm": 0.3010469973087311, "learning_rate": 2.991622580125408e-07, "loss": 0.2954, "step": 7531 }, { "epoch": 2.701171408080325, "grad_norm": 0.2986663579940796, "learning_rate": 2.9845164067077883e-07, "loss": 0.3064, "step": 7532 }, { "epoch": 2.7015300023906286, "grad_norm": 0.3226589262485504, "learning_rate": 2.9774184235336846e-07, "loss": 0.317, "step": 7533 }, { "epoch": 2.7018885967009325, "grad_norm": 0.3067325949668884, "learning_rate": 2.9703286318395863e-07, "loss": 0.298, "step": 7534 }, { "epoch": 2.702247191011236, "grad_norm": 0.2982819378376007, "learning_rate": 2.963247032860561e-07, "loss": 0.2892, "step": 7535 }, { "epoch": 2.7026057853215395, "grad_norm": 0.29045918583869934, "learning_rate": 2.956173627830244e-07, "loss": 0.3024, "step": 7536 }, { "epoch": 2.702964379631843, "grad_norm": 0.3046587407588959, "learning_rate": 2.9491084179808394e-07, "loss": 0.3122, "step": 7537 }, { "epoch": 2.703322973942147, "grad_norm": 0.27893689274787903, "learning_rate": 2.9420514045431394e-07, "loss": 0.2597, "step": 7538 }, { "epoch": 2.7036815682524504, "grad_norm": 0.307996928691864, "learning_rate": 2.935002588746483e-07, "loss": 0.2809, "step": 7539 }, { "epoch": 2.704040162562754, "grad_norm": 0.3056161403656006, "learning_rate": 2.9279619718188047e-07, "loss": 0.3136, "step": 7540 }, { "epoch": 2.704398756873058, "grad_norm": 0.28787046670913696, "learning_rate": 2.9209295549865956e-07, "loss": 0.2743, "step": 7541 }, { "epoch": 2.7047573511833614, "grad_norm": 0.29550859332084656, "learning_rate": 2.9139053394749316e-07, "loss": 0.3017, "step": 7542 }, { "epoch": 2.705115945493665, "grad_norm": 0.2953530550003052, "learning_rate": 2.9068893265074615e-07, "loss": 0.2738, "step": 7543 }, { "epoch": 2.7054745398039683, "grad_norm": 0.31259486079216003, "learning_rate": 2.899881517306374e-07, "loss": 0.296, "step": 7544 }, { "epoch": 2.705833134114272, "grad_norm": 0.2944324314594269, "learning_rate": 2.8928819130924656e-07, "loss": 0.2991, "step": 7545 }, { "epoch": 2.706191728424576, "grad_norm": 0.31367430090904236, "learning_rate": 2.8858905150850825e-07, "loss": 0.2825, "step": 7546 }, { "epoch": 2.7065503227348793, "grad_norm": 0.28238004446029663, "learning_rate": 2.878907324502156e-07, "loss": 0.2637, "step": 7547 }, { "epoch": 2.7069089170451828, "grad_norm": 0.3028237521648407, "learning_rate": 2.871932342560185e-07, "loss": 0.306, "step": 7548 }, { "epoch": 2.7072675113554867, "grad_norm": 0.27884697914123535, "learning_rate": 2.8649655704742096e-07, "loss": 0.305, "step": 7549 }, { "epoch": 2.70762610566579, "grad_norm": 0.29562807083129883, "learning_rate": 2.8580070094578913e-07, "loss": 0.2996, "step": 7550 }, { "epoch": 2.7079846999760937, "grad_norm": 0.3244156539440155, "learning_rate": 2.8510566607234005e-07, "loss": 0.3183, "step": 7551 }, { "epoch": 2.708343294286397, "grad_norm": 0.3084458112716675, "learning_rate": 2.8441145254815336e-07, "loss": 0.312, "step": 7552 }, { "epoch": 2.7087018885967007, "grad_norm": 0.2962481677532196, "learning_rate": 2.8371806049416416e-07, "loss": 0.2958, "step": 7553 }, { "epoch": 2.7090604829070046, "grad_norm": 0.29102587699890137, "learning_rate": 2.8302549003116077e-07, "loss": 0.2863, "step": 7554 }, { "epoch": 2.709419077217308, "grad_norm": 0.2963537275791168, "learning_rate": 2.8233374127979275e-07, "loss": 0.2869, "step": 7555 }, { "epoch": 2.7097776715276116, "grad_norm": 0.30791041254997253, "learning_rate": 2.816428143605643e-07, "loss": 0.2831, "step": 7556 }, { "epoch": 2.7101362658379156, "grad_norm": 0.30131030082702637, "learning_rate": 2.809527093938369e-07, "loss": 0.2849, "step": 7557 }, { "epoch": 2.710494860148219, "grad_norm": 0.27914363145828247, "learning_rate": 2.802634264998294e-07, "loss": 0.2711, "step": 7558 }, { "epoch": 2.7108534544585225, "grad_norm": 0.3125843405723572, "learning_rate": 2.795749657986174e-07, "loss": 0.3002, "step": 7559 }, { "epoch": 2.711212048768826, "grad_norm": 0.3115805685520172, "learning_rate": 2.788873274101328e-07, "loss": 0.2746, "step": 7560 }, { "epoch": 2.7115706430791295, "grad_norm": 0.28584280610084534, "learning_rate": 2.7820051145416307e-07, "loss": 0.3079, "step": 7561 }, { "epoch": 2.7119292373894335, "grad_norm": 0.29530394077301025, "learning_rate": 2.7751451805035466e-07, "loss": 0.3036, "step": 7562 }, { "epoch": 2.712287831699737, "grad_norm": 0.29420068860054016, "learning_rate": 2.768293473182099e-07, "loss": 0.3097, "step": 7563 }, { "epoch": 2.712646426010041, "grad_norm": 0.28057432174682617, "learning_rate": 2.761449993770876e-07, "loss": 0.2685, "step": 7564 }, { "epoch": 2.7130050203203444, "grad_norm": 0.2802877128124237, "learning_rate": 2.7546147434620365e-07, "loss": 0.2851, "step": 7565 }, { "epoch": 2.713363614630648, "grad_norm": 0.30534809827804565, "learning_rate": 2.747787723446288e-07, "loss": 0.3054, "step": 7566 }, { "epoch": 2.7137222089409514, "grad_norm": 0.3110775947570801, "learning_rate": 2.740968934912935e-07, "loss": 0.3044, "step": 7567 }, { "epoch": 2.714080803251255, "grad_norm": 0.2690470516681671, "learning_rate": 2.734158379049817e-07, "loss": 0.2724, "step": 7568 }, { "epoch": 2.714439397561559, "grad_norm": 0.27856677770614624, "learning_rate": 2.727356057043368e-07, "loss": 0.2971, "step": 7569 }, { "epoch": 2.7147979918718623, "grad_norm": 0.29785338044166565, "learning_rate": 2.720561970078567e-07, "loss": 0.3127, "step": 7570 }, { "epoch": 2.715156586182166, "grad_norm": 0.29855695366859436, "learning_rate": 2.7137761193389623e-07, "loss": 0.2982, "step": 7571 }, { "epoch": 2.7155151804924698, "grad_norm": 0.29220885038375854, "learning_rate": 2.7069985060066806e-07, "loss": 0.2817, "step": 7572 }, { "epoch": 2.7158737748027733, "grad_norm": 0.30632880330085754, "learning_rate": 2.7002291312623884e-07, "loss": 0.285, "step": 7573 }, { "epoch": 2.7162323691130768, "grad_norm": 0.30690404772758484, "learning_rate": 2.693467996285343e-07, "loss": 0.3142, "step": 7574 }, { "epoch": 2.7165909634233802, "grad_norm": 0.293824166059494, "learning_rate": 2.6867151022533454e-07, "loss": 0.2855, "step": 7575 }, { "epoch": 2.7169495577336837, "grad_norm": 0.2983725070953369, "learning_rate": 2.6799704503427836e-07, "loss": 0.3182, "step": 7576 }, { "epoch": 2.7173081520439877, "grad_norm": 0.2887006402015686, "learning_rate": 2.673234041728595e-07, "loss": 0.2765, "step": 7577 }, { "epoch": 2.717666746354291, "grad_norm": 0.29841580986976624, "learning_rate": 2.666505877584269e-07, "loss": 0.3027, "step": 7578 }, { "epoch": 2.7180253406645947, "grad_norm": 0.30295121669769287, "learning_rate": 2.65978595908189e-07, "loss": 0.3072, "step": 7579 }, { "epoch": 2.7183839349748986, "grad_norm": 0.3037998378276825, "learning_rate": 2.653074287392071e-07, "loss": 0.3055, "step": 7580 }, { "epoch": 2.718742529285202, "grad_norm": 0.3004857003688812, "learning_rate": 2.6463708636840166e-07, "loss": 0.2762, "step": 7581 }, { "epoch": 2.7191011235955056, "grad_norm": 0.2956327497959137, "learning_rate": 2.6396756891254927e-07, "loss": 0.301, "step": 7582 }, { "epoch": 2.719459717905809, "grad_norm": 0.3203355073928833, "learning_rate": 2.6329887648828044e-07, "loss": 0.2857, "step": 7583 }, { "epoch": 2.7198183122161126, "grad_norm": 0.3089333474636078, "learning_rate": 2.6263100921208484e-07, "loss": 0.275, "step": 7584 }, { "epoch": 2.7201769065264165, "grad_norm": 0.3308011293411255, "learning_rate": 2.619639672003049e-07, "loss": 0.3178, "step": 7585 }, { "epoch": 2.72053550083672, "grad_norm": 0.3063344955444336, "learning_rate": 2.612977505691433e-07, "loss": 0.2935, "step": 7586 }, { "epoch": 2.7208940951470235, "grad_norm": 0.27595797181129456, "learning_rate": 2.606323594346566e-07, "loss": 0.2478, "step": 7587 }, { "epoch": 2.7212526894573275, "grad_norm": 0.3096642792224884, "learning_rate": 2.5996779391275763e-07, "loss": 0.2783, "step": 7588 }, { "epoch": 2.721611283767631, "grad_norm": 0.3105829954147339, "learning_rate": 2.5930405411921654e-07, "loss": 0.291, "step": 7589 }, { "epoch": 2.7219698780779344, "grad_norm": 0.30768439173698425, "learning_rate": 2.5864114016965816e-07, "loss": 0.2831, "step": 7590 }, { "epoch": 2.722328472388238, "grad_norm": 0.2987958490848541, "learning_rate": 2.579790521795639e-07, "loss": 0.2734, "step": 7591 }, { "epoch": 2.722687066698542, "grad_norm": 0.3279123604297638, "learning_rate": 2.573177902642726e-07, "loss": 0.3019, "step": 7592 }, { "epoch": 2.7230456610088454, "grad_norm": 0.30977651476860046, "learning_rate": 2.5665735453897724e-07, "loss": 0.2755, "step": 7593 }, { "epoch": 2.723404255319149, "grad_norm": 0.3014085590839386, "learning_rate": 2.5599774511872844e-07, "loss": 0.306, "step": 7594 }, { "epoch": 2.723762849629453, "grad_norm": 0.2911854386329651, "learning_rate": 2.5533896211843157e-07, "loss": 0.2548, "step": 7595 }, { "epoch": 2.7241214439397563, "grad_norm": 0.30106836557388306, "learning_rate": 2.546810056528498e-07, "loss": 0.2799, "step": 7596 }, { "epoch": 2.72448003825006, "grad_norm": 0.30828335881233215, "learning_rate": 2.540238758365987e-07, "loss": 0.3179, "step": 7597 }, { "epoch": 2.7248386325603633, "grad_norm": 0.3047334849834442, "learning_rate": 2.53367572784155e-07, "loss": 0.3125, "step": 7598 }, { "epoch": 2.725197226870667, "grad_norm": 0.2867695689201355, "learning_rate": 2.5271209660984786e-07, "loss": 0.2745, "step": 7599 }, { "epoch": 2.7255558211809707, "grad_norm": 0.2884856164455414, "learning_rate": 2.5205744742786255e-07, "loss": 0.2727, "step": 7600 }, { "epoch": 2.7259144154912742, "grad_norm": 0.28846821188926697, "learning_rate": 2.5140362535224284e-07, "loss": 0.3049, "step": 7601 }, { "epoch": 2.7262730098015777, "grad_norm": 0.2911306917667389, "learning_rate": 2.507506304968843e-07, "loss": 0.2642, "step": 7602 }, { "epoch": 2.7266316041118817, "grad_norm": 0.3084554076194763, "learning_rate": 2.5009846297554153e-07, "loss": 0.291, "step": 7603 }, { "epoch": 2.726990198422185, "grad_norm": 0.32096368074417114, "learning_rate": 2.494471229018247e-07, "loss": 0.3055, "step": 7604 }, { "epoch": 2.7273487927324886, "grad_norm": 0.31289809942245483, "learning_rate": 2.4879661038919866e-07, "loss": 0.2968, "step": 7605 }, { "epoch": 2.727707387042792, "grad_norm": 0.31167468428611755, "learning_rate": 2.4814692555098553e-07, "loss": 0.2882, "step": 7606 }, { "epoch": 2.7280659813530956, "grad_norm": 0.30917710065841675, "learning_rate": 2.4749806850036083e-07, "loss": 0.2945, "step": 7607 }, { "epoch": 2.7284245756633996, "grad_norm": 0.29286813735961914, "learning_rate": 2.4685003935035924e-07, "loss": 0.29, "step": 7608 }, { "epoch": 2.728783169973703, "grad_norm": 0.31917068362236023, "learning_rate": 2.4620283821386814e-07, "loss": 0.2883, "step": 7609 }, { "epoch": 2.7291417642840066, "grad_norm": 0.30615612864494324, "learning_rate": 2.455564652036324e-07, "loss": 0.285, "step": 7610 }, { "epoch": 2.7295003585943105, "grad_norm": 0.3120078444480896, "learning_rate": 2.449109204322536e-07, "loss": 0.3042, "step": 7611 }, { "epoch": 2.729858952904614, "grad_norm": 0.30546143651008606, "learning_rate": 2.4426620401218515e-07, "loss": 0.2534, "step": 7612 }, { "epoch": 2.7302175472149175, "grad_norm": 0.28638753294944763, "learning_rate": 2.4362231605574106e-07, "loss": 0.3003, "step": 7613 }, { "epoch": 2.730576141525221, "grad_norm": 0.30424410104751587, "learning_rate": 2.42979256675086e-07, "loss": 0.2859, "step": 7614 }, { "epoch": 2.7309347358355245, "grad_norm": 0.3079318404197693, "learning_rate": 2.423370259822455e-07, "loss": 0.3038, "step": 7615 }, { "epoch": 2.7312933301458284, "grad_norm": 0.33055299520492554, "learning_rate": 2.4169562408909607e-07, "loss": 0.2815, "step": 7616 }, { "epoch": 2.731651924456132, "grad_norm": 0.30179160833358765, "learning_rate": 2.4105505110737335e-07, "loss": 0.2953, "step": 7617 }, { "epoch": 2.7320105187664354, "grad_norm": 0.32092732191085815, "learning_rate": 2.40415307148667e-07, "loss": 0.2951, "step": 7618 }, { "epoch": 2.7323691130767394, "grad_norm": 0.28452321887016296, "learning_rate": 2.3977639232442175e-07, "loss": 0.2618, "step": 7619 }, { "epoch": 2.732727707387043, "grad_norm": 0.3024204671382904, "learning_rate": 2.391383067459385e-07, "loss": 0.2717, "step": 7620 }, { "epoch": 2.7330863016973463, "grad_norm": 0.3238036334514618, "learning_rate": 2.3850105052437456e-07, "loss": 0.2889, "step": 7621 }, { "epoch": 2.73344489600765, "grad_norm": 0.3059982359409332, "learning_rate": 2.3786462377074104e-07, "loss": 0.3328, "step": 7622 }, { "epoch": 2.7338034903179538, "grad_norm": 0.30952197313308716, "learning_rate": 2.3722902659590653e-07, "loss": 0.2709, "step": 7623 }, { "epoch": 2.7341620846282573, "grad_norm": 0.2838614284992218, "learning_rate": 2.3659425911059298e-07, "loss": 0.3134, "step": 7624 }, { "epoch": 2.7345206789385608, "grad_norm": 0.31072545051574707, "learning_rate": 2.3596032142537918e-07, "loss": 0.2872, "step": 7625 }, { "epoch": 2.7348792732488647, "grad_norm": 0.2893613874912262, "learning_rate": 2.3532721365069956e-07, "loss": 0.2978, "step": 7626 }, { "epoch": 2.735237867559168, "grad_norm": 0.30676138401031494, "learning_rate": 2.3469493589684312e-07, "loss": 0.3111, "step": 7627 }, { "epoch": 2.7355964618694717, "grad_norm": 0.31323134899139404, "learning_rate": 2.3406348827395398e-07, "loss": 0.3023, "step": 7628 }, { "epoch": 2.735955056179775, "grad_norm": 0.2884471118450165, "learning_rate": 2.3343287089203303e-07, "loss": 0.262, "step": 7629 }, { "epoch": 2.7363136504900787, "grad_norm": 0.3230219781398773, "learning_rate": 2.328030838609363e-07, "loss": 0.307, "step": 7630 }, { "epoch": 2.7366722448003826, "grad_norm": 0.3203973174095154, "learning_rate": 2.321741272903727e-07, "loss": 0.283, "step": 7631 }, { "epoch": 2.737030839110686, "grad_norm": 0.3160082697868347, "learning_rate": 2.3154600128991066e-07, "loss": 0.2998, "step": 7632 }, { "epoch": 2.7373894334209896, "grad_norm": 0.299332857131958, "learning_rate": 2.3091870596897048e-07, "loss": 0.2724, "step": 7633 }, { "epoch": 2.7377480277312936, "grad_norm": 0.31827688217163086, "learning_rate": 2.3029224143682917e-07, "loss": 0.3228, "step": 7634 }, { "epoch": 2.738106622041597, "grad_norm": 0.3020837604999542, "learning_rate": 2.2966660780261885e-07, "loss": 0.2641, "step": 7635 }, { "epoch": 2.7384652163519005, "grad_norm": 0.3119642436504364, "learning_rate": 2.2904180517532682e-07, "loss": 0.2821, "step": 7636 }, { "epoch": 2.738823810662204, "grad_norm": 0.3014690577983856, "learning_rate": 2.2841783366379545e-07, "loss": 0.2842, "step": 7637 }, { "epoch": 2.7391824049725075, "grad_norm": 0.30592766404151917, "learning_rate": 2.2779469337672278e-07, "loss": 0.3106, "step": 7638 }, { "epoch": 2.7395409992828115, "grad_norm": 0.3144061267375946, "learning_rate": 2.2717238442266143e-07, "loss": 0.307, "step": 7639 }, { "epoch": 2.739899593593115, "grad_norm": 0.2927984297275543, "learning_rate": 2.2655090691002023e-07, "loss": 0.2621, "step": 7640 }, { "epoch": 2.7402581879034185, "grad_norm": 0.31695473194122314, "learning_rate": 2.2593026094706206e-07, "loss": 0.2976, "step": 7641 }, { "epoch": 2.7406167822137224, "grad_norm": 0.31491154432296753, "learning_rate": 2.2531044664190595e-07, "loss": 0.2919, "step": 7642 }, { "epoch": 2.740975376524026, "grad_norm": 0.30441948771476746, "learning_rate": 2.246914641025233e-07, "loss": 0.2941, "step": 7643 }, { "epoch": 2.7413339708343294, "grad_norm": 0.2733285129070282, "learning_rate": 2.2407331343674567e-07, "loss": 0.2624, "step": 7644 }, { "epoch": 2.741692565144633, "grad_norm": 0.2934863269329071, "learning_rate": 2.2345599475225522e-07, "loss": 0.3022, "step": 7645 }, { "epoch": 2.7420511594549364, "grad_norm": 0.30445972084999084, "learning_rate": 2.228395081565915e-07, "loss": 0.299, "step": 7646 }, { "epoch": 2.7424097537652403, "grad_norm": 0.2878897786140442, "learning_rate": 2.2222385375714805e-07, "loss": 0.2849, "step": 7647 }, { "epoch": 2.742768348075544, "grad_norm": 0.30670374631881714, "learning_rate": 2.2160903166117297e-07, "loss": 0.2977, "step": 7648 }, { "epoch": 2.7431269423858478, "grad_norm": 0.32013779878616333, "learning_rate": 2.209950419757717e-07, "loss": 0.3034, "step": 7649 }, { "epoch": 2.7434855366961513, "grad_norm": 0.28913286328315735, "learning_rate": 2.2038188480790256e-07, "loss": 0.2439, "step": 7650 }, { "epoch": 2.7438441310064547, "grad_norm": 0.31337931752204895, "learning_rate": 2.19769560264379e-07, "loss": 0.2992, "step": 7651 }, { "epoch": 2.7442027253167582, "grad_norm": 0.2972906529903412, "learning_rate": 2.191580684518707e-07, "loss": 0.2615, "step": 7652 }, { "epoch": 2.7445613196270617, "grad_norm": 0.31002292037010193, "learning_rate": 2.1854740947690023e-07, "loss": 0.2946, "step": 7653 }, { "epoch": 2.7449199139373657, "grad_norm": 0.29796040058135986, "learning_rate": 2.1793758344584693e-07, "loss": 0.3017, "step": 7654 }, { "epoch": 2.745278508247669, "grad_norm": 0.31366151571273804, "learning_rate": 2.173285904649447e-07, "loss": 0.3026, "step": 7655 }, { "epoch": 2.7456371025579727, "grad_norm": 0.31412577629089355, "learning_rate": 2.1672043064028258e-07, "loss": 0.2955, "step": 7656 }, { "epoch": 2.7459956968682766, "grad_norm": 0.31530633568763733, "learning_rate": 2.1611310407780194e-07, "loss": 0.273, "step": 7657 }, { "epoch": 2.74635429117858, "grad_norm": 0.31398847699165344, "learning_rate": 2.155066108833026e-07, "loss": 0.2931, "step": 7658 }, { "epoch": 2.7467128854888836, "grad_norm": 0.313912957906723, "learning_rate": 2.1490095116243726e-07, "loss": 0.3089, "step": 7659 }, { "epoch": 2.747071479799187, "grad_norm": 0.30529123544692993, "learning_rate": 2.1429612502071205e-07, "loss": 0.3046, "step": 7660 }, { "epoch": 2.7474300741094906, "grad_norm": 0.3003274202346802, "learning_rate": 2.1369213256349275e-07, "loss": 0.2873, "step": 7661 }, { "epoch": 2.7477886684197945, "grad_norm": 0.3221365809440613, "learning_rate": 2.130889738959946e-07, "loss": 0.3148, "step": 7662 }, { "epoch": 2.748147262730098, "grad_norm": 0.29949113726615906, "learning_rate": 2.1248664912328966e-07, "loss": 0.2848, "step": 7663 }, { "epoch": 2.7485058570404015, "grad_norm": 0.29657450318336487, "learning_rate": 2.118851583503062e-07, "loss": 0.2755, "step": 7664 }, { "epoch": 2.7488644513507055, "grad_norm": 0.3042198121547699, "learning_rate": 2.1128450168182324e-07, "loss": 0.2932, "step": 7665 }, { "epoch": 2.749223045661009, "grad_norm": 0.2846454381942749, "learning_rate": 2.106846792224798e-07, "loss": 0.2438, "step": 7666 }, { "epoch": 2.7495816399713124, "grad_norm": 0.33104178309440613, "learning_rate": 2.1008569107676512e-07, "loss": 0.3235, "step": 7667 }, { "epoch": 2.749940234281616, "grad_norm": 0.2954975664615631, "learning_rate": 2.0948753734902571e-07, "loss": 0.2905, "step": 7668 }, { "epoch": 2.7502988285919194, "grad_norm": 0.3327820897102356, "learning_rate": 2.0889021814346156e-07, "loss": 0.2781, "step": 7669 }, { "epoch": 2.7506574229022234, "grad_norm": 0.30844560265541077, "learning_rate": 2.0829373356412718e-07, "loss": 0.3049, "step": 7670 }, { "epoch": 2.751016017212527, "grad_norm": 0.29809942841529846, "learning_rate": 2.0769808371493228e-07, "loss": 0.3043, "step": 7671 }, { "epoch": 2.7513746115228304, "grad_norm": 0.3027389645576477, "learning_rate": 2.0710326869964047e-07, "loss": 0.2814, "step": 7672 }, { "epoch": 2.7517332058331343, "grad_norm": 0.2981058955192566, "learning_rate": 2.0650928862187168e-07, "loss": 0.2894, "step": 7673 }, { "epoch": 2.752091800143438, "grad_norm": 0.2866021990776062, "learning_rate": 2.0591614358509758e-07, "loss": 0.2699, "step": 7674 }, { "epoch": 2.7524503944537413, "grad_norm": 0.2977599501609802, "learning_rate": 2.0532383369264663e-07, "loss": 0.2939, "step": 7675 }, { "epoch": 2.752808988764045, "grad_norm": 0.2875092029571533, "learning_rate": 2.0473235904770128e-07, "loss": 0.2708, "step": 7676 }, { "epoch": 2.7531675830743483, "grad_norm": 0.31245073676109314, "learning_rate": 2.0414171975329688e-07, "loss": 0.3065, "step": 7677 }, { "epoch": 2.753526177384652, "grad_norm": 0.3182673752307892, "learning_rate": 2.035519159123267e-07, "loss": 0.3314, "step": 7678 }, { "epoch": 2.7538847716949557, "grad_norm": 0.30795055627822876, "learning_rate": 2.0296294762753522e-07, "loss": 0.295, "step": 7679 }, { "epoch": 2.7542433660052597, "grad_norm": 0.29806074500083923, "learning_rate": 2.0237481500152201e-07, "loss": 0.2699, "step": 7680 }, { "epoch": 2.754601960315563, "grad_norm": 0.3071155846118927, "learning_rate": 2.0178751813674347e-07, "loss": 0.286, "step": 7681 }, { "epoch": 2.7549605546258666, "grad_norm": 0.31284284591674805, "learning_rate": 2.0120105713550663e-07, "loss": 0.3068, "step": 7682 }, { "epoch": 2.75531914893617, "grad_norm": 0.2988992929458618, "learning_rate": 2.0061543209997536e-07, "loss": 0.2829, "step": 7683 }, { "epoch": 2.7556777432464736, "grad_norm": 0.3080078959465027, "learning_rate": 2.0003064313216748e-07, "loss": 0.2825, "step": 7684 }, { "epoch": 2.7560363375567776, "grad_norm": 0.3016411364078522, "learning_rate": 1.994466903339559e-07, "loss": 0.3161, "step": 7685 }, { "epoch": 2.756394931867081, "grad_norm": 0.30388641357421875, "learning_rate": 1.98863573807066e-07, "loss": 0.273, "step": 7686 }, { "epoch": 2.7567535261773846, "grad_norm": 0.31630444526672363, "learning_rate": 1.9828129365307813e-07, "loss": 0.3164, "step": 7687 }, { "epoch": 2.7571121204876885, "grad_norm": 0.30043190717697144, "learning_rate": 1.9769984997342838e-07, "loss": 0.2688, "step": 7688 }, { "epoch": 2.757470714797992, "grad_norm": 0.291428804397583, "learning_rate": 1.9711924286940522e-07, "loss": 0.2886, "step": 7689 }, { "epoch": 2.7578293091082955, "grad_norm": 0.31969115138053894, "learning_rate": 1.965394724421532e-07, "loss": 0.3085, "step": 7690 }, { "epoch": 2.758187903418599, "grad_norm": 0.29243990778923035, "learning_rate": 1.9596053879266885e-07, "loss": 0.2769, "step": 7691 }, { "epoch": 2.7585464977289025, "grad_norm": 0.2991008162498474, "learning_rate": 1.9538244202180534e-07, "loss": 0.314, "step": 7692 }, { "epoch": 2.7589050920392064, "grad_norm": 0.2978654205799103, "learning_rate": 1.948051822302688e-07, "loss": 0.3068, "step": 7693 }, { "epoch": 2.75926368634951, "grad_norm": 0.2983558773994446, "learning_rate": 1.9422875951861764e-07, "loss": 0.2985, "step": 7694 }, { "epoch": 2.7596222806598134, "grad_norm": 0.2923683226108551, "learning_rate": 1.936531739872699e-07, "loss": 0.265, "step": 7695 }, { "epoch": 2.7599808749701173, "grad_norm": 0.32226163148880005, "learning_rate": 1.9307842573649206e-07, "loss": 0.3125, "step": 7696 }, { "epoch": 2.760339469280421, "grad_norm": 0.32518813014030457, "learning_rate": 1.9250451486640732e-07, "loss": 0.3333, "step": 7697 }, { "epoch": 2.7606980635907243, "grad_norm": 0.29621341824531555, "learning_rate": 1.9193144147699404e-07, "loss": 0.2889, "step": 7698 }, { "epoch": 2.761056657901028, "grad_norm": 0.3105599284172058, "learning_rate": 1.9135920566808132e-07, "loss": 0.3137, "step": 7699 }, { "epoch": 2.7614152522113313, "grad_norm": 0.2941896915435791, "learning_rate": 1.90787807539356e-07, "loss": 0.2975, "step": 7700 }, { "epoch": 2.7617738465216353, "grad_norm": 0.28258994221687317, "learning_rate": 1.902172471903563e-07, "loss": 0.2995, "step": 7701 }, { "epoch": 2.7621324408319388, "grad_norm": 0.31765487790107727, "learning_rate": 1.8964752472047765e-07, "loss": 0.2951, "step": 7702 }, { "epoch": 2.7624910351422423, "grad_norm": 0.2824992835521698, "learning_rate": 1.890786402289646e-07, "loss": 0.2756, "step": 7703 }, { "epoch": 2.762849629452546, "grad_norm": 0.28759753704071045, "learning_rate": 1.8851059381492065e-07, "loss": 0.284, "step": 7704 }, { "epoch": 2.7632082237628497, "grad_norm": 0.30075308680534363, "learning_rate": 1.8794338557730052e-07, "loss": 0.268, "step": 7705 }, { "epoch": 2.763566818073153, "grad_norm": 0.3044241666793823, "learning_rate": 1.873770156149135e-07, "loss": 0.3091, "step": 7706 }, { "epoch": 2.7639254123834567, "grad_norm": 0.2855354845523834, "learning_rate": 1.8681148402642457e-07, "loss": 0.3089, "step": 7707 }, { "epoch": 2.7642840066937606, "grad_norm": 0.30867189168930054, "learning_rate": 1.862467909103488e-07, "loss": 0.2985, "step": 7708 }, { "epoch": 2.764642601004064, "grad_norm": 0.30625268816947937, "learning_rate": 1.8568293636505809e-07, "loss": 0.2883, "step": 7709 }, { "epoch": 2.7650011953143676, "grad_norm": 0.2996129095554352, "learning_rate": 1.8511992048877936e-07, "loss": 0.2752, "step": 7710 }, { "epoch": 2.7653597896246715, "grad_norm": 0.3083707094192505, "learning_rate": 1.8455774337958865e-07, "loss": 0.3047, "step": 7711 }, { "epoch": 2.765718383934975, "grad_norm": 0.3028271794319153, "learning_rate": 1.8399640513542205e-07, "loss": 0.2871, "step": 7712 }, { "epoch": 2.7660769782452785, "grad_norm": 0.28574731945991516, "learning_rate": 1.8343590585406467e-07, "loss": 0.2768, "step": 7713 }, { "epoch": 2.766435572555582, "grad_norm": 0.30676430463790894, "learning_rate": 1.8287624563315842e-07, "loss": 0.316, "step": 7714 }, { "epoch": 2.7667941668658855, "grad_norm": 0.2863733172416687, "learning_rate": 1.823174245701964e-07, "loss": 0.2655, "step": 7715 }, { "epoch": 2.7671527611761895, "grad_norm": 0.2947860360145569, "learning_rate": 1.8175944276252743e-07, "loss": 0.2536, "step": 7716 }, { "epoch": 2.767511355486493, "grad_norm": 0.3354093134403229, "learning_rate": 1.812023003073543e-07, "loss": 0.2922, "step": 7717 }, { "epoch": 2.7678699497967965, "grad_norm": 0.2944773733615875, "learning_rate": 1.8064599730173215e-07, "loss": 0.2509, "step": 7718 }, { "epoch": 2.7682285441071004, "grad_norm": 0.2896343171596527, "learning_rate": 1.8009053384257236e-07, "loss": 0.2748, "step": 7719 }, { "epoch": 2.768587138417404, "grad_norm": 0.327693372964859, "learning_rate": 1.7953591002663584e-07, "loss": 0.3067, "step": 7720 }, { "epoch": 2.7689457327277074, "grad_norm": 0.3192734718322754, "learning_rate": 1.7898212595054144e-07, "loss": 0.3263, "step": 7721 }, { "epoch": 2.769304327038011, "grad_norm": 0.29851269721984863, "learning_rate": 1.7842918171076084e-07, "loss": 0.2701, "step": 7722 }, { "epoch": 2.7696629213483144, "grad_norm": 0.30660495162010193, "learning_rate": 1.7787707740361537e-07, "loss": 0.2774, "step": 7723 }, { "epoch": 2.7700215156586183, "grad_norm": 0.31863000988960266, "learning_rate": 1.7732581312528752e-07, "loss": 0.3208, "step": 7724 }, { "epoch": 2.770380109968922, "grad_norm": 0.28931307792663574, "learning_rate": 1.76775388971806e-07, "loss": 0.2738, "step": 7725 }, { "epoch": 2.7707387042792253, "grad_norm": 0.28678831458091736, "learning_rate": 1.762258050390586e-07, "loss": 0.2693, "step": 7726 }, { "epoch": 2.7710972985895292, "grad_norm": 0.3045458495616913, "learning_rate": 1.7567706142278318e-07, "loss": 0.2967, "step": 7727 }, { "epoch": 2.7714558928998327, "grad_norm": 0.30559858679771423, "learning_rate": 1.7512915821857214e-07, "loss": 0.3094, "step": 7728 }, { "epoch": 2.7718144872101362, "grad_norm": 0.29722756147384644, "learning_rate": 1.745820955218741e-07, "loss": 0.2977, "step": 7729 }, { "epoch": 2.7721730815204397, "grad_norm": 0.3041365444660187, "learning_rate": 1.7403587342798677e-07, "loss": 0.2729, "step": 7730 }, { "epoch": 2.7725316758307432, "grad_norm": 0.3105095326900482, "learning_rate": 1.734904920320657e-07, "loss": 0.2933, "step": 7731 }, { "epoch": 2.772890270141047, "grad_norm": 0.30860015749931335, "learning_rate": 1.7294595142911597e-07, "loss": 0.3409, "step": 7732 }, { "epoch": 2.7732488644513507, "grad_norm": 0.2915976941585541, "learning_rate": 1.7240225171399948e-07, "loss": 0.2904, "step": 7733 }, { "epoch": 2.773607458761654, "grad_norm": 0.3183482587337494, "learning_rate": 1.718593929814305e-07, "loss": 0.2854, "step": 7734 }, { "epoch": 2.773966053071958, "grad_norm": 0.2986627519130707, "learning_rate": 1.713173753259767e-07, "loss": 0.2618, "step": 7735 }, { "epoch": 2.7743246473822616, "grad_norm": 0.30327966809272766, "learning_rate": 1.707761988420592e-07, "loss": 0.2847, "step": 7736 }, { "epoch": 2.774683241692565, "grad_norm": 0.30465298891067505, "learning_rate": 1.7023586362395205e-07, "loss": 0.3043, "step": 7737 }, { "epoch": 2.7750418360028686, "grad_norm": 0.27392691373825073, "learning_rate": 1.696963697657833e-07, "loss": 0.2694, "step": 7738 }, { "epoch": 2.7754004303131725, "grad_norm": 0.29710787534713745, "learning_rate": 1.6915771736153552e-07, "loss": 0.2998, "step": 7739 }, { "epoch": 2.775759024623476, "grad_norm": 0.3069706857204437, "learning_rate": 1.6861990650504256e-07, "loss": 0.2891, "step": 7740 }, { "epoch": 2.7761176189337795, "grad_norm": 0.3080275356769562, "learning_rate": 1.6808293728999337e-07, "loss": 0.2851, "step": 7741 }, { "epoch": 2.7764762132440834, "grad_norm": 0.2930867373943329, "learning_rate": 1.675468098099292e-07, "loss": 0.2891, "step": 7742 }, { "epoch": 2.776834807554387, "grad_norm": 0.2912619411945343, "learning_rate": 1.6701152415824651e-07, "loss": 0.3043, "step": 7743 }, { "epoch": 2.7771934018646904, "grad_norm": 0.29926127195358276, "learning_rate": 1.664770804281912e-07, "loss": 0.3002, "step": 7744 }, { "epoch": 2.777551996174994, "grad_norm": 0.2844950258731842, "learning_rate": 1.659434787128672e-07, "loss": 0.2776, "step": 7745 }, { "epoch": 2.7779105904852974, "grad_norm": 0.304310142993927, "learning_rate": 1.6541071910522787e-07, "loss": 0.3008, "step": 7746 }, { "epoch": 2.7782691847956014, "grad_norm": 0.3140414357185364, "learning_rate": 1.6487880169808344e-07, "loss": 0.2993, "step": 7747 }, { "epoch": 2.778627779105905, "grad_norm": 0.2732681334018707, "learning_rate": 1.643477265840948e-07, "loss": 0.2519, "step": 7748 }, { "epoch": 2.7789863734162084, "grad_norm": 0.29875028133392334, "learning_rate": 1.6381749385577626e-07, "loss": 0.3118, "step": 7749 }, { "epoch": 2.7793449677265123, "grad_norm": 0.28585296869277954, "learning_rate": 1.6328810360549674e-07, "loss": 0.281, "step": 7750 }, { "epoch": 2.779703562036816, "grad_norm": 0.31932350993156433, "learning_rate": 1.6275955592547686e-07, "loss": 0.2909, "step": 7751 }, { "epoch": 2.7800621563471193, "grad_norm": 0.3108222782611847, "learning_rate": 1.6223185090779248e-07, "loss": 0.2867, "step": 7752 }, { "epoch": 2.780420750657423, "grad_norm": 0.29908040165901184, "learning_rate": 1.6170498864437112e-07, "loss": 0.2925, "step": 7753 }, { "epoch": 2.7807793449677263, "grad_norm": 0.30244046449661255, "learning_rate": 1.6117896922699272e-07, "loss": 0.312, "step": 7754 }, { "epoch": 2.78113793927803, "grad_norm": 0.296297162771225, "learning_rate": 1.6065379274729343e-07, "loss": 0.3186, "step": 7755 }, { "epoch": 2.7814965335883337, "grad_norm": 0.2981465458869934, "learning_rate": 1.6012945929675838e-07, "loss": 0.2807, "step": 7756 }, { "epoch": 2.781855127898637, "grad_norm": 0.3085557520389557, "learning_rate": 1.5960596896672897e-07, "loss": 0.279, "step": 7757 }, { "epoch": 2.782213722208941, "grad_norm": 0.3172108829021454, "learning_rate": 1.5908332184840003e-07, "loss": 0.3329, "step": 7758 }, { "epoch": 2.7825723165192446, "grad_norm": 0.26308131217956543, "learning_rate": 1.5856151803281706e-07, "loss": 0.2768, "step": 7759 }, { "epoch": 2.782930910829548, "grad_norm": 0.32646089792251587, "learning_rate": 1.5804055761088065e-07, "loss": 0.3274, "step": 7760 }, { "epoch": 2.7832895051398516, "grad_norm": 0.2909362316131592, "learning_rate": 1.575204406733427e-07, "loss": 0.28, "step": 7761 }, { "epoch": 2.783648099450155, "grad_norm": 0.3040490448474884, "learning_rate": 1.5700116731080962e-07, "loss": 0.2907, "step": 7762 }, { "epoch": 2.784006693760459, "grad_norm": 0.30987560749053955, "learning_rate": 1.564827376137412e-07, "loss": 0.298, "step": 7763 }, { "epoch": 2.7843652880707626, "grad_norm": 0.3012539744377136, "learning_rate": 1.5596515167244863e-07, "loss": 0.2996, "step": 7764 }, { "epoch": 2.7847238823810665, "grad_norm": 0.3104313015937805, "learning_rate": 1.554484095770975e-07, "loss": 0.2688, "step": 7765 }, { "epoch": 2.78508247669137, "grad_norm": 0.3033367097377777, "learning_rate": 1.5493251141770527e-07, "loss": 0.3109, "step": 7766 }, { "epoch": 2.7854410710016735, "grad_norm": 0.2830864489078522, "learning_rate": 1.5441745728414391e-07, "loss": 0.2693, "step": 7767 }, { "epoch": 2.785799665311977, "grad_norm": 0.30745798349380493, "learning_rate": 1.5390324726613616e-07, "loss": 0.288, "step": 7768 }, { "epoch": 2.7861582596222805, "grad_norm": 0.3079562783241272, "learning_rate": 1.533898814532603e-07, "loss": 0.2719, "step": 7769 }, { "epoch": 2.7865168539325844, "grad_norm": 0.29837653040885925, "learning_rate": 1.5287735993494645e-07, "loss": 0.2852, "step": 7770 }, { "epoch": 2.786875448242888, "grad_norm": 0.30134880542755127, "learning_rate": 1.5236568280047603e-07, "loss": 0.2987, "step": 7771 }, { "epoch": 2.7872340425531914, "grad_norm": 0.28591668605804443, "learning_rate": 1.5185485013898605e-07, "loss": 0.2739, "step": 7772 }, { "epoch": 2.7875926368634953, "grad_norm": 0.2928742468357086, "learning_rate": 1.513448620394642e-07, "loss": 0.2946, "step": 7773 }, { "epoch": 2.787951231173799, "grad_norm": 0.30413752794265747, "learning_rate": 1.508357185907522e-07, "loss": 0.2947, "step": 7774 }, { "epoch": 2.7883098254841023, "grad_norm": 0.2916651666164398, "learning_rate": 1.5032741988154465e-07, "loss": 0.2879, "step": 7775 }, { "epoch": 2.788668419794406, "grad_norm": 0.320382684469223, "learning_rate": 1.498199660003885e-07, "loss": 0.3129, "step": 7776 }, { "epoch": 2.7890270141047093, "grad_norm": 0.28296729922294617, "learning_rate": 1.493133570356853e-07, "loss": 0.2682, "step": 7777 }, { "epoch": 2.7893856084150133, "grad_norm": 0.26973921060562134, "learning_rate": 1.488075930756855e-07, "loss": 0.2673, "step": 7778 }, { "epoch": 2.7897442027253168, "grad_norm": 0.3186951279640198, "learning_rate": 1.4830267420849587e-07, "loss": 0.3461, "step": 7779 }, { "epoch": 2.7901027970356203, "grad_norm": 0.2767465114593506, "learning_rate": 1.477986005220755e-07, "loss": 0.2514, "step": 7780 }, { "epoch": 2.790461391345924, "grad_norm": 0.2902543246746063, "learning_rate": 1.4729537210423418e-07, "loss": 0.2618, "step": 7781 }, { "epoch": 2.7908199856562277, "grad_norm": 0.3215247094631195, "learning_rate": 1.4679298904263783e-07, "loss": 0.3358, "step": 7782 }, { "epoch": 2.791178579966531, "grad_norm": 0.28267741203308105, "learning_rate": 1.4629145142480094e-07, "loss": 0.2908, "step": 7783 }, { "epoch": 2.7915371742768347, "grad_norm": 0.2834550738334656, "learning_rate": 1.4579075933809472e-07, "loss": 0.2854, "step": 7784 }, { "epoch": 2.791895768587138, "grad_norm": 0.2745019197463989, "learning_rate": 1.4529091286973994e-07, "loss": 0.2521, "step": 7785 }, { "epoch": 2.792254362897442, "grad_norm": 0.3011954426765442, "learning_rate": 1.4479191210681198e-07, "loss": 0.3132, "step": 7786 }, { "epoch": 2.7926129572077456, "grad_norm": 0.3231201469898224, "learning_rate": 1.4429375713623906e-07, "loss": 0.319, "step": 7787 }, { "epoch": 2.792971551518049, "grad_norm": 0.28708165884017944, "learning_rate": 1.4379644804480009e-07, "loss": 0.2532, "step": 7788 }, { "epoch": 2.793330145828353, "grad_norm": 0.3003886342048645, "learning_rate": 1.4329998491912854e-07, "loss": 0.2878, "step": 7789 }, { "epoch": 2.7936887401386565, "grad_norm": 0.2921387553215027, "learning_rate": 1.4280436784570972e-07, "loss": 0.3101, "step": 7790 }, { "epoch": 2.79404733444896, "grad_norm": 0.2972431778907776, "learning_rate": 1.423095969108812e-07, "loss": 0.3026, "step": 7791 }, { "epoch": 2.7944059287592635, "grad_norm": 0.29989659786224365, "learning_rate": 1.4181567220083403e-07, "loss": 0.3101, "step": 7792 }, { "epoch": 2.794764523069567, "grad_norm": 0.2902945578098297, "learning_rate": 1.413225938016116e-07, "loss": 0.2883, "step": 7793 }, { "epoch": 2.795123117379871, "grad_norm": 0.30159837007522583, "learning_rate": 1.4083036179911024e-07, "loss": 0.3214, "step": 7794 }, { "epoch": 2.7954817116901745, "grad_norm": 0.30328965187072754, "learning_rate": 1.4033897627907688e-07, "loss": 0.2857, "step": 7795 }, { "epoch": 2.7958403060004784, "grad_norm": 0.29214298725128174, "learning_rate": 1.3984843732711305e-07, "loss": 0.2798, "step": 7796 }, { "epoch": 2.796198900310782, "grad_norm": 0.316633403301239, "learning_rate": 1.393587450286721e-07, "loss": 0.3108, "step": 7797 }, { "epoch": 2.7965574946210854, "grad_norm": 0.30354008078575134, "learning_rate": 1.3886989946906014e-07, "loss": 0.3041, "step": 7798 }, { "epoch": 2.796916088931389, "grad_norm": 0.2801968455314636, "learning_rate": 1.383819007334364e-07, "loss": 0.2863, "step": 7799 }, { "epoch": 2.7972746832416924, "grad_norm": 0.30224230885505676, "learning_rate": 1.3789474890680999e-07, "loss": 0.2734, "step": 7800 }, { "epoch": 2.7976332775519963, "grad_norm": 0.3146675229072571, "learning_rate": 1.3740844407404585e-07, "loss": 0.2868, "step": 7801 }, { "epoch": 2.7979918718623, "grad_norm": 0.33345502614974976, "learning_rate": 1.3692298631985845e-07, "loss": 0.2855, "step": 7802 }, { "epoch": 2.7983504661726033, "grad_norm": 0.31573063135147095, "learning_rate": 1.3643837572881735e-07, "loss": 0.3103, "step": 7803 }, { "epoch": 2.7987090604829072, "grad_norm": 0.30041563510894775, "learning_rate": 1.3595461238534223e-07, "loss": 0.2741, "step": 7804 }, { "epoch": 2.7990676547932107, "grad_norm": 0.30367404222488403, "learning_rate": 1.3547169637370627e-07, "loss": 0.3031, "step": 7805 }, { "epoch": 2.7994262491035142, "grad_norm": 0.29627230763435364, "learning_rate": 1.3498962777803604e-07, "loss": 0.2725, "step": 7806 }, { "epoch": 2.7997848434138177, "grad_norm": 0.28909462690353394, "learning_rate": 1.345084066823077e-07, "loss": 0.2979, "step": 7807 }, { "epoch": 2.800143437724121, "grad_norm": 0.28456681966781616, "learning_rate": 1.3402803317035251e-07, "loss": 0.2961, "step": 7808 }, { "epoch": 2.800502032034425, "grad_norm": 0.29155433177948, "learning_rate": 1.33548507325853e-07, "loss": 0.3209, "step": 7809 }, { "epoch": 2.8008606263447287, "grad_norm": 0.28561142086982727, "learning_rate": 1.3306982923234402e-07, "loss": 0.2772, "step": 7810 }, { "epoch": 2.801219220655032, "grad_norm": 0.3059192895889282, "learning_rate": 1.3259199897321274e-07, "loss": 0.3017, "step": 7811 }, { "epoch": 2.801577814965336, "grad_norm": 0.29702839255332947, "learning_rate": 1.321150166316987e-07, "loss": 0.2937, "step": 7812 }, { "epoch": 2.8019364092756396, "grad_norm": 0.3123089671134949, "learning_rate": 1.3163888229089372e-07, "loss": 0.2945, "step": 7813 }, { "epoch": 2.802295003585943, "grad_norm": 0.31504401564598083, "learning_rate": 1.311635960337404e-07, "loss": 0.279, "step": 7814 }, { "epoch": 2.8026535978962466, "grad_norm": 0.27728933095932007, "learning_rate": 1.306891579430375e-07, "loss": 0.2746, "step": 7815 }, { "epoch": 2.80301219220655, "grad_norm": 0.2862841784954071, "learning_rate": 1.302155681014333e-07, "loss": 0.3068, "step": 7816 }, { "epoch": 2.803370786516854, "grad_norm": 0.31089651584625244, "learning_rate": 1.297428265914269e-07, "loss": 0.306, "step": 7817 }, { "epoch": 2.8037293808271575, "grad_norm": 0.33245956897735596, "learning_rate": 1.292709334953729e-07, "loss": 0.3189, "step": 7818 }, { "epoch": 2.804087975137461, "grad_norm": 0.31114354729652405, "learning_rate": 1.2879988889547556e-07, "loss": 0.2665, "step": 7819 }, { "epoch": 2.804446569447765, "grad_norm": 0.2972923517227173, "learning_rate": 1.283296928737926e-07, "loss": 0.2902, "step": 7820 }, { "epoch": 2.8048051637580684, "grad_norm": 0.293457955121994, "learning_rate": 1.2786034551223403e-07, "loss": 0.2829, "step": 7821 }, { "epoch": 2.805163758068372, "grad_norm": 0.30076488852500916, "learning_rate": 1.2739184689256113e-07, "loss": 0.2855, "step": 7822 }, { "epoch": 2.8055223523786754, "grad_norm": 0.323956698179245, "learning_rate": 1.269241970963886e-07, "loss": 0.2792, "step": 7823 }, { "epoch": 2.8058809466889794, "grad_norm": 0.2924081087112427, "learning_rate": 1.2645739620518183e-07, "loss": 0.2726, "step": 7824 }, { "epoch": 2.806239540999283, "grad_norm": 0.31848353147506714, "learning_rate": 1.2599144430025857e-07, "loss": 0.2805, "step": 7825 }, { "epoch": 2.8065981353095864, "grad_norm": 0.3214656710624695, "learning_rate": 1.2552634146278998e-07, "loss": 0.2779, "step": 7826 }, { "epoch": 2.8069567296198903, "grad_norm": 0.3089284300804138, "learning_rate": 1.250620877737979e-07, "loss": 0.2774, "step": 7827 }, { "epoch": 2.807315323930194, "grad_norm": 0.2955875098705292, "learning_rate": 1.2459868331415714e-07, "loss": 0.2928, "step": 7828 }, { "epoch": 2.8076739182404973, "grad_norm": 0.3066750168800354, "learning_rate": 1.241361281645942e-07, "loss": 0.31, "step": 7829 }, { "epoch": 2.8080325125508008, "grad_norm": 0.28498128056526184, "learning_rate": 1.2367442240568738e-07, "loss": 0.2561, "step": 7830 }, { "epoch": 2.8083911068611043, "grad_norm": 0.31682807207107544, "learning_rate": 1.2321356611786627e-07, "loss": 0.305, "step": 7831 }, { "epoch": 2.808749701171408, "grad_norm": 0.2897023856639862, "learning_rate": 1.2275355938141553e-07, "loss": 0.2774, "step": 7832 }, { "epoch": 2.8091082954817117, "grad_norm": 0.3284591734409332, "learning_rate": 1.2229440227646827e-07, "loss": 0.2836, "step": 7833 }, { "epoch": 2.809466889792015, "grad_norm": 0.31173670291900635, "learning_rate": 1.2183609488301163e-07, "loss": 0.2884, "step": 7834 }, { "epoch": 2.809825484102319, "grad_norm": 0.30638447403907776, "learning_rate": 1.2137863728088451e-07, "loss": 0.3051, "step": 7835 }, { "epoch": 2.8101840784126226, "grad_norm": 0.27047938108444214, "learning_rate": 1.209220295497765e-07, "loss": 0.2662, "step": 7836 }, { "epoch": 2.810542672722926, "grad_norm": 0.30453968048095703, "learning_rate": 1.2046627176923066e-07, "loss": 0.3172, "step": 7837 }, { "epoch": 2.8109012670332296, "grad_norm": 0.29381516575813293, "learning_rate": 1.2001136401864176e-07, "loss": 0.2807, "step": 7838 }, { "epoch": 2.811259861343533, "grad_norm": 0.30607154965400696, "learning_rate": 1.195573063772554e-07, "loss": 0.2709, "step": 7839 }, { "epoch": 2.811618455653837, "grad_norm": 0.293413370847702, "learning_rate": 1.191040989241704e-07, "loss": 0.2933, "step": 7840 }, { "epoch": 2.8119770499641406, "grad_norm": 0.2795218527317047, "learning_rate": 1.1865174173833649e-07, "loss": 0.2827, "step": 7841 }, { "epoch": 2.812335644274444, "grad_norm": 0.29661673307418823, "learning_rate": 1.1820023489855615e-07, "loss": 0.3194, "step": 7842 }, { "epoch": 2.812694238584748, "grad_norm": 0.30685731768608093, "learning_rate": 1.1774957848348256e-07, "loss": 0.3148, "step": 7843 }, { "epoch": 2.8130528328950515, "grad_norm": 0.28101837635040283, "learning_rate": 1.172997725716224e-07, "loss": 0.2735, "step": 7844 }, { "epoch": 2.813411427205355, "grad_norm": 0.3054209351539612, "learning_rate": 1.1685081724133296e-07, "loss": 0.2923, "step": 7845 }, { "epoch": 2.8137700215156585, "grad_norm": 0.3287253677845001, "learning_rate": 1.1640271257082337e-07, "loss": 0.3076, "step": 7846 }, { "epoch": 2.814128615825962, "grad_norm": 0.2999616265296936, "learning_rate": 1.1595545863815505e-07, "loss": 0.2897, "step": 7847 }, { "epoch": 2.814487210136266, "grad_norm": 0.28246989846229553, "learning_rate": 1.1550905552124014e-07, "loss": 0.2846, "step": 7848 }, { "epoch": 2.8148458044465694, "grad_norm": 0.31572332978248596, "learning_rate": 1.1506350329784532e-07, "loss": 0.3064, "step": 7849 }, { "epoch": 2.815204398756873, "grad_norm": 0.31888115406036377, "learning_rate": 1.1461880204558518e-07, "loss": 0.2791, "step": 7850 }, { "epoch": 2.815562993067177, "grad_norm": 0.3103685677051544, "learning_rate": 1.1417495184192995e-07, "loss": 0.2865, "step": 7851 }, { "epoch": 2.8159215873774803, "grad_norm": 0.30334123969078064, "learning_rate": 1.1373195276419835e-07, "loss": 0.2953, "step": 7852 }, { "epoch": 2.816280181687784, "grad_norm": 0.29453831911087036, "learning_rate": 1.1328980488956253e-07, "loss": 0.2692, "step": 7853 }, { "epoch": 2.8166387759980873, "grad_norm": 0.3204566240310669, "learning_rate": 1.1284850829504645e-07, "loss": 0.2845, "step": 7854 }, { "epoch": 2.8169973703083913, "grad_norm": 0.31320399045944214, "learning_rate": 1.1240806305752416e-07, "loss": 0.2641, "step": 7855 }, { "epoch": 2.8173559646186948, "grad_norm": 0.3301124572753906, "learning_rate": 1.1196846925372429e-07, "loss": 0.3491, "step": 7856 }, { "epoch": 2.8177145589289982, "grad_norm": 0.30290067195892334, "learning_rate": 1.1152972696022447e-07, "loss": 0.2658, "step": 7857 }, { "epoch": 2.818073153239302, "grad_norm": 0.3056081533432007, "learning_rate": 1.1109183625345521e-07, "loss": 0.2977, "step": 7858 }, { "epoch": 2.8184317475496057, "grad_norm": 0.2879026532173157, "learning_rate": 1.1065479720969829e-07, "loss": 0.2857, "step": 7859 }, { "epoch": 2.818790341859909, "grad_norm": 0.2763688266277313, "learning_rate": 1.102186099050867e-07, "loss": 0.2645, "step": 7860 }, { "epoch": 2.8191489361702127, "grad_norm": 0.3123871684074402, "learning_rate": 1.0978327441560743e-07, "loss": 0.3366, "step": 7861 }, { "epoch": 2.819507530480516, "grad_norm": 0.2863824963569641, "learning_rate": 1.0934879081709482e-07, "loss": 0.3002, "step": 7862 }, { "epoch": 2.81986612479082, "grad_norm": 0.30476298928260803, "learning_rate": 1.0891515918523942e-07, "loss": 0.258, "step": 7863 }, { "epoch": 2.8202247191011236, "grad_norm": 0.31779634952545166, "learning_rate": 1.0848237959558027e-07, "loss": 0.3145, "step": 7864 }, { "epoch": 2.820583313411427, "grad_norm": 0.3104623854160309, "learning_rate": 1.0805045212350818e-07, "loss": 0.2888, "step": 7865 }, { "epoch": 2.820941907721731, "grad_norm": 0.32102862000465393, "learning_rate": 1.0761937684426793e-07, "loss": 0.3188, "step": 7866 }, { "epoch": 2.8213005020320345, "grad_norm": 0.28240424394607544, "learning_rate": 1.071891538329528e-07, "loss": 0.3029, "step": 7867 }, { "epoch": 2.821659096342338, "grad_norm": 0.289365291595459, "learning_rate": 1.0675978316450953e-07, "loss": 0.2927, "step": 7868 }, { "epoch": 2.8220176906526415, "grad_norm": 0.3284064829349518, "learning_rate": 1.0633126491373602e-07, "loss": 0.3339, "step": 7869 }, { "epoch": 2.822376284962945, "grad_norm": 0.2887446880340576, "learning_rate": 1.0590359915528092e-07, "loss": 0.2616, "step": 7870 }, { "epoch": 2.822734879273249, "grad_norm": 0.2795880138874054, "learning_rate": 1.0547678596364519e-07, "loss": 0.2759, "step": 7871 }, { "epoch": 2.8230934735835524, "grad_norm": 0.3083893954753876, "learning_rate": 1.0505082541318157e-07, "loss": 0.297, "step": 7872 }, { "epoch": 2.823452067893856, "grad_norm": 0.293910950422287, "learning_rate": 1.0462571757809237e-07, "loss": 0.2731, "step": 7873 }, { "epoch": 2.82381066220416, "grad_norm": 0.3005927801132202, "learning_rate": 1.0420146253243446e-07, "loss": 0.2854, "step": 7874 }, { "epoch": 2.8241692565144634, "grad_norm": 0.31372469663619995, "learning_rate": 1.0377806035011318e-07, "loss": 0.2939, "step": 7875 }, { "epoch": 2.824527850824767, "grad_norm": 0.2923657298088074, "learning_rate": 1.0335551110488728e-07, "loss": 0.2685, "step": 7876 }, { "epoch": 2.8248864451350704, "grad_norm": 0.29698115587234497, "learning_rate": 1.0293381487036458e-07, "loss": 0.3086, "step": 7877 }, { "epoch": 2.825245039445374, "grad_norm": 0.2974838316440582, "learning_rate": 1.0251297172000796e-07, "loss": 0.2775, "step": 7878 }, { "epoch": 2.825603633755678, "grad_norm": 0.28159865736961365, "learning_rate": 1.0209298172712767e-07, "loss": 0.2694, "step": 7879 }, { "epoch": 2.8259622280659813, "grad_norm": 0.30932968854904175, "learning_rate": 1.0167384496488908e-07, "loss": 0.2935, "step": 7880 }, { "epoch": 2.8263208223762852, "grad_norm": 0.30551013350486755, "learning_rate": 1.01255561506306e-07, "loss": 0.3002, "step": 7881 }, { "epoch": 2.8266794166865887, "grad_norm": 0.29980191588401794, "learning_rate": 1.008381314242446e-07, "loss": 0.3095, "step": 7882 }, { "epoch": 2.8270380109968922, "grad_norm": 0.3070824146270752, "learning_rate": 1.0042155479142335e-07, "loss": 0.3167, "step": 7883 }, { "epoch": 2.8273966053071957, "grad_norm": 0.3062865138053894, "learning_rate": 1.0000583168041034e-07, "loss": 0.2969, "step": 7884 }, { "epoch": 2.827755199617499, "grad_norm": 0.3184621334075928, "learning_rate": 9.959096216362596e-08, "loss": 0.3101, "step": 7885 }, { "epoch": 2.828113793927803, "grad_norm": 0.3009479343891144, "learning_rate": 9.917694631334296e-08, "loss": 0.2735, "step": 7886 }, { "epoch": 2.8284723882381066, "grad_norm": 0.3222179710865021, "learning_rate": 9.876378420168254e-08, "loss": 0.2842, "step": 7887 }, { "epoch": 2.82883098254841, "grad_norm": 0.3126574754714966, "learning_rate": 9.835147590061989e-08, "loss": 0.3041, "step": 7888 }, { "epoch": 2.829189576858714, "grad_norm": 0.28824377059936523, "learning_rate": 9.794002148197979e-08, "loss": 0.2869, "step": 7889 }, { "epoch": 2.8295481711690176, "grad_norm": 0.2981645166873932, "learning_rate": 9.752942101743934e-08, "loss": 0.3115, "step": 7890 }, { "epoch": 2.829906765479321, "grad_norm": 0.31686633825302124, "learning_rate": 9.711967457852577e-08, "loss": 0.2972, "step": 7891 }, { "epoch": 2.8302653597896246, "grad_norm": 0.3048613369464874, "learning_rate": 9.671078223661867e-08, "loss": 0.2701, "step": 7892 }, { "epoch": 2.830623954099928, "grad_norm": 0.3062710762023926, "learning_rate": 9.630274406294881e-08, "loss": 0.3057, "step": 7893 }, { "epoch": 2.830982548410232, "grad_norm": 0.2868022322654724, "learning_rate": 9.589556012859657e-08, "loss": 0.3057, "step": 7894 }, { "epoch": 2.8313411427205355, "grad_norm": 0.30464205145835876, "learning_rate": 9.548923050449576e-08, "loss": 0.3218, "step": 7895 }, { "epoch": 2.831699737030839, "grad_norm": 0.30006009340286255, "learning_rate": 9.508375526142976e-08, "loss": 0.271, "step": 7896 }, { "epoch": 2.832058331341143, "grad_norm": 0.33540084958076477, "learning_rate": 9.467913447003374e-08, "loss": 0.3353, "step": 7897 }, { "epoch": 2.8324169256514464, "grad_norm": 0.2936016321182251, "learning_rate": 9.427536820079353e-08, "loss": 0.2868, "step": 7898 }, { "epoch": 2.83277551996175, "grad_norm": 0.29986897110939026, "learning_rate": 9.387245652404675e-08, "loss": 0.2904, "step": 7899 }, { "epoch": 2.8331341142720534, "grad_norm": 0.31301766633987427, "learning_rate": 9.347039950998227e-08, "loss": 0.3173, "step": 7900 }, { "epoch": 2.833492708582357, "grad_norm": 0.2805391848087311, "learning_rate": 9.30691972286385e-08, "loss": 0.2503, "step": 7901 }, { "epoch": 2.833851302892661, "grad_norm": 0.2984428405761719, "learning_rate": 9.266884974990786e-08, "loss": 0.2804, "step": 7902 }, { "epoch": 2.8342098972029643, "grad_norm": 0.31558287143707275, "learning_rate": 9.226935714353069e-08, "loss": 0.2714, "step": 7903 }, { "epoch": 2.834568491513268, "grad_norm": 0.30520302057266235, "learning_rate": 9.187071947910021e-08, "loss": 0.3012, "step": 7904 }, { "epoch": 2.834927085823572, "grad_norm": 0.3127974271774292, "learning_rate": 9.147293682606084e-08, "loss": 0.3044, "step": 7905 }, { "epoch": 2.8352856801338753, "grad_norm": 0.3125121295452118, "learning_rate": 9.107600925370664e-08, "loss": 0.2836, "step": 7906 }, { "epoch": 2.8356442744441788, "grad_norm": 0.30061060190200806, "learning_rate": 9.067993683118504e-08, "loss": 0.3011, "step": 7907 }, { "epoch": 2.8360028687544823, "grad_norm": 0.28981664776802063, "learning_rate": 9.028471962749141e-08, "loss": 0.3007, "step": 7908 }, { "epoch": 2.836361463064786, "grad_norm": 0.31390997767448425, "learning_rate": 8.989035771147458e-08, "loss": 0.2837, "step": 7909 }, { "epoch": 2.8367200573750897, "grad_norm": 0.30519697070121765, "learning_rate": 8.949685115183404e-08, "loss": 0.2995, "step": 7910 }, { "epoch": 2.837078651685393, "grad_norm": 0.2999595105648041, "learning_rate": 8.910420001711884e-08, "loss": 0.2899, "step": 7911 }, { "epoch": 2.837437245995697, "grad_norm": 0.2897045910358429, "learning_rate": 8.871240437573147e-08, "loss": 0.2764, "step": 7912 }, { "epoch": 2.8377958403060006, "grad_norm": 0.3071534037590027, "learning_rate": 8.832146429592291e-08, "loss": 0.3093, "step": 7913 }, { "epoch": 2.838154434616304, "grad_norm": 0.3073152005672455, "learning_rate": 8.793137984579647e-08, "loss": 0.2767, "step": 7914 }, { "epoch": 2.8385130289266076, "grad_norm": 0.3146825432777405, "learning_rate": 8.75421510933061e-08, "loss": 0.2798, "step": 7915 }, { "epoch": 2.838871623236911, "grad_norm": 0.3087587058544159, "learning_rate": 8.715377810625592e-08, "loss": 0.2889, "step": 7916 }, { "epoch": 2.839230217547215, "grad_norm": 0.30499905347824097, "learning_rate": 8.676626095230289e-08, "loss": 0.2984, "step": 7917 }, { "epoch": 2.8395888118575185, "grad_norm": 0.323076993227005, "learning_rate": 8.637959969895249e-08, "loss": 0.3127, "step": 7918 }, { "epoch": 2.839947406167822, "grad_norm": 0.2969795763492584, "learning_rate": 8.599379441356359e-08, "loss": 0.2782, "step": 7919 }, { "epoch": 2.840306000478126, "grad_norm": 0.30332228541374207, "learning_rate": 8.560884516334411e-08, "loss": 0.2725, "step": 7920 }, { "epoch": 2.8406645947884295, "grad_norm": 0.31656843423843384, "learning_rate": 8.522475201535263e-08, "loss": 0.334, "step": 7921 }, { "epoch": 2.841023189098733, "grad_norm": 0.3088700473308563, "learning_rate": 8.484151503650062e-08, "loss": 0.289, "step": 7922 }, { "epoch": 2.8413817834090365, "grad_norm": 0.301155686378479, "learning_rate": 8.445913429354857e-08, "loss": 0.2756, "step": 7923 }, { "epoch": 2.84174037771934, "grad_norm": 0.3466264605522156, "learning_rate": 8.407760985310876e-08, "loss": 0.3336, "step": 7924 }, { "epoch": 2.842098972029644, "grad_norm": 0.30978304147720337, "learning_rate": 8.3696941781643e-08, "loss": 0.3141, "step": 7925 }, { "epoch": 2.8424575663399474, "grad_norm": 0.2954142391681671, "learning_rate": 8.33171301454655e-08, "loss": 0.2592, "step": 7926 }, { "epoch": 2.842816160650251, "grad_norm": 0.30105459690093994, "learning_rate": 8.293817501074109e-08, "loss": 0.2889, "step": 7927 }, { "epoch": 2.843174754960555, "grad_norm": 0.29888981580734253, "learning_rate": 8.256007644348362e-08, "loss": 0.3046, "step": 7928 }, { "epoch": 2.8435333492708583, "grad_norm": 0.3001464903354645, "learning_rate": 8.218283450956099e-08, "loss": 0.2808, "step": 7929 }, { "epoch": 2.843891943581162, "grad_norm": 0.28946200013160706, "learning_rate": 8.180644927468839e-08, "loss": 0.2707, "step": 7930 }, { "epoch": 2.8442505378914653, "grad_norm": 0.3157193064689636, "learning_rate": 8.143092080443338e-08, "loss": 0.3167, "step": 7931 }, { "epoch": 2.844609132201769, "grad_norm": 0.30071988701820374, "learning_rate": 8.105624916421473e-08, "loss": 0.3008, "step": 7932 }, { "epoch": 2.8449677265120727, "grad_norm": 0.305019348859787, "learning_rate": 8.068243441930135e-08, "loss": 0.2646, "step": 7933 }, { "epoch": 2.8453263208223762, "grad_norm": 0.3179347515106201, "learning_rate": 8.030947663481226e-08, "loss": 0.3399, "step": 7934 }, { "epoch": 2.8456849151326797, "grad_norm": 0.29083511233329773, "learning_rate": 7.993737587571825e-08, "loss": 0.2695, "step": 7935 }, { "epoch": 2.8460435094429837, "grad_norm": 0.31264516711235046, "learning_rate": 7.95661322068414e-08, "loss": 0.3022, "step": 7936 }, { "epoch": 2.846402103753287, "grad_norm": 0.30152183771133423, "learning_rate": 7.919574569285215e-08, "loss": 0.2938, "step": 7937 }, { "epoch": 2.8467606980635907, "grad_norm": 0.291312575340271, "learning_rate": 7.882621639827337e-08, "loss": 0.3303, "step": 7938 }, { "epoch": 2.847119292373894, "grad_norm": 0.3001742362976074, "learning_rate": 7.845754438747854e-08, "loss": 0.3178, "step": 7939 }, { "epoch": 2.847477886684198, "grad_norm": 0.28707200288772583, "learning_rate": 7.808972972469131e-08, "loss": 0.3074, "step": 7940 }, { "epoch": 2.8478364809945016, "grad_norm": 0.29242271184921265, "learning_rate": 7.772277247398596e-08, "loss": 0.3007, "step": 7941 }, { "epoch": 2.848195075304805, "grad_norm": 0.2815973460674286, "learning_rate": 7.735667269928803e-08, "loss": 0.2906, "step": 7942 }, { "epoch": 2.848553669615109, "grad_norm": 0.2804605960845947, "learning_rate": 7.699143046437263e-08, "loss": 0.2967, "step": 7943 }, { "epoch": 2.8489122639254125, "grad_norm": 0.31154611706733704, "learning_rate": 7.662704583286662e-08, "loss": 0.3246, "step": 7944 }, { "epoch": 2.849270858235716, "grad_norm": 0.27394595742225647, "learning_rate": 7.62635188682459e-08, "loss": 0.2521, "step": 7945 }, { "epoch": 2.8496294525460195, "grad_norm": 0.29651564359664917, "learning_rate": 7.590084963383982e-08, "loss": 0.3093, "step": 7946 }, { "epoch": 2.849988046856323, "grad_norm": 0.29047682881355286, "learning_rate": 7.553903819282504e-08, "loss": 0.2942, "step": 7947 }, { "epoch": 2.850346641166627, "grad_norm": 0.30105510354042053, "learning_rate": 7.517808460823117e-08, "loss": 0.2854, "step": 7948 }, { "epoch": 2.8507052354769304, "grad_norm": 0.3129481375217438, "learning_rate": 7.481798894293624e-08, "loss": 0.2956, "step": 7949 }, { "epoch": 2.851063829787234, "grad_norm": 0.3076300323009491, "learning_rate": 7.445875125967117e-08, "loss": 0.293, "step": 7950 }, { "epoch": 2.851422424097538, "grad_norm": 0.2898560166358948, "learning_rate": 7.410037162101591e-08, "loss": 0.2809, "step": 7951 }, { "epoch": 2.8517810184078414, "grad_norm": 0.3099871277809143, "learning_rate": 7.374285008940052e-08, "loss": 0.2996, "step": 7952 }, { "epoch": 2.852139612718145, "grad_norm": 0.3146514892578125, "learning_rate": 7.338618672710795e-08, "loss": 0.3215, "step": 7953 }, { "epoch": 2.8524982070284484, "grad_norm": 0.29635322093963623, "learning_rate": 7.303038159626907e-08, "loss": 0.283, "step": 7954 }, { "epoch": 2.852856801338752, "grad_norm": 0.28047510981559753, "learning_rate": 7.267543475886596e-08, "loss": 0.2784, "step": 7955 }, { "epoch": 2.853215395649056, "grad_norm": 0.29270443320274353, "learning_rate": 7.232134627673248e-08, "loss": 0.2876, "step": 7956 }, { "epoch": 2.8535739899593593, "grad_norm": 0.30032607913017273, "learning_rate": 7.196811621155098e-08, "loss": 0.2901, "step": 7957 }, { "epoch": 2.853932584269663, "grad_norm": 0.30037954449653625, "learning_rate": 7.16157446248561e-08, "loss": 0.2679, "step": 7958 }, { "epoch": 2.8542911785799667, "grad_norm": 0.29641610383987427, "learning_rate": 7.126423157803098e-08, "loss": 0.2938, "step": 7959 }, { "epoch": 2.85464977289027, "grad_norm": 0.29965996742248535, "learning_rate": 7.091357713231106e-08, "loss": 0.2945, "step": 7960 }, { "epoch": 2.8550083672005737, "grad_norm": 0.31338268518447876, "learning_rate": 7.056378134878139e-08, "loss": 0.2856, "step": 7961 }, { "epoch": 2.855366961510877, "grad_norm": 0.29886820912361145, "learning_rate": 7.02148442883771e-08, "loss": 0.306, "step": 7962 }, { "epoch": 2.8557255558211807, "grad_norm": 0.3137671649456024, "learning_rate": 6.986676601188458e-08, "loss": 0.3005, "step": 7963 }, { "epoch": 2.8560841501314846, "grad_norm": 0.27587249875068665, "learning_rate": 6.951954657993975e-08, "loss": 0.258, "step": 7964 }, { "epoch": 2.856442744441788, "grad_norm": 0.3001053035259247, "learning_rate": 6.917318605303036e-08, "loss": 0.2927, "step": 7965 }, { "epoch": 2.8568013387520916, "grad_norm": 0.30817151069641113, "learning_rate": 6.882768449149147e-08, "loss": 0.2793, "step": 7966 }, { "epoch": 2.8571599330623956, "grad_norm": 0.2933499217033386, "learning_rate": 6.848304195551215e-08, "loss": 0.2872, "step": 7967 }, { "epoch": 2.857518527372699, "grad_norm": 0.2875014543533325, "learning_rate": 6.813925850512992e-08, "loss": 0.2812, "step": 7968 }, { "epoch": 2.8578771216830026, "grad_norm": 0.2973639965057373, "learning_rate": 6.779633420023246e-08, "loss": 0.2832, "step": 7969 }, { "epoch": 2.858235715993306, "grad_norm": 0.2951761782169342, "learning_rate": 6.745426910055864e-08, "loss": 0.2825, "step": 7970 }, { "epoch": 2.85859431030361, "grad_norm": 0.3164810836315155, "learning_rate": 6.711306326569744e-08, "loss": 0.343, "step": 7971 }, { "epoch": 2.8589529046139135, "grad_norm": 0.2607160806655884, "learning_rate": 6.677271675508746e-08, "loss": 0.2593, "step": 7972 }, { "epoch": 2.859311498924217, "grad_norm": 0.3016079068183899, "learning_rate": 6.643322962801846e-08, "loss": 0.2957, "step": 7973 }, { "epoch": 2.859670093234521, "grad_norm": 0.31317877769470215, "learning_rate": 6.609460194362927e-08, "loss": 0.2881, "step": 7974 }, { "epoch": 2.8600286875448244, "grad_norm": 0.32371410727500916, "learning_rate": 6.575683376091213e-08, "loss": 0.3328, "step": 7975 }, { "epoch": 2.860387281855128, "grad_norm": 0.28653618693351746, "learning_rate": 6.541992513870499e-08, "loss": 0.2639, "step": 7976 }, { "epoch": 2.8607458761654314, "grad_norm": 0.3296469748020172, "learning_rate": 6.508387613569922e-08, "loss": 0.3352, "step": 7977 }, { "epoch": 2.861104470475735, "grad_norm": 0.31289181113243103, "learning_rate": 6.474868681043578e-08, "loss": 0.2891, "step": 7978 }, { "epoch": 2.861463064786039, "grad_norm": 0.28960639238357544, "learning_rate": 6.441435722130574e-08, "loss": 0.2799, "step": 7979 }, { "epoch": 2.8618216590963423, "grad_norm": 0.3027072846889496, "learning_rate": 6.408088742654972e-08, "loss": 0.2973, "step": 7980 }, { "epoch": 2.862180253406646, "grad_norm": 0.3108142912387848, "learning_rate": 6.37482774842596e-08, "loss": 0.2975, "step": 7981 }, { "epoch": 2.8625388477169498, "grad_norm": 0.29532891511917114, "learning_rate": 6.341652745237791e-08, "loss": 0.2496, "step": 7982 }, { "epoch": 2.8628974420272533, "grad_norm": 0.299836665391922, "learning_rate": 6.308563738869511e-08, "loss": 0.2884, "step": 7983 }, { "epoch": 2.8632560363375568, "grad_norm": 0.3041658103466034, "learning_rate": 6.27556073508534e-08, "loss": 0.3014, "step": 7984 }, { "epoch": 2.8636146306478603, "grad_norm": 0.30585262179374695, "learning_rate": 6.242643739634624e-08, "loss": 0.2894, "step": 7985 }, { "epoch": 2.8639732249581638, "grad_norm": 0.3349616527557373, "learning_rate": 6.209812758251499e-08, "loss": 0.2876, "step": 7986 }, { "epoch": 2.8643318192684677, "grad_norm": 0.30253931879997253, "learning_rate": 6.177067796655334e-08, "loss": 0.2658, "step": 7987 }, { "epoch": 2.864690413578771, "grad_norm": 0.31237903237342834, "learning_rate": 6.144408860550233e-08, "loss": 0.3416, "step": 7988 }, { "epoch": 2.8650490078890747, "grad_norm": 0.28113484382629395, "learning_rate": 6.111835955625645e-08, "loss": 0.2747, "step": 7989 }, { "epoch": 2.8654076021993786, "grad_norm": 0.3120099604129791, "learning_rate": 6.079349087555808e-08, "loss": 0.2936, "step": 7990 }, { "epoch": 2.865766196509682, "grad_norm": 0.339271605014801, "learning_rate": 6.046948261999974e-08, "loss": 0.3228, "step": 7991 }, { "epoch": 2.8661247908199856, "grad_norm": 0.2967865765094757, "learning_rate": 6.014633484602573e-08, "loss": 0.2751, "step": 7992 }, { "epoch": 2.866483385130289, "grad_norm": 0.29656359553337097, "learning_rate": 5.982404760992878e-08, "loss": 0.2908, "step": 7993 }, { "epoch": 2.8668419794405926, "grad_norm": 0.31302377581596375, "learning_rate": 5.9502620967852865e-08, "loss": 0.2874, "step": 7994 }, { "epoch": 2.8672005737508965, "grad_norm": 0.35041725635528564, "learning_rate": 5.918205497579099e-08, "loss": 0.3291, "step": 7995 }, { "epoch": 2.8675591680612, "grad_norm": 0.30130910873413086, "learning_rate": 5.886234968958626e-08, "loss": 0.2645, "step": 7996 }, { "epoch": 2.867917762371504, "grad_norm": 0.3009084165096283, "learning_rate": 5.854350516493357e-08, "loss": 0.2951, "step": 7997 }, { "epoch": 2.8682763566818075, "grad_norm": 0.29423558712005615, "learning_rate": 5.82255214573757e-08, "loss": 0.255, "step": 7998 }, { "epoch": 2.868634950992111, "grad_norm": 0.3087853193283081, "learning_rate": 5.790839862230724e-08, "loss": 0.2972, "step": 7999 }, { "epoch": 2.8689935453024145, "grad_norm": 0.2996331751346588, "learning_rate": 5.7592136714971214e-08, "loss": 0.2696, "step": 8000 }, { "epoch": 2.869352139612718, "grad_norm": 0.32804349064826965, "learning_rate": 5.727673579046189e-08, "loss": 0.3319, "step": 8001 }, { "epoch": 2.869710733923022, "grad_norm": 0.2849877178668976, "learning_rate": 5.6962195903722536e-08, "loss": 0.2892, "step": 8002 }, { "epoch": 2.8700693282333254, "grad_norm": 0.2975309193134308, "learning_rate": 5.664851710954711e-08, "loss": 0.3408, "step": 8003 }, { "epoch": 2.870427922543629, "grad_norm": 0.29275819659233093, "learning_rate": 5.633569946258022e-08, "loss": 0.2637, "step": 8004 }, { "epoch": 2.870786516853933, "grad_norm": 0.2979039251804352, "learning_rate": 5.602374301731495e-08, "loss": 0.2864, "step": 8005 }, { "epoch": 2.8711451111642363, "grad_norm": 0.30593934655189514, "learning_rate": 5.5712647828095045e-08, "loss": 0.3028, "step": 8006 }, { "epoch": 2.87150370547454, "grad_norm": 0.3025653064250946, "learning_rate": 5.540241394911494e-08, "loss": 0.2875, "step": 8007 }, { "epoch": 2.8718622997848433, "grad_norm": 0.31246864795684814, "learning_rate": 5.509304143441696e-08, "loss": 0.2988, "step": 8008 }, { "epoch": 2.872220894095147, "grad_norm": 0.3201029598712921, "learning_rate": 5.4784530337896325e-08, "loss": 0.3065, "step": 8009 }, { "epoch": 2.8725794884054507, "grad_norm": 0.29592832922935486, "learning_rate": 5.44768807132956e-08, "loss": 0.2629, "step": 8010 }, { "epoch": 2.8729380827157542, "grad_norm": 0.28611645102500916, "learning_rate": 5.417009261420914e-08, "loss": 0.3049, "step": 8011 }, { "epoch": 2.8732966770260577, "grad_norm": 0.2952830195426941, "learning_rate": 5.386416609407919e-08, "loss": 0.2678, "step": 8012 }, { "epoch": 2.8736552713363617, "grad_norm": 0.31102898716926575, "learning_rate": 5.3559101206200337e-08, "loss": 0.281, "step": 8013 }, { "epoch": 2.874013865646665, "grad_norm": 0.3339313566684723, "learning_rate": 5.325489800371508e-08, "loss": 0.3234, "step": 8014 }, { "epoch": 2.8743724599569687, "grad_norm": 0.3023766875267029, "learning_rate": 5.295155653961659e-08, "loss": 0.2785, "step": 8015 }, { "epoch": 2.874731054267272, "grad_norm": 0.29698270559310913, "learning_rate": 5.264907686674869e-08, "loss": 0.3188, "step": 8016 }, { "epoch": 2.8750896485775757, "grad_norm": 0.30930331349372864, "learning_rate": 5.2347459037803136e-08, "loss": 0.2771, "step": 8017 }, { "epoch": 2.8754482428878796, "grad_norm": 0.2939767837524414, "learning_rate": 5.2046703105324005e-08, "loss": 0.298, "step": 8018 }, { "epoch": 2.875806837198183, "grad_norm": 0.3087790310382843, "learning_rate": 5.174680912170216e-08, "loss": 0.3072, "step": 8019 }, { "epoch": 2.8761654315084866, "grad_norm": 0.29387950897216797, "learning_rate": 5.144777713918137e-08, "loss": 0.2579, "step": 8020 }, { "epoch": 2.8765240258187905, "grad_norm": 0.3090617060661316, "learning_rate": 5.1149607209853844e-08, "loss": 0.318, "step": 8021 }, { "epoch": 2.876882620129094, "grad_norm": 0.2896825075149536, "learning_rate": 5.0852299385661384e-08, "loss": 0.2461, "step": 8022 }, { "epoch": 2.8772412144393975, "grad_norm": 0.3055759072303772, "learning_rate": 5.0555853718396445e-08, "loss": 0.3076, "step": 8023 }, { "epoch": 2.877599808749701, "grad_norm": 0.3043343126773834, "learning_rate": 5.0260270259699926e-08, "loss": 0.3017, "step": 8024 }, { "epoch": 2.877958403060005, "grad_norm": 0.30285707116127014, "learning_rate": 4.996554906106399e-08, "loss": 0.316, "step": 8025 }, { "epoch": 2.8783169973703084, "grad_norm": 0.29832327365875244, "learning_rate": 4.967169017382978e-08, "loss": 0.2752, "step": 8026 }, { "epoch": 2.878675591680612, "grad_norm": 0.28239932656288147, "learning_rate": 4.9378693649188014e-08, "loss": 0.294, "step": 8027 }, { "epoch": 2.879034185990916, "grad_norm": 0.28474074602127075, "learning_rate": 4.90865595381812e-08, "loss": 0.2755, "step": 8028 }, { "epoch": 2.8793927803012194, "grad_norm": 0.3007030487060547, "learning_rate": 4.8795287891698077e-08, "loss": 0.3116, "step": 8029 }, { "epoch": 2.879751374611523, "grad_norm": 0.29105985164642334, "learning_rate": 4.850487876047971e-08, "loss": 0.2862, "step": 8030 }, { "epoch": 2.8801099689218264, "grad_norm": 0.29627370834350586, "learning_rate": 4.821533219511676e-08, "loss": 0.2881, "step": 8031 }, { "epoch": 2.88046856323213, "grad_norm": 0.2958909869194031, "learning_rate": 4.792664824604887e-08, "loss": 0.3052, "step": 8032 }, { "epoch": 2.880827157542434, "grad_norm": 0.3004480302333832, "learning_rate": 4.763882696356581e-08, "loss": 0.2936, "step": 8033 }, { "epoch": 2.8811857518527373, "grad_norm": 0.3031415641307831, "learning_rate": 4.735186839780637e-08, "loss": 0.2814, "step": 8034 }, { "epoch": 2.881544346163041, "grad_norm": 0.29055196046829224, "learning_rate": 4.706577259876055e-08, "loss": 0.3084, "step": 8035 }, { "epoch": 2.8819029404733447, "grad_norm": 0.3032698333263397, "learning_rate": 4.678053961626572e-08, "loss": 0.2975, "step": 8036 }, { "epoch": 2.882261534783648, "grad_norm": 0.2992185950279236, "learning_rate": 4.6496169500012124e-08, "loss": 0.2569, "step": 8037 }, { "epoch": 2.8826201290939517, "grad_norm": 0.2948700785636902, "learning_rate": 4.62126622995368e-08, "loss": 0.2879, "step": 8038 }, { "epoch": 2.882978723404255, "grad_norm": 0.3107766807079315, "learning_rate": 4.593001806422748e-08, "loss": 0.2826, "step": 8039 }, { "epoch": 2.8833373177145587, "grad_norm": 0.31519341468811035, "learning_rate": 4.5648236843322535e-08, "loss": 0.3256, "step": 8040 }, { "epoch": 2.8836959120248626, "grad_norm": 0.2950042486190796, "learning_rate": 4.536731868590827e-08, "loss": 0.2911, "step": 8041 }, { "epoch": 2.884054506335166, "grad_norm": 0.29239073395729065, "learning_rate": 4.508726364092164e-08, "loss": 0.286, "step": 8042 }, { "epoch": 2.8844131006454696, "grad_norm": 0.3034345805644989, "learning_rate": 4.4808071757149185e-08, "loss": 0.2848, "step": 8043 }, { "epoch": 2.8847716949557736, "grad_norm": 0.2981545627117157, "learning_rate": 4.452974308322755e-08, "loss": 0.314, "step": 8044 }, { "epoch": 2.885130289266077, "grad_norm": 0.3049045503139496, "learning_rate": 4.42522776676424e-08, "loss": 0.2754, "step": 8045 }, { "epoch": 2.8854888835763806, "grad_norm": 0.30980151891708374, "learning_rate": 4.3975675558727835e-08, "loss": 0.2548, "step": 8046 }, { "epoch": 2.885847477886684, "grad_norm": 0.3137832581996918, "learning_rate": 4.369993680467033e-08, "loss": 0.2543, "step": 8047 }, { "epoch": 2.8862060721969875, "grad_norm": 0.30825570225715637, "learning_rate": 4.342506145350312e-08, "loss": 0.3169, "step": 8048 }, { "epoch": 2.8865646665072915, "grad_norm": 0.2928904592990875, "learning_rate": 4.3151049553111224e-08, "loss": 0.2925, "step": 8049 }, { "epoch": 2.886923260817595, "grad_norm": 0.2902921140193939, "learning_rate": 4.287790115122814e-08, "loss": 0.3214, "step": 8050 }, { "epoch": 2.8872818551278985, "grad_norm": 0.3118566572666168, "learning_rate": 4.260561629543747e-08, "loss": 0.339, "step": 8051 }, { "epoch": 2.8876404494382024, "grad_norm": 0.2953662574291229, "learning_rate": 4.233419503317182e-08, "loss": 0.28, "step": 8052 }, { "epoch": 2.887999043748506, "grad_norm": 0.32278668880462646, "learning_rate": 4.2063637411712823e-08, "loss": 0.3311, "step": 8053 }, { "epoch": 2.8883576380588094, "grad_norm": 0.3163824677467346, "learning_rate": 4.1793943478193875e-08, "loss": 0.2908, "step": 8054 }, { "epoch": 2.888716232369113, "grad_norm": 0.3185065984725952, "learning_rate": 4.152511327959519e-08, "loss": 0.294, "step": 8055 }, { "epoch": 2.889074826679417, "grad_norm": 0.28004616498947144, "learning_rate": 4.125714686274929e-08, "loss": 0.2764, "step": 8056 }, { "epoch": 2.8894334209897203, "grad_norm": 0.31118714809417725, "learning_rate": 4.099004427433551e-08, "loss": 0.3663, "step": 8057 }, { "epoch": 2.889792015300024, "grad_norm": 0.30090099573135376, "learning_rate": 4.07238055608844e-08, "loss": 0.3222, "step": 8058 }, { "epoch": 2.8901506096103278, "grad_norm": 0.3157695531845093, "learning_rate": 4.0458430768775536e-08, "loss": 0.3046, "step": 8059 }, { "epoch": 2.8905092039206313, "grad_norm": 0.31526827812194824, "learning_rate": 4.0193919944238044e-08, "loss": 0.2798, "step": 8060 }, { "epoch": 2.8908677982309348, "grad_norm": 0.28508976101875305, "learning_rate": 3.993027313335007e-08, "loss": 0.2762, "step": 8061 }, { "epoch": 2.8912263925412383, "grad_norm": 0.29563185572624207, "learning_rate": 3.966749038204099e-08, "loss": 0.3051, "step": 8062 }, { "epoch": 2.8915849868515417, "grad_norm": 0.3170342743396759, "learning_rate": 3.94055717360875e-08, "loss": 0.3063, "step": 8063 }, { "epoch": 2.8919435811618457, "grad_norm": 0.3282296061515808, "learning_rate": 3.914451724111645e-08, "loss": 0.305, "step": 8064 }, { "epoch": 2.892302175472149, "grad_norm": 0.2955663502216339, "learning_rate": 3.888432694260425e-08, "loss": 0.2796, "step": 8065 }, { "epoch": 2.8926607697824527, "grad_norm": 0.31754207611083984, "learning_rate": 3.862500088587795e-08, "loss": 0.2903, "step": 8066 }, { "epoch": 2.8930193640927566, "grad_norm": 0.3074356019496918, "learning_rate": 3.836653911611143e-08, "loss": 0.2914, "step": 8067 }, { "epoch": 2.89337795840306, "grad_norm": 0.3018714487552643, "learning_rate": 3.81089416783309e-08, "loss": 0.2734, "step": 8068 }, { "epoch": 2.8937365527133636, "grad_norm": 0.2855609357357025, "learning_rate": 3.78522086174099e-08, "loss": 0.2992, "step": 8069 }, { "epoch": 2.894095147023667, "grad_norm": 0.3016826808452606, "learning_rate": 3.759633997807211e-08, "loss": 0.2854, "step": 8070 }, { "epoch": 2.8944537413339706, "grad_norm": 0.32418665289878845, "learning_rate": 3.734133580489074e-08, "loss": 0.3208, "step": 8071 }, { "epoch": 2.8948123356442745, "grad_norm": 0.3200346529483795, "learning_rate": 3.7087196142288614e-08, "loss": 0.2789, "step": 8072 }, { "epoch": 2.895170929954578, "grad_norm": 0.30310511589050293, "learning_rate": 3.6833921034536976e-08, "loss": 0.297, "step": 8073 }, { "epoch": 2.8955295242648815, "grad_norm": 0.2960565686225891, "learning_rate": 3.658151052575831e-08, "loss": 0.2882, "step": 8074 }, { "epoch": 2.8958881185751855, "grad_norm": 0.29645437002182007, "learning_rate": 3.6329964659922446e-08, "loss": 0.3088, "step": 8075 }, { "epoch": 2.896246712885489, "grad_norm": 0.32975611090660095, "learning_rate": 3.6079283480849324e-08, "loss": 0.3243, "step": 8076 }, { "epoch": 2.8966053071957925, "grad_norm": 0.3105672597885132, "learning_rate": 3.5829467032208464e-08, "loss": 0.2758, "step": 8077 }, { "epoch": 2.896963901506096, "grad_norm": 0.31161820888519287, "learning_rate": 3.558051535751894e-08, "loss": 0.3036, "step": 8078 }, { "epoch": 2.8973224958163994, "grad_norm": 0.30979791283607483, "learning_rate": 3.5332428500149396e-08, "loss": 0.3035, "step": 8079 }, { "epoch": 2.8976810901267034, "grad_norm": 0.2951692044734955, "learning_rate": 3.508520650331637e-08, "loss": 0.2976, "step": 8080 }, { "epoch": 2.898039684437007, "grad_norm": 0.30019503831863403, "learning_rate": 3.4838849410087084e-08, "loss": 0.2775, "step": 8081 }, { "epoch": 2.898398278747311, "grad_norm": 0.2902412414550781, "learning_rate": 3.459335726337776e-08, "loss": 0.2891, "step": 8082 }, { "epoch": 2.8987568730576143, "grad_norm": 0.30808451771736145, "learning_rate": 3.434873010595363e-08, "loss": 0.3214, "step": 8083 }, { "epoch": 2.899115467367918, "grad_norm": 0.2786746025085449, "learning_rate": 3.410496798043006e-08, "loss": 0.2679, "step": 8084 }, { "epoch": 2.8994740616782213, "grad_norm": 0.30028560757637024, "learning_rate": 3.386207092927085e-08, "loss": 0.2804, "step": 8085 }, { "epoch": 2.899832655988525, "grad_norm": 0.31772682070732117, "learning_rate": 3.3620038994789386e-08, "loss": 0.3197, "step": 8086 }, { "epoch": 2.9001912502988287, "grad_norm": 0.3193233013153076, "learning_rate": 3.3378872219148596e-08, "loss": 0.2755, "step": 8087 }, { "epoch": 2.9005498446091322, "grad_norm": 0.2986992597579956, "learning_rate": 3.313857064435988e-08, "loss": 0.2849, "step": 8088 }, { "epoch": 2.9009084389194357, "grad_norm": 0.29206663370132446, "learning_rate": 3.2899134312284754e-08, "loss": 0.2916, "step": 8089 }, { "epoch": 2.9012670332297397, "grad_norm": 0.28835538029670715, "learning_rate": 3.2660563264634295e-08, "loss": 0.2985, "step": 8090 }, { "epoch": 2.901625627540043, "grad_norm": 0.30738964676856995, "learning_rate": 3.242285754296859e-08, "loss": 0.3019, "step": 8091 }, { "epoch": 2.9019842218503467, "grad_norm": 0.3097648620605469, "learning_rate": 3.2186017188695075e-08, "loss": 0.2923, "step": 8092 }, { "epoch": 2.90234281616065, "grad_norm": 0.316671222448349, "learning_rate": 3.195004224307352e-08, "loss": 0.2981, "step": 8093 }, { "epoch": 2.9027014104709536, "grad_norm": 0.30960389971733093, "learning_rate": 3.1714932747211605e-08, "loss": 0.2816, "step": 8094 }, { "epoch": 2.9030600047812576, "grad_norm": 0.3115827143192291, "learning_rate": 3.148068874206489e-08, "loss": 0.3047, "step": 8095 }, { "epoch": 2.903418599091561, "grad_norm": 0.28854504227638245, "learning_rate": 3.1247310268440743e-08, "loss": 0.2934, "step": 8096 }, { "epoch": 2.9037771934018646, "grad_norm": 0.29298657178878784, "learning_rate": 3.101479736699331e-08, "loss": 0.2746, "step": 8097 }, { "epoch": 2.9041357877121685, "grad_norm": 0.318735808134079, "learning_rate": 3.0783150078227966e-08, "loss": 0.3123, "step": 8098 }, { "epoch": 2.904494382022472, "grad_norm": 0.28967201709747314, "learning_rate": 3.055236844249743e-08, "loss": 0.2737, "step": 8099 }, { "epoch": 2.9048529763327755, "grad_norm": 0.2899410128593445, "learning_rate": 3.032245250000565e-08, "loss": 0.2699, "step": 8100 }, { "epoch": 2.905211570643079, "grad_norm": 0.3064381182193756, "learning_rate": 3.009340229080393e-08, "loss": 0.3231, "step": 8101 }, { "epoch": 2.9055701649533825, "grad_norm": 0.300229012966156, "learning_rate": 2.986521785479424e-08, "loss": 0.2758, "step": 8102 }, { "epoch": 2.9059287592636864, "grad_norm": 0.3287486732006073, "learning_rate": 2.9637899231725885e-08, "loss": 0.2705, "step": 8103 }, { "epoch": 2.90628735357399, "grad_norm": 0.33293992280960083, "learning_rate": 2.9411446461199978e-08, "loss": 0.3021, "step": 8104 }, { "epoch": 2.9066459478842934, "grad_norm": 0.28835785388946533, "learning_rate": 2.9185859582663845e-08, "loss": 0.2628, "step": 8105 }, { "epoch": 2.9070045421945974, "grad_norm": 0.3313639760017395, "learning_rate": 2.8961138635416054e-08, "loss": 0.349, "step": 8106 }, { "epoch": 2.907363136504901, "grad_norm": 0.28468266129493713, "learning_rate": 2.873728365860362e-08, "loss": 0.2795, "step": 8107 }, { "epoch": 2.9077217308152044, "grad_norm": 0.2893339991569519, "learning_rate": 2.8514294691223686e-08, "loss": 0.2778, "step": 8108 }, { "epoch": 2.908080325125508, "grad_norm": 0.3121119439601898, "learning_rate": 2.8292171772119624e-08, "loss": 0.2998, "step": 8109 }, { "epoch": 2.9084389194358113, "grad_norm": 0.3005044162273407, "learning_rate": 2.8070914939988258e-08, "loss": 0.3049, "step": 8110 }, { "epoch": 2.9087975137461153, "grad_norm": 0.28813377022743225, "learning_rate": 2.785052423337098e-08, "loss": 0.2641, "step": 8111 }, { "epoch": 2.9091561080564188, "grad_norm": 0.28763678669929504, "learning_rate": 2.763099969066263e-08, "loss": 0.29, "step": 8112 }, { "epoch": 2.9095147023667227, "grad_norm": 0.29093894362449646, "learning_rate": 2.741234135010318e-08, "loss": 0.3001, "step": 8113 }, { "epoch": 2.909873296677026, "grad_norm": 0.31409183144569397, "learning_rate": 2.719454924978493e-08, "loss": 0.2912, "step": 8114 }, { "epoch": 2.9102318909873297, "grad_norm": 0.3109573721885681, "learning_rate": 2.6977623427647537e-08, "loss": 0.2821, "step": 8115 }, { "epoch": 2.910590485297633, "grad_norm": 0.32090720534324646, "learning_rate": 2.676156392147966e-08, "loss": 0.3026, "step": 8116 }, { "epoch": 2.9109490796079367, "grad_norm": 0.30034732818603516, "learning_rate": 2.6546370768920638e-08, "loss": 0.2696, "step": 8117 }, { "epoch": 2.9113076739182406, "grad_norm": 0.30699992179870605, "learning_rate": 2.633204400745659e-08, "loss": 0.2984, "step": 8118 }, { "epoch": 2.911666268228544, "grad_norm": 0.30457308888435364, "learning_rate": 2.6118583674424325e-08, "loss": 0.3013, "step": 8119 }, { "epoch": 2.9120248625388476, "grad_norm": 0.2799375355243683, "learning_rate": 2.5905989807009645e-08, "loss": 0.2926, "step": 8120 }, { "epoch": 2.9123834568491516, "grad_norm": 0.3094714879989624, "learning_rate": 2.569426244224682e-08, "loss": 0.3235, "step": 8121 }, { "epoch": 2.912742051159455, "grad_norm": 0.27373528480529785, "learning_rate": 2.5483401617019677e-08, "loss": 0.2722, "step": 8122 }, { "epoch": 2.9131006454697586, "grad_norm": 0.30989009141921997, "learning_rate": 2.52734073680605e-08, "loss": 0.306, "step": 8123 }, { "epoch": 2.913459239780062, "grad_norm": 0.3030009865760803, "learning_rate": 2.5064279731951135e-08, "loss": 0.3139, "step": 8124 }, { "epoch": 2.9138178340903655, "grad_norm": 0.2989613711833954, "learning_rate": 2.4856018745121335e-08, "loss": 0.3007, "step": 8125 }, { "epoch": 2.9141764284006695, "grad_norm": 0.2713419198989868, "learning_rate": 2.464862444385263e-08, "loss": 0.2605, "step": 8126 }, { "epoch": 2.914535022710973, "grad_norm": 0.29966697096824646, "learning_rate": 2.4442096864272237e-08, "loss": 0.2921, "step": 8127 }, { "epoch": 2.9148936170212765, "grad_norm": 0.3121687173843384, "learning_rate": 2.4236436042358036e-08, "loss": 0.2963, "step": 8128 }, { "epoch": 2.9152522113315804, "grad_norm": 0.31712064146995544, "learning_rate": 2.4031642013938038e-08, "loss": 0.3063, "step": 8129 }, { "epoch": 2.915610805641884, "grad_norm": 0.2966185212135315, "learning_rate": 2.3827714814686488e-08, "loss": 0.2776, "step": 8130 }, { "epoch": 2.9159693999521874, "grad_norm": 0.301354318857193, "learning_rate": 2.362465448012885e-08, "loss": 0.2743, "step": 8131 }, { "epoch": 2.916327994262491, "grad_norm": 0.3015195429325104, "learning_rate": 2.3422461045639056e-08, "loss": 0.2971, "step": 8132 }, { "epoch": 2.9166865885727944, "grad_norm": 0.322171688079834, "learning_rate": 2.3221134546438927e-08, "loss": 0.3266, "step": 8133 }, { "epoch": 2.9170451828830983, "grad_norm": 0.2885507345199585, "learning_rate": 2.3020675017601524e-08, "loss": 0.2772, "step": 8134 }, { "epoch": 2.917403777193402, "grad_norm": 0.2793942391872406, "learning_rate": 2.2821082494046686e-08, "loss": 0.2688, "step": 8135 }, { "epoch": 2.9177623715037053, "grad_norm": 0.29286742210388184, "learning_rate": 2.2622357010543827e-08, "loss": 0.3109, "step": 8136 }, { "epoch": 2.9181209658140093, "grad_norm": 0.27257490158081055, "learning_rate": 2.2424498601712475e-08, "loss": 0.2604, "step": 8137 }, { "epoch": 2.9184795601243128, "grad_norm": 0.320941299200058, "learning_rate": 2.222750730201895e-08, "loss": 0.3433, "step": 8138 }, { "epoch": 2.9188381544346162, "grad_norm": 0.2954048812389374, "learning_rate": 2.2031383145780793e-08, "loss": 0.2767, "step": 8139 }, { "epoch": 2.9191967487449197, "grad_norm": 0.33243700861930847, "learning_rate": 2.1836126167162907e-08, "loss": 0.3002, "step": 8140 }, { "epoch": 2.9195553430552237, "grad_norm": 0.29409873485565186, "learning_rate": 2.1641736400179748e-08, "loss": 0.2894, "step": 8141 }, { "epoch": 2.919913937365527, "grad_norm": 0.29637229442596436, "learning_rate": 2.144821387869478e-08, "loss": 0.2836, "step": 8142 }, { "epoch": 2.9202725316758307, "grad_norm": 0.2885655462741852, "learning_rate": 2.1255558636419925e-08, "loss": 0.2784, "step": 8143 }, { "epoch": 2.9206311259861346, "grad_norm": 0.2981787323951721, "learning_rate": 2.1063770706916676e-08, "loss": 0.324, "step": 8144 }, { "epoch": 2.920989720296438, "grad_norm": 0.2929205000400543, "learning_rate": 2.0872850123594412e-08, "loss": 0.2996, "step": 8145 }, { "epoch": 2.9213483146067416, "grad_norm": 0.29244744777679443, "learning_rate": 2.06827969197132e-08, "loss": 0.2794, "step": 8146 }, { "epoch": 2.921706908917045, "grad_norm": 0.3096117675304413, "learning_rate": 2.0493611128380443e-08, "loss": 0.2907, "step": 8147 }, { "epoch": 2.9220655032273486, "grad_norm": 0.3017307221889496, "learning_rate": 2.0305292782552e-08, "loss": 0.2673, "step": 8148 }, { "epoch": 2.9224240975376525, "grad_norm": 0.2851341962814331, "learning_rate": 2.0117841915034963e-08, "loss": 0.3014, "step": 8149 }, { "epoch": 2.922782691847956, "grad_norm": 0.2927659749984741, "learning_rate": 1.9931258558482658e-08, "loss": 0.2924, "step": 8150 }, { "epoch": 2.9231412861582595, "grad_norm": 0.2974579632282257, "learning_rate": 1.9745542745399638e-08, "loss": 0.2943, "step": 8151 }, { "epoch": 2.9234998804685635, "grad_norm": 0.3113076984882355, "learning_rate": 1.956069450813669e-08, "loss": 0.2779, "step": 8152 }, { "epoch": 2.923858474778867, "grad_norm": 0.31066691875457764, "learning_rate": 1.937671387889639e-08, "loss": 0.2862, "step": 8153 }, { "epoch": 2.9242170690891705, "grad_norm": 0.2816377580165863, "learning_rate": 1.9193600889728104e-08, "loss": 0.2569, "step": 8154 }, { "epoch": 2.924575663399474, "grad_norm": 0.2891364097595215, "learning_rate": 1.9011355572530753e-08, "loss": 0.2726, "step": 8155 }, { "epoch": 2.9249342577097774, "grad_norm": 0.2989264130592346, "learning_rate": 1.8829977959051728e-08, "loss": 0.3156, "step": 8156 }, { "epoch": 2.9252928520200814, "grad_norm": 0.3004503846168518, "learning_rate": 1.8649468080887968e-08, "loss": 0.3224, "step": 8157 }, { "epoch": 2.925651446330385, "grad_norm": 0.29426994919776917, "learning_rate": 1.8469825969484877e-08, "loss": 0.306, "step": 8158 }, { "epoch": 2.9260100406406884, "grad_norm": 0.28350552916526794, "learning_rate": 1.8291051656136316e-08, "loss": 0.2708, "step": 8159 }, { "epoch": 2.9263686349509923, "grad_norm": 0.28972840309143066, "learning_rate": 1.8113145171985148e-08, "loss": 0.2811, "step": 8160 }, { "epoch": 2.926727229261296, "grad_norm": 0.29881373047828674, "learning_rate": 1.7936106548024358e-08, "loss": 0.3143, "step": 8161 }, { "epoch": 2.9270858235715993, "grad_norm": 0.2984490692615509, "learning_rate": 1.7759935815093165e-08, "loss": 0.2765, "step": 8162 }, { "epoch": 2.927444417881903, "grad_norm": 0.30133959650993347, "learning_rate": 1.7584633003882578e-08, "loss": 0.2962, "step": 8163 }, { "epoch": 2.9278030121922063, "grad_norm": 0.29608845710754395, "learning_rate": 1.7410198144929834e-08, "loss": 0.2926, "step": 8164 }, { "epoch": 2.9281616065025102, "grad_norm": 0.294943243265152, "learning_rate": 1.723663126862174e-08, "loss": 0.2675, "step": 8165 }, { "epoch": 2.9285202008128137, "grad_norm": 0.2884156107902527, "learning_rate": 1.7063932405195215e-08, "loss": 0.2797, "step": 8166 }, { "epoch": 2.928878795123117, "grad_norm": 0.292305052280426, "learning_rate": 1.6892101584734533e-08, "loss": 0.2868, "step": 8167 }, { "epoch": 2.929237389433421, "grad_norm": 0.3023209869861603, "learning_rate": 1.6721138837172967e-08, "loss": 0.3047, "step": 8168 }, { "epoch": 2.9295959837437247, "grad_norm": 0.2981685400009155, "learning_rate": 1.655104419229281e-08, "loss": 0.2929, "step": 8169 }, { "epoch": 2.929954578054028, "grad_norm": 0.32401344180107117, "learning_rate": 1.6381817679725354e-08, "loss": 0.3015, "step": 8170 }, { "epoch": 2.9303131723643316, "grad_norm": 0.33560919761657715, "learning_rate": 1.6213459328950355e-08, "loss": 0.3304, "step": 8171 }, { "epoch": 2.9306717666746356, "grad_norm": 0.27473950386047363, "learning_rate": 1.604596916929546e-08, "loss": 0.2533, "step": 8172 }, { "epoch": 2.931030360984939, "grad_norm": 0.3065648674964905, "learning_rate": 1.5879347229939002e-08, "loss": 0.3171, "step": 8173 }, { "epoch": 2.9313889552952426, "grad_norm": 0.28314530849456787, "learning_rate": 1.5713593539907202e-08, "loss": 0.2687, "step": 8174 }, { "epoch": 2.9317475496055465, "grad_norm": 0.3236085772514343, "learning_rate": 1.5548708128074186e-08, "loss": 0.3064, "step": 8175 }, { "epoch": 2.93210614391585, "grad_norm": 0.302666574716568, "learning_rate": 1.5384691023163644e-08, "loss": 0.2848, "step": 8176 }, { "epoch": 2.9324647382261535, "grad_norm": 0.27810364961624146, "learning_rate": 1.5221542253747723e-08, "loss": 0.284, "step": 8177 }, { "epoch": 2.932823332536457, "grad_norm": 0.334418922662735, "learning_rate": 1.5059261848248132e-08, "loss": 0.3134, "step": 8178 }, { "epoch": 2.9331819268467605, "grad_norm": 0.28544890880584717, "learning_rate": 1.4897849834933365e-08, "loss": 0.282, "step": 8179 }, { "epoch": 2.9335405211570644, "grad_norm": 0.30946820974349976, "learning_rate": 1.473730624192371e-08, "loss": 0.3102, "step": 8180 }, { "epoch": 2.933899115467368, "grad_norm": 0.31159090995788574, "learning_rate": 1.4577631097184575e-08, "loss": 0.295, "step": 8181 }, { "epoch": 2.9342577097776714, "grad_norm": 0.3019108176231384, "learning_rate": 1.4418824428533152e-08, "loss": 0.319, "step": 8182 }, { "epoch": 2.9346163040879754, "grad_norm": 0.28571459650993347, "learning_rate": 1.4260886263632867e-08, "loss": 0.2613, "step": 8183 }, { "epoch": 2.934974898398279, "grad_norm": 0.3075399398803711, "learning_rate": 1.410381662999838e-08, "loss": 0.3048, "step": 8184 }, { "epoch": 2.9353334927085823, "grad_norm": 0.30712267756462097, "learning_rate": 1.3947615554990578e-08, "loss": 0.3046, "step": 8185 }, { "epoch": 2.935692087018886, "grad_norm": 0.2933464050292969, "learning_rate": 1.3792283065821032e-08, "loss": 0.3032, "step": 8186 }, { "epoch": 2.9360506813291893, "grad_norm": 0.287535160779953, "learning_rate": 1.363781918954865e-08, "loss": 0.2878, "step": 8187 }, { "epoch": 2.9364092756394933, "grad_norm": 0.3170267641544342, "learning_rate": 1.3484223953081355e-08, "loss": 0.3204, "step": 8188 }, { "epoch": 2.9367678699497968, "grad_norm": 0.29420018196105957, "learning_rate": 1.3331497383176629e-08, "loss": 0.2748, "step": 8189 }, { "epoch": 2.9371264642601003, "grad_norm": 0.3085471987724304, "learning_rate": 1.31796395064393e-08, "loss": 0.2857, "step": 8190 }, { "epoch": 2.937485058570404, "grad_norm": 0.3171232342720032, "learning_rate": 1.3028650349323213e-08, "loss": 0.3027, "step": 8191 }, { "epoch": 2.9378436528807077, "grad_norm": 0.286157488822937, "learning_rate": 1.2878529938132323e-08, "loss": 0.2681, "step": 8192 }, { "epoch": 2.938202247191011, "grad_norm": 0.30580446124076843, "learning_rate": 1.2729278299016823e-08, "loss": 0.2819, "step": 8193 }, { "epoch": 2.9385608415013147, "grad_norm": 0.3023986518383026, "learning_rate": 1.2580895457977582e-08, "loss": 0.3124, "step": 8194 }, { "epoch": 2.938919435811618, "grad_norm": 0.29155367612838745, "learning_rate": 1.2433381440862814e-08, "loss": 0.2464, "step": 8195 }, { "epoch": 2.939278030121922, "grad_norm": 0.3237321078777313, "learning_rate": 1.2286736273370291e-08, "loss": 0.3122, "step": 8196 }, { "epoch": 2.9396366244322256, "grad_norm": 0.29662060737609863, "learning_rate": 1.2140959981046251e-08, "loss": 0.2945, "step": 8197 }, { "epoch": 2.9399952187425296, "grad_norm": 0.29962027072906494, "learning_rate": 1.1996052589284824e-08, "loss": 0.2762, "step": 8198 }, { "epoch": 2.940353813052833, "grad_norm": 0.29699867963790894, "learning_rate": 1.1852014123329703e-08, "loss": 0.2934, "step": 8199 }, { "epoch": 2.9407124073631365, "grad_norm": 0.31938469409942627, "learning_rate": 1.1708844608272485e-08, "loss": 0.3088, "step": 8200 }, { "epoch": 2.94107100167344, "grad_norm": 0.31076550483703613, "learning_rate": 1.156654406905433e-08, "loss": 0.3034, "step": 8201 }, { "epoch": 2.9414295959837435, "grad_norm": 0.29290199279785156, "learning_rate": 1.1425112530463744e-08, "loss": 0.2637, "step": 8202 }, { "epoch": 2.9417881902940475, "grad_norm": 0.3014110028743744, "learning_rate": 1.1284550017139351e-08, "loss": 0.2839, "step": 8203 }, { "epoch": 2.942146784604351, "grad_norm": 0.3210718035697937, "learning_rate": 1.1144856553567118e-08, "loss": 0.3376, "step": 8204 }, { "epoch": 2.9425053789146545, "grad_norm": 0.291351318359375, "learning_rate": 1.100603216408147e-08, "loss": 0.2588, "step": 8205 }, { "epoch": 2.9428639732249584, "grad_norm": 0.29017889499664307, "learning_rate": 1.086807687286695e-08, "loss": 0.3231, "step": 8206 }, { "epoch": 2.943222567535262, "grad_norm": 0.288957417011261, "learning_rate": 1.0730990703956002e-08, "loss": 0.2869, "step": 8207 }, { "epoch": 2.9435811618455654, "grad_norm": 0.34119167923927307, "learning_rate": 1.059477368122841e-08, "loss": 0.3182, "step": 8208 }, { "epoch": 2.943939756155869, "grad_norm": 0.2786208987236023, "learning_rate": 1.0459425828414638e-08, "loss": 0.2586, "step": 8209 }, { "epoch": 2.9442983504661724, "grad_norm": 0.30458465218544006, "learning_rate": 1.0324947169091936e-08, "loss": 0.292, "step": 8210 }, { "epoch": 2.9446569447764763, "grad_norm": 0.31200098991394043, "learning_rate": 1.0191337726687123e-08, "loss": 0.2656, "step": 8211 }, { "epoch": 2.94501553908678, "grad_norm": 0.28898775577545166, "learning_rate": 1.0058597524475467e-08, "loss": 0.2715, "step": 8212 }, { "epoch": 2.9453741333970833, "grad_norm": 0.30835339426994324, "learning_rate": 9.926726585580692e-09, "loss": 0.3146, "step": 8213 }, { "epoch": 2.9457327277073873, "grad_norm": 0.30775055289268494, "learning_rate": 9.795724932975537e-09, "loss": 0.3099, "step": 8214 }, { "epoch": 2.9460913220176908, "grad_norm": 0.2886641025543213, "learning_rate": 9.665592589480077e-09, "loss": 0.2724, "step": 8215 }, { "epoch": 2.9464499163279942, "grad_norm": 0.30195653438568115, "learning_rate": 9.536329577764514e-09, "loss": 0.3102, "step": 8216 }, { "epoch": 2.9468085106382977, "grad_norm": 0.2944205701351166, "learning_rate": 9.40793592034639e-09, "loss": 0.2566, "step": 8217 }, { "epoch": 2.9471671049486012, "grad_norm": 0.3138660490512848, "learning_rate": 9.280411639592257e-09, "loss": 0.2767, "step": 8218 }, { "epoch": 2.947525699258905, "grad_norm": 0.3121120035648346, "learning_rate": 9.15375675771768e-09, "loss": 0.2943, "step": 8219 }, { "epoch": 2.9478842935692087, "grad_norm": 0.29867973923683167, "learning_rate": 9.027971296785565e-09, "loss": 0.3054, "step": 8220 }, { "epoch": 2.948242887879512, "grad_norm": 0.3064243793487549, "learning_rate": 8.903055278709494e-09, "loss": 0.3046, "step": 8221 }, { "epoch": 2.948601482189816, "grad_norm": 0.3008926808834076, "learning_rate": 8.779008725248729e-09, "loss": 0.2885, "step": 8222 }, { "epoch": 2.9489600765001196, "grad_norm": 0.27731698751449585, "learning_rate": 8.655831658013757e-09, "loss": 0.2697, "step": 8223 }, { "epoch": 2.949318670810423, "grad_norm": 0.28034549951553345, "learning_rate": 8.53352409846131e-09, "loss": 0.2925, "step": 8224 }, { "epoch": 2.9496772651207266, "grad_norm": 0.3195663392543793, "learning_rate": 8.412086067898784e-09, "loss": 0.337, "step": 8225 }, { "epoch": 2.9500358594310305, "grad_norm": 0.31845414638519287, "learning_rate": 8.291517587480369e-09, "loss": 0.2929, "step": 8226 }, { "epoch": 2.950394453741334, "grad_norm": 0.3050687611103058, "learning_rate": 8.17181867820982e-09, "loss": 0.3002, "step": 8227 }, { "epoch": 2.9507530480516375, "grad_norm": 0.2943020164966583, "learning_rate": 8.05298936093879e-09, "loss": 0.2721, "step": 8228 }, { "epoch": 2.9511116423619415, "grad_norm": 0.33280664682388306, "learning_rate": 7.935029656367388e-09, "loss": 0.3569, "step": 8229 }, { "epoch": 2.951470236672245, "grad_norm": 0.2776827812194824, "learning_rate": 7.817939585045842e-09, "loss": 0.2625, "step": 8230 }, { "epoch": 2.9518288309825484, "grad_norm": 0.2765820026397705, "learning_rate": 7.701719167370059e-09, "loss": 0.2561, "step": 8231 }, { "epoch": 2.952187425292852, "grad_norm": 0.29379725456237793, "learning_rate": 7.586368423587175e-09, "loss": 0.2791, "step": 8232 }, { "epoch": 2.9525460196031554, "grad_norm": 0.32061874866485596, "learning_rate": 7.471887373791121e-09, "loss": 0.3171, "step": 8233 }, { "epoch": 2.9529046139134594, "grad_norm": 0.3127382695674896, "learning_rate": 7.358276037924827e-09, "loss": 0.3105, "step": 8234 }, { "epoch": 2.953263208223763, "grad_norm": 0.28511127829551697, "learning_rate": 7.245534435780244e-09, "loss": 0.2631, "step": 8235 }, { "epoch": 2.9536218025340664, "grad_norm": 0.2931682765483856, "learning_rate": 7.133662586996104e-09, "loss": 0.2955, "step": 8236 }, { "epoch": 2.9539803968443703, "grad_norm": 0.31713464856147766, "learning_rate": 7.022660511062374e-09, "loss": 0.316, "step": 8237 }, { "epoch": 2.954338991154674, "grad_norm": 0.29539990425109863, "learning_rate": 6.912528227314696e-09, "loss": 0.302, "step": 8238 }, { "epoch": 2.9546975854649773, "grad_norm": 0.2862451374530792, "learning_rate": 6.8032657549393925e-09, "loss": 0.285, "step": 8239 }, { "epoch": 2.955056179775281, "grad_norm": 0.3040280342102051, "learning_rate": 6.694873112970124e-09, "loss": 0.3018, "step": 8240 }, { "epoch": 2.9554147740855843, "grad_norm": 0.29720190167427063, "learning_rate": 6.587350320288455e-09, "loss": 0.281, "step": 8241 }, { "epoch": 2.9557733683958882, "grad_norm": 0.3262995779514313, "learning_rate": 6.480697395626068e-09, "loss": 0.3245, "step": 8242 }, { "epoch": 2.9561319627061917, "grad_norm": 0.2821657359600067, "learning_rate": 6.374914357561435e-09, "loss": 0.2448, "step": 8243 }, { "epoch": 2.956490557016495, "grad_norm": 0.3021596670150757, "learning_rate": 6.270001224523148e-09, "loss": 0.3268, "step": 8244 }, { "epoch": 2.956849151326799, "grad_norm": 0.27623382210731506, "learning_rate": 6.165958014787143e-09, "loss": 0.2824, "step": 8245 }, { "epoch": 2.9572077456371026, "grad_norm": 0.31620335578918457, "learning_rate": 6.062784746477812e-09, "loss": 0.2859, "step": 8246 }, { "epoch": 2.957566339947406, "grad_norm": 0.31084874272346497, "learning_rate": 5.9604814375685546e-09, "loss": 0.2907, "step": 8247 }, { "epoch": 2.9579249342577096, "grad_norm": 0.29420244693756104, "learning_rate": 5.859048105880671e-09, "loss": 0.3001, "step": 8248 }, { "epoch": 2.958283528568013, "grad_norm": 0.2931903600692749, "learning_rate": 5.758484769083916e-09, "loss": 0.2954, "step": 8249 }, { "epoch": 2.958642122878317, "grad_norm": 0.30317172408103943, "learning_rate": 5.658791444697609e-09, "loss": 0.3027, "step": 8250 }, { "epoch": 2.9590007171886206, "grad_norm": 0.3116511404514313, "learning_rate": 5.559968150087858e-09, "loss": 0.271, "step": 8251 }, { "epoch": 2.959359311498924, "grad_norm": 0.29960018396377563, "learning_rate": 5.462014902470336e-09, "loss": 0.2625, "step": 8252 }, { "epoch": 2.959717905809228, "grad_norm": 0.3043360710144043, "learning_rate": 5.364931718908617e-09, "loss": 0.2911, "step": 8253 }, { "epoch": 2.9600765001195315, "grad_norm": 0.3001806139945984, "learning_rate": 5.268718616314728e-09, "loss": 0.2798, "step": 8254 }, { "epoch": 2.960435094429835, "grad_norm": 0.29945483803749084, "learning_rate": 5.17337561145026e-09, "loss": 0.2821, "step": 8255 }, { "epoch": 2.9607936887401385, "grad_norm": 0.3014342188835144, "learning_rate": 5.0789027209230405e-09, "loss": 0.2998, "step": 8256 }, { "epoch": 2.9611522830504424, "grad_norm": 0.3169805407524109, "learning_rate": 4.985299961191014e-09, "loss": 0.3254, "step": 8257 }, { "epoch": 2.961510877360746, "grad_norm": 0.2801032066345215, "learning_rate": 4.892567348560029e-09, "loss": 0.2521, "step": 8258 }, { "epoch": 2.9618694716710494, "grad_norm": 0.3138664662837982, "learning_rate": 4.800704899184938e-09, "loss": 0.3215, "step": 8259 }, { "epoch": 2.9622280659813534, "grad_norm": 0.3012702465057373, "learning_rate": 4.709712629067942e-09, "loss": 0.3171, "step": 8260 }, { "epoch": 2.962586660291657, "grad_norm": 0.2964463531970978, "learning_rate": 4.6195905540596944e-09, "loss": 0.2628, "step": 8261 }, { "epoch": 2.9629452546019603, "grad_norm": 0.3069271147251129, "learning_rate": 4.530338689860969e-09, "loss": 0.3068, "step": 8262 }, { "epoch": 2.963303848912264, "grad_norm": 0.2938917577266693, "learning_rate": 4.4419570520187745e-09, "loss": 0.2753, "step": 8263 }, { "epoch": 2.9636624432225673, "grad_norm": 0.31206047534942627, "learning_rate": 4.354445655929684e-09, "loss": 0.2992, "step": 8264 }, { "epoch": 2.9640210375328713, "grad_norm": 0.3073708415031433, "learning_rate": 4.26780451683817e-09, "loss": 0.2915, "step": 8265 }, { "epoch": 2.9643796318431748, "grad_norm": 0.2956296503543854, "learning_rate": 4.182033649838269e-09, "loss": 0.2907, "step": 8266 }, { "epoch": 2.9647382261534783, "grad_norm": 0.29531893134117126, "learning_rate": 4.097133069870807e-09, "loss": 0.2794, "step": 8267 }, { "epoch": 2.965096820463782, "grad_norm": 0.29696670174598694, "learning_rate": 4.01310279172562e-09, "loss": 0.2838, "step": 8268 }, { "epoch": 2.9654554147740857, "grad_norm": 0.3342685103416443, "learning_rate": 3.929942830041556e-09, "loss": 0.3146, "step": 8269 }, { "epoch": 2.965814009084389, "grad_norm": 0.3096832036972046, "learning_rate": 3.847653199304801e-09, "loss": 0.2854, "step": 8270 }, { "epoch": 2.9661726033946927, "grad_norm": 0.2915898859500885, "learning_rate": 3.766233913851114e-09, "loss": 0.3008, "step": 8271 }, { "epoch": 2.966531197704996, "grad_norm": 0.29613742232322693, "learning_rate": 3.6856849878635916e-09, "loss": 0.2853, "step": 8272 }, { "epoch": 2.9668897920153, "grad_norm": 0.2945435047149658, "learning_rate": 3.606006435374343e-09, "loss": 0.297, "step": 8273 }, { "epoch": 2.9672483863256036, "grad_norm": 0.29558244347572327, "learning_rate": 3.5271982702633767e-09, "loss": 0.2836, "step": 8274 }, { "epoch": 2.967606980635907, "grad_norm": 0.30023783445358276, "learning_rate": 3.4492605062591556e-09, "loss": 0.3204, "step": 8275 }, { "epoch": 2.967965574946211, "grad_norm": 0.2774084806442261, "learning_rate": 3.372193156939152e-09, "loss": 0.248, "step": 8276 }, { "epoch": 2.9683241692565145, "grad_norm": 0.30888307094573975, "learning_rate": 3.295996235728183e-09, "loss": 0.3151, "step": 8277 }, { "epoch": 2.968682763566818, "grad_norm": 0.29290053248405457, "learning_rate": 3.2206697559006293e-09, "loss": 0.2517, "step": 8278 }, { "epoch": 2.9690413578771215, "grad_norm": 0.3172091543674469, "learning_rate": 3.146213730578218e-09, "loss": 0.3136, "step": 8279 }, { "epoch": 2.969399952187425, "grad_norm": 0.32705339789390564, "learning_rate": 3.072628172731129e-09, "loss": 0.302, "step": 8280 }, { "epoch": 2.969758546497729, "grad_norm": 0.29371681809425354, "learning_rate": 2.999913095179108e-09, "loss": 0.2528, "step": 8281 }, { "epoch": 2.9701171408080325, "grad_norm": 0.30737751722335815, "learning_rate": 2.9280685105881333e-09, "loss": 0.3233, "step": 8282 }, { "epoch": 2.970475735118336, "grad_norm": 0.2879246771335602, "learning_rate": 2.8570944314748607e-09, "loss": 0.281, "step": 8283 }, { "epoch": 2.97083432942864, "grad_norm": 0.29934707283973694, "learning_rate": 2.7869908702027325e-09, "loss": 0.2846, "step": 8284 }, { "epoch": 2.9711929237389434, "grad_norm": 0.31302666664123535, "learning_rate": 2.717757838984203e-09, "loss": 0.3216, "step": 8285 }, { "epoch": 2.971551518049247, "grad_norm": 0.2764107882976532, "learning_rate": 2.6493953498790692e-09, "loss": 0.2886, "step": 8286 }, { "epoch": 2.9719101123595504, "grad_norm": 0.28676608204841614, "learning_rate": 2.581903414797249e-09, "loss": 0.2636, "step": 8287 }, { "epoch": 2.9722687066698543, "grad_norm": 0.3154106140136719, "learning_rate": 2.5152820454954484e-09, "loss": 0.2958, "step": 8288 }, { "epoch": 2.972627300980158, "grad_norm": 0.28218498826026917, "learning_rate": 2.44953125357994e-09, "loss": 0.3045, "step": 8289 }, { "epoch": 2.9729858952904613, "grad_norm": 0.27544882893562317, "learning_rate": 2.3846510505043386e-09, "loss": 0.2655, "step": 8290 }, { "epoch": 2.9733444896007653, "grad_norm": 0.2954639792442322, "learning_rate": 2.320641447570715e-09, "loss": 0.2882, "step": 8291 }, { "epoch": 2.9737030839110687, "grad_norm": 0.3004128634929657, "learning_rate": 2.2575024559301497e-09, "loss": 0.2974, "step": 8292 }, { "epoch": 2.9740616782213722, "grad_norm": 0.29784080386161804, "learning_rate": 2.1952340865816214e-09, "loss": 0.2959, "step": 8293 }, { "epoch": 2.9744202725316757, "grad_norm": 0.32948005199432373, "learning_rate": 2.1338363503725644e-09, "loss": 0.3375, "step": 8294 }, { "epoch": 2.9747788668419792, "grad_norm": 0.29218122363090515, "learning_rate": 2.073309257997758e-09, "loss": 0.2425, "step": 8295 }, { "epoch": 2.975137461152283, "grad_norm": 0.31666284799575806, "learning_rate": 2.013652820002654e-09, "loss": 0.3065, "step": 8296 }, { "epoch": 2.9754960554625867, "grad_norm": 0.28872743248939514, "learning_rate": 1.954867046778386e-09, "loss": 0.2727, "step": 8297 }, { "epoch": 2.97585464977289, "grad_norm": 0.3083575367927551, "learning_rate": 1.8969519485662056e-09, "loss": 0.3012, "step": 8298 }, { "epoch": 2.976213244083194, "grad_norm": 0.2984709143638611, "learning_rate": 1.8399075354552653e-09, "loss": 0.2614, "step": 8299 }, { "epoch": 2.9765718383934976, "grad_norm": 0.2965335547924042, "learning_rate": 1.7837338173820607e-09, "loss": 0.2764, "step": 8300 }, { "epoch": 2.976930432703801, "grad_norm": 0.3174035847187042, "learning_rate": 1.7284308041332075e-09, "loss": 0.305, "step": 8301 }, { "epoch": 2.9772890270141046, "grad_norm": 0.3114328980445862, "learning_rate": 1.6739985053421114e-09, "loss": 0.2973, "step": 8302 }, { "epoch": 2.977647621324408, "grad_norm": 0.2893660068511963, "learning_rate": 1.620436930491187e-09, "loss": 0.2786, "step": 8303 }, { "epoch": 2.978006215634712, "grad_norm": 0.29695841670036316, "learning_rate": 1.5677460889107488e-09, "loss": 0.2902, "step": 8304 }, { "epoch": 2.9783648099450155, "grad_norm": 0.30551502108573914, "learning_rate": 1.5159259897801204e-09, "loss": 0.3, "step": 8305 }, { "epoch": 2.978723404255319, "grad_norm": 0.29218360781669617, "learning_rate": 1.4649766421265254e-09, "loss": 0.2807, "step": 8306 }, { "epoch": 2.979081998565623, "grad_norm": 0.3064703941345215, "learning_rate": 1.4148980548250867e-09, "loss": 0.3142, "step": 8307 }, { "epoch": 2.9794405928759264, "grad_norm": 0.30232682824134827, "learning_rate": 1.3656902365999369e-09, "loss": 0.3157, "step": 8308 }, { "epoch": 2.97979918718623, "grad_norm": 0.30299511551856995, "learning_rate": 1.317353196023108e-09, "loss": 0.2879, "step": 8309 }, { "epoch": 2.9801577814965334, "grad_norm": 0.2955334484577179, "learning_rate": 1.269886941515086e-09, "loss": 0.3071, "step": 8310 }, { "epoch": 2.980516375806837, "grad_norm": 0.2829092741012573, "learning_rate": 1.2232914813448127e-09, "loss": 0.2711, "step": 8311 }, { "epoch": 2.980874970117141, "grad_norm": 0.3074456453323364, "learning_rate": 1.1775668236291282e-09, "loss": 0.2936, "step": 8312 }, { "epoch": 2.9812335644274444, "grad_norm": 0.3108493983745575, "learning_rate": 1.132712976333883e-09, "loss": 0.3337, "step": 8313 }, { "epoch": 2.9815921587377483, "grad_norm": 0.2971186339855194, "learning_rate": 1.0887299472722712e-09, "loss": 0.2645, "step": 8314 }, { "epoch": 2.981950753048052, "grad_norm": 0.3166179955005646, "learning_rate": 1.0456177441064974e-09, "loss": 0.2849, "step": 8315 }, { "epoch": 2.9823093473583553, "grad_norm": 0.30711138248443604, "learning_rate": 1.00337637434611e-09, "loss": 0.3196, "step": 8316 }, { "epoch": 2.982667941668659, "grad_norm": 0.2922019958496094, "learning_rate": 9.620058453507774e-10, "loss": 0.2724, "step": 8317 }, { "epoch": 2.9830265359789623, "grad_norm": 0.3139442801475525, "learning_rate": 9.215061643269574e-10, "loss": 0.3415, "step": 8318 }, { "epoch": 2.983385130289266, "grad_norm": 0.2889906167984009, "learning_rate": 8.81877338329562e-10, "loss": 0.2732, "step": 8319 }, { "epoch": 2.9837437245995697, "grad_norm": 0.30698537826538086, "learning_rate": 8.431193742625133e-10, "loss": 0.2706, "step": 8320 }, { "epoch": 2.984102318909873, "grad_norm": 0.30871424078941345, "learning_rate": 8.052322788776324e-10, "loss": 0.3031, "step": 8321 }, { "epoch": 2.984460913220177, "grad_norm": 0.2816862463951111, "learning_rate": 7.68216058774085e-10, "loss": 0.3042, "step": 8322 }, { "epoch": 2.9848195075304806, "grad_norm": 0.30661335587501526, "learning_rate": 7.320707204011568e-10, "loss": 0.2873, "step": 8323 }, { "epoch": 2.985178101840784, "grad_norm": 0.2908784747123718, "learning_rate": 6.967962700549225e-10, "loss": 0.2648, "step": 8324 }, { "epoch": 2.9855366961510876, "grad_norm": 0.30825263261795044, "learning_rate": 6.623927138804665e-10, "loss": 0.2837, "step": 8325 }, { "epoch": 2.985895290461391, "grad_norm": 0.32348018884658813, "learning_rate": 6.288600578713277e-10, "loss": 0.3093, "step": 8326 }, { "epoch": 2.986253884771695, "grad_norm": 0.2830203175544739, "learning_rate": 5.961983078683898e-10, "loss": 0.2998, "step": 8327 }, { "epoch": 2.9866124790819986, "grad_norm": 0.2675917148590088, "learning_rate": 5.644074695615454e-10, "loss": 0.2564, "step": 8328 }, { "epoch": 2.986971073392302, "grad_norm": 0.3046930730342865, "learning_rate": 5.334875484896973e-10, "loss": 0.3384, "step": 8329 }, { "epoch": 2.987329667702606, "grad_norm": 0.3084012269973755, "learning_rate": 5.034385500379824e-10, "loss": 0.3141, "step": 8330 }, { "epoch": 2.9876882620129095, "grad_norm": 0.31704849004745483, "learning_rate": 4.742604794416572e-10, "loss": 0.2856, "step": 8331 }, { "epoch": 2.988046856323213, "grad_norm": 0.3063993453979492, "learning_rate": 4.4595334178387794e-10, "loss": 0.2873, "step": 8332 }, { "epoch": 2.9884054506335165, "grad_norm": 0.2996687591075897, "learning_rate": 4.1851714199514505e-10, "loss": 0.2954, "step": 8333 }, { "epoch": 2.98876404494382, "grad_norm": 0.29109472036361694, "learning_rate": 3.919518848555237e-10, "loss": 0.2873, "step": 8334 }, { "epoch": 2.989122639254124, "grad_norm": 0.3076004087924957, "learning_rate": 3.6625757499297867e-10, "loss": 0.3079, "step": 8335 }, { "epoch": 2.9894812335644274, "grad_norm": 0.29876503348350525, "learning_rate": 3.4143421688226374e-10, "loss": 0.2783, "step": 8336 }, { "epoch": 2.989839827874731, "grad_norm": 0.2919938564300537, "learning_rate": 3.174818148488079e-10, "loss": 0.2809, "step": 8337 }, { "epoch": 2.990198422185035, "grad_norm": 0.3087342083454132, "learning_rate": 2.944003730653844e-10, "loss": 0.283, "step": 8338 }, { "epoch": 2.9905570164953383, "grad_norm": 0.30660226941108704, "learning_rate": 2.721898955521107e-10, "loss": 0.3231, "step": 8339 }, { "epoch": 2.990915610805642, "grad_norm": 0.3071148991584778, "learning_rate": 2.508503861781142e-10, "loss": 0.2687, "step": 8340 }, { "epoch": 2.9912742051159453, "grad_norm": 0.3118583559989929, "learning_rate": 2.303818486615317e-10, "loss": 0.3073, "step": 8341 }, { "epoch": 2.9916327994262493, "grad_norm": 0.29318365454673767, "learning_rate": 2.1078428656728933e-10, "loss": 0.2605, "step": 8342 }, { "epoch": 2.9919913937365528, "grad_norm": 0.29925140738487244, "learning_rate": 1.9205770330987804e-10, "loss": 0.296, "step": 8343 }, { "epoch": 2.9923499880468563, "grad_norm": 0.30486685037612915, "learning_rate": 1.742021021511331e-10, "loss": 0.2882, "step": 8344 }, { "epoch": 2.99270858235716, "grad_norm": 0.2980802059173584, "learning_rate": 1.5721748620134425e-10, "loss": 0.3131, "step": 8345 }, { "epoch": 2.9930671766674637, "grad_norm": 0.3183971047401428, "learning_rate": 1.411038584203661e-10, "loss": 0.3061, "step": 8346 }, { "epoch": 2.993425770977767, "grad_norm": 0.28880566358566284, "learning_rate": 1.2586122161428737e-10, "loss": 0.278, "step": 8347 }, { "epoch": 2.9937843652880707, "grad_norm": 0.3269912600517273, "learning_rate": 1.1148957843876151e-10, "loss": 0.3493, "step": 8348 }, { "epoch": 2.994142959598374, "grad_norm": 0.29689347743988037, "learning_rate": 9.798893139678634e-11, "loss": 0.2568, "step": 8349 }, { "epoch": 2.994501553908678, "grad_norm": 0.32202064990997314, "learning_rate": 8.535928284147954e-11, "loss": 0.3119, "step": 8350 }, { "epoch": 2.9948601482189816, "grad_norm": 0.2970808744430542, "learning_rate": 7.360063497163783e-11, "loss": 0.2998, "step": 8351 }, { "epoch": 2.995218742529285, "grad_norm": 0.28962409496307373, "learning_rate": 6.27129898367329e-11, "loss": 0.2567, "step": 8352 }, { "epoch": 2.995577336839589, "grad_norm": 0.3108940124511719, "learning_rate": 5.2696349332470586e-11, "loss": 0.3379, "step": 8353 }, { "epoch": 2.9959359311498925, "grad_norm": 0.29201194643974304, "learning_rate": 4.3550715204121466e-11, "loss": 0.2925, "step": 8354 }, { "epoch": 2.996294525460196, "grad_norm": 0.30441558361053467, "learning_rate": 3.527608904541069e-11, "loss": 0.2642, "step": 8355 }, { "epoch": 2.9966531197704995, "grad_norm": 0.28772804141044617, "learning_rate": 2.7872472296852638e-11, "loss": 0.2681, "step": 8356 }, { "epoch": 2.997011714080803, "grad_norm": 0.31431007385253906, "learning_rate": 2.1339866249081577e-11, "loss": 0.3172, "step": 8357 }, { "epoch": 2.997370308391107, "grad_norm": 0.30009499192237854, "learning_rate": 1.5678272040076105e-11, "loss": 0.2775, "step": 8358 }, { "epoch": 2.9977289027014105, "grad_norm": 0.2851913273334503, "learning_rate": 1.0887690655159155e-11, "loss": 0.2653, "step": 8359 }, { "epoch": 2.998087497011714, "grad_norm": 0.3204035758972168, "learning_rate": 6.968122929773558e-12, "loss": 0.3168, "step": 8360 }, { "epoch": 2.998446091322018, "grad_norm": 0.28635135293006897, "learning_rate": 3.919569546151359e-12, "loss": 0.2688, "step": 8361 }, { "epoch": 2.9988046856323214, "grad_norm": 0.3028222322463989, "learning_rate": 1.7420310360893866e-12, "loss": 0.2938, "step": 8362 }, { "epoch": 2.999163279942625, "grad_norm": 0.3161454498767853, "learning_rate": 4.355077781736938e-13, "loss": 0.2981, "step": 8363 }, { "epoch": 2.9995218742529284, "grad_norm": 0.3232811391353607, "learning_rate": 0.0, "loss": 0.3002, "step": 8364 }, { "epoch": 2.9995218742529284, "step": 8364, "total_flos": 6692984695308288.0, "train_loss": 0.33367715629876826, "train_runtime": 137071.0792, "train_samples_per_second": 5.859, "train_steps_per_second": 0.061 } ], "logging_steps": 1.0, "max_steps": 8364, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6692984695308288.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }