{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 9790, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010214504596527068, "grad_norm": 24.5991329176078, "learning_rate": 6.802721088435375e-08, "loss": 2.2848, "step": 1 }, { "epoch": 0.00020429009193054137, "grad_norm": 26.689937379942236, "learning_rate": 1.360544217687075e-07, "loss": 2.2822, "step": 2 }, { "epoch": 0.00030643513789581204, "grad_norm": 29.618260135945295, "learning_rate": 2.0408163265306121e-07, "loss": 2.4747, "step": 3 }, { "epoch": 0.00040858018386108274, "grad_norm": 30.76652113148423, "learning_rate": 2.72108843537415e-07, "loss": 2.565, "step": 4 }, { "epoch": 0.0005107252298263534, "grad_norm": 34.77649002482841, "learning_rate": 3.401360544217688e-07, "loss": 2.7352, "step": 5 }, { "epoch": 0.0006128702757916241, "grad_norm": 26.199640845074015, "learning_rate": 4.0816326530612243e-07, "loss": 2.4032, "step": 6 }, { "epoch": 0.0007150153217568948, "grad_norm": 25.4498992514559, "learning_rate": 4.7619047619047623e-07, "loss": 2.2179, "step": 7 }, { "epoch": 0.0008171603677221655, "grad_norm": 26.10129625751608, "learning_rate": 5.4421768707483e-07, "loss": 2.23, "step": 8 }, { "epoch": 0.0009193054136874361, "grad_norm": 27.916045826055516, "learning_rate": 6.122448979591837e-07, "loss": 2.4002, "step": 9 }, { "epoch": 0.0010214504596527069, "grad_norm": 22.706817758677374, "learning_rate": 6.802721088435376e-07, "loss": 2.2149, "step": 10 }, { "epoch": 0.0011235955056179776, "grad_norm": 25.25466775253154, "learning_rate": 7.482993197278913e-07, "loss": 2.4988, "step": 11 }, { "epoch": 0.0012257405515832482, "grad_norm": 16.502357021610447, "learning_rate": 8.163265306122449e-07, "loss": 1.9654, "step": 12 }, { "epoch": 0.001327885597548519, "grad_norm": 21.93641937168555, "learning_rate": 8.843537414965988e-07, "loss": 2.3907, "step": 13 }, { "epoch": 0.0014300306435137897, "grad_norm": 18.19832506133001, "learning_rate": 9.523809523809525e-07, "loss": 2.06, "step": 14 }, { "epoch": 0.0015321756894790602, "grad_norm": 15.799339815975372, "learning_rate": 1.0204081632653063e-06, "loss": 1.9616, "step": 15 }, { "epoch": 0.001634320735444331, "grad_norm": 19.08729217369872, "learning_rate": 1.08843537414966e-06, "loss": 2.02, "step": 16 }, { "epoch": 0.0017364657814096017, "grad_norm": 17.409340578425272, "learning_rate": 1.1564625850340136e-06, "loss": 1.8118, "step": 17 }, { "epoch": 0.0018386108273748722, "grad_norm": 22.852017868556352, "learning_rate": 1.2244897959183673e-06, "loss": 2.1514, "step": 18 }, { "epoch": 0.001940755873340143, "grad_norm": 21.746796575398797, "learning_rate": 1.2925170068027212e-06, "loss": 2.1073, "step": 19 }, { "epoch": 0.0020429009193054137, "grad_norm": 21.970393666076028, "learning_rate": 1.3605442176870751e-06, "loss": 2.0132, "step": 20 }, { "epoch": 0.0021450459652706845, "grad_norm": 18.47075930471974, "learning_rate": 1.4285714285714286e-06, "loss": 1.9016, "step": 21 }, { "epoch": 0.0022471910112359553, "grad_norm": 19.13191802744562, "learning_rate": 1.4965986394557825e-06, "loss": 1.8869, "step": 22 }, { "epoch": 0.0023493360572012256, "grad_norm": 20.980926233635074, "learning_rate": 1.5646258503401362e-06, "loss": 1.8411, "step": 23 }, { "epoch": 0.0024514811031664963, "grad_norm": 14.495264246481026, "learning_rate": 1.6326530612244897e-06, "loss": 1.5245, "step": 24 }, { "epoch": 0.002553626149131767, "grad_norm": 13.249080816156338, "learning_rate": 1.7006802721088436e-06, "loss": 1.5568, "step": 25 }, { "epoch": 0.002655771195097038, "grad_norm": 11.986619676400249, "learning_rate": 1.7687074829931975e-06, "loss": 1.4914, "step": 26 }, { "epoch": 0.0027579162410623086, "grad_norm": 15.893350275113658, "learning_rate": 1.8367346938775512e-06, "loss": 1.3491, "step": 27 }, { "epoch": 0.0028600612870275793, "grad_norm": 12.389233496652443, "learning_rate": 1.904761904761905e-06, "loss": 1.1885, "step": 28 }, { "epoch": 0.0029622063329928497, "grad_norm": 11.068388300477746, "learning_rate": 1.9727891156462586e-06, "loss": 1.2589, "step": 29 }, { "epoch": 0.0030643513789581204, "grad_norm": 15.458274294530403, "learning_rate": 2.0408163265306125e-06, "loss": 1.098, "step": 30 }, { "epoch": 0.003166496424923391, "grad_norm": 9.893105406446107, "learning_rate": 2.1088435374149664e-06, "loss": 1.1345, "step": 31 }, { "epoch": 0.003268641470888662, "grad_norm": 10.238031953467008, "learning_rate": 2.17687074829932e-06, "loss": 0.9939, "step": 32 }, { "epoch": 0.0033707865168539327, "grad_norm": 7.177534050314678, "learning_rate": 2.244897959183674e-06, "loss": 0.9789, "step": 33 }, { "epoch": 0.0034729315628192034, "grad_norm": 3.6925445053829624, "learning_rate": 2.3129251700680273e-06, "loss": 0.8936, "step": 34 }, { "epoch": 0.003575076608784474, "grad_norm": 2.1311857004428254, "learning_rate": 2.380952380952381e-06, "loss": 0.8226, "step": 35 }, { "epoch": 0.0036772216547497445, "grad_norm": 2.114237694447485, "learning_rate": 2.4489795918367347e-06, "loss": 0.8576, "step": 36 }, { "epoch": 0.0037793667007150152, "grad_norm": 1.7798161938458779, "learning_rate": 2.5170068027210886e-06, "loss": 0.7929, "step": 37 }, { "epoch": 0.003881511746680286, "grad_norm": 2.1284386403464612, "learning_rate": 2.5850340136054425e-06, "loss": 0.8067, "step": 38 }, { "epoch": 0.003983656792645556, "grad_norm": 2.051715887744448, "learning_rate": 2.6530612244897964e-06, "loss": 0.8059, "step": 39 }, { "epoch": 0.0040858018386108275, "grad_norm": 1.9090586734108757, "learning_rate": 2.7210884353741503e-06, "loss": 0.781, "step": 40 }, { "epoch": 0.004187946884576098, "grad_norm": 1.930879616738226, "learning_rate": 2.7891156462585034e-06, "loss": 0.8795, "step": 41 }, { "epoch": 0.004290091930541369, "grad_norm": 1.7311599953935373, "learning_rate": 2.8571428571428573e-06, "loss": 0.783, "step": 42 }, { "epoch": 0.004392236976506639, "grad_norm": 1.7921573751754891, "learning_rate": 2.925170068027211e-06, "loss": 0.8301, "step": 43 }, { "epoch": 0.0044943820224719105, "grad_norm": 1.8516372555657379, "learning_rate": 2.993197278911565e-06, "loss": 0.8177, "step": 44 }, { "epoch": 0.004596527068437181, "grad_norm": 1.8180026983940232, "learning_rate": 3.0612244897959185e-06, "loss": 0.8082, "step": 45 }, { "epoch": 0.004698672114402451, "grad_norm": 1.787553864501069, "learning_rate": 3.1292517006802725e-06, "loss": 0.8534, "step": 46 }, { "epoch": 0.004800817160367722, "grad_norm": 1.7674821523573645, "learning_rate": 3.1972789115646264e-06, "loss": 0.7113, "step": 47 }, { "epoch": 0.004902962206332993, "grad_norm": 1.7673234675114342, "learning_rate": 3.2653061224489794e-06, "loss": 0.8221, "step": 48 }, { "epoch": 0.005005107252298264, "grad_norm": 1.853059425454463, "learning_rate": 3.3333333333333333e-06, "loss": 0.7571, "step": 49 }, { "epoch": 0.005107252298263534, "grad_norm": 1.8825858830560922, "learning_rate": 3.4013605442176872e-06, "loss": 0.7544, "step": 50 }, { "epoch": 0.0052093973442288045, "grad_norm": 1.5495490747161171, "learning_rate": 3.469387755102041e-06, "loss": 0.7932, "step": 51 }, { "epoch": 0.005311542390194076, "grad_norm": 1.6914705556391993, "learning_rate": 3.537414965986395e-06, "loss": 0.769, "step": 52 }, { "epoch": 0.005413687436159346, "grad_norm": 1.5809579323730074, "learning_rate": 3.6054421768707485e-06, "loss": 0.7488, "step": 53 }, { "epoch": 0.005515832482124617, "grad_norm": 1.6351397266977088, "learning_rate": 3.6734693877551024e-06, "loss": 0.7445, "step": 54 }, { "epoch": 0.0056179775280898875, "grad_norm": 1.7543673403299578, "learning_rate": 3.7414965986394563e-06, "loss": 0.8146, "step": 55 }, { "epoch": 0.005720122574055159, "grad_norm": 1.6393310890394845, "learning_rate": 3.80952380952381e-06, "loss": 0.7224, "step": 56 }, { "epoch": 0.005822267620020429, "grad_norm": 1.6193877525173186, "learning_rate": 3.877551020408164e-06, "loss": 0.8524, "step": 57 }, { "epoch": 0.005924412665985699, "grad_norm": 1.6556719265796729, "learning_rate": 3.945578231292517e-06, "loss": 0.7241, "step": 58 }, { "epoch": 0.0060265577119509705, "grad_norm": 1.7118024761672823, "learning_rate": 4.013605442176871e-06, "loss": 0.8001, "step": 59 }, { "epoch": 0.006128702757916241, "grad_norm": 1.7056909426326199, "learning_rate": 4.081632653061225e-06, "loss": 0.8776, "step": 60 }, { "epoch": 0.006230847803881512, "grad_norm": 1.7769958621411681, "learning_rate": 4.1496598639455785e-06, "loss": 0.7095, "step": 61 }, { "epoch": 0.006332992849846782, "grad_norm": 1.6667081033813227, "learning_rate": 4.217687074829933e-06, "loss": 0.825, "step": 62 }, { "epoch": 0.0064351378958120535, "grad_norm": 1.7396099308185704, "learning_rate": 4.2857142857142855e-06, "loss": 0.8232, "step": 63 }, { "epoch": 0.006537282941777324, "grad_norm": 1.5707682172154676, "learning_rate": 4.35374149659864e-06, "loss": 0.6576, "step": 64 }, { "epoch": 0.006639427987742594, "grad_norm": 1.6418845282111552, "learning_rate": 4.421768707482993e-06, "loss": 0.7043, "step": 65 }, { "epoch": 0.006741573033707865, "grad_norm": 1.7075199999313757, "learning_rate": 4.489795918367348e-06, "loss": 0.7945, "step": 66 }, { "epoch": 0.006843718079673136, "grad_norm": 1.8398294781739963, "learning_rate": 4.557823129251701e-06, "loss": 0.787, "step": 67 }, { "epoch": 0.006945863125638407, "grad_norm": 1.5084581476623506, "learning_rate": 4.6258503401360546e-06, "loss": 0.7204, "step": 68 }, { "epoch": 0.007048008171603677, "grad_norm": 1.7077426782576781, "learning_rate": 4.693877551020409e-06, "loss": 0.7126, "step": 69 }, { "epoch": 0.007150153217568948, "grad_norm": 1.7319515911963492, "learning_rate": 4.761904761904762e-06, "loss": 0.774, "step": 70 }, { "epoch": 0.007252298263534219, "grad_norm": 1.6110605030554606, "learning_rate": 4.829931972789116e-06, "loss": 0.6792, "step": 71 }, { "epoch": 0.007354443309499489, "grad_norm": 1.6872445859245582, "learning_rate": 4.897959183673469e-06, "loss": 0.6961, "step": 72 }, { "epoch": 0.00745658835546476, "grad_norm": 1.8419337432316705, "learning_rate": 4.965986394557824e-06, "loss": 0.6634, "step": 73 }, { "epoch": 0.0075587334014300305, "grad_norm": 1.6798481460683667, "learning_rate": 5.034013605442177e-06, "loss": 0.7768, "step": 74 }, { "epoch": 0.007660878447395302, "grad_norm": 1.6100273438676207, "learning_rate": 5.1020408163265315e-06, "loss": 0.6997, "step": 75 }, { "epoch": 0.007763023493360572, "grad_norm": 1.71066206035823, "learning_rate": 5.170068027210885e-06, "loss": 0.692, "step": 76 }, { "epoch": 0.007865168539325843, "grad_norm": 1.8157128315419326, "learning_rate": 5.2380952380952384e-06, "loss": 0.7918, "step": 77 }, { "epoch": 0.007967313585291113, "grad_norm": 1.7095248307867774, "learning_rate": 5.306122448979593e-06, "loss": 0.7059, "step": 78 }, { "epoch": 0.008069458631256384, "grad_norm": 1.7370650727490233, "learning_rate": 5.374149659863946e-06, "loss": 0.6581, "step": 79 }, { "epoch": 0.008171603677221655, "grad_norm": 1.5022918246899075, "learning_rate": 5.442176870748301e-06, "loss": 0.7065, "step": 80 }, { "epoch": 0.008273748723186926, "grad_norm": 1.6772017183621166, "learning_rate": 5.510204081632653e-06, "loss": 0.6651, "step": 81 }, { "epoch": 0.008375893769152196, "grad_norm": 1.61774747933001, "learning_rate": 5.578231292517007e-06, "loss": 0.7368, "step": 82 }, { "epoch": 0.008478038815117467, "grad_norm": 1.6658071780723769, "learning_rate": 5.646258503401361e-06, "loss": 0.7258, "step": 83 }, { "epoch": 0.008580183861082738, "grad_norm": 1.669071710605808, "learning_rate": 5.7142857142857145e-06, "loss": 0.7969, "step": 84 }, { "epoch": 0.008682328907048007, "grad_norm": 1.8579457330459581, "learning_rate": 5.782312925170068e-06, "loss": 0.6907, "step": 85 }, { "epoch": 0.008784473953013279, "grad_norm": 1.7176272414291363, "learning_rate": 5.850340136054422e-06, "loss": 0.6941, "step": 86 }, { "epoch": 0.00888661899897855, "grad_norm": 1.7660467367038595, "learning_rate": 5.918367346938776e-06, "loss": 0.7132, "step": 87 }, { "epoch": 0.008988764044943821, "grad_norm": 1.638935033482523, "learning_rate": 5.98639455782313e-06, "loss": 0.7117, "step": 88 }, { "epoch": 0.00909090909090909, "grad_norm": 1.7943910830705998, "learning_rate": 6.054421768707484e-06, "loss": 0.8004, "step": 89 }, { "epoch": 0.009193054136874362, "grad_norm": 1.858461886121563, "learning_rate": 6.122448979591837e-06, "loss": 0.7581, "step": 90 }, { "epoch": 0.009295199182839633, "grad_norm": 1.5113421439996582, "learning_rate": 6.1904761904761914e-06, "loss": 0.7104, "step": 91 }, { "epoch": 0.009397344228804902, "grad_norm": 1.7763599753453199, "learning_rate": 6.258503401360545e-06, "loss": 0.8159, "step": 92 }, { "epoch": 0.009499489274770173, "grad_norm": 1.5799451481356066, "learning_rate": 6.326530612244899e-06, "loss": 0.771, "step": 93 }, { "epoch": 0.009601634320735445, "grad_norm": 1.6975121060835323, "learning_rate": 6.394557823129253e-06, "loss": 0.7183, "step": 94 }, { "epoch": 0.009703779366700716, "grad_norm": 1.645376004221033, "learning_rate": 6.462585034013606e-06, "loss": 0.773, "step": 95 }, { "epoch": 0.009805924412665985, "grad_norm": 1.7624371076071546, "learning_rate": 6.530612244897959e-06, "loss": 0.7485, "step": 96 }, { "epoch": 0.009908069458631256, "grad_norm": 1.9495126468948434, "learning_rate": 6.598639455782313e-06, "loss": 0.7994, "step": 97 }, { "epoch": 0.010010214504596528, "grad_norm": 1.598074382630312, "learning_rate": 6.666666666666667e-06, "loss": 0.6719, "step": 98 }, { "epoch": 0.010112359550561797, "grad_norm": 1.630640534815859, "learning_rate": 6.734693877551021e-06, "loss": 0.7443, "step": 99 }, { "epoch": 0.010214504596527068, "grad_norm": 1.675352230442309, "learning_rate": 6.8027210884353745e-06, "loss": 0.7456, "step": 100 }, { "epoch": 0.01031664964249234, "grad_norm": 1.826682592735814, "learning_rate": 6.870748299319728e-06, "loss": 0.6965, "step": 101 }, { "epoch": 0.010418794688457609, "grad_norm": 1.8860094246191392, "learning_rate": 6.938775510204082e-06, "loss": 0.7326, "step": 102 }, { "epoch": 0.01052093973442288, "grad_norm": 1.730954945401936, "learning_rate": 7.006802721088436e-06, "loss": 0.7144, "step": 103 }, { "epoch": 0.010623084780388151, "grad_norm": 1.5375316324963417, "learning_rate": 7.07482993197279e-06, "loss": 0.7126, "step": 104 }, { "epoch": 0.010725229826353423, "grad_norm": 1.7738652321511554, "learning_rate": 7.1428571428571436e-06, "loss": 0.7301, "step": 105 }, { "epoch": 0.010827374872318692, "grad_norm": 1.8410963440617436, "learning_rate": 7.210884353741497e-06, "loss": 0.741, "step": 106 }, { "epoch": 0.010929519918283963, "grad_norm": 1.798370972457453, "learning_rate": 7.278911564625851e-06, "loss": 0.6855, "step": 107 }, { "epoch": 0.011031664964249234, "grad_norm": 1.6166116878132524, "learning_rate": 7.346938775510205e-06, "loss": 0.7772, "step": 108 }, { "epoch": 0.011133810010214504, "grad_norm": 1.9580684840168145, "learning_rate": 7.414965986394559e-06, "loss": 0.7244, "step": 109 }, { "epoch": 0.011235955056179775, "grad_norm": 1.7065434743561958, "learning_rate": 7.482993197278913e-06, "loss": 0.7254, "step": 110 }, { "epoch": 0.011338100102145046, "grad_norm": 1.7525889212294594, "learning_rate": 7.551020408163265e-06, "loss": 0.8227, "step": 111 }, { "epoch": 0.011440245148110317, "grad_norm": 1.9076416714464244, "learning_rate": 7.61904761904762e-06, "loss": 0.6655, "step": 112 }, { "epoch": 0.011542390194075587, "grad_norm": 1.631377216930173, "learning_rate": 7.687074829931972e-06, "loss": 0.734, "step": 113 }, { "epoch": 0.011644535240040858, "grad_norm": 1.7763009147340159, "learning_rate": 7.755102040816327e-06, "loss": 0.6994, "step": 114 }, { "epoch": 0.01174668028600613, "grad_norm": 1.746756564467626, "learning_rate": 7.823129251700681e-06, "loss": 0.7995, "step": 115 }, { "epoch": 0.011848825331971399, "grad_norm": 1.6623568361016479, "learning_rate": 7.891156462585034e-06, "loss": 0.6245, "step": 116 }, { "epoch": 0.01195097037793667, "grad_norm": 1.6868704683946245, "learning_rate": 7.959183673469388e-06, "loss": 0.7797, "step": 117 }, { "epoch": 0.012053115423901941, "grad_norm": 1.5387174275978197, "learning_rate": 8.027210884353741e-06, "loss": 0.623, "step": 118 }, { "epoch": 0.012155260469867212, "grad_norm": 1.6130834721838176, "learning_rate": 8.095238095238097e-06, "loss": 0.6125, "step": 119 }, { "epoch": 0.012257405515832482, "grad_norm": 1.591501857340208, "learning_rate": 8.16326530612245e-06, "loss": 0.8235, "step": 120 }, { "epoch": 0.012359550561797753, "grad_norm": 1.753758701962058, "learning_rate": 8.231292517006804e-06, "loss": 0.7735, "step": 121 }, { "epoch": 0.012461695607763024, "grad_norm": 1.4108382782919635, "learning_rate": 8.299319727891157e-06, "loss": 0.6539, "step": 122 }, { "epoch": 0.012563840653728293, "grad_norm": 1.7317979549539477, "learning_rate": 8.36734693877551e-06, "loss": 0.7092, "step": 123 }, { "epoch": 0.012665985699693565, "grad_norm": 1.7272447316706576, "learning_rate": 8.435374149659866e-06, "loss": 0.6365, "step": 124 }, { "epoch": 0.012768130745658836, "grad_norm": 1.8271664185653176, "learning_rate": 8.503401360544217e-06, "loss": 0.8789, "step": 125 }, { "epoch": 0.012870275791624107, "grad_norm": 1.7663221688313868, "learning_rate": 8.571428571428571e-06, "loss": 0.673, "step": 126 }, { "epoch": 0.012972420837589376, "grad_norm": 1.5353243105307623, "learning_rate": 8.639455782312926e-06, "loss": 0.6768, "step": 127 }, { "epoch": 0.013074565883554648, "grad_norm": 1.6616810202667323, "learning_rate": 8.70748299319728e-06, "loss": 0.8092, "step": 128 }, { "epoch": 0.013176710929519919, "grad_norm": 1.575094304505959, "learning_rate": 8.775510204081633e-06, "loss": 0.7702, "step": 129 }, { "epoch": 0.013278855975485188, "grad_norm": 1.782638617469749, "learning_rate": 8.843537414965987e-06, "loss": 0.7127, "step": 130 }, { "epoch": 0.01338100102145046, "grad_norm": 1.6846980944597323, "learning_rate": 8.91156462585034e-06, "loss": 0.7413, "step": 131 }, { "epoch": 0.01348314606741573, "grad_norm": 1.72903262346004, "learning_rate": 8.979591836734695e-06, "loss": 0.6658, "step": 132 }, { "epoch": 0.013585291113381002, "grad_norm": 1.8581330047604752, "learning_rate": 9.047619047619049e-06, "loss": 0.7698, "step": 133 }, { "epoch": 0.013687436159346271, "grad_norm": 1.6775962447900343, "learning_rate": 9.115646258503402e-06, "loss": 0.7782, "step": 134 }, { "epoch": 0.013789581205311542, "grad_norm": 1.782752664617114, "learning_rate": 9.183673469387756e-06, "loss": 0.8082, "step": 135 }, { "epoch": 0.013891726251276814, "grad_norm": 1.668617754573196, "learning_rate": 9.251700680272109e-06, "loss": 0.6672, "step": 136 }, { "epoch": 0.013993871297242083, "grad_norm": 1.6118861990864957, "learning_rate": 9.319727891156464e-06, "loss": 0.7963, "step": 137 }, { "epoch": 0.014096016343207354, "grad_norm": 1.6761421515526789, "learning_rate": 9.387755102040818e-06, "loss": 0.7422, "step": 138 }, { "epoch": 0.014198161389172625, "grad_norm": 1.726881522958956, "learning_rate": 9.455782312925171e-06, "loss": 0.6591, "step": 139 }, { "epoch": 0.014300306435137897, "grad_norm": 1.8151721433036694, "learning_rate": 9.523809523809525e-06, "loss": 0.81, "step": 140 }, { "epoch": 0.014402451481103166, "grad_norm": 1.7914170589563632, "learning_rate": 9.591836734693878e-06, "loss": 0.7645, "step": 141 }, { "epoch": 0.014504596527068437, "grad_norm": 1.6518787386343419, "learning_rate": 9.659863945578232e-06, "loss": 0.7456, "step": 142 }, { "epoch": 0.014606741573033709, "grad_norm": 1.659105613563791, "learning_rate": 9.727891156462585e-06, "loss": 0.832, "step": 143 }, { "epoch": 0.014708886618998978, "grad_norm": 1.6376917775876176, "learning_rate": 9.795918367346939e-06, "loss": 0.6964, "step": 144 }, { "epoch": 0.01481103166496425, "grad_norm": 1.7268140730240362, "learning_rate": 9.863945578231294e-06, "loss": 0.7475, "step": 145 }, { "epoch": 0.01491317671092952, "grad_norm": 1.6995295154953174, "learning_rate": 9.931972789115647e-06, "loss": 0.7166, "step": 146 }, { "epoch": 0.01501532175689479, "grad_norm": 1.5709403314483485, "learning_rate": 1e-05, "loss": 0.7314, "step": 147 }, { "epoch": 0.015117466802860061, "grad_norm": 1.720495364028145, "learning_rate": 1.0068027210884354e-05, "loss": 0.7551, "step": 148 }, { "epoch": 0.015219611848825332, "grad_norm": 1.819951190412068, "learning_rate": 1.0136054421768708e-05, "loss": 0.7591, "step": 149 }, { "epoch": 0.015321756894790603, "grad_norm": 1.6213023995255083, "learning_rate": 1.0204081632653063e-05, "loss": 0.6375, "step": 150 }, { "epoch": 0.015423901940755873, "grad_norm": 1.8198256408141487, "learning_rate": 1.0272108843537416e-05, "loss": 0.8037, "step": 151 }, { "epoch": 0.015526046986721144, "grad_norm": 1.717390105568949, "learning_rate": 1.034013605442177e-05, "loss": 0.7034, "step": 152 }, { "epoch": 0.015628192032686415, "grad_norm": 1.7525426165424476, "learning_rate": 1.0408163265306123e-05, "loss": 0.8328, "step": 153 }, { "epoch": 0.015730337078651686, "grad_norm": 2.018821327836352, "learning_rate": 1.0476190476190477e-05, "loss": 0.8377, "step": 154 }, { "epoch": 0.015832482124616958, "grad_norm": 1.9651480001068806, "learning_rate": 1.0544217687074832e-05, "loss": 0.7725, "step": 155 }, { "epoch": 0.015934627170582225, "grad_norm": 1.7303060951319966, "learning_rate": 1.0612244897959186e-05, "loss": 0.6728, "step": 156 }, { "epoch": 0.016036772216547496, "grad_norm": 1.832914627252907, "learning_rate": 1.0680272108843539e-05, "loss": 0.7327, "step": 157 }, { "epoch": 0.016138917262512768, "grad_norm": 1.9952743671628559, "learning_rate": 1.0748299319727893e-05, "loss": 0.7062, "step": 158 }, { "epoch": 0.01624106230847804, "grad_norm": 1.6901300303721887, "learning_rate": 1.0816326530612246e-05, "loss": 0.7215, "step": 159 }, { "epoch": 0.01634320735444331, "grad_norm": 1.6882719908313557, "learning_rate": 1.0884353741496601e-05, "loss": 0.6899, "step": 160 }, { "epoch": 0.01644535240040858, "grad_norm": 1.7126975472492683, "learning_rate": 1.0952380952380955e-05, "loss": 0.7668, "step": 161 }, { "epoch": 0.016547497446373852, "grad_norm": 1.860833048235229, "learning_rate": 1.1020408163265306e-05, "loss": 0.7757, "step": 162 }, { "epoch": 0.01664964249233912, "grad_norm": 1.6691646075823658, "learning_rate": 1.108843537414966e-05, "loss": 0.6983, "step": 163 }, { "epoch": 0.01675178753830439, "grad_norm": 1.6710931737332289, "learning_rate": 1.1156462585034013e-05, "loss": 0.703, "step": 164 }, { "epoch": 0.016853932584269662, "grad_norm": 1.6028278372345446, "learning_rate": 1.1224489795918367e-05, "loss": 0.7139, "step": 165 }, { "epoch": 0.016956077630234934, "grad_norm": 1.6871631323522613, "learning_rate": 1.1292517006802722e-05, "loss": 0.6787, "step": 166 }, { "epoch": 0.017058222676200205, "grad_norm": 1.5901260536870798, "learning_rate": 1.1360544217687076e-05, "loss": 0.6613, "step": 167 }, { "epoch": 0.017160367722165476, "grad_norm": 1.6000584814482617, "learning_rate": 1.1428571428571429e-05, "loss": 0.7295, "step": 168 }, { "epoch": 0.017262512768130747, "grad_norm": 1.7022611355821193, "learning_rate": 1.1496598639455783e-05, "loss": 0.7237, "step": 169 }, { "epoch": 0.017364657814096015, "grad_norm": 1.8639312676359368, "learning_rate": 1.1564625850340136e-05, "loss": 0.8106, "step": 170 }, { "epoch": 0.017466802860061286, "grad_norm": 1.9157902018907373, "learning_rate": 1.1632653061224491e-05, "loss": 0.8267, "step": 171 }, { "epoch": 0.017568947906026557, "grad_norm": 1.785882858974327, "learning_rate": 1.1700680272108845e-05, "loss": 0.8219, "step": 172 }, { "epoch": 0.01767109295199183, "grad_norm": 1.7530962357015458, "learning_rate": 1.1768707482993198e-05, "loss": 0.7005, "step": 173 }, { "epoch": 0.0177732379979571, "grad_norm": 1.8202815347760355, "learning_rate": 1.1836734693877552e-05, "loss": 0.6966, "step": 174 }, { "epoch": 0.01787538304392237, "grad_norm": 1.5678831440125445, "learning_rate": 1.1904761904761905e-05, "loss": 0.7865, "step": 175 }, { "epoch": 0.017977528089887642, "grad_norm": 1.6051735570217414, "learning_rate": 1.197278911564626e-05, "loss": 0.8575, "step": 176 }, { "epoch": 0.01807967313585291, "grad_norm": 1.6369558411506475, "learning_rate": 1.2040816326530614e-05, "loss": 0.674, "step": 177 }, { "epoch": 0.01818181818181818, "grad_norm": 1.8105242715857925, "learning_rate": 1.2108843537414967e-05, "loss": 0.6989, "step": 178 }, { "epoch": 0.018283963227783452, "grad_norm": 1.6483872674217979, "learning_rate": 1.217687074829932e-05, "loss": 0.7412, "step": 179 }, { "epoch": 0.018386108273748723, "grad_norm": 1.4853293043136457, "learning_rate": 1.2244897959183674e-05, "loss": 0.6996, "step": 180 }, { "epoch": 0.018488253319713994, "grad_norm": 1.6830465494255156, "learning_rate": 1.231292517006803e-05, "loss": 0.7431, "step": 181 }, { "epoch": 0.018590398365679266, "grad_norm": 1.6223540718110974, "learning_rate": 1.2380952380952383e-05, "loss": 0.6751, "step": 182 }, { "epoch": 0.018692543411644537, "grad_norm": 1.7860008803194065, "learning_rate": 1.2448979591836736e-05, "loss": 0.8121, "step": 183 }, { "epoch": 0.018794688457609805, "grad_norm": 1.7770504613473805, "learning_rate": 1.251700680272109e-05, "loss": 0.7278, "step": 184 }, { "epoch": 0.018896833503575076, "grad_norm": 1.8001236496603807, "learning_rate": 1.2585034013605443e-05, "loss": 0.8101, "step": 185 }, { "epoch": 0.018998978549540347, "grad_norm": 1.7187471925775293, "learning_rate": 1.2653061224489798e-05, "loss": 0.6679, "step": 186 }, { "epoch": 0.019101123595505618, "grad_norm": 1.7432579851728531, "learning_rate": 1.2721088435374152e-05, "loss": 0.7274, "step": 187 }, { "epoch": 0.01920326864147089, "grad_norm": 1.7177537817958337, "learning_rate": 1.2789115646258505e-05, "loss": 0.7787, "step": 188 }, { "epoch": 0.01930541368743616, "grad_norm": 1.6834932053378133, "learning_rate": 1.2857142857142859e-05, "loss": 0.6854, "step": 189 }, { "epoch": 0.01940755873340143, "grad_norm": 1.949245504046552, "learning_rate": 1.2925170068027212e-05, "loss": 0.8335, "step": 190 }, { "epoch": 0.0195097037793667, "grad_norm": 1.7420522713597348, "learning_rate": 1.2993197278911568e-05, "loss": 0.696, "step": 191 }, { "epoch": 0.01961184882533197, "grad_norm": 1.7722594170393748, "learning_rate": 1.3061224489795918e-05, "loss": 0.7023, "step": 192 }, { "epoch": 0.019713993871297242, "grad_norm": 1.632257598748308, "learning_rate": 1.3129251700680273e-05, "loss": 0.7893, "step": 193 }, { "epoch": 0.019816138917262513, "grad_norm": 1.7162977302301121, "learning_rate": 1.3197278911564626e-05, "loss": 0.7816, "step": 194 }, { "epoch": 0.019918283963227784, "grad_norm": 1.6428808771496362, "learning_rate": 1.326530612244898e-05, "loss": 0.6069, "step": 195 }, { "epoch": 0.020020429009193055, "grad_norm": 1.7608765496826089, "learning_rate": 1.3333333333333333e-05, "loss": 0.8061, "step": 196 }, { "epoch": 0.020122574055158327, "grad_norm": 1.6945246408232142, "learning_rate": 1.3401360544217687e-05, "loss": 0.6908, "step": 197 }, { "epoch": 0.020224719101123594, "grad_norm": 1.6245687218591212, "learning_rate": 1.3469387755102042e-05, "loss": 0.6128, "step": 198 }, { "epoch": 0.020326864147088865, "grad_norm": 1.5964224806883631, "learning_rate": 1.3537414965986395e-05, "loss": 0.6373, "step": 199 }, { "epoch": 0.020429009193054137, "grad_norm": 1.623491593147068, "learning_rate": 1.3605442176870749e-05, "loss": 0.7421, "step": 200 }, { "epoch": 0.020531154239019408, "grad_norm": 1.5199753628257748, "learning_rate": 1.3673469387755102e-05, "loss": 0.5401, "step": 201 }, { "epoch": 0.02063329928498468, "grad_norm": 1.6903192779010237, "learning_rate": 1.3741496598639456e-05, "loss": 0.7139, "step": 202 }, { "epoch": 0.02073544433094995, "grad_norm": 1.6703009895863918, "learning_rate": 1.3809523809523811e-05, "loss": 0.805, "step": 203 }, { "epoch": 0.020837589376915218, "grad_norm": 1.6561649172999005, "learning_rate": 1.3877551020408165e-05, "loss": 0.6521, "step": 204 }, { "epoch": 0.02093973442288049, "grad_norm": 1.6374563688361488, "learning_rate": 1.3945578231292518e-05, "loss": 0.7981, "step": 205 }, { "epoch": 0.02104187946884576, "grad_norm": 1.6011019389196248, "learning_rate": 1.4013605442176872e-05, "loss": 0.6574, "step": 206 }, { "epoch": 0.02114402451481103, "grad_norm": 1.7318980429273745, "learning_rate": 1.4081632653061225e-05, "loss": 0.7204, "step": 207 }, { "epoch": 0.021246169560776303, "grad_norm": 1.667116251107914, "learning_rate": 1.414965986394558e-05, "loss": 0.7088, "step": 208 }, { "epoch": 0.021348314606741574, "grad_norm": 1.7146530136245697, "learning_rate": 1.4217687074829934e-05, "loss": 0.7269, "step": 209 }, { "epoch": 0.021450459652706845, "grad_norm": 1.7339521332968388, "learning_rate": 1.4285714285714287e-05, "loss": 0.5921, "step": 210 }, { "epoch": 0.021552604698672113, "grad_norm": 1.7307420670093898, "learning_rate": 1.435374149659864e-05, "loss": 0.6899, "step": 211 }, { "epoch": 0.021654749744637384, "grad_norm": 1.6792597753169058, "learning_rate": 1.4421768707482994e-05, "loss": 0.7691, "step": 212 }, { "epoch": 0.021756894790602655, "grad_norm": 1.69983704609484, "learning_rate": 1.448979591836735e-05, "loss": 0.6724, "step": 213 }, { "epoch": 0.021859039836567926, "grad_norm": 1.6834153652082477, "learning_rate": 1.4557823129251703e-05, "loss": 0.7277, "step": 214 }, { "epoch": 0.021961184882533197, "grad_norm": 1.6887632288306307, "learning_rate": 1.4625850340136056e-05, "loss": 0.737, "step": 215 }, { "epoch": 0.02206332992849847, "grad_norm": 1.750575761137874, "learning_rate": 1.469387755102041e-05, "loss": 0.5893, "step": 216 }, { "epoch": 0.02216547497446374, "grad_norm": 1.760104995544491, "learning_rate": 1.4761904761904763e-05, "loss": 0.7191, "step": 217 }, { "epoch": 0.022267620020429008, "grad_norm": 1.6966140601261814, "learning_rate": 1.4829931972789118e-05, "loss": 0.8299, "step": 218 }, { "epoch": 0.02236976506639428, "grad_norm": 1.7096511991105092, "learning_rate": 1.4897959183673472e-05, "loss": 0.6451, "step": 219 }, { "epoch": 0.02247191011235955, "grad_norm": 1.7601676205959085, "learning_rate": 1.4965986394557825e-05, "loss": 0.7569, "step": 220 }, { "epoch": 0.02257405515832482, "grad_norm": 1.715999457369528, "learning_rate": 1.5034013605442177e-05, "loss": 0.7148, "step": 221 }, { "epoch": 0.022676200204290092, "grad_norm": 1.6585584667190563, "learning_rate": 1.510204081632653e-05, "loss": 0.7202, "step": 222 }, { "epoch": 0.022778345250255363, "grad_norm": 1.656971523509079, "learning_rate": 1.5170068027210884e-05, "loss": 0.7582, "step": 223 }, { "epoch": 0.022880490296220635, "grad_norm": 1.5368838757342893, "learning_rate": 1.523809523809524e-05, "loss": 0.7434, "step": 224 }, { "epoch": 0.022982635342185902, "grad_norm": 1.6102753910803371, "learning_rate": 1.530612244897959e-05, "loss": 0.6667, "step": 225 }, { "epoch": 0.023084780388151174, "grad_norm": 1.7496249149226775, "learning_rate": 1.5374149659863945e-05, "loss": 0.7434, "step": 226 }, { "epoch": 0.023186925434116445, "grad_norm": 1.7378752250236362, "learning_rate": 1.54421768707483e-05, "loss": 0.8048, "step": 227 }, { "epoch": 0.023289070480081716, "grad_norm": 1.6995440865015914, "learning_rate": 1.5510204081632655e-05, "loss": 0.7731, "step": 228 }, { "epoch": 0.023391215526046987, "grad_norm": 1.6948162036390637, "learning_rate": 1.557823129251701e-05, "loss": 0.6862, "step": 229 }, { "epoch": 0.02349336057201226, "grad_norm": 1.7526251091046414, "learning_rate": 1.5646258503401362e-05, "loss": 0.6658, "step": 230 }, { "epoch": 0.02359550561797753, "grad_norm": 1.7286133632001344, "learning_rate": 1.5714285714285715e-05, "loss": 0.8782, "step": 231 }, { "epoch": 0.023697650663942797, "grad_norm": 1.6708372326114151, "learning_rate": 1.578231292517007e-05, "loss": 0.7649, "step": 232 }, { "epoch": 0.02379979570990807, "grad_norm": 1.7309187424472103, "learning_rate": 1.5850340136054422e-05, "loss": 0.7077, "step": 233 }, { "epoch": 0.02390194075587334, "grad_norm": 1.7207735528501127, "learning_rate": 1.5918367346938776e-05, "loss": 0.6373, "step": 234 }, { "epoch": 0.02400408580183861, "grad_norm": 1.542492375282899, "learning_rate": 1.598639455782313e-05, "loss": 0.7017, "step": 235 }, { "epoch": 0.024106230847803882, "grad_norm": 1.7310693731168392, "learning_rate": 1.6054421768707483e-05, "loss": 0.7241, "step": 236 }, { "epoch": 0.024208375893769153, "grad_norm": 1.591240757601507, "learning_rate": 1.612244897959184e-05, "loss": 0.727, "step": 237 }, { "epoch": 0.024310520939734424, "grad_norm": 1.6240608809840893, "learning_rate": 1.6190476190476193e-05, "loss": 0.6995, "step": 238 }, { "epoch": 0.024412665985699692, "grad_norm": 1.6138243337460614, "learning_rate": 1.6258503401360547e-05, "loss": 0.7657, "step": 239 }, { "epoch": 0.024514811031664963, "grad_norm": 1.7302630769224912, "learning_rate": 1.63265306122449e-05, "loss": 0.7491, "step": 240 }, { "epoch": 0.024616956077630234, "grad_norm": 1.8695354004194862, "learning_rate": 1.6394557823129254e-05, "loss": 0.7828, "step": 241 }, { "epoch": 0.024719101123595506, "grad_norm": 1.8416740418279043, "learning_rate": 1.6462585034013607e-05, "loss": 0.8681, "step": 242 }, { "epoch": 0.024821246169560777, "grad_norm": 1.4945412066264474, "learning_rate": 1.653061224489796e-05, "loss": 0.7292, "step": 243 }, { "epoch": 0.024923391215526048, "grad_norm": 1.4726427374583286, "learning_rate": 1.6598639455782314e-05, "loss": 0.6267, "step": 244 }, { "epoch": 0.02502553626149132, "grad_norm": 1.531520904197372, "learning_rate": 1.6666666666666667e-05, "loss": 0.6931, "step": 245 }, { "epoch": 0.025127681307456587, "grad_norm": 1.6189735160692857, "learning_rate": 1.673469387755102e-05, "loss": 0.6459, "step": 246 }, { "epoch": 0.025229826353421858, "grad_norm": 1.7952277677611934, "learning_rate": 1.6802721088435378e-05, "loss": 0.7225, "step": 247 }, { "epoch": 0.02533197139938713, "grad_norm": 1.4750822617726922, "learning_rate": 1.687074829931973e-05, "loss": 0.6693, "step": 248 }, { "epoch": 0.0254341164453524, "grad_norm": 1.5025866791924054, "learning_rate": 1.6938775510204085e-05, "loss": 0.7338, "step": 249 }, { "epoch": 0.02553626149131767, "grad_norm": 1.9184730298445185, "learning_rate": 1.7006802721088435e-05, "loss": 0.822, "step": 250 }, { "epoch": 0.025638406537282943, "grad_norm": 1.7640638289695652, "learning_rate": 1.707482993197279e-05, "loss": 0.7888, "step": 251 }, { "epoch": 0.025740551583248214, "grad_norm": 1.6289474440466316, "learning_rate": 1.7142857142857142e-05, "loss": 0.645, "step": 252 }, { "epoch": 0.025842696629213482, "grad_norm": 1.6168415775559752, "learning_rate": 1.72108843537415e-05, "loss": 0.8349, "step": 253 }, { "epoch": 0.025944841675178753, "grad_norm": 1.6972890558710467, "learning_rate": 1.7278911564625852e-05, "loss": 0.8225, "step": 254 }, { "epoch": 0.026046986721144024, "grad_norm": 1.7546501844753204, "learning_rate": 1.7346938775510206e-05, "loss": 0.793, "step": 255 }, { "epoch": 0.026149131767109295, "grad_norm": 1.605222248842367, "learning_rate": 1.741496598639456e-05, "loss": 0.7618, "step": 256 }, { "epoch": 0.026251276813074566, "grad_norm": 1.7734820728515044, "learning_rate": 1.7482993197278913e-05, "loss": 0.8619, "step": 257 }, { "epoch": 0.026353421859039838, "grad_norm": 1.7856967588397747, "learning_rate": 1.7551020408163266e-05, "loss": 0.6808, "step": 258 }, { "epoch": 0.02645556690500511, "grad_norm": 1.7125105641531968, "learning_rate": 1.761904761904762e-05, "loss": 0.7379, "step": 259 }, { "epoch": 0.026557711950970377, "grad_norm": 1.747847044271819, "learning_rate": 1.7687074829931973e-05, "loss": 0.7189, "step": 260 }, { "epoch": 0.026659856996935648, "grad_norm": 1.7306235385439581, "learning_rate": 1.7755102040816327e-05, "loss": 0.8412, "step": 261 }, { "epoch": 0.02676200204290092, "grad_norm": 1.6081901194935102, "learning_rate": 1.782312925170068e-05, "loss": 0.7475, "step": 262 }, { "epoch": 0.02686414708886619, "grad_norm": 1.5510312240707456, "learning_rate": 1.7891156462585037e-05, "loss": 0.7793, "step": 263 }, { "epoch": 0.02696629213483146, "grad_norm": 1.7514148922507933, "learning_rate": 1.795918367346939e-05, "loss": 0.7567, "step": 264 }, { "epoch": 0.027068437180796732, "grad_norm": 1.6813056729305516, "learning_rate": 1.8027210884353744e-05, "loss": 0.7805, "step": 265 }, { "epoch": 0.027170582226762004, "grad_norm": 1.636398050011782, "learning_rate": 1.8095238095238097e-05, "loss": 0.6795, "step": 266 }, { "epoch": 0.02727272727272727, "grad_norm": 1.5779239857805816, "learning_rate": 1.816326530612245e-05, "loss": 0.6869, "step": 267 }, { "epoch": 0.027374872318692543, "grad_norm": 1.4412803271887495, "learning_rate": 1.8231292517006804e-05, "loss": 0.6369, "step": 268 }, { "epoch": 0.027477017364657814, "grad_norm": 1.804770986377828, "learning_rate": 1.8299319727891158e-05, "loss": 0.8157, "step": 269 }, { "epoch": 0.027579162410623085, "grad_norm": 1.668485876468596, "learning_rate": 1.836734693877551e-05, "loss": 0.6413, "step": 270 }, { "epoch": 0.027681307456588356, "grad_norm": 1.7856336273013087, "learning_rate": 1.8435374149659865e-05, "loss": 0.8693, "step": 271 }, { "epoch": 0.027783452502553627, "grad_norm": 1.697632519966551, "learning_rate": 1.8503401360544218e-05, "loss": 0.7679, "step": 272 }, { "epoch": 0.0278855975485189, "grad_norm": 1.7171739215199602, "learning_rate": 1.8571428571428575e-05, "loss": 0.778, "step": 273 }, { "epoch": 0.027987742594484166, "grad_norm": 1.5938726988377543, "learning_rate": 1.863945578231293e-05, "loss": 0.7612, "step": 274 }, { "epoch": 0.028089887640449437, "grad_norm": 1.5906655717814715, "learning_rate": 1.8707482993197282e-05, "loss": 0.5723, "step": 275 }, { "epoch": 0.02819203268641471, "grad_norm": 1.5078696721505913, "learning_rate": 1.8775510204081636e-05, "loss": 0.7363, "step": 276 }, { "epoch": 0.02829417773237998, "grad_norm": 1.5446916452430135, "learning_rate": 1.884353741496599e-05, "loss": 0.7853, "step": 277 }, { "epoch": 0.02839632277834525, "grad_norm": 1.708121037225686, "learning_rate": 1.8911564625850343e-05, "loss": 0.7252, "step": 278 }, { "epoch": 0.028498467824310522, "grad_norm": 1.5934769581812638, "learning_rate": 1.8979591836734696e-05, "loss": 0.8717, "step": 279 }, { "epoch": 0.028600612870275793, "grad_norm": 1.5107727511597122, "learning_rate": 1.904761904761905e-05, "loss": 0.7137, "step": 280 }, { "epoch": 0.02870275791624106, "grad_norm": 1.3976840822460448, "learning_rate": 1.9115646258503403e-05, "loss": 0.6616, "step": 281 }, { "epoch": 0.028804902962206332, "grad_norm": 1.7475390113391824, "learning_rate": 1.9183673469387756e-05, "loss": 0.7495, "step": 282 }, { "epoch": 0.028907048008171603, "grad_norm": 1.765610249198674, "learning_rate": 1.925170068027211e-05, "loss": 0.7275, "step": 283 }, { "epoch": 0.029009193054136875, "grad_norm": 1.7270537589170198, "learning_rate": 1.9319727891156463e-05, "loss": 0.7495, "step": 284 }, { "epoch": 0.029111338100102146, "grad_norm": 1.747335050821353, "learning_rate": 1.9387755102040817e-05, "loss": 0.709, "step": 285 }, { "epoch": 0.029213483146067417, "grad_norm": 1.5635922172080774, "learning_rate": 1.945578231292517e-05, "loss": 0.655, "step": 286 }, { "epoch": 0.029315628192032685, "grad_norm": 1.6530065973763257, "learning_rate": 1.9523809523809524e-05, "loss": 0.5877, "step": 287 }, { "epoch": 0.029417773237997956, "grad_norm": 1.6733903459496355, "learning_rate": 1.9591836734693877e-05, "loss": 0.7207, "step": 288 }, { "epoch": 0.029519918283963227, "grad_norm": 1.685661854915379, "learning_rate": 1.965986394557823e-05, "loss": 0.7612, "step": 289 }, { "epoch": 0.0296220633299285, "grad_norm": 1.4862629797845708, "learning_rate": 1.9727891156462588e-05, "loss": 0.6418, "step": 290 }, { "epoch": 0.02972420837589377, "grad_norm": 1.6535641609527183, "learning_rate": 1.979591836734694e-05, "loss": 0.7818, "step": 291 }, { "epoch": 0.02982635342185904, "grad_norm": 1.6678727911393412, "learning_rate": 1.9863945578231295e-05, "loss": 0.7316, "step": 292 }, { "epoch": 0.029928498467824312, "grad_norm": 1.857236685258155, "learning_rate": 1.9931972789115648e-05, "loss": 0.7219, "step": 293 }, { "epoch": 0.03003064351378958, "grad_norm": 1.7130211183964796, "learning_rate": 2e-05, "loss": 0.7533, "step": 294 }, { "epoch": 0.03013278855975485, "grad_norm": 1.6306750132118761, "learning_rate": 1.9999999452746773e-05, "loss": 0.8894, "step": 295 }, { "epoch": 0.030234933605720122, "grad_norm": 1.596068821772123, "learning_rate": 1.999999781098714e-05, "loss": 0.6652, "step": 296 }, { "epoch": 0.030337078651685393, "grad_norm": 1.5023620977647107, "learning_rate": 1.9999995074721287e-05, "loss": 0.653, "step": 297 }, { "epoch": 0.030439223697650664, "grad_norm": 1.7996223461899603, "learning_rate": 1.999999124394951e-05, "loss": 0.7884, "step": 298 }, { "epoch": 0.030541368743615935, "grad_norm": 1.5801199353543882, "learning_rate": 1.9999986318672236e-05, "loss": 0.764, "step": 299 }, { "epoch": 0.030643513789581207, "grad_norm": 1.4371662538136063, "learning_rate": 1.9999980298889996e-05, "loss": 0.6438, "step": 300 }, { "epoch": 0.030745658835546474, "grad_norm": 1.4869738768014928, "learning_rate": 1.9999973184603453e-05, "loss": 0.6223, "step": 301 }, { "epoch": 0.030847803881511746, "grad_norm": 1.6940793723174485, "learning_rate": 1.999996497581338e-05, "loss": 0.7169, "step": 302 }, { "epoch": 0.030949948927477017, "grad_norm": 1.685173831152958, "learning_rate": 1.9999955672520682e-05, "loss": 0.6781, "step": 303 }, { "epoch": 0.031052093973442288, "grad_norm": 1.7090834649740707, "learning_rate": 1.9999945274726376e-05, "loss": 0.7915, "step": 304 }, { "epoch": 0.03115423901940756, "grad_norm": 1.671882306345056, "learning_rate": 1.9999933782431596e-05, "loss": 0.6592, "step": 305 }, { "epoch": 0.03125638406537283, "grad_norm": 1.4801728664351115, "learning_rate": 1.9999921195637606e-05, "loss": 0.7148, "step": 306 }, { "epoch": 0.0313585291113381, "grad_norm": 1.6220174463697596, "learning_rate": 1.999990751434578e-05, "loss": 0.6391, "step": 307 }, { "epoch": 0.03146067415730337, "grad_norm": 1.7585727306421493, "learning_rate": 1.9999892738557615e-05, "loss": 0.8403, "step": 308 }, { "epoch": 0.031562819203268644, "grad_norm": 1.6104542939414912, "learning_rate": 1.9999876868274727e-05, "loss": 0.6965, "step": 309 }, { "epoch": 0.031664964249233915, "grad_norm": 1.5333296122061046, "learning_rate": 1.9999859903498856e-05, "loss": 0.6839, "step": 310 }, { "epoch": 0.031767109295199186, "grad_norm": 1.5891278151624393, "learning_rate": 1.9999841844231857e-05, "loss": 0.6996, "step": 311 }, { "epoch": 0.03186925434116445, "grad_norm": 1.574480556315509, "learning_rate": 1.9999822690475713e-05, "loss": 0.7041, "step": 312 }, { "epoch": 0.03197139938712972, "grad_norm": 1.591178739260255, "learning_rate": 1.999980244223251e-05, "loss": 0.7303, "step": 313 }, { "epoch": 0.03207354443309499, "grad_norm": 1.587964682884301, "learning_rate": 1.9999781099504466e-05, "loss": 0.6733, "step": 314 }, { "epoch": 0.032175689479060264, "grad_norm": 1.524198174484014, "learning_rate": 1.9999758662293926e-05, "loss": 0.8069, "step": 315 }, { "epoch": 0.032277834525025535, "grad_norm": 1.4841818332002192, "learning_rate": 1.9999735130603336e-05, "loss": 0.7504, "step": 316 }, { "epoch": 0.032379979570990806, "grad_norm": 1.6605234050501971, "learning_rate": 1.9999710504435278e-05, "loss": 0.7957, "step": 317 }, { "epoch": 0.03248212461695608, "grad_norm": 1.4966583829015518, "learning_rate": 1.9999684783792445e-05, "loss": 0.7275, "step": 318 }, { "epoch": 0.03258426966292135, "grad_norm": 1.643817257907392, "learning_rate": 1.999965796867765e-05, "loss": 0.7958, "step": 319 }, { "epoch": 0.03268641470888662, "grad_norm": 1.5659770135920033, "learning_rate": 1.999963005909383e-05, "loss": 0.7684, "step": 320 }, { "epoch": 0.03278855975485189, "grad_norm": 1.6574457624811796, "learning_rate": 1.9999601055044036e-05, "loss": 0.7444, "step": 321 }, { "epoch": 0.03289070480081716, "grad_norm": 1.5399348865853, "learning_rate": 1.999957095653145e-05, "loss": 0.8094, "step": 322 }, { "epoch": 0.032992849846782434, "grad_norm": 1.5944043156877568, "learning_rate": 1.9999539763559362e-05, "loss": 0.8015, "step": 323 }, { "epoch": 0.033094994892747705, "grad_norm": 1.7176720338121858, "learning_rate": 1.999950747613119e-05, "loss": 0.826, "step": 324 }, { "epoch": 0.03319713993871297, "grad_norm": 1.5113923216000331, "learning_rate": 1.9999474094250457e-05, "loss": 0.7037, "step": 325 }, { "epoch": 0.03329928498467824, "grad_norm": 1.3853075170693228, "learning_rate": 1.9999439617920825e-05, "loss": 0.7392, "step": 326 }, { "epoch": 0.03340143003064351, "grad_norm": 1.5232185394028046, "learning_rate": 1.999940404714607e-05, "loss": 0.7098, "step": 327 }, { "epoch": 0.03350357507660878, "grad_norm": 1.8802732147649686, "learning_rate": 1.999936738193008e-05, "loss": 0.8102, "step": 328 }, { "epoch": 0.033605720122574054, "grad_norm": 1.4225413446281991, "learning_rate": 1.9999329622276867e-05, "loss": 0.6202, "step": 329 }, { "epoch": 0.033707865168539325, "grad_norm": 1.6435347543779217, "learning_rate": 1.999929076819057e-05, "loss": 0.9419, "step": 330 }, { "epoch": 0.033810010214504596, "grad_norm": 1.818942270512346, "learning_rate": 1.9999250819675436e-05, "loss": 0.7188, "step": 331 }, { "epoch": 0.03391215526046987, "grad_norm": 1.629449058835817, "learning_rate": 1.9999209776735844e-05, "loss": 0.7595, "step": 332 }, { "epoch": 0.03401430030643514, "grad_norm": 1.5463631599213123, "learning_rate": 1.9999167639376277e-05, "loss": 0.6287, "step": 333 }, { "epoch": 0.03411644535240041, "grad_norm": 1.760281293616789, "learning_rate": 1.9999124407601353e-05, "loss": 0.6983, "step": 334 }, { "epoch": 0.03421859039836568, "grad_norm": 1.6512443349780666, "learning_rate": 1.9999080081415802e-05, "loss": 0.7428, "step": 335 }, { "epoch": 0.03432073544433095, "grad_norm": 1.6602032171866863, "learning_rate": 1.9999034660824476e-05, "loss": 0.8147, "step": 336 }, { "epoch": 0.03442288049029622, "grad_norm": 1.4931542508576237, "learning_rate": 1.9998988145832348e-05, "loss": 0.7465, "step": 337 }, { "epoch": 0.034525025536261494, "grad_norm": 1.3321594599952813, "learning_rate": 1.99989405364445e-05, "loss": 0.6092, "step": 338 }, { "epoch": 0.03462717058222676, "grad_norm": 1.493017008015358, "learning_rate": 1.999889183266616e-05, "loss": 0.6399, "step": 339 }, { "epoch": 0.03472931562819203, "grad_norm": 1.8223996061744088, "learning_rate": 1.9998842034502644e-05, "loss": 0.7442, "step": 340 }, { "epoch": 0.0348314606741573, "grad_norm": 1.7639551650847805, "learning_rate": 1.999879114195941e-05, "loss": 0.7286, "step": 341 }, { "epoch": 0.03493360572012257, "grad_norm": 1.531757375530844, "learning_rate": 1.999873915504202e-05, "loss": 0.6388, "step": 342 }, { "epoch": 0.03503575076608784, "grad_norm": 1.6711013011690217, "learning_rate": 1.9998686073756174e-05, "loss": 0.764, "step": 343 }, { "epoch": 0.035137895812053115, "grad_norm": 1.599774294140866, "learning_rate": 1.9998631898107675e-05, "loss": 0.7366, "step": 344 }, { "epoch": 0.035240040858018386, "grad_norm": 1.4249024224943276, "learning_rate": 1.9998576628102455e-05, "loss": 0.6737, "step": 345 }, { "epoch": 0.03534218590398366, "grad_norm": 1.6793259124814255, "learning_rate": 1.9998520263746564e-05, "loss": 0.7076, "step": 346 }, { "epoch": 0.03544433094994893, "grad_norm": 1.5590748935269132, "learning_rate": 1.999846280504617e-05, "loss": 0.6703, "step": 347 }, { "epoch": 0.0355464759959142, "grad_norm": 1.656939834153814, "learning_rate": 1.999840425200756e-05, "loss": 0.7481, "step": 348 }, { "epoch": 0.03564862104187947, "grad_norm": 1.5175613108654284, "learning_rate": 1.9998344604637148e-05, "loss": 0.6894, "step": 349 }, { "epoch": 0.03575076608784474, "grad_norm": 1.684746418460725, "learning_rate": 1.9998283862941457e-05, "loss": 0.6748, "step": 350 }, { "epoch": 0.03585291113381001, "grad_norm": 1.5357017913763469, "learning_rate": 1.9998222026927138e-05, "loss": 0.7063, "step": 351 }, { "epoch": 0.035955056179775284, "grad_norm": 1.46456112975668, "learning_rate": 1.9998159096600962e-05, "loss": 0.7087, "step": 352 }, { "epoch": 0.03605720122574055, "grad_norm": 1.6441176951800112, "learning_rate": 1.9998095071969808e-05, "loss": 0.8299, "step": 353 }, { "epoch": 0.03615934627170582, "grad_norm": 1.6363800478989037, "learning_rate": 1.9998029953040693e-05, "loss": 0.7238, "step": 354 }, { "epoch": 0.03626149131767109, "grad_norm": 1.6570628364055506, "learning_rate": 1.9997963739820737e-05, "loss": 0.7974, "step": 355 }, { "epoch": 0.03636363636363636, "grad_norm": 1.5979569956631086, "learning_rate": 1.9997896432317193e-05, "loss": 0.7773, "step": 356 }, { "epoch": 0.03646578140960163, "grad_norm": 1.6277886279965166, "learning_rate": 1.999782803053742e-05, "loss": 0.7895, "step": 357 }, { "epoch": 0.036567926455566904, "grad_norm": 1.6179991781659897, "learning_rate": 1.9997758534488915e-05, "loss": 0.7239, "step": 358 }, { "epoch": 0.036670071501532175, "grad_norm": 1.5977292493497925, "learning_rate": 1.999768794417928e-05, "loss": 0.7186, "step": 359 }, { "epoch": 0.03677221654749745, "grad_norm": 1.6260876869448038, "learning_rate": 1.9997616259616236e-05, "loss": 0.7696, "step": 360 }, { "epoch": 0.03687436159346272, "grad_norm": 1.5045152360940806, "learning_rate": 1.9997543480807635e-05, "loss": 0.7997, "step": 361 }, { "epoch": 0.03697650663942799, "grad_norm": 1.6966463079599634, "learning_rate": 1.999746960776144e-05, "loss": 0.837, "step": 362 }, { "epoch": 0.03707865168539326, "grad_norm": 1.5346645215134478, "learning_rate": 1.9997394640485738e-05, "loss": 0.7537, "step": 363 }, { "epoch": 0.03718079673135853, "grad_norm": 1.6332849422753557, "learning_rate": 1.9997318578988732e-05, "loss": 0.6529, "step": 364 }, { "epoch": 0.0372829417773238, "grad_norm": 1.6062702267766975, "learning_rate": 1.999724142327875e-05, "loss": 0.8201, "step": 365 }, { "epoch": 0.037385086823289074, "grad_norm": 1.7595573766043457, "learning_rate": 1.9997163173364233e-05, "loss": 0.8048, "step": 366 }, { "epoch": 0.03748723186925434, "grad_norm": 1.531230432584508, "learning_rate": 1.9997083829253748e-05, "loss": 0.6983, "step": 367 }, { "epoch": 0.03758937691521961, "grad_norm": 1.7098444699568267, "learning_rate": 1.999700339095598e-05, "loss": 0.8021, "step": 368 }, { "epoch": 0.03769152196118488, "grad_norm": 1.5761250003328382, "learning_rate": 1.9996921858479733e-05, "loss": 0.8436, "step": 369 }, { "epoch": 0.03779366700715015, "grad_norm": 1.7226213868594205, "learning_rate": 1.999683923183393e-05, "loss": 0.7574, "step": 370 }, { "epoch": 0.03789581205311542, "grad_norm": 1.522931463193398, "learning_rate": 1.9996755511027613e-05, "loss": 0.7175, "step": 371 }, { "epoch": 0.037997957099080694, "grad_norm": 1.738367365517668, "learning_rate": 1.9996670696069947e-05, "loss": 0.7346, "step": 372 }, { "epoch": 0.038100102145045965, "grad_norm": 1.6328014144266472, "learning_rate": 1.9996584786970214e-05, "loss": 0.7633, "step": 373 }, { "epoch": 0.038202247191011236, "grad_norm": 1.7662587538174572, "learning_rate": 1.999649778373782e-05, "loss": 0.7376, "step": 374 }, { "epoch": 0.03830439223697651, "grad_norm": 1.6698967486625416, "learning_rate": 1.9996409686382278e-05, "loss": 0.8357, "step": 375 }, { "epoch": 0.03840653728294178, "grad_norm": 1.6776205167172553, "learning_rate": 1.9996320494913245e-05, "loss": 0.7336, "step": 376 }, { "epoch": 0.03850868232890705, "grad_norm": 1.6430954813325385, "learning_rate": 1.999623020934047e-05, "loss": 0.8032, "step": 377 }, { "epoch": 0.03861082737487232, "grad_norm": 1.654899567519746, "learning_rate": 1.9996138829673844e-05, "loss": 0.724, "step": 378 }, { "epoch": 0.03871297242083759, "grad_norm": 1.6907175181600396, "learning_rate": 1.9996046355923365e-05, "loss": 0.8435, "step": 379 }, { "epoch": 0.03881511746680286, "grad_norm": 1.7540290532399327, "learning_rate": 1.999595278809915e-05, "loss": 0.6889, "step": 380 }, { "epoch": 0.03891726251276813, "grad_norm": 1.5127412714008044, "learning_rate": 1.999585812621145e-05, "loss": 0.7169, "step": 381 }, { "epoch": 0.0390194075587334, "grad_norm": 1.3896934203025388, "learning_rate": 1.9995762370270616e-05, "loss": 0.6606, "step": 382 }, { "epoch": 0.03912155260469867, "grad_norm": 1.6430319317316633, "learning_rate": 1.999566552028713e-05, "loss": 0.762, "step": 383 }, { "epoch": 0.03922369765066394, "grad_norm": 1.8095769884768498, "learning_rate": 1.99955675762716e-05, "loss": 0.7503, "step": 384 }, { "epoch": 0.03932584269662921, "grad_norm": 1.6134360376707184, "learning_rate": 1.9995468538234738e-05, "loss": 0.7931, "step": 385 }, { "epoch": 0.039427987742594484, "grad_norm": 1.7125351230052142, "learning_rate": 1.9995368406187387e-05, "loss": 0.6807, "step": 386 }, { "epoch": 0.039530132788559755, "grad_norm": 1.5854645387766508, "learning_rate": 1.9995267180140504e-05, "loss": 0.777, "step": 387 }, { "epoch": 0.039632277834525026, "grad_norm": 1.7868588964792638, "learning_rate": 1.9995164860105176e-05, "loss": 0.7762, "step": 388 }, { "epoch": 0.0397344228804903, "grad_norm": 1.71671415273508, "learning_rate": 1.9995061446092593e-05, "loss": 0.7728, "step": 389 }, { "epoch": 0.03983656792645557, "grad_norm": 1.4664603461526882, "learning_rate": 1.9994956938114075e-05, "loss": 0.6703, "step": 390 }, { "epoch": 0.03993871297242084, "grad_norm": 1.376104314039649, "learning_rate": 1.9994851336181065e-05, "loss": 0.6903, "step": 391 }, { "epoch": 0.04004085801838611, "grad_norm": 1.7120683874024334, "learning_rate": 1.9994744640305118e-05, "loss": 0.7492, "step": 392 }, { "epoch": 0.04014300306435138, "grad_norm": 1.5266195487385579, "learning_rate": 1.9994636850497914e-05, "loss": 0.8543, "step": 393 }, { "epoch": 0.04024514811031665, "grad_norm": 1.7370112684271555, "learning_rate": 1.999452796677125e-05, "loss": 0.7076, "step": 394 }, { "epoch": 0.04034729315628192, "grad_norm": 1.5306005443036559, "learning_rate": 1.9994417989137042e-05, "loss": 0.6494, "step": 395 }, { "epoch": 0.04044943820224719, "grad_norm": 1.714731082919517, "learning_rate": 1.9994306917607328e-05, "loss": 0.8529, "step": 396 }, { "epoch": 0.04055158324821246, "grad_norm": 1.653950434390749, "learning_rate": 1.9994194752194262e-05, "loss": 0.7136, "step": 397 }, { "epoch": 0.04065372829417773, "grad_norm": 1.6357864809981595, "learning_rate": 1.9994081492910126e-05, "loss": 0.8166, "step": 398 }, { "epoch": 0.040755873340143, "grad_norm": 1.5747167760334395, "learning_rate": 1.9993967139767313e-05, "loss": 0.7278, "step": 399 }, { "epoch": 0.04085801838610827, "grad_norm": 1.689504266752484, "learning_rate": 1.999385169277834e-05, "loss": 0.8446, "step": 400 }, { "epoch": 0.040960163432073544, "grad_norm": 1.7690625261396564, "learning_rate": 1.999373515195584e-05, "loss": 0.677, "step": 401 }, { "epoch": 0.041062308478038816, "grad_norm": 1.5242754554386093, "learning_rate": 1.9993617517312573e-05, "loss": 0.6768, "step": 402 }, { "epoch": 0.04116445352400409, "grad_norm": 1.4788487504520307, "learning_rate": 1.999349878886141e-05, "loss": 0.6219, "step": 403 }, { "epoch": 0.04126659856996936, "grad_norm": 1.7810048941677938, "learning_rate": 1.999337896661535e-05, "loss": 0.7806, "step": 404 }, { "epoch": 0.04136874361593463, "grad_norm": 1.5778067115298302, "learning_rate": 1.9993258050587503e-05, "loss": 0.7152, "step": 405 }, { "epoch": 0.0414708886618999, "grad_norm": 1.582008540833576, "learning_rate": 1.999313604079111e-05, "loss": 0.7615, "step": 406 }, { "epoch": 0.04157303370786517, "grad_norm": 1.5850712044121877, "learning_rate": 1.999301293723952e-05, "loss": 0.6787, "step": 407 }, { "epoch": 0.041675178753830436, "grad_norm": 1.612463597676338, "learning_rate": 1.9992888739946207e-05, "loss": 0.6229, "step": 408 }, { "epoch": 0.04177732379979571, "grad_norm": 1.5661592926069352, "learning_rate": 1.9992763448924763e-05, "loss": 0.7595, "step": 409 }, { "epoch": 0.04187946884576098, "grad_norm": 1.6317731553287673, "learning_rate": 1.9992637064188906e-05, "loss": 0.836, "step": 410 }, { "epoch": 0.04198161389172625, "grad_norm": 1.7424545360235912, "learning_rate": 1.9992509585752465e-05, "loss": 0.7934, "step": 411 }, { "epoch": 0.04208375893769152, "grad_norm": 1.4864095271972726, "learning_rate": 1.9992381013629397e-05, "loss": 0.8132, "step": 412 }, { "epoch": 0.04218590398365679, "grad_norm": 1.5790188794772937, "learning_rate": 1.9992251347833766e-05, "loss": 0.7902, "step": 413 }, { "epoch": 0.04228804902962206, "grad_norm": 1.6618138742842077, "learning_rate": 1.9992120588379774e-05, "loss": 0.7934, "step": 414 }, { "epoch": 0.042390194075587334, "grad_norm": 1.5066893644067565, "learning_rate": 1.9991988735281724e-05, "loss": 0.7341, "step": 415 }, { "epoch": 0.042492339121552605, "grad_norm": 1.7165765959051955, "learning_rate": 1.9991855788554055e-05, "loss": 0.8411, "step": 416 }, { "epoch": 0.042594484167517876, "grad_norm": 1.513931606897242, "learning_rate": 1.999172174821131e-05, "loss": 0.7091, "step": 417 }, { "epoch": 0.04269662921348315, "grad_norm": 1.4733180904385181, "learning_rate": 1.9991586614268166e-05, "loss": 0.7547, "step": 418 }, { "epoch": 0.04279877425944842, "grad_norm": 1.6487688037815096, "learning_rate": 1.999145038673941e-05, "loss": 0.8272, "step": 419 }, { "epoch": 0.04290091930541369, "grad_norm": 1.6313369499823922, "learning_rate": 1.9991313065639956e-05, "loss": 0.7994, "step": 420 }, { "epoch": 0.04300306435137896, "grad_norm": 1.7312501065629007, "learning_rate": 1.9991174650984832e-05, "loss": 0.812, "step": 421 }, { "epoch": 0.043105209397344225, "grad_norm": 1.5753455465226176, "learning_rate": 1.9991035142789187e-05, "loss": 0.8568, "step": 422 }, { "epoch": 0.0432073544433095, "grad_norm": 1.516052496234355, "learning_rate": 1.999089454106829e-05, "loss": 0.6674, "step": 423 }, { "epoch": 0.04330949948927477, "grad_norm": 1.5456793116039083, "learning_rate": 1.999075284583753e-05, "loss": 0.8071, "step": 424 }, { "epoch": 0.04341164453524004, "grad_norm": 1.6815766452711434, "learning_rate": 1.999061005711242e-05, "loss": 0.7161, "step": 425 }, { "epoch": 0.04351378958120531, "grad_norm": 1.6035608458533839, "learning_rate": 1.999046617490858e-05, "loss": 0.63, "step": 426 }, { "epoch": 0.04361593462717058, "grad_norm": 1.5743440037884069, "learning_rate": 1.9990321199241765e-05, "loss": 0.8395, "step": 427 }, { "epoch": 0.04371807967313585, "grad_norm": 1.5575052038038233, "learning_rate": 1.9990175130127837e-05, "loss": 0.735, "step": 428 }, { "epoch": 0.043820224719101124, "grad_norm": 1.5457309060173676, "learning_rate": 1.999002796758279e-05, "loss": 0.724, "step": 429 }, { "epoch": 0.043922369765066395, "grad_norm": 1.7407269912291348, "learning_rate": 1.9989879711622726e-05, "loss": 0.794, "step": 430 }, { "epoch": 0.044024514811031666, "grad_norm": 1.6411432528678405, "learning_rate": 1.9989730362263874e-05, "loss": 0.7356, "step": 431 }, { "epoch": 0.04412665985699694, "grad_norm": 1.6459306563987728, "learning_rate": 1.998957991952258e-05, "loss": 0.811, "step": 432 }, { "epoch": 0.04422880490296221, "grad_norm": 1.551529819349357, "learning_rate": 1.998942838341531e-05, "loss": 0.6689, "step": 433 }, { "epoch": 0.04433094994892748, "grad_norm": 1.669418577461631, "learning_rate": 1.9989275753958646e-05, "loss": 0.7974, "step": 434 }, { "epoch": 0.04443309499489275, "grad_norm": 1.6083260424621268, "learning_rate": 1.9989122031169303e-05, "loss": 0.603, "step": 435 }, { "epoch": 0.044535240040858015, "grad_norm": 1.730858785645276, "learning_rate": 1.9988967215064096e-05, "loss": 0.891, "step": 436 }, { "epoch": 0.044637385086823286, "grad_norm": 1.5440714801961926, "learning_rate": 1.998881130565997e-05, "loss": 0.6839, "step": 437 }, { "epoch": 0.04473953013278856, "grad_norm": 1.5262191281826751, "learning_rate": 1.9988654302974e-05, "loss": 0.682, "step": 438 }, { "epoch": 0.04484167517875383, "grad_norm": 1.5339464791930282, "learning_rate": 1.998849620702336e-05, "loss": 0.6362, "step": 439 }, { "epoch": 0.0449438202247191, "grad_norm": 1.5615261336960227, "learning_rate": 1.9988337017825355e-05, "loss": 0.8495, "step": 440 }, { "epoch": 0.04504596527068437, "grad_norm": 1.5228235326627682, "learning_rate": 1.9988176735397414e-05, "loss": 0.8616, "step": 441 }, { "epoch": 0.04514811031664964, "grad_norm": 1.565493645775326, "learning_rate": 1.9988015359757075e-05, "loss": 0.7127, "step": 442 }, { "epoch": 0.04525025536261491, "grad_norm": 1.5632633415007537, "learning_rate": 1.9987852890922e-05, "loss": 0.679, "step": 443 }, { "epoch": 0.045352400408580185, "grad_norm": 1.4951795322950667, "learning_rate": 1.9987689328909973e-05, "loss": 0.7127, "step": 444 }, { "epoch": 0.045454545454545456, "grad_norm": 1.6508795094702773, "learning_rate": 1.9987524673738897e-05, "loss": 0.8676, "step": 445 }, { "epoch": 0.04555669050051073, "grad_norm": 1.5517521677287343, "learning_rate": 1.998735892542679e-05, "loss": 0.7402, "step": 446 }, { "epoch": 0.045658835546476, "grad_norm": 1.4303236217913078, "learning_rate": 1.99871920839918e-05, "loss": 0.7569, "step": 447 }, { "epoch": 0.04576098059244127, "grad_norm": 1.6020744350189244, "learning_rate": 1.9987024149452183e-05, "loss": 0.7093, "step": 448 }, { "epoch": 0.04586312563840654, "grad_norm": 1.5160906930493678, "learning_rate": 1.998685512182632e-05, "loss": 0.7835, "step": 449 }, { "epoch": 0.045965270684371805, "grad_norm": 1.476476819397115, "learning_rate": 1.9986685001132712e-05, "loss": 0.7317, "step": 450 }, { "epoch": 0.046067415730337076, "grad_norm": 1.5560080475435927, "learning_rate": 1.9986513787389977e-05, "loss": 0.7305, "step": 451 }, { "epoch": 0.04616956077630235, "grad_norm": 1.5714651262145016, "learning_rate": 1.9986341480616856e-05, "loss": 0.7018, "step": 452 }, { "epoch": 0.04627170582226762, "grad_norm": 1.6352297711977473, "learning_rate": 1.998616808083221e-05, "loss": 0.7336, "step": 453 }, { "epoch": 0.04637385086823289, "grad_norm": 1.4413342018676707, "learning_rate": 1.998599358805501e-05, "loss": 0.6515, "step": 454 }, { "epoch": 0.04647599591419816, "grad_norm": 1.6903614834734375, "learning_rate": 1.9985818002304367e-05, "loss": 0.8132, "step": 455 }, { "epoch": 0.04657814096016343, "grad_norm": 1.8258184877594659, "learning_rate": 1.998564132359949e-05, "loss": 0.8263, "step": 456 }, { "epoch": 0.0466802860061287, "grad_norm": 1.6567282783905968, "learning_rate": 1.9985463551959715e-05, "loss": 0.7446, "step": 457 }, { "epoch": 0.046782431052093974, "grad_norm": 1.5563465442212512, "learning_rate": 1.998528468740451e-05, "loss": 0.5944, "step": 458 }, { "epoch": 0.046884576098059245, "grad_norm": 1.546733433320347, "learning_rate": 1.9985104729953438e-05, "loss": 0.7259, "step": 459 }, { "epoch": 0.04698672114402452, "grad_norm": 1.7834205927440032, "learning_rate": 1.9984923679626205e-05, "loss": 0.7441, "step": 460 }, { "epoch": 0.04708886618998979, "grad_norm": 1.5980607898721175, "learning_rate": 1.9984741536442622e-05, "loss": 0.811, "step": 461 }, { "epoch": 0.04719101123595506, "grad_norm": 1.5931779877444234, "learning_rate": 1.998455830042263e-05, "loss": 0.7936, "step": 462 }, { "epoch": 0.04729315628192033, "grad_norm": 1.6641931540896815, "learning_rate": 1.998437397158628e-05, "loss": 0.7834, "step": 463 }, { "epoch": 0.047395301327885594, "grad_norm": 1.6176479362590956, "learning_rate": 1.9984188549953747e-05, "loss": 0.8281, "step": 464 }, { "epoch": 0.047497446373850866, "grad_norm": 1.436029553368375, "learning_rate": 1.9984002035545327e-05, "loss": 0.5851, "step": 465 }, { "epoch": 0.04759959141981614, "grad_norm": 1.6815793284759746, "learning_rate": 1.9983814428381433e-05, "loss": 0.7416, "step": 466 }, { "epoch": 0.04770173646578141, "grad_norm": 1.6775855993331013, "learning_rate": 1.99836257284826e-05, "loss": 0.7009, "step": 467 }, { "epoch": 0.04780388151174668, "grad_norm": 1.6912070041998748, "learning_rate": 1.9983435935869483e-05, "loss": 0.7175, "step": 468 }, { "epoch": 0.04790602655771195, "grad_norm": 1.5353035836739497, "learning_rate": 1.998324505056285e-05, "loss": 0.6991, "step": 469 }, { "epoch": 0.04800817160367722, "grad_norm": 1.7177225253697452, "learning_rate": 1.9983053072583595e-05, "loss": 0.7749, "step": 470 }, { "epoch": 0.04811031664964249, "grad_norm": 1.5518824824200352, "learning_rate": 1.9982860001952735e-05, "loss": 0.7881, "step": 471 }, { "epoch": 0.048212461695607764, "grad_norm": 1.6189876512945427, "learning_rate": 1.9982665838691396e-05, "loss": 0.7223, "step": 472 }, { "epoch": 0.048314606741573035, "grad_norm": 1.5862052439655876, "learning_rate": 1.998247058282083e-05, "loss": 0.6951, "step": 473 }, { "epoch": 0.048416751787538306, "grad_norm": 1.664954892936885, "learning_rate": 1.9982274234362414e-05, "loss": 0.7931, "step": 474 }, { "epoch": 0.04851889683350358, "grad_norm": 1.713903726048864, "learning_rate": 1.998207679333763e-05, "loss": 0.7588, "step": 475 }, { "epoch": 0.04862104187946885, "grad_norm": 1.682176230758614, "learning_rate": 1.9981878259768094e-05, "loss": 0.8976, "step": 476 }, { "epoch": 0.04872318692543412, "grad_norm": 1.9723092918071983, "learning_rate": 1.998167863367553e-05, "loss": 0.7619, "step": 477 }, { "epoch": 0.048825331971399384, "grad_norm": 1.599296990545799, "learning_rate": 1.9981477915081794e-05, "loss": 0.7556, "step": 478 }, { "epoch": 0.048927477017364655, "grad_norm": 1.3960674296691826, "learning_rate": 1.9981276104008848e-05, "loss": 0.7784, "step": 479 }, { "epoch": 0.049029622063329927, "grad_norm": 1.6407098154989128, "learning_rate": 1.9981073200478787e-05, "loss": 0.7919, "step": 480 }, { "epoch": 0.0491317671092952, "grad_norm": 1.595878767512802, "learning_rate": 1.9980869204513814e-05, "loss": 0.781, "step": 481 }, { "epoch": 0.04923391215526047, "grad_norm": 1.573015042069417, "learning_rate": 1.9980664116136255e-05, "loss": 0.8291, "step": 482 }, { "epoch": 0.04933605720122574, "grad_norm": 1.6073490789153841, "learning_rate": 1.9980457935368565e-05, "loss": 0.6668, "step": 483 }, { "epoch": 0.04943820224719101, "grad_norm": 1.5462474517851519, "learning_rate": 1.99802506622333e-05, "loss": 0.6628, "step": 484 }, { "epoch": 0.04954034729315628, "grad_norm": 1.6142670346195724, "learning_rate": 1.9980042296753158e-05, "loss": 0.7874, "step": 485 }, { "epoch": 0.049642492339121554, "grad_norm": 1.6400766316865425, "learning_rate": 1.9979832838950937e-05, "loss": 0.747, "step": 486 }, { "epoch": 0.049744637385086825, "grad_norm": 1.416262241420444, "learning_rate": 1.9979622288849563e-05, "loss": 0.6473, "step": 487 }, { "epoch": 0.049846782431052096, "grad_norm": 1.6284847907731343, "learning_rate": 1.9979410646472084e-05, "loss": 0.7313, "step": 488 }, { "epoch": 0.04994892747701737, "grad_norm": 1.5246099600341796, "learning_rate": 1.997919791184166e-05, "loss": 0.6147, "step": 489 }, { "epoch": 0.05005107252298264, "grad_norm": 1.5482113841987348, "learning_rate": 1.9978984084981578e-05, "loss": 0.7131, "step": 490 }, { "epoch": 0.0501532175689479, "grad_norm": 1.5261039055113574, "learning_rate": 1.997876916591524e-05, "loss": 0.8526, "step": 491 }, { "epoch": 0.050255362614913174, "grad_norm": 1.5850472171550933, "learning_rate": 1.997855315466617e-05, "loss": 0.8454, "step": 492 }, { "epoch": 0.050357507660878445, "grad_norm": 1.635046304306875, "learning_rate": 1.9978336051258012e-05, "loss": 0.7252, "step": 493 }, { "epoch": 0.050459652706843716, "grad_norm": 1.8475031041628454, "learning_rate": 1.9978117855714524e-05, "loss": 0.8357, "step": 494 }, { "epoch": 0.05056179775280899, "grad_norm": 1.5477499977293454, "learning_rate": 1.9977898568059592e-05, "loss": 0.7261, "step": 495 }, { "epoch": 0.05066394279877426, "grad_norm": 1.5736238463668162, "learning_rate": 1.9977678188317213e-05, "loss": 0.7558, "step": 496 }, { "epoch": 0.05076608784473953, "grad_norm": 1.750954972848195, "learning_rate": 1.997745671651151e-05, "loss": 0.8026, "step": 497 }, { "epoch": 0.0508682328907048, "grad_norm": 1.7538040961994257, "learning_rate": 1.9977234152666723e-05, "loss": 0.768, "step": 498 }, { "epoch": 0.05097037793667007, "grad_norm": 1.49839006573758, "learning_rate": 1.997701049680721e-05, "loss": 0.7114, "step": 499 }, { "epoch": 0.05107252298263534, "grad_norm": 1.487862582013371, "learning_rate": 1.997678574895746e-05, "loss": 0.7208, "step": 500 }, { "epoch": 0.051174668028600614, "grad_norm": 1.4400234174710935, "learning_rate": 1.9976559909142057e-05, "loss": 0.6979, "step": 501 }, { "epoch": 0.051276813074565886, "grad_norm": 1.646071259640487, "learning_rate": 1.997633297738573e-05, "loss": 0.7387, "step": 502 }, { "epoch": 0.05137895812053116, "grad_norm": 1.5022436848759284, "learning_rate": 1.997610495371331e-05, "loss": 0.6309, "step": 503 }, { "epoch": 0.05148110316649643, "grad_norm": 1.6163719101583476, "learning_rate": 1.9975875838149758e-05, "loss": 0.8308, "step": 504 }, { "epoch": 0.05158324821246169, "grad_norm": 1.5272768152282914, "learning_rate": 1.9975645630720152e-05, "loss": 0.7628, "step": 505 }, { "epoch": 0.051685393258426963, "grad_norm": 1.4855036847637648, "learning_rate": 1.9975414331449684e-05, "loss": 0.7701, "step": 506 }, { "epoch": 0.051787538304392235, "grad_norm": 1.5871173934837504, "learning_rate": 1.9975181940363675e-05, "loss": 0.7707, "step": 507 }, { "epoch": 0.051889683350357506, "grad_norm": 1.6252297582884947, "learning_rate": 1.997494845748756e-05, "loss": 0.7864, "step": 508 }, { "epoch": 0.05199182839632278, "grad_norm": 1.6982028690022921, "learning_rate": 1.9974713882846885e-05, "loss": 0.7825, "step": 509 }, { "epoch": 0.05209397344228805, "grad_norm": 1.4933623426526994, "learning_rate": 1.9974478216467333e-05, "loss": 0.8042, "step": 510 }, { "epoch": 0.05219611848825332, "grad_norm": 1.5149481162013883, "learning_rate": 1.99742414583747e-05, "loss": 0.7284, "step": 511 }, { "epoch": 0.05229826353421859, "grad_norm": 1.631588226488018, "learning_rate": 1.997400360859489e-05, "loss": 0.7637, "step": 512 }, { "epoch": 0.05240040858018386, "grad_norm": 1.6478162754339205, "learning_rate": 1.9973764667153944e-05, "loss": 0.6624, "step": 513 }, { "epoch": 0.05250255362614913, "grad_norm": 1.557462708731741, "learning_rate": 1.9973524634078012e-05, "loss": 0.7373, "step": 514 }, { "epoch": 0.052604698672114404, "grad_norm": 1.636878955660981, "learning_rate": 1.9973283509393364e-05, "loss": 0.7685, "step": 515 }, { "epoch": 0.052706843718079675, "grad_norm": 1.523844984732845, "learning_rate": 1.9973041293126392e-05, "loss": 0.7082, "step": 516 }, { "epoch": 0.052808988764044947, "grad_norm": 1.6214690089363601, "learning_rate": 1.997279798530361e-05, "loss": 0.8186, "step": 517 }, { "epoch": 0.05291113381001022, "grad_norm": 1.5019751681067, "learning_rate": 1.997255358595164e-05, "loss": 0.7605, "step": 518 }, { "epoch": 0.05301327885597548, "grad_norm": 1.8309363486052423, "learning_rate": 1.997230809509724e-05, "loss": 0.7709, "step": 519 }, { "epoch": 0.05311542390194075, "grad_norm": 1.572658511634003, "learning_rate": 1.9972061512767276e-05, "loss": 0.8028, "step": 520 }, { "epoch": 0.053217568947906024, "grad_norm": 1.6805782612053086, "learning_rate": 1.9971813838988736e-05, "loss": 0.8838, "step": 521 }, { "epoch": 0.053319713993871296, "grad_norm": 1.4313241773408503, "learning_rate": 1.9971565073788728e-05, "loss": 0.6039, "step": 522 }, { "epoch": 0.05342185903983657, "grad_norm": 1.5878763956757955, "learning_rate": 1.997131521719448e-05, "loss": 0.843, "step": 523 }, { "epoch": 0.05352400408580184, "grad_norm": 1.5120214772681129, "learning_rate": 1.9971064269233343e-05, "loss": 0.6371, "step": 524 }, { "epoch": 0.05362614913176711, "grad_norm": 1.4288915245762615, "learning_rate": 1.9970812229932777e-05, "loss": 0.7581, "step": 525 }, { "epoch": 0.05372829417773238, "grad_norm": 1.5143270955064858, "learning_rate": 1.997055909932037e-05, "loss": 0.7561, "step": 526 }, { "epoch": 0.05383043922369765, "grad_norm": 1.6536682572378336, "learning_rate": 1.9970304877423827e-05, "loss": 0.7383, "step": 527 }, { "epoch": 0.05393258426966292, "grad_norm": 1.4776304363949475, "learning_rate": 1.9970049564270975e-05, "loss": 0.7054, "step": 528 }, { "epoch": 0.054034729315628194, "grad_norm": 1.523494785377469, "learning_rate": 1.9969793159889758e-05, "loss": 0.698, "step": 529 }, { "epoch": 0.054136874361593465, "grad_norm": 1.620583783092881, "learning_rate": 1.9969535664308237e-05, "loss": 0.8122, "step": 530 }, { "epoch": 0.054239019407558736, "grad_norm": 1.5368145123426755, "learning_rate": 1.9969277077554597e-05, "loss": 0.7826, "step": 531 }, { "epoch": 0.05434116445352401, "grad_norm": 1.5291641763068498, "learning_rate": 1.996901739965714e-05, "loss": 0.8057, "step": 532 }, { "epoch": 0.05444330949948927, "grad_norm": 1.4092524903000994, "learning_rate": 1.9968756630644287e-05, "loss": 0.7227, "step": 533 }, { "epoch": 0.05454545454545454, "grad_norm": 1.5490642289609866, "learning_rate": 1.996849477054458e-05, "loss": 0.607, "step": 534 }, { "epoch": 0.054647599591419814, "grad_norm": 1.5506759266480605, "learning_rate": 1.9968231819386677e-05, "loss": 0.7096, "step": 535 }, { "epoch": 0.054749744637385085, "grad_norm": 1.558974258222842, "learning_rate": 1.9967967777199366e-05, "loss": 0.7737, "step": 536 }, { "epoch": 0.054851889683350356, "grad_norm": 1.6078716807902478, "learning_rate": 1.9967702644011538e-05, "loss": 0.8519, "step": 537 }, { "epoch": 0.05495403472931563, "grad_norm": 1.532917311097514, "learning_rate": 1.996743641985222e-05, "loss": 0.7292, "step": 538 }, { "epoch": 0.0550561797752809, "grad_norm": 1.5539661819233375, "learning_rate": 1.996716910475054e-05, "loss": 0.8819, "step": 539 }, { "epoch": 0.05515832482124617, "grad_norm": 1.609369477387334, "learning_rate": 1.9966900698735764e-05, "loss": 0.779, "step": 540 }, { "epoch": 0.05526046986721144, "grad_norm": 2.094831186753633, "learning_rate": 1.996663120183727e-05, "loss": 0.7686, "step": 541 }, { "epoch": 0.05536261491317671, "grad_norm": 1.5550813485343937, "learning_rate": 1.996636061408455e-05, "loss": 0.8563, "step": 542 }, { "epoch": 0.055464759959141983, "grad_norm": 1.769620850762701, "learning_rate": 1.996608893550722e-05, "loss": 0.837, "step": 543 }, { "epoch": 0.055566905005107255, "grad_norm": 1.4468661425150449, "learning_rate": 1.996581616613502e-05, "loss": 0.6905, "step": 544 }, { "epoch": 0.055669050051072526, "grad_norm": 1.550372147968441, "learning_rate": 1.99655423059978e-05, "loss": 0.7644, "step": 545 }, { "epoch": 0.0557711950970378, "grad_norm": 1.5846849090314357, "learning_rate": 1.996526735512554e-05, "loss": 0.8444, "step": 546 }, { "epoch": 0.05587334014300306, "grad_norm": 1.5786291793989105, "learning_rate": 1.9964991313548326e-05, "loss": 0.805, "step": 547 }, { "epoch": 0.05597548518896833, "grad_norm": 1.511048158131181, "learning_rate": 1.9964714181296374e-05, "loss": 0.7388, "step": 548 }, { "epoch": 0.056077630234933604, "grad_norm": 1.6033130407298832, "learning_rate": 1.9964435958400016e-05, "loss": 0.7402, "step": 549 }, { "epoch": 0.056179775280898875, "grad_norm": 1.644533878086931, "learning_rate": 1.9964156644889707e-05, "loss": 0.826, "step": 550 }, { "epoch": 0.056281920326864146, "grad_norm": 1.5918361700773669, "learning_rate": 1.9963876240796015e-05, "loss": 0.802, "step": 551 }, { "epoch": 0.05638406537282942, "grad_norm": 1.5934854457855434, "learning_rate": 1.996359474614963e-05, "loss": 0.7374, "step": 552 }, { "epoch": 0.05648621041879469, "grad_norm": 1.545601552720927, "learning_rate": 1.9963312160981365e-05, "loss": 0.7748, "step": 553 }, { "epoch": 0.05658835546475996, "grad_norm": 1.531004944122486, "learning_rate": 1.9963028485322145e-05, "loss": 0.6628, "step": 554 }, { "epoch": 0.05669050051072523, "grad_norm": 1.5441027613915532, "learning_rate": 1.996274371920302e-05, "loss": 0.7104, "step": 555 }, { "epoch": 0.0567926455566905, "grad_norm": 1.5711363595726566, "learning_rate": 1.996245786265516e-05, "loss": 0.8046, "step": 556 }, { "epoch": 0.05689479060265577, "grad_norm": 1.6185700584690819, "learning_rate": 1.9962170915709848e-05, "loss": 0.7038, "step": 557 }, { "epoch": 0.056996935648621044, "grad_norm": 1.6658338054592956, "learning_rate": 1.9961882878398493e-05, "loss": 0.7685, "step": 558 }, { "epoch": 0.057099080694586316, "grad_norm": 1.6462724659163284, "learning_rate": 1.9961593750752623e-05, "loss": 0.7543, "step": 559 }, { "epoch": 0.05720122574055159, "grad_norm": 1.3781033539757612, "learning_rate": 1.9961303532803876e-05, "loss": 0.8265, "step": 560 }, { "epoch": 0.05730337078651685, "grad_norm": 1.7254477092011642, "learning_rate": 1.996101222458403e-05, "loss": 0.8623, "step": 561 }, { "epoch": 0.05740551583248212, "grad_norm": 1.7514153248574444, "learning_rate": 1.996071982612495e-05, "loss": 0.7886, "step": 562 }, { "epoch": 0.05750766087844739, "grad_norm": 1.710241000449227, "learning_rate": 1.996042633745866e-05, "loss": 0.7457, "step": 563 }, { "epoch": 0.057609805924412665, "grad_norm": 1.5905894639715203, "learning_rate": 1.9960131758617267e-05, "loss": 0.8155, "step": 564 }, { "epoch": 0.057711950970377936, "grad_norm": 1.5460268595231133, "learning_rate": 1.9959836089633016e-05, "loss": 0.7128, "step": 565 }, { "epoch": 0.05781409601634321, "grad_norm": 1.5691851861406998, "learning_rate": 1.9959539330538274e-05, "loss": 0.8104, "step": 566 }, { "epoch": 0.05791624106230848, "grad_norm": 1.3747345740424768, "learning_rate": 1.9959241481365516e-05, "loss": 0.7899, "step": 567 }, { "epoch": 0.05801838610827375, "grad_norm": 1.5894366067354702, "learning_rate": 1.9958942542147342e-05, "loss": 0.7379, "step": 568 }, { "epoch": 0.05812053115423902, "grad_norm": 1.5656637984176518, "learning_rate": 1.9958642512916475e-05, "loss": 0.7532, "step": 569 }, { "epoch": 0.05822267620020429, "grad_norm": 1.6208811836271306, "learning_rate": 1.9958341393705753e-05, "loss": 0.7773, "step": 570 }, { "epoch": 0.05832482124616956, "grad_norm": 1.4510835220131038, "learning_rate": 1.9958039184548124e-05, "loss": 0.7486, "step": 571 }, { "epoch": 0.058426966292134834, "grad_norm": 1.3265444317940052, "learning_rate": 1.995773588547668e-05, "loss": 0.6078, "step": 572 }, { "epoch": 0.058529111338100105, "grad_norm": 1.7889459854017975, "learning_rate": 1.9957431496524603e-05, "loss": 0.7607, "step": 573 }, { "epoch": 0.05863125638406537, "grad_norm": 1.6092639296922533, "learning_rate": 1.995712601772522e-05, "loss": 0.8071, "step": 574 }, { "epoch": 0.05873340143003064, "grad_norm": 1.5569820386518356, "learning_rate": 1.995681944911196e-05, "loss": 0.7053, "step": 575 }, { "epoch": 0.05883554647599591, "grad_norm": 1.4914129111365901, "learning_rate": 1.995651179071838e-05, "loss": 0.7801, "step": 576 }, { "epoch": 0.05893769152196118, "grad_norm": 1.5695007891508992, "learning_rate": 1.995620304257815e-05, "loss": 0.6953, "step": 577 }, { "epoch": 0.059039836567926454, "grad_norm": 1.4422134060583685, "learning_rate": 1.9955893204725062e-05, "loss": 0.7145, "step": 578 }, { "epoch": 0.059141981613891725, "grad_norm": 1.6665487085279531, "learning_rate": 1.9955582277193036e-05, "loss": 0.7044, "step": 579 }, { "epoch": 0.059244126659857, "grad_norm": 1.7181099967750908, "learning_rate": 1.9955270260016096e-05, "loss": 0.776, "step": 580 }, { "epoch": 0.05934627170582227, "grad_norm": 1.590228139316221, "learning_rate": 1.995495715322839e-05, "loss": 0.6424, "step": 581 }, { "epoch": 0.05944841675178754, "grad_norm": 1.589196741933278, "learning_rate": 1.9954642956864198e-05, "loss": 0.766, "step": 582 }, { "epoch": 0.05955056179775281, "grad_norm": 1.6068330611078379, "learning_rate": 1.9954327670957898e-05, "loss": 0.8129, "step": 583 }, { "epoch": 0.05965270684371808, "grad_norm": 1.5243254401184279, "learning_rate": 1.9954011295544004e-05, "loss": 0.8694, "step": 584 }, { "epoch": 0.05975485188968335, "grad_norm": 1.5752865210891498, "learning_rate": 1.9953693830657143e-05, "loss": 0.748, "step": 585 }, { "epoch": 0.059856996935648624, "grad_norm": 1.4165480654836802, "learning_rate": 1.9953375276332064e-05, "loss": 0.6782, "step": 586 }, { "epoch": 0.059959141981613895, "grad_norm": 1.6993650859970464, "learning_rate": 1.9953055632603627e-05, "loss": 0.7063, "step": 587 }, { "epoch": 0.06006128702757916, "grad_norm": 1.777596350073618, "learning_rate": 1.995273489950682e-05, "loss": 0.7054, "step": 588 }, { "epoch": 0.06016343207354443, "grad_norm": 1.6533109003456685, "learning_rate": 1.995241307707675e-05, "loss": 0.755, "step": 589 }, { "epoch": 0.0602655771195097, "grad_norm": 1.5631367505558615, "learning_rate": 1.995209016534864e-05, "loss": 0.7397, "step": 590 }, { "epoch": 0.06036772216547497, "grad_norm": 1.64619328578527, "learning_rate": 1.9951766164357827e-05, "loss": 0.7568, "step": 591 }, { "epoch": 0.060469867211440244, "grad_norm": 1.4740392958486426, "learning_rate": 1.995144107413978e-05, "loss": 0.7219, "step": 592 }, { "epoch": 0.060572012257405515, "grad_norm": 1.6425538694037616, "learning_rate": 1.995111489473008e-05, "loss": 0.8175, "step": 593 }, { "epoch": 0.060674157303370786, "grad_norm": 1.6721734378179682, "learning_rate": 1.9950787626164424e-05, "loss": 0.6805, "step": 594 }, { "epoch": 0.06077630234933606, "grad_norm": 1.6541518838558171, "learning_rate": 1.9950459268478632e-05, "loss": 0.8255, "step": 595 }, { "epoch": 0.06087844739530133, "grad_norm": 1.5076635760283956, "learning_rate": 1.9950129821708644e-05, "loss": 0.7373, "step": 596 }, { "epoch": 0.0609805924412666, "grad_norm": 1.4708847455718552, "learning_rate": 1.994979928589052e-05, "loss": 0.7017, "step": 597 }, { "epoch": 0.06108273748723187, "grad_norm": 1.4735246989363984, "learning_rate": 1.9949467661060435e-05, "loss": 0.6551, "step": 598 }, { "epoch": 0.06118488253319714, "grad_norm": 1.620560433529078, "learning_rate": 1.9949134947254687e-05, "loss": 0.8583, "step": 599 }, { "epoch": 0.06128702757916241, "grad_norm": 1.5202001725670053, "learning_rate": 1.994880114450969e-05, "loss": 0.7184, "step": 600 }, { "epoch": 0.061389172625127685, "grad_norm": 1.6874192179963887, "learning_rate": 1.9948466252861982e-05, "loss": 0.6951, "step": 601 }, { "epoch": 0.06149131767109295, "grad_norm": 1.4496333113262838, "learning_rate": 1.9948130272348213e-05, "loss": 0.6575, "step": 602 }, { "epoch": 0.06159346271705822, "grad_norm": 1.5120439459007018, "learning_rate": 1.9947793203005157e-05, "loss": 0.768, "step": 603 }, { "epoch": 0.06169560776302349, "grad_norm": 1.5308792722786435, "learning_rate": 1.9947455044869716e-05, "loss": 0.7208, "step": 604 }, { "epoch": 0.06179775280898876, "grad_norm": 1.6111333480085013, "learning_rate": 1.9947115797978886e-05, "loss": 0.732, "step": 605 }, { "epoch": 0.061899897854954034, "grad_norm": 1.4558751531569698, "learning_rate": 1.9946775462369806e-05, "loss": 0.6121, "step": 606 }, { "epoch": 0.062002042900919305, "grad_norm": 1.50971799023137, "learning_rate": 1.9946434038079724e-05, "loss": 0.6609, "step": 607 }, { "epoch": 0.062104187946884576, "grad_norm": 1.4495462026953028, "learning_rate": 1.9946091525146015e-05, "loss": 0.6942, "step": 608 }, { "epoch": 0.06220633299284985, "grad_norm": 1.9258324900622656, "learning_rate": 1.994574792360616e-05, "loss": 0.7457, "step": 609 }, { "epoch": 0.06230847803881512, "grad_norm": 1.5265900948180682, "learning_rate": 1.9945403233497766e-05, "loss": 0.6708, "step": 610 }, { "epoch": 0.06241062308478039, "grad_norm": 1.6015515748634863, "learning_rate": 1.994505745485857e-05, "loss": 0.7489, "step": 611 }, { "epoch": 0.06251276813074566, "grad_norm": 1.5696067837819851, "learning_rate": 1.99447105877264e-05, "loss": 0.7903, "step": 612 }, { "epoch": 0.06261491317671093, "grad_norm": 1.5196455189921743, "learning_rate": 1.994436263213924e-05, "loss": 0.7553, "step": 613 }, { "epoch": 0.0627170582226762, "grad_norm": 1.5666728170951127, "learning_rate": 1.994401358813516e-05, "loss": 0.8312, "step": 614 }, { "epoch": 0.06281920326864147, "grad_norm": 1.3917764579346945, "learning_rate": 1.994366345575237e-05, "loss": 0.7198, "step": 615 }, { "epoch": 0.06292134831460675, "grad_norm": 1.5003092381742327, "learning_rate": 1.9943312235029192e-05, "loss": 0.8565, "step": 616 }, { "epoch": 0.06302349336057202, "grad_norm": 1.6050289324744085, "learning_rate": 1.9942959926004065e-05, "loss": 0.7621, "step": 617 }, { "epoch": 0.06312563840653729, "grad_norm": 1.4683720072335726, "learning_rate": 1.9942606528715547e-05, "loss": 0.6446, "step": 618 }, { "epoch": 0.06322778345250256, "grad_norm": 1.634182783379614, "learning_rate": 1.9942252043202325e-05, "loss": 0.7343, "step": 619 }, { "epoch": 0.06332992849846783, "grad_norm": 1.5269348810724157, "learning_rate": 1.994189646950319e-05, "loss": 0.7183, "step": 620 }, { "epoch": 0.0634320735444331, "grad_norm": 1.6680220462987225, "learning_rate": 1.9941539807657064e-05, "loss": 0.7474, "step": 621 }, { "epoch": 0.06353421859039837, "grad_norm": 1.6384141210354473, "learning_rate": 1.994118205770298e-05, "loss": 0.7789, "step": 622 }, { "epoch": 0.06363636363636363, "grad_norm": 1.6344947011807236, "learning_rate": 1.9940823219680102e-05, "loss": 0.7822, "step": 623 }, { "epoch": 0.0637385086823289, "grad_norm": 1.5476356635630524, "learning_rate": 1.99404632936277e-05, "loss": 0.693, "step": 624 }, { "epoch": 0.06384065372829417, "grad_norm": 1.5978943230502578, "learning_rate": 1.9940102279585164e-05, "loss": 0.746, "step": 625 }, { "epoch": 0.06394279877425944, "grad_norm": 1.5417716602283837, "learning_rate": 1.9939740177592015e-05, "loss": 0.7295, "step": 626 }, { "epoch": 0.06404494382022471, "grad_norm": 1.637605439818522, "learning_rate": 1.9939376987687883e-05, "loss": 0.7761, "step": 627 }, { "epoch": 0.06414708886618999, "grad_norm": 1.6837369991094335, "learning_rate": 1.9939012709912512e-05, "loss": 0.7225, "step": 628 }, { "epoch": 0.06424923391215526, "grad_norm": 1.5839454603856955, "learning_rate": 1.9938647344305782e-05, "loss": 0.7269, "step": 629 }, { "epoch": 0.06435137895812053, "grad_norm": 1.6013596310684362, "learning_rate": 1.993828089090768e-05, "loss": 0.7591, "step": 630 }, { "epoch": 0.0644535240040858, "grad_norm": 1.6984502816932658, "learning_rate": 1.993791334975831e-05, "loss": 0.8338, "step": 631 }, { "epoch": 0.06455566905005107, "grad_norm": 1.5903603697224313, "learning_rate": 1.9937544720897907e-05, "loss": 0.8551, "step": 632 }, { "epoch": 0.06465781409601634, "grad_norm": 1.7649648243138163, "learning_rate": 1.9937175004366812e-05, "loss": 0.8415, "step": 633 }, { "epoch": 0.06475995914198161, "grad_norm": 1.5726902291857323, "learning_rate": 1.9936804200205496e-05, "loss": 0.7818, "step": 634 }, { "epoch": 0.06486210418794688, "grad_norm": 1.674714523420893, "learning_rate": 1.9936432308454537e-05, "loss": 0.8311, "step": 635 }, { "epoch": 0.06496424923391216, "grad_norm": 1.6467588792381955, "learning_rate": 1.993605932915464e-05, "loss": 0.6923, "step": 636 }, { "epoch": 0.06506639427987743, "grad_norm": 1.509599656060883, "learning_rate": 1.9935685262346634e-05, "loss": 0.7742, "step": 637 }, { "epoch": 0.0651685393258427, "grad_norm": 1.4578257392850222, "learning_rate": 1.9935310108071453e-05, "loss": 0.6792, "step": 638 }, { "epoch": 0.06527068437180797, "grad_norm": 1.7251091820920132, "learning_rate": 1.9934933866370162e-05, "loss": 0.9141, "step": 639 }, { "epoch": 0.06537282941777324, "grad_norm": 1.7072827191149216, "learning_rate": 1.9934556537283946e-05, "loss": 0.7508, "step": 640 }, { "epoch": 0.06547497446373851, "grad_norm": 1.8231793662869957, "learning_rate": 1.9934178120854095e-05, "loss": 0.8053, "step": 641 }, { "epoch": 0.06557711950970378, "grad_norm": 1.301354381951905, "learning_rate": 1.9933798617122025e-05, "loss": 0.7137, "step": 642 }, { "epoch": 0.06567926455566905, "grad_norm": 1.600505738452158, "learning_rate": 1.9933418026129286e-05, "loss": 0.7759, "step": 643 }, { "epoch": 0.06578140960163432, "grad_norm": 1.5899448311873305, "learning_rate": 1.993303634791752e-05, "loss": 0.8273, "step": 644 }, { "epoch": 0.0658835546475996, "grad_norm": 1.6798411521996006, "learning_rate": 1.9932653582528517e-05, "loss": 0.7645, "step": 645 }, { "epoch": 0.06598569969356487, "grad_norm": 1.7277547870483982, "learning_rate": 1.9932269730004155e-05, "loss": 0.7776, "step": 646 }, { "epoch": 0.06608784473953014, "grad_norm": 1.5878907315721582, "learning_rate": 1.9931884790386454e-05, "loss": 0.8447, "step": 647 }, { "epoch": 0.06618998978549541, "grad_norm": 1.6000326237782907, "learning_rate": 1.9931498763717548e-05, "loss": 0.7636, "step": 648 }, { "epoch": 0.06629213483146068, "grad_norm": 1.6543339803988673, "learning_rate": 1.9931111650039687e-05, "loss": 0.6744, "step": 649 }, { "epoch": 0.06639427987742594, "grad_norm": 1.472374993391623, "learning_rate": 1.9930723449395236e-05, "loss": 0.7685, "step": 650 }, { "epoch": 0.06649642492339121, "grad_norm": 1.6415638653209483, "learning_rate": 1.993033416182669e-05, "loss": 0.7568, "step": 651 }, { "epoch": 0.06659856996935648, "grad_norm": 1.6010789108313617, "learning_rate": 1.9929943787376652e-05, "loss": 0.7398, "step": 652 }, { "epoch": 0.06670071501532175, "grad_norm": 1.5422819405802102, "learning_rate": 1.9929552326087856e-05, "loss": 0.8178, "step": 653 }, { "epoch": 0.06680286006128702, "grad_norm": 1.5318593065550377, "learning_rate": 1.9929159778003137e-05, "loss": 0.7564, "step": 654 }, { "epoch": 0.0669050051072523, "grad_norm": 1.7590596625204797, "learning_rate": 1.9928766143165466e-05, "loss": 0.7798, "step": 655 }, { "epoch": 0.06700715015321757, "grad_norm": 1.551189663550595, "learning_rate": 1.992837142161793e-05, "loss": 0.8, "step": 656 }, { "epoch": 0.06710929519918284, "grad_norm": 1.606162706518352, "learning_rate": 1.992797561340372e-05, "loss": 0.8859, "step": 657 }, { "epoch": 0.06721144024514811, "grad_norm": 1.558381297421778, "learning_rate": 1.9927578718566173e-05, "loss": 0.6391, "step": 658 }, { "epoch": 0.06731358529111338, "grad_norm": 1.5091285818872757, "learning_rate": 1.9927180737148718e-05, "loss": 0.7152, "step": 659 }, { "epoch": 0.06741573033707865, "grad_norm": 1.4881232449263553, "learning_rate": 1.9926781669194916e-05, "loss": 0.822, "step": 660 }, { "epoch": 0.06751787538304392, "grad_norm": 1.4684678201962496, "learning_rate": 1.992638151474845e-05, "loss": 0.705, "step": 661 }, { "epoch": 0.06762002042900919, "grad_norm": 1.6144178141316767, "learning_rate": 1.9925980273853108e-05, "loss": 0.8541, "step": 662 }, { "epoch": 0.06772216547497446, "grad_norm": 1.4072855254516001, "learning_rate": 1.9925577946552815e-05, "loss": 0.7339, "step": 663 }, { "epoch": 0.06782431052093973, "grad_norm": 1.4531206148226108, "learning_rate": 1.9925174532891604e-05, "loss": 0.6722, "step": 664 }, { "epoch": 0.067926455566905, "grad_norm": 1.6322310197028325, "learning_rate": 1.992477003291363e-05, "loss": 0.8147, "step": 665 }, { "epoch": 0.06802860061287028, "grad_norm": 1.7031047741715521, "learning_rate": 1.992436444666316e-05, "loss": 0.8484, "step": 666 }, { "epoch": 0.06813074565883555, "grad_norm": 1.464101499114788, "learning_rate": 1.992395777418459e-05, "loss": 0.7673, "step": 667 }, { "epoch": 0.06823289070480082, "grad_norm": 1.5529368939218136, "learning_rate": 1.992355001552243e-05, "loss": 0.8471, "step": 668 }, { "epoch": 0.06833503575076609, "grad_norm": 1.7265945033132117, "learning_rate": 1.992314117072131e-05, "loss": 0.7832, "step": 669 }, { "epoch": 0.06843718079673136, "grad_norm": 1.6098369488798387, "learning_rate": 1.9922731239825978e-05, "loss": 0.8438, "step": 670 }, { "epoch": 0.06853932584269663, "grad_norm": 1.557280759518139, "learning_rate": 1.9922320222881303e-05, "loss": 0.8054, "step": 671 }, { "epoch": 0.0686414708886619, "grad_norm": 1.6145992300166256, "learning_rate": 1.9921908119932264e-05, "loss": 0.79, "step": 672 }, { "epoch": 0.06874361593462718, "grad_norm": 1.5615568820912313, "learning_rate": 1.992149493102397e-05, "loss": 0.7661, "step": 673 }, { "epoch": 0.06884576098059245, "grad_norm": 1.4485106388941436, "learning_rate": 1.9921080656201656e-05, "loss": 0.7599, "step": 674 }, { "epoch": 0.06894790602655772, "grad_norm": 1.5911176880846032, "learning_rate": 1.9920665295510644e-05, "loss": 0.7543, "step": 675 }, { "epoch": 0.06905005107252299, "grad_norm": 1.4857460673627971, "learning_rate": 1.992024884899641e-05, "loss": 0.8055, "step": 676 }, { "epoch": 0.06915219611848826, "grad_norm": 1.7098513265507742, "learning_rate": 1.9919831316704528e-05, "loss": 0.7858, "step": 677 }, { "epoch": 0.06925434116445352, "grad_norm": 1.5768377750627027, "learning_rate": 1.9919412698680704e-05, "loss": 0.8942, "step": 678 }, { "epoch": 0.06935648621041879, "grad_norm": 1.5544200792278677, "learning_rate": 1.9918992994970746e-05, "loss": 0.7406, "step": 679 }, { "epoch": 0.06945863125638406, "grad_norm": 1.683868501646567, "learning_rate": 1.9918572205620598e-05, "loss": 0.7079, "step": 680 }, { "epoch": 0.06956077630234933, "grad_norm": 1.6008296559091235, "learning_rate": 1.9918150330676314e-05, "loss": 0.7637, "step": 681 }, { "epoch": 0.0696629213483146, "grad_norm": 1.6282029718071185, "learning_rate": 1.991772737018407e-05, "loss": 0.7462, "step": 682 }, { "epoch": 0.06976506639427987, "grad_norm": 1.5364638687936156, "learning_rate": 1.9917303324190156e-05, "loss": 0.7966, "step": 683 }, { "epoch": 0.06986721144024514, "grad_norm": 1.5769647562019777, "learning_rate": 1.9916878192740987e-05, "loss": 0.7351, "step": 684 }, { "epoch": 0.06996935648621042, "grad_norm": 1.572562450978231, "learning_rate": 1.9916451975883092e-05, "loss": 0.7753, "step": 685 }, { "epoch": 0.07007150153217569, "grad_norm": 1.4796469239160819, "learning_rate": 1.991602467366312e-05, "loss": 0.8036, "step": 686 }, { "epoch": 0.07017364657814096, "grad_norm": 1.7182920829827204, "learning_rate": 1.9915596286127843e-05, "loss": 0.8053, "step": 687 }, { "epoch": 0.07027579162410623, "grad_norm": 1.6979670130254412, "learning_rate": 1.9915166813324145e-05, "loss": 0.831, "step": 688 }, { "epoch": 0.0703779366700715, "grad_norm": 1.5195726888095025, "learning_rate": 1.9914736255299033e-05, "loss": 0.6631, "step": 689 }, { "epoch": 0.07048008171603677, "grad_norm": 1.5128278782252023, "learning_rate": 1.9914304612099633e-05, "loss": 0.6983, "step": 690 }, { "epoch": 0.07058222676200204, "grad_norm": 1.4687556704285503, "learning_rate": 1.9913871883773188e-05, "loss": 0.6949, "step": 691 }, { "epoch": 0.07068437180796731, "grad_norm": 1.3865099430284489, "learning_rate": 1.9913438070367058e-05, "loss": 0.7254, "step": 692 }, { "epoch": 0.07078651685393259, "grad_norm": 1.596453530090198, "learning_rate": 1.9913003171928727e-05, "loss": 0.7507, "step": 693 }, { "epoch": 0.07088866189989786, "grad_norm": 1.53493286244014, "learning_rate": 1.9912567188505796e-05, "loss": 0.7905, "step": 694 }, { "epoch": 0.07099080694586313, "grad_norm": 1.4433145177523503, "learning_rate": 1.991213012014598e-05, "loss": 0.7007, "step": 695 }, { "epoch": 0.0710929519918284, "grad_norm": 1.5678657067346933, "learning_rate": 1.9911691966897118e-05, "loss": 0.8142, "step": 696 }, { "epoch": 0.07119509703779367, "grad_norm": 1.4595333474514265, "learning_rate": 1.9911252728807167e-05, "loss": 0.8165, "step": 697 }, { "epoch": 0.07129724208375894, "grad_norm": 1.5359168885617551, "learning_rate": 1.99108124059242e-05, "loss": 0.769, "step": 698 }, { "epoch": 0.07139938712972421, "grad_norm": 1.6874209864064396, "learning_rate": 1.991037099829642e-05, "loss": 0.7905, "step": 699 }, { "epoch": 0.07150153217568948, "grad_norm": 1.6732078623193523, "learning_rate": 1.9909928505972123e-05, "loss": 0.7971, "step": 700 }, { "epoch": 0.07160367722165475, "grad_norm": 1.8307454778427759, "learning_rate": 1.9909484928999752e-05, "loss": 0.7915, "step": 701 }, { "epoch": 0.07170582226762003, "grad_norm": 1.3265328922254678, "learning_rate": 1.990904026742785e-05, "loss": 0.6835, "step": 702 }, { "epoch": 0.0718079673135853, "grad_norm": 1.5616331071149123, "learning_rate": 1.990859452130509e-05, "loss": 0.6752, "step": 703 }, { "epoch": 0.07191011235955057, "grad_norm": 1.781903509047666, "learning_rate": 1.990814769068026e-05, "loss": 0.7371, "step": 704 }, { "epoch": 0.07201225740551584, "grad_norm": 1.4997407588919809, "learning_rate": 1.9907699775602262e-05, "loss": 0.7359, "step": 705 }, { "epoch": 0.0721144024514811, "grad_norm": 1.6027053774428606, "learning_rate": 1.9907250776120123e-05, "loss": 0.7568, "step": 706 }, { "epoch": 0.07221654749744637, "grad_norm": 1.6407751251477933, "learning_rate": 1.9906800692282983e-05, "loss": 0.9391, "step": 707 }, { "epoch": 0.07231869254341164, "grad_norm": 1.4735190523413029, "learning_rate": 1.990634952414011e-05, "loss": 0.7301, "step": 708 }, { "epoch": 0.07242083758937691, "grad_norm": 1.4884076613314179, "learning_rate": 1.990589727174088e-05, "loss": 0.7576, "step": 709 }, { "epoch": 0.07252298263534218, "grad_norm": 1.3524800634259417, "learning_rate": 1.990544393513479e-05, "loss": 0.6862, "step": 710 }, { "epoch": 0.07262512768130745, "grad_norm": 1.6463373831241659, "learning_rate": 1.9904989514371467e-05, "loss": 0.7797, "step": 711 }, { "epoch": 0.07272727272727272, "grad_norm": 1.5462069913992529, "learning_rate": 1.990453400950064e-05, "loss": 0.8187, "step": 712 }, { "epoch": 0.072829417773238, "grad_norm": 1.699006101490331, "learning_rate": 1.990407742057217e-05, "loss": 0.743, "step": 713 }, { "epoch": 0.07293156281920327, "grad_norm": 1.6061828755109313, "learning_rate": 1.9903619747636022e-05, "loss": 0.7521, "step": 714 }, { "epoch": 0.07303370786516854, "grad_norm": 1.6243691110560676, "learning_rate": 1.99031609907423e-05, "loss": 0.8141, "step": 715 }, { "epoch": 0.07313585291113381, "grad_norm": 1.65902958838249, "learning_rate": 1.9902701149941204e-05, "loss": 0.7947, "step": 716 }, { "epoch": 0.07323799795709908, "grad_norm": 1.7082412412467654, "learning_rate": 1.990224022528307e-05, "loss": 0.8251, "step": 717 }, { "epoch": 0.07334014300306435, "grad_norm": 1.6513869806788732, "learning_rate": 1.9901778216818347e-05, "loss": 0.7945, "step": 718 }, { "epoch": 0.07344228804902962, "grad_norm": 1.4609448055067984, "learning_rate": 1.99013151245976e-05, "loss": 0.8098, "step": 719 }, { "epoch": 0.0735444330949949, "grad_norm": 1.57467649884625, "learning_rate": 1.9900850948671515e-05, "loss": 0.7695, "step": 720 }, { "epoch": 0.07364657814096016, "grad_norm": 1.5724447961707912, "learning_rate": 1.99003856890909e-05, "loss": 0.7731, "step": 721 }, { "epoch": 0.07374872318692544, "grad_norm": 1.4186462889716356, "learning_rate": 1.989991934590667e-05, "loss": 0.7015, "step": 722 }, { "epoch": 0.0738508682328907, "grad_norm": 1.621292148811876, "learning_rate": 1.9899451919169875e-05, "loss": 0.9366, "step": 723 }, { "epoch": 0.07395301327885598, "grad_norm": 1.5438176975374884, "learning_rate": 1.9898983408931668e-05, "loss": 0.7126, "step": 724 }, { "epoch": 0.07405515832482125, "grad_norm": 1.4776689784371302, "learning_rate": 1.9898513815243338e-05, "loss": 0.7993, "step": 725 }, { "epoch": 0.07415730337078652, "grad_norm": 1.6348592038527079, "learning_rate": 1.989804313815627e-05, "loss": 0.8359, "step": 726 }, { "epoch": 0.07425944841675179, "grad_norm": 1.5377131113836813, "learning_rate": 1.9897571377721988e-05, "loss": 0.7586, "step": 727 }, { "epoch": 0.07436159346271706, "grad_norm": 1.586864813250946, "learning_rate": 1.9897098533992122e-05, "loss": 0.7267, "step": 728 }, { "epoch": 0.07446373850868233, "grad_norm": 1.370270570433998, "learning_rate": 1.9896624607018427e-05, "loss": 0.7329, "step": 729 }, { "epoch": 0.0745658835546476, "grad_norm": 1.5477175276987212, "learning_rate": 1.989614959685278e-05, "loss": 0.7014, "step": 730 }, { "epoch": 0.07466802860061288, "grad_norm": 1.6205493800980881, "learning_rate": 1.989567350354716e-05, "loss": 0.7644, "step": 731 }, { "epoch": 0.07477017364657815, "grad_norm": 1.4524553614458884, "learning_rate": 1.9895196327153684e-05, "loss": 0.7321, "step": 732 }, { "epoch": 0.0748723186925434, "grad_norm": 1.5151719588795212, "learning_rate": 1.9894718067724577e-05, "loss": 0.7411, "step": 733 }, { "epoch": 0.07497446373850868, "grad_norm": 1.4766899841660894, "learning_rate": 1.9894238725312186e-05, "loss": 0.8123, "step": 734 }, { "epoch": 0.07507660878447395, "grad_norm": 1.547990257754269, "learning_rate": 1.989375829996897e-05, "loss": 0.7884, "step": 735 }, { "epoch": 0.07517875383043922, "grad_norm": 1.5315344486305633, "learning_rate": 1.989327679174752e-05, "loss": 0.7777, "step": 736 }, { "epoch": 0.07528089887640449, "grad_norm": 1.7029751826087458, "learning_rate": 1.989279420070053e-05, "loss": 0.7959, "step": 737 }, { "epoch": 0.07538304392236976, "grad_norm": 1.4664468437029052, "learning_rate": 1.9892310526880822e-05, "loss": 0.7233, "step": 738 }, { "epoch": 0.07548518896833503, "grad_norm": 1.6436581056046273, "learning_rate": 1.989182577034134e-05, "loss": 0.791, "step": 739 }, { "epoch": 0.0755873340143003, "grad_norm": 1.7126920305247544, "learning_rate": 1.9891339931135137e-05, "loss": 0.8633, "step": 740 }, { "epoch": 0.07568947906026557, "grad_norm": 1.4813937524265346, "learning_rate": 1.989085300931538e-05, "loss": 0.7213, "step": 741 }, { "epoch": 0.07579162410623085, "grad_norm": 1.5995606942956262, "learning_rate": 1.989036500493538e-05, "loss": 0.7798, "step": 742 }, { "epoch": 0.07589376915219612, "grad_norm": 1.5256275244075512, "learning_rate": 1.9889875918048534e-05, "loss": 0.7354, "step": 743 }, { "epoch": 0.07599591419816139, "grad_norm": 1.518060767587871, "learning_rate": 1.9889385748708382e-05, "loss": 0.7424, "step": 744 }, { "epoch": 0.07609805924412666, "grad_norm": 1.6176905243628699, "learning_rate": 1.988889449696857e-05, "loss": 0.7268, "step": 745 }, { "epoch": 0.07620020429009193, "grad_norm": 1.2258361858494313, "learning_rate": 1.9888402162882867e-05, "loss": 0.6337, "step": 746 }, { "epoch": 0.0763023493360572, "grad_norm": 1.4919956686934734, "learning_rate": 1.988790874650516e-05, "loss": 0.6678, "step": 747 }, { "epoch": 0.07640449438202247, "grad_norm": 1.502786516150113, "learning_rate": 1.988741424788945e-05, "loss": 0.732, "step": 748 }, { "epoch": 0.07650663942798774, "grad_norm": 1.602405416370382, "learning_rate": 1.9886918667089864e-05, "loss": 0.7905, "step": 749 }, { "epoch": 0.07660878447395301, "grad_norm": 1.6610767815020735, "learning_rate": 1.988642200416064e-05, "loss": 0.7595, "step": 750 }, { "epoch": 0.07671092951991829, "grad_norm": 1.4912374533280652, "learning_rate": 1.988592425915614e-05, "loss": 0.7594, "step": 751 }, { "epoch": 0.07681307456588356, "grad_norm": 1.4204070325857612, "learning_rate": 1.9885425432130842e-05, "loss": 0.6754, "step": 752 }, { "epoch": 0.07691521961184883, "grad_norm": 1.5317845821517249, "learning_rate": 1.9884925523139347e-05, "loss": 0.6774, "step": 753 }, { "epoch": 0.0770173646578141, "grad_norm": 1.5919564661606977, "learning_rate": 1.9884424532236366e-05, "loss": 0.7577, "step": 754 }, { "epoch": 0.07711950970377937, "grad_norm": 1.6650090508036093, "learning_rate": 1.9883922459476734e-05, "loss": 0.7953, "step": 755 }, { "epoch": 0.07722165474974464, "grad_norm": 1.4379715547000977, "learning_rate": 1.98834193049154e-05, "loss": 0.7142, "step": 756 }, { "epoch": 0.07732379979570991, "grad_norm": 1.4366099066907718, "learning_rate": 1.988291506860744e-05, "loss": 0.7266, "step": 757 }, { "epoch": 0.07742594484167518, "grad_norm": 1.6099513116698367, "learning_rate": 1.988240975060804e-05, "loss": 0.7915, "step": 758 }, { "epoch": 0.07752808988764046, "grad_norm": 1.5486251855676378, "learning_rate": 1.9881903350972508e-05, "loss": 0.8171, "step": 759 }, { "epoch": 0.07763023493360573, "grad_norm": 1.4550210812471185, "learning_rate": 1.9881395869756272e-05, "loss": 0.6751, "step": 760 }, { "epoch": 0.07773237997957098, "grad_norm": 1.4715384750036027, "learning_rate": 1.988088730701487e-05, "loss": 0.5825, "step": 761 }, { "epoch": 0.07783452502553626, "grad_norm": 1.7486606767552886, "learning_rate": 1.988037766280397e-05, "loss": 0.7462, "step": 762 }, { "epoch": 0.07793667007150153, "grad_norm": 1.6997136563404789, "learning_rate": 1.987986693717935e-05, "loss": 0.7463, "step": 763 }, { "epoch": 0.0780388151174668, "grad_norm": 1.6543343326848066, "learning_rate": 1.9879355130196914e-05, "loss": 0.8159, "step": 764 }, { "epoch": 0.07814096016343207, "grad_norm": 1.6581740189588405, "learning_rate": 1.9878842241912672e-05, "loss": 0.75, "step": 765 }, { "epoch": 0.07824310520939734, "grad_norm": 1.480553301449181, "learning_rate": 1.987832827238277e-05, "loss": 0.7625, "step": 766 }, { "epoch": 0.07834525025536261, "grad_norm": 1.6234046309946566, "learning_rate": 1.987781322166345e-05, "loss": 0.7769, "step": 767 }, { "epoch": 0.07844739530132788, "grad_norm": 1.54715934175039, "learning_rate": 1.9877297089811095e-05, "loss": 0.7055, "step": 768 }, { "epoch": 0.07854954034729315, "grad_norm": 1.5143382835940802, "learning_rate": 1.987677987688219e-05, "loss": 0.908, "step": 769 }, { "epoch": 0.07865168539325842, "grad_norm": 1.528983724094074, "learning_rate": 1.9876261582933348e-05, "loss": 0.7632, "step": 770 }, { "epoch": 0.0787538304392237, "grad_norm": 1.4173996927090302, "learning_rate": 1.9875742208021292e-05, "loss": 0.6574, "step": 771 }, { "epoch": 0.07885597548518897, "grad_norm": 1.464518733576873, "learning_rate": 1.9875221752202872e-05, "loss": 0.6784, "step": 772 }, { "epoch": 0.07895812053115424, "grad_norm": 1.3667126138228713, "learning_rate": 1.9874700215535053e-05, "loss": 0.7041, "step": 773 }, { "epoch": 0.07906026557711951, "grad_norm": 1.583922414261538, "learning_rate": 1.9874177598074915e-05, "loss": 0.892, "step": 774 }, { "epoch": 0.07916241062308478, "grad_norm": 1.654368210394596, "learning_rate": 1.9873653899879655e-05, "loss": 0.8314, "step": 775 }, { "epoch": 0.07926455566905005, "grad_norm": 1.455394945038289, "learning_rate": 1.9873129121006602e-05, "loss": 0.674, "step": 776 }, { "epoch": 0.07936670071501532, "grad_norm": 1.428920027582383, "learning_rate": 1.9872603261513184e-05, "loss": 0.6809, "step": 777 }, { "epoch": 0.0794688457609806, "grad_norm": 1.3997409338536504, "learning_rate": 1.9872076321456962e-05, "loss": 0.7237, "step": 778 }, { "epoch": 0.07957099080694587, "grad_norm": 1.4866396602756338, "learning_rate": 1.987154830089561e-05, "loss": 0.6898, "step": 779 }, { "epoch": 0.07967313585291114, "grad_norm": 1.6736638969346522, "learning_rate": 1.9871019199886916e-05, "loss": 0.876, "step": 780 }, { "epoch": 0.07977528089887641, "grad_norm": 1.5023448351180575, "learning_rate": 1.9870489018488793e-05, "loss": 0.7453, "step": 781 }, { "epoch": 0.07987742594484168, "grad_norm": 1.4255061349577813, "learning_rate": 1.9869957756759273e-05, "loss": 0.7273, "step": 782 }, { "epoch": 0.07997957099080695, "grad_norm": 1.5790262378656115, "learning_rate": 1.9869425414756498e-05, "loss": 0.5604, "step": 783 }, { "epoch": 0.08008171603677222, "grad_norm": 1.5083152187830278, "learning_rate": 1.986889199253873e-05, "loss": 0.7644, "step": 784 }, { "epoch": 0.08018386108273749, "grad_norm": 1.5375834404833606, "learning_rate": 1.9868357490164367e-05, "loss": 0.7871, "step": 785 }, { "epoch": 0.08028600612870276, "grad_norm": 1.6478695551781142, "learning_rate": 1.9867821907691894e-05, "loss": 0.771, "step": 786 }, { "epoch": 0.08038815117466803, "grad_norm": 1.7135939688805435, "learning_rate": 1.986728524517994e-05, "loss": 0.8638, "step": 787 }, { "epoch": 0.0804902962206333, "grad_norm": 1.5666635072247177, "learning_rate": 1.986674750268724e-05, "loss": 0.7413, "step": 788 }, { "epoch": 0.08059244126659856, "grad_norm": 1.5624782421230234, "learning_rate": 1.9866208680272653e-05, "loss": 0.7927, "step": 789 }, { "epoch": 0.08069458631256383, "grad_norm": 1.417811274465328, "learning_rate": 1.986566877799515e-05, "loss": 0.763, "step": 790 }, { "epoch": 0.0807967313585291, "grad_norm": 1.5093397415037746, "learning_rate": 1.9865127795913826e-05, "loss": 0.7269, "step": 791 }, { "epoch": 0.08089887640449438, "grad_norm": 1.5983634921869176, "learning_rate": 1.986458573408789e-05, "loss": 0.8614, "step": 792 }, { "epoch": 0.08100102145045965, "grad_norm": 1.4699588960762733, "learning_rate": 1.9864042592576674e-05, "loss": 0.6837, "step": 793 }, { "epoch": 0.08110316649642492, "grad_norm": 1.6686341256597426, "learning_rate": 1.986349837143962e-05, "loss": 0.8628, "step": 794 }, { "epoch": 0.08120531154239019, "grad_norm": 1.7327450098838917, "learning_rate": 1.9862953070736298e-05, "loss": 0.7782, "step": 795 }, { "epoch": 0.08130745658835546, "grad_norm": 1.4915645258091934, "learning_rate": 1.986240669052639e-05, "loss": 0.7639, "step": 796 }, { "epoch": 0.08140960163432073, "grad_norm": 1.682551088860698, "learning_rate": 1.98618592308697e-05, "loss": 0.858, "step": 797 }, { "epoch": 0.081511746680286, "grad_norm": 1.2839623368381212, "learning_rate": 1.9861310691826143e-05, "loss": 0.6939, "step": 798 }, { "epoch": 0.08161389172625128, "grad_norm": 1.4278261582709826, "learning_rate": 1.986076107345576e-05, "loss": 0.7933, "step": 799 }, { "epoch": 0.08171603677221655, "grad_norm": 1.4489440344317464, "learning_rate": 1.9860210375818707e-05, "loss": 0.6521, "step": 800 }, { "epoch": 0.08181818181818182, "grad_norm": 1.4721306914730188, "learning_rate": 1.9859658598975257e-05, "loss": 0.6779, "step": 801 }, { "epoch": 0.08192032686414709, "grad_norm": 1.692738841349463, "learning_rate": 1.9859105742985803e-05, "loss": 0.8404, "step": 802 }, { "epoch": 0.08202247191011236, "grad_norm": 1.5552306293363731, "learning_rate": 1.9858551807910856e-05, "loss": 0.7664, "step": 803 }, { "epoch": 0.08212461695607763, "grad_norm": 1.7757978074082381, "learning_rate": 1.9857996793811046e-05, "loss": 0.8659, "step": 804 }, { "epoch": 0.0822267620020429, "grad_norm": 1.360922155263416, "learning_rate": 1.9857440700747118e-05, "loss": 0.6373, "step": 805 }, { "epoch": 0.08232890704800817, "grad_norm": 1.7338778585528292, "learning_rate": 1.9856883528779934e-05, "loss": 0.8424, "step": 806 }, { "epoch": 0.08243105209397344, "grad_norm": 1.4599206311539763, "learning_rate": 1.9856325277970484e-05, "loss": 0.63, "step": 807 }, { "epoch": 0.08253319713993872, "grad_norm": 1.5647437015477845, "learning_rate": 1.9855765948379858e-05, "loss": 0.735, "step": 808 }, { "epoch": 0.08263534218590399, "grad_norm": 1.4430131020274404, "learning_rate": 1.9855205540069288e-05, "loss": 0.7342, "step": 809 }, { "epoch": 0.08273748723186926, "grad_norm": 1.6340244378319413, "learning_rate": 1.9854644053100097e-05, "loss": 0.8719, "step": 810 }, { "epoch": 0.08283963227783453, "grad_norm": 1.4633354893425299, "learning_rate": 1.985408148753375e-05, "loss": 0.6136, "step": 811 }, { "epoch": 0.0829417773237998, "grad_norm": 1.3820516118314197, "learning_rate": 1.9853517843431823e-05, "loss": 0.8004, "step": 812 }, { "epoch": 0.08304392236976507, "grad_norm": 1.3954143673228985, "learning_rate": 1.9852953120855995e-05, "loss": 0.754, "step": 813 }, { "epoch": 0.08314606741573034, "grad_norm": 1.461763770126284, "learning_rate": 1.9852387319868085e-05, "loss": 0.7728, "step": 814 }, { "epoch": 0.08324821246169561, "grad_norm": 1.5594550676812877, "learning_rate": 1.9851820440530018e-05, "loss": 0.7631, "step": 815 }, { "epoch": 0.08335035750766087, "grad_norm": 1.4121140136898949, "learning_rate": 1.9851252482903837e-05, "loss": 0.706, "step": 816 }, { "epoch": 0.08345250255362614, "grad_norm": 1.6768742025591088, "learning_rate": 1.9850683447051707e-05, "loss": 0.7596, "step": 817 }, { "epoch": 0.08355464759959141, "grad_norm": 1.4840062183728497, "learning_rate": 1.9850113333035913e-05, "loss": 0.7564, "step": 818 }, { "epoch": 0.08365679264555669, "grad_norm": 1.5074222259461636, "learning_rate": 1.9849542140918847e-05, "loss": 0.7703, "step": 819 }, { "epoch": 0.08375893769152196, "grad_norm": 1.5065764301217976, "learning_rate": 1.984896987076303e-05, "loss": 0.7261, "step": 820 }, { "epoch": 0.08386108273748723, "grad_norm": 1.4425233733404113, "learning_rate": 1.98483965226311e-05, "loss": 0.7128, "step": 821 }, { "epoch": 0.0839632277834525, "grad_norm": 1.4782159146197773, "learning_rate": 1.9847822096585805e-05, "loss": 0.7477, "step": 822 }, { "epoch": 0.08406537282941777, "grad_norm": 1.5409926640930578, "learning_rate": 1.9847246592690022e-05, "loss": 0.7657, "step": 823 }, { "epoch": 0.08416751787538304, "grad_norm": 1.4147519294055058, "learning_rate": 1.9846670011006735e-05, "loss": 0.6879, "step": 824 }, { "epoch": 0.08426966292134831, "grad_norm": 1.4840139264747112, "learning_rate": 1.9846092351599054e-05, "loss": 0.6602, "step": 825 }, { "epoch": 0.08437180796731358, "grad_norm": 1.7122099507533417, "learning_rate": 1.9845513614530203e-05, "loss": 0.8124, "step": 826 }, { "epoch": 0.08447395301327885, "grad_norm": 1.7229563796040182, "learning_rate": 1.9844933799863526e-05, "loss": 0.7928, "step": 827 }, { "epoch": 0.08457609805924413, "grad_norm": 1.4135953103967285, "learning_rate": 1.9844352907662486e-05, "loss": 0.7566, "step": 828 }, { "epoch": 0.0846782431052094, "grad_norm": 1.6710072642746758, "learning_rate": 1.9843770937990658e-05, "loss": 0.768, "step": 829 }, { "epoch": 0.08478038815117467, "grad_norm": 1.525259579103551, "learning_rate": 1.984318789091174e-05, "loss": 0.7441, "step": 830 }, { "epoch": 0.08488253319713994, "grad_norm": 1.4840255779132323, "learning_rate": 1.984260376648955e-05, "loss": 0.7261, "step": 831 }, { "epoch": 0.08498467824310521, "grad_norm": 1.806837967751418, "learning_rate": 1.984201856478802e-05, "loss": 0.7181, "step": 832 }, { "epoch": 0.08508682328907048, "grad_norm": 1.631149606829919, "learning_rate": 1.9841432285871198e-05, "loss": 0.7397, "step": 833 }, { "epoch": 0.08518896833503575, "grad_norm": 1.4496522135636345, "learning_rate": 1.984084492980325e-05, "loss": 0.7489, "step": 834 }, { "epoch": 0.08529111338100102, "grad_norm": 1.5294238044507162, "learning_rate": 1.9840256496648468e-05, "loss": 0.7066, "step": 835 }, { "epoch": 0.0853932584269663, "grad_norm": 1.7448167131128816, "learning_rate": 1.9839666986471256e-05, "loss": 0.8274, "step": 836 }, { "epoch": 0.08549540347293157, "grad_norm": 1.590341784894217, "learning_rate": 1.9839076399336138e-05, "loss": 0.8077, "step": 837 }, { "epoch": 0.08559754851889684, "grad_norm": 1.5733700494905307, "learning_rate": 1.983848473530775e-05, "loss": 0.7376, "step": 838 }, { "epoch": 0.08569969356486211, "grad_norm": 1.5794162519850896, "learning_rate": 1.983789199445085e-05, "loss": 0.798, "step": 839 }, { "epoch": 0.08580183861082738, "grad_norm": 1.6813426649202494, "learning_rate": 1.9837298176830317e-05, "loss": 0.7654, "step": 840 }, { "epoch": 0.08590398365679265, "grad_norm": 1.7162930259840956, "learning_rate": 1.9836703282511137e-05, "loss": 0.8303, "step": 841 }, { "epoch": 0.08600612870275792, "grad_norm": 1.4623338096532696, "learning_rate": 1.9836107311558434e-05, "loss": 0.7273, "step": 842 }, { "epoch": 0.0861082737487232, "grad_norm": 1.5255223717546944, "learning_rate": 1.9835510264037426e-05, "loss": 0.7602, "step": 843 }, { "epoch": 0.08621041879468845, "grad_norm": 1.6161301368382748, "learning_rate": 1.983491214001347e-05, "loss": 0.6849, "step": 844 }, { "epoch": 0.08631256384065372, "grad_norm": 1.7714352477479653, "learning_rate": 1.9834312939552022e-05, "loss": 0.7908, "step": 845 }, { "epoch": 0.086414708886619, "grad_norm": 1.5888815693134404, "learning_rate": 1.983371266271867e-05, "loss": 0.749, "step": 846 }, { "epoch": 0.08651685393258426, "grad_norm": 1.356413066008791, "learning_rate": 1.9833111309579112e-05, "loss": 0.6391, "step": 847 }, { "epoch": 0.08661899897854954, "grad_norm": 1.6019716048238979, "learning_rate": 1.983250888019917e-05, "loss": 0.7782, "step": 848 }, { "epoch": 0.0867211440245148, "grad_norm": 1.5384344743887846, "learning_rate": 1.983190537464478e-05, "loss": 0.7433, "step": 849 }, { "epoch": 0.08682328907048008, "grad_norm": 1.4962183146123815, "learning_rate": 1.9831300792981994e-05, "loss": 0.7768, "step": 850 }, { "epoch": 0.08692543411644535, "grad_norm": 1.368787082962646, "learning_rate": 1.9830695135276982e-05, "loss": 0.7428, "step": 851 }, { "epoch": 0.08702757916241062, "grad_norm": 1.634690284174617, "learning_rate": 1.9830088401596036e-05, "loss": 0.6678, "step": 852 }, { "epoch": 0.08712972420837589, "grad_norm": 1.6140357483361458, "learning_rate": 1.9829480592005566e-05, "loss": 0.6981, "step": 853 }, { "epoch": 0.08723186925434116, "grad_norm": 1.4094843826197572, "learning_rate": 1.9828871706572096e-05, "loss": 0.6323, "step": 854 }, { "epoch": 0.08733401430030643, "grad_norm": 1.686124900906538, "learning_rate": 1.9828261745362262e-05, "loss": 0.7347, "step": 855 }, { "epoch": 0.0874361593462717, "grad_norm": 1.4063934258439823, "learning_rate": 1.9827650708442836e-05, "loss": 0.7882, "step": 856 }, { "epoch": 0.08753830439223698, "grad_norm": 1.5190213871997271, "learning_rate": 1.9827038595880688e-05, "loss": 0.8105, "step": 857 }, { "epoch": 0.08764044943820225, "grad_norm": 1.4380671354280348, "learning_rate": 1.982642540774281e-05, "loss": 0.6453, "step": 858 }, { "epoch": 0.08774259448416752, "grad_norm": 1.5025846569855732, "learning_rate": 1.9825811144096333e-05, "loss": 0.8155, "step": 859 }, { "epoch": 0.08784473953013279, "grad_norm": 1.4676118109568428, "learning_rate": 1.9825195805008476e-05, "loss": 0.7929, "step": 860 }, { "epoch": 0.08794688457609806, "grad_norm": 1.3751748279834934, "learning_rate": 1.9824579390546586e-05, "loss": 0.6421, "step": 861 }, { "epoch": 0.08804902962206333, "grad_norm": 1.4990570842691318, "learning_rate": 1.982396190077814e-05, "loss": 0.7473, "step": 862 }, { "epoch": 0.0881511746680286, "grad_norm": 1.3410887995637522, "learning_rate": 1.982334333577071e-05, "loss": 0.7659, "step": 863 }, { "epoch": 0.08825331971399387, "grad_norm": 1.4572545901732041, "learning_rate": 1.9822723695592007e-05, "loss": 0.6999, "step": 864 }, { "epoch": 0.08835546475995915, "grad_norm": 1.552223699430407, "learning_rate": 1.982210298030985e-05, "loss": 0.7945, "step": 865 }, { "epoch": 0.08845760980592442, "grad_norm": 1.6220141237598558, "learning_rate": 1.9821481189992175e-05, "loss": 0.7589, "step": 866 }, { "epoch": 0.08855975485188969, "grad_norm": 1.6223640833234694, "learning_rate": 1.982085832470704e-05, "loss": 0.7834, "step": 867 }, { "epoch": 0.08866189989785496, "grad_norm": 1.5556122987309737, "learning_rate": 1.9820234384522617e-05, "loss": 0.7302, "step": 868 }, { "epoch": 0.08876404494382023, "grad_norm": 1.5130214699337237, "learning_rate": 1.9819609369507194e-05, "loss": 0.6756, "step": 869 }, { "epoch": 0.0888661899897855, "grad_norm": 1.6280051708419594, "learning_rate": 1.981898327972918e-05, "loss": 0.7624, "step": 870 }, { "epoch": 0.08896833503575077, "grad_norm": 1.6227536567478529, "learning_rate": 1.9818356115257104e-05, "loss": 0.8002, "step": 871 }, { "epoch": 0.08907048008171603, "grad_norm": 1.5261325070193603, "learning_rate": 1.981772787615961e-05, "loss": 0.8195, "step": 872 }, { "epoch": 0.0891726251276813, "grad_norm": 1.7044919112596812, "learning_rate": 1.9817098562505454e-05, "loss": 0.7506, "step": 873 }, { "epoch": 0.08927477017364657, "grad_norm": 1.6259638475250309, "learning_rate": 1.981646817436352e-05, "loss": 0.7411, "step": 874 }, { "epoch": 0.08937691521961184, "grad_norm": 1.405259031614385, "learning_rate": 1.98158367118028e-05, "loss": 0.7416, "step": 875 }, { "epoch": 0.08947906026557712, "grad_norm": 1.6656868407488183, "learning_rate": 1.981520417489241e-05, "loss": 0.7209, "step": 876 }, { "epoch": 0.08958120531154239, "grad_norm": 1.5492602806531655, "learning_rate": 1.981457056370158e-05, "loss": 0.7849, "step": 877 }, { "epoch": 0.08968335035750766, "grad_norm": 1.4769806549032172, "learning_rate": 1.9813935878299663e-05, "loss": 0.7935, "step": 878 }, { "epoch": 0.08978549540347293, "grad_norm": 1.4192083509317202, "learning_rate": 1.9813300118756125e-05, "loss": 0.6606, "step": 879 }, { "epoch": 0.0898876404494382, "grad_norm": 1.4206634803657674, "learning_rate": 1.9812663285140546e-05, "loss": 0.875, "step": 880 }, { "epoch": 0.08998978549540347, "grad_norm": 1.6049033800597063, "learning_rate": 1.9812025377522633e-05, "loss": 0.8148, "step": 881 }, { "epoch": 0.09009193054136874, "grad_norm": 1.7258080579116855, "learning_rate": 1.9811386395972202e-05, "loss": 0.8029, "step": 882 }, { "epoch": 0.09019407558733401, "grad_norm": 1.421843603111564, "learning_rate": 1.981074634055919e-05, "loss": 0.6283, "step": 883 }, { "epoch": 0.09029622063329928, "grad_norm": 1.4575145172629398, "learning_rate": 1.9810105211353656e-05, "loss": 0.7996, "step": 884 }, { "epoch": 0.09039836567926456, "grad_norm": 1.4893253618472304, "learning_rate": 1.9809463008425765e-05, "loss": 0.7306, "step": 885 }, { "epoch": 0.09050051072522983, "grad_norm": 1.3534431765352035, "learning_rate": 1.980881973184581e-05, "loss": 0.6758, "step": 886 }, { "epoch": 0.0906026557711951, "grad_norm": 1.4524154318699805, "learning_rate": 1.98081753816842e-05, "loss": 0.6753, "step": 887 }, { "epoch": 0.09070480081716037, "grad_norm": 1.5538733477037465, "learning_rate": 1.9807529958011457e-05, "loss": 0.8651, "step": 888 }, { "epoch": 0.09080694586312564, "grad_norm": 1.5585416799150469, "learning_rate": 1.980688346089822e-05, "loss": 0.7714, "step": 889 }, { "epoch": 0.09090909090909091, "grad_norm": 1.5828426151284556, "learning_rate": 1.9806235890415257e-05, "loss": 0.7933, "step": 890 }, { "epoch": 0.09101123595505618, "grad_norm": 1.6448461278910054, "learning_rate": 1.9805587246633436e-05, "loss": 0.7855, "step": 891 }, { "epoch": 0.09111338100102145, "grad_norm": 1.530293547554862, "learning_rate": 1.980493752962376e-05, "loss": 0.7784, "step": 892 }, { "epoch": 0.09121552604698673, "grad_norm": 1.495940270298718, "learning_rate": 1.9804286739457335e-05, "loss": 0.7604, "step": 893 }, { "epoch": 0.091317671092952, "grad_norm": 1.4913273932204647, "learning_rate": 1.980363487620539e-05, "loss": 0.6655, "step": 894 }, { "epoch": 0.09141981613891727, "grad_norm": 1.5773076757232762, "learning_rate": 1.9802981939939276e-05, "loss": 0.683, "step": 895 }, { "epoch": 0.09152196118488254, "grad_norm": 1.5032513749087435, "learning_rate": 1.9802327930730457e-05, "loss": 0.715, "step": 896 }, { "epoch": 0.09162410623084781, "grad_norm": 1.6378338568172568, "learning_rate": 1.980167284865051e-05, "loss": 0.7641, "step": 897 }, { "epoch": 0.09172625127681308, "grad_norm": 1.6424016881391832, "learning_rate": 1.9801016693771137e-05, "loss": 0.7579, "step": 898 }, { "epoch": 0.09182839632277834, "grad_norm": 1.5688594892105001, "learning_rate": 1.9800359466164154e-05, "loss": 0.8109, "step": 899 }, { "epoch": 0.09193054136874361, "grad_norm": 1.553520275259485, "learning_rate": 1.9799701165901498e-05, "loss": 0.7681, "step": 900 }, { "epoch": 0.09203268641470888, "grad_norm": 1.597249136006614, "learning_rate": 1.9799041793055217e-05, "loss": 0.7311, "step": 901 }, { "epoch": 0.09213483146067415, "grad_norm": 1.712686887918941, "learning_rate": 1.979838134769748e-05, "loss": 0.6871, "step": 902 }, { "epoch": 0.09223697650663942, "grad_norm": 1.4619654615917665, "learning_rate": 1.9797719829900575e-05, "loss": 0.7894, "step": 903 }, { "epoch": 0.0923391215526047, "grad_norm": 1.539392201239084, "learning_rate": 1.97970572397369e-05, "loss": 0.647, "step": 904 }, { "epoch": 0.09244126659856997, "grad_norm": 1.634055876247212, "learning_rate": 1.9796393577278988e-05, "loss": 0.83, "step": 905 }, { "epoch": 0.09254341164453524, "grad_norm": 1.414305344368932, "learning_rate": 1.9795728842599466e-05, "loss": 0.6678, "step": 906 }, { "epoch": 0.09264555669050051, "grad_norm": 1.7054663304351356, "learning_rate": 1.979506303577109e-05, "loss": 0.8213, "step": 907 }, { "epoch": 0.09274770173646578, "grad_norm": 1.6482320243261614, "learning_rate": 1.9794396156866738e-05, "loss": 0.7433, "step": 908 }, { "epoch": 0.09284984678243105, "grad_norm": 1.5641111015995306, "learning_rate": 1.9793728205959405e-05, "loss": 0.587, "step": 909 }, { "epoch": 0.09295199182839632, "grad_norm": 1.4727159809697772, "learning_rate": 1.9793059183122184e-05, "loss": 0.8073, "step": 910 }, { "epoch": 0.09305413687436159, "grad_norm": 1.4852320000582167, "learning_rate": 1.9792389088428316e-05, "loss": 0.8124, "step": 911 }, { "epoch": 0.09315628192032686, "grad_norm": 1.3676632308089758, "learning_rate": 1.9791717921951126e-05, "loss": 0.7521, "step": 912 }, { "epoch": 0.09325842696629214, "grad_norm": 1.333238580325899, "learning_rate": 1.979104568376409e-05, "loss": 0.6769, "step": 913 }, { "epoch": 0.0933605720122574, "grad_norm": 1.4487597637598837, "learning_rate": 1.979037237394078e-05, "loss": 0.6905, "step": 914 }, { "epoch": 0.09346271705822268, "grad_norm": 1.4693204822563655, "learning_rate": 1.9789697992554884e-05, "loss": 0.8051, "step": 915 }, { "epoch": 0.09356486210418795, "grad_norm": 1.5911308747422055, "learning_rate": 1.9789022539680215e-05, "loss": 0.7371, "step": 916 }, { "epoch": 0.09366700715015322, "grad_norm": 1.481090729042003, "learning_rate": 1.978834601539071e-05, "loss": 0.7916, "step": 917 }, { "epoch": 0.09376915219611849, "grad_norm": 1.3255619406382855, "learning_rate": 1.9787668419760407e-05, "loss": 0.7514, "step": 918 }, { "epoch": 0.09387129724208376, "grad_norm": 1.7346530489362686, "learning_rate": 1.9786989752863475e-05, "loss": 0.8005, "step": 919 }, { "epoch": 0.09397344228804903, "grad_norm": 1.6633517908047575, "learning_rate": 1.9786310014774187e-05, "loss": 0.7166, "step": 920 }, { "epoch": 0.0940755873340143, "grad_norm": 1.581306581079274, "learning_rate": 1.9785629205566946e-05, "loss": 0.7771, "step": 921 }, { "epoch": 0.09417773237997958, "grad_norm": 1.3220695340545767, "learning_rate": 1.9784947325316267e-05, "loss": 0.8083, "step": 922 }, { "epoch": 0.09427987742594485, "grad_norm": 1.758164691278845, "learning_rate": 1.978426437409678e-05, "loss": 0.8166, "step": 923 }, { "epoch": 0.09438202247191012, "grad_norm": 1.563338732698904, "learning_rate": 1.978358035198324e-05, "loss": 0.8369, "step": 924 }, { "epoch": 0.09448416751787539, "grad_norm": 1.4793236281382585, "learning_rate": 1.9782895259050505e-05, "loss": 0.7605, "step": 925 }, { "epoch": 0.09458631256384066, "grad_norm": 1.5496608138805956, "learning_rate": 1.9782209095373565e-05, "loss": 0.8162, "step": 926 }, { "epoch": 0.09468845760980592, "grad_norm": 1.448925366912586, "learning_rate": 1.978152186102752e-05, "loss": 0.7521, "step": 927 }, { "epoch": 0.09479060265577119, "grad_norm": 1.4797535004677953, "learning_rate": 1.9780833556087588e-05, "loss": 0.6501, "step": 928 }, { "epoch": 0.09489274770173646, "grad_norm": 1.4758269869906364, "learning_rate": 1.9780144180629103e-05, "loss": 0.7062, "step": 929 }, { "epoch": 0.09499489274770173, "grad_norm": 1.3421072019631117, "learning_rate": 1.977945373472752e-05, "loss": 0.6394, "step": 930 }, { "epoch": 0.095097037793667, "grad_norm": 1.4751750547065008, "learning_rate": 1.9778762218458403e-05, "loss": 0.7302, "step": 931 }, { "epoch": 0.09519918283963227, "grad_norm": 1.457181462647776, "learning_rate": 1.9778069631897446e-05, "loss": 0.7136, "step": 932 }, { "epoch": 0.09530132788559754, "grad_norm": 1.6367787019134947, "learning_rate": 1.9777375975120453e-05, "loss": 0.788, "step": 933 }, { "epoch": 0.09540347293156282, "grad_norm": 1.5440112199616998, "learning_rate": 1.9776681248203342e-05, "loss": 0.7905, "step": 934 }, { "epoch": 0.09550561797752809, "grad_norm": 1.482758153559448, "learning_rate": 1.977598545122215e-05, "loss": 0.8693, "step": 935 }, { "epoch": 0.09560776302349336, "grad_norm": 1.5019975142719908, "learning_rate": 1.9775288584253034e-05, "loss": 0.7404, "step": 936 }, { "epoch": 0.09570990806945863, "grad_norm": 1.4422841648228601, "learning_rate": 1.9774590647372267e-05, "loss": 0.7715, "step": 937 }, { "epoch": 0.0958120531154239, "grad_norm": 1.6374522012972488, "learning_rate": 1.977389164065624e-05, "loss": 0.8055, "step": 938 }, { "epoch": 0.09591419816138917, "grad_norm": 1.6181114850846268, "learning_rate": 1.9773191564181454e-05, "loss": 0.884, "step": 939 }, { "epoch": 0.09601634320735444, "grad_norm": 1.6861344823706046, "learning_rate": 1.9772490418024543e-05, "loss": 0.7129, "step": 940 }, { "epoch": 0.09611848825331971, "grad_norm": 1.6726536961960072, "learning_rate": 1.977178820226224e-05, "loss": 0.7871, "step": 941 }, { "epoch": 0.09622063329928499, "grad_norm": 1.4630568723771824, "learning_rate": 1.97710849169714e-05, "loss": 0.7664, "step": 942 }, { "epoch": 0.09632277834525026, "grad_norm": 1.4361129616472423, "learning_rate": 1.9770380562229006e-05, "loss": 0.635, "step": 943 }, { "epoch": 0.09642492339121553, "grad_norm": 1.5865003913190385, "learning_rate": 1.976967513811215e-05, "loss": 0.8809, "step": 944 }, { "epoch": 0.0965270684371808, "grad_norm": 1.4803357787221834, "learning_rate": 1.9768968644698036e-05, "loss": 0.6568, "step": 945 }, { "epoch": 0.09662921348314607, "grad_norm": 1.477908993220323, "learning_rate": 1.976826108206399e-05, "loss": 0.7369, "step": 946 }, { "epoch": 0.09673135852911134, "grad_norm": 1.4587094253692428, "learning_rate": 1.976755245028746e-05, "loss": 0.6607, "step": 947 }, { "epoch": 0.09683350357507661, "grad_norm": 1.4658564717015132, "learning_rate": 1.9766842749446004e-05, "loss": 0.6987, "step": 948 }, { "epoch": 0.09693564862104188, "grad_norm": 1.558194164125307, "learning_rate": 1.97661319796173e-05, "loss": 0.7635, "step": 949 }, { "epoch": 0.09703779366700716, "grad_norm": 1.4704086617906666, "learning_rate": 1.9765420140879137e-05, "loss": 0.7255, "step": 950 }, { "epoch": 0.09713993871297243, "grad_norm": 1.4730396591420127, "learning_rate": 1.976470723330943e-05, "loss": 0.6741, "step": 951 }, { "epoch": 0.0972420837589377, "grad_norm": 1.3039910815210418, "learning_rate": 1.9763993256986215e-05, "loss": 0.6434, "step": 952 }, { "epoch": 0.09734422880490297, "grad_norm": 1.4775103489065533, "learning_rate": 1.976327821198762e-05, "loss": 0.6766, "step": 953 }, { "epoch": 0.09744637385086824, "grad_norm": 1.5776372083024406, "learning_rate": 1.9762562098391922e-05, "loss": 0.7452, "step": 954 }, { "epoch": 0.0975485188968335, "grad_norm": 1.5583070802795673, "learning_rate": 1.9761844916277496e-05, "loss": 0.8046, "step": 955 }, { "epoch": 0.09765066394279877, "grad_norm": 1.7470901466903863, "learning_rate": 1.9761126665722835e-05, "loss": 0.7099, "step": 956 }, { "epoch": 0.09775280898876404, "grad_norm": 1.482867770568344, "learning_rate": 1.9760407346806555e-05, "loss": 0.7122, "step": 957 }, { "epoch": 0.09785495403472931, "grad_norm": 1.4378085173546427, "learning_rate": 1.9759686959607385e-05, "loss": 0.7045, "step": 958 }, { "epoch": 0.09795709908069458, "grad_norm": 1.5122287550337, "learning_rate": 1.975896550420417e-05, "loss": 0.7209, "step": 959 }, { "epoch": 0.09805924412665985, "grad_norm": 1.7803133458264089, "learning_rate": 1.9758242980675876e-05, "loss": 0.7201, "step": 960 }, { "epoch": 0.09816138917262512, "grad_norm": 1.4864698119183952, "learning_rate": 1.9757519389101583e-05, "loss": 0.7521, "step": 961 }, { "epoch": 0.0982635342185904, "grad_norm": 1.5932527645758576, "learning_rate": 1.975679472956049e-05, "loss": 0.8326, "step": 962 }, { "epoch": 0.09836567926455567, "grad_norm": 1.6543202167305613, "learning_rate": 1.9756069002131908e-05, "loss": 0.825, "step": 963 }, { "epoch": 0.09846782431052094, "grad_norm": 1.4463693997998088, "learning_rate": 1.9755342206895274e-05, "loss": 0.7518, "step": 964 }, { "epoch": 0.09856996935648621, "grad_norm": 1.6063518742478968, "learning_rate": 1.975461434393013e-05, "loss": 0.7253, "step": 965 }, { "epoch": 0.09867211440245148, "grad_norm": 1.4877549812942281, "learning_rate": 1.9753885413316143e-05, "loss": 0.7249, "step": 966 }, { "epoch": 0.09877425944841675, "grad_norm": 1.3326528855874147, "learning_rate": 1.9753155415133097e-05, "loss": 0.7369, "step": 967 }, { "epoch": 0.09887640449438202, "grad_norm": 1.4714953511265507, "learning_rate": 1.9752424349460888e-05, "loss": 0.7083, "step": 968 }, { "epoch": 0.0989785495403473, "grad_norm": 1.4045230827554696, "learning_rate": 1.9751692216379538e-05, "loss": 0.7008, "step": 969 }, { "epoch": 0.09908069458631256, "grad_norm": 1.41148734085975, "learning_rate": 1.975095901596917e-05, "loss": 0.7569, "step": 970 }, { "epoch": 0.09918283963227784, "grad_norm": 1.6547030130396438, "learning_rate": 1.9750224748310036e-05, "loss": 0.8257, "step": 971 }, { "epoch": 0.09928498467824311, "grad_norm": 1.5953106596724391, "learning_rate": 1.974948941348251e-05, "loss": 0.7068, "step": 972 }, { "epoch": 0.09938712972420838, "grad_norm": 1.6975504063540594, "learning_rate": 1.9748753011567063e-05, "loss": 0.7403, "step": 973 }, { "epoch": 0.09948927477017365, "grad_norm": 1.6567869110170834, "learning_rate": 1.9748015542644304e-05, "loss": 0.7955, "step": 974 }, { "epoch": 0.09959141981613892, "grad_norm": 1.4566763449068436, "learning_rate": 1.9747277006794943e-05, "loss": 0.675, "step": 975 }, { "epoch": 0.09969356486210419, "grad_norm": 1.3840306808623692, "learning_rate": 1.9746537404099818e-05, "loss": 0.7338, "step": 976 }, { "epoch": 0.09979570990806946, "grad_norm": 1.4520267334727888, "learning_rate": 1.9745796734639877e-05, "loss": 0.7519, "step": 977 }, { "epoch": 0.09989785495403473, "grad_norm": 1.5906652066312208, "learning_rate": 1.9745054998496186e-05, "loss": 0.7921, "step": 978 }, { "epoch": 0.1, "grad_norm": 1.8458016554749022, "learning_rate": 1.974431219574993e-05, "loss": 0.7262, "step": 979 }, { "epoch": 0.10010214504596528, "grad_norm": 1.6046986126533127, "learning_rate": 1.974356832648241e-05, "loss": 0.7491, "step": 980 }, { "epoch": 0.10020429009193055, "grad_norm": 1.4656463383202505, "learning_rate": 1.9742823390775038e-05, "loss": 0.7235, "step": 981 }, { "epoch": 0.1003064351378958, "grad_norm": 1.6927113481283556, "learning_rate": 1.9742077388709354e-05, "loss": 0.8676, "step": 982 }, { "epoch": 0.10040858018386108, "grad_norm": 1.5775193434317438, "learning_rate": 1.9741330320367003e-05, "loss": 0.8681, "step": 983 }, { "epoch": 0.10051072522982635, "grad_norm": 1.4187466816216623, "learning_rate": 1.9740582185829758e-05, "loss": 0.7256, "step": 984 }, { "epoch": 0.10061287027579162, "grad_norm": 1.7057356358705964, "learning_rate": 1.9739832985179496e-05, "loss": 0.8539, "step": 985 }, { "epoch": 0.10071501532175689, "grad_norm": 1.436147638666035, "learning_rate": 1.9739082718498225e-05, "loss": 0.7029, "step": 986 }, { "epoch": 0.10081716036772216, "grad_norm": 1.466806296811124, "learning_rate": 1.9738331385868057e-05, "loss": 0.7136, "step": 987 }, { "epoch": 0.10091930541368743, "grad_norm": 1.5674119607445685, "learning_rate": 1.9737578987371228e-05, "loss": 0.7591, "step": 988 }, { "epoch": 0.1010214504596527, "grad_norm": 1.586697300144906, "learning_rate": 1.973682552309008e-05, "loss": 0.8178, "step": 989 }, { "epoch": 0.10112359550561797, "grad_norm": 1.5910089426981955, "learning_rate": 1.9736070993107096e-05, "loss": 0.7809, "step": 990 }, { "epoch": 0.10122574055158325, "grad_norm": 1.405608333447748, "learning_rate": 1.973531539750485e-05, "loss": 0.7101, "step": 991 }, { "epoch": 0.10132788559754852, "grad_norm": 1.3404674305678805, "learning_rate": 1.973455873636604e-05, "loss": 0.7214, "step": 992 }, { "epoch": 0.10143003064351379, "grad_norm": 1.3524429057620553, "learning_rate": 1.9733801009773493e-05, "loss": 0.6585, "step": 993 }, { "epoch": 0.10153217568947906, "grad_norm": 1.4920286250840071, "learning_rate": 1.9733042217810132e-05, "loss": 0.7548, "step": 994 }, { "epoch": 0.10163432073544433, "grad_norm": 1.4909766479476947, "learning_rate": 1.9732282360559013e-05, "loss": 0.6958, "step": 995 }, { "epoch": 0.1017364657814096, "grad_norm": 1.4771995706066727, "learning_rate": 1.9731521438103302e-05, "loss": 0.6878, "step": 996 }, { "epoch": 0.10183861082737487, "grad_norm": 1.4062500744157633, "learning_rate": 1.9730759450526283e-05, "loss": 0.6565, "step": 997 }, { "epoch": 0.10194075587334014, "grad_norm": 1.5604525269143246, "learning_rate": 1.9729996397911354e-05, "loss": 0.8305, "step": 998 }, { "epoch": 0.10204290091930542, "grad_norm": 1.5364580628298345, "learning_rate": 1.9729232280342035e-05, "loss": 0.7152, "step": 999 }, { "epoch": 0.10214504596527069, "grad_norm": 1.4468280092950851, "learning_rate": 1.9728467097901954e-05, "loss": 0.7943, "step": 1000 }, { "epoch": 0.10224719101123596, "grad_norm": 1.3990501658212111, "learning_rate": 1.9727700850674868e-05, "loss": 0.6525, "step": 1001 }, { "epoch": 0.10234933605720123, "grad_norm": 1.7582794258530305, "learning_rate": 1.9726933538744636e-05, "loss": 0.863, "step": 1002 }, { "epoch": 0.1024514811031665, "grad_norm": 1.4979593135081044, "learning_rate": 1.9726165162195244e-05, "loss": 0.7641, "step": 1003 }, { "epoch": 0.10255362614913177, "grad_norm": 1.633991705835641, "learning_rate": 1.972539572111079e-05, "loss": 0.6708, "step": 1004 }, { "epoch": 0.10265577119509704, "grad_norm": 1.6315050251407412, "learning_rate": 1.972462521557549e-05, "loss": 0.7879, "step": 1005 }, { "epoch": 0.10275791624106231, "grad_norm": 1.5218473411352251, "learning_rate": 1.972385364567368e-05, "loss": 0.693, "step": 1006 }, { "epoch": 0.10286006128702758, "grad_norm": 1.537691951507889, "learning_rate": 1.9723081011489808e-05, "loss": 0.7028, "step": 1007 }, { "epoch": 0.10296220633299286, "grad_norm": 1.5743260630199554, "learning_rate": 1.9722307313108434e-05, "loss": 0.7258, "step": 1008 }, { "epoch": 0.10306435137895813, "grad_norm": 1.4768790424166727, "learning_rate": 1.9721532550614243e-05, "loss": 0.789, "step": 1009 }, { "epoch": 0.10316649642492338, "grad_norm": 1.699258153445073, "learning_rate": 1.9720756724092033e-05, "loss": 0.6559, "step": 1010 }, { "epoch": 0.10326864147088866, "grad_norm": 1.3808968819690532, "learning_rate": 1.9719979833626723e-05, "loss": 0.7556, "step": 1011 }, { "epoch": 0.10337078651685393, "grad_norm": 1.4855624790134385, "learning_rate": 1.971920187930334e-05, "loss": 0.7814, "step": 1012 }, { "epoch": 0.1034729315628192, "grad_norm": 1.5518038113996941, "learning_rate": 1.9718422861207028e-05, "loss": 0.725, "step": 1013 }, { "epoch": 0.10357507660878447, "grad_norm": 1.6336153863697138, "learning_rate": 1.9717642779423056e-05, "loss": 0.9471, "step": 1014 }, { "epoch": 0.10367722165474974, "grad_norm": 1.64745504326712, "learning_rate": 1.9716861634036807e-05, "loss": 0.7723, "step": 1015 }, { "epoch": 0.10377936670071501, "grad_norm": 1.3576936215628461, "learning_rate": 1.971607942513377e-05, "loss": 0.6835, "step": 1016 }, { "epoch": 0.10388151174668028, "grad_norm": 1.1858525253337149, "learning_rate": 1.9715296152799563e-05, "loss": 0.6177, "step": 1017 }, { "epoch": 0.10398365679264555, "grad_norm": 1.6705054575036173, "learning_rate": 1.9714511817119913e-05, "loss": 0.7585, "step": 1018 }, { "epoch": 0.10408580183861083, "grad_norm": 1.7704489777671324, "learning_rate": 1.9713726418180673e-05, "loss": 0.6826, "step": 1019 }, { "epoch": 0.1041879468845761, "grad_norm": 1.3940675481157598, "learning_rate": 1.97129399560678e-05, "loss": 0.8696, "step": 1020 }, { "epoch": 0.10429009193054137, "grad_norm": 1.5279804213259633, "learning_rate": 1.971215243086737e-05, "loss": 0.7345, "step": 1021 }, { "epoch": 0.10439223697650664, "grad_norm": 1.6089632661400797, "learning_rate": 1.971136384266559e-05, "loss": 0.799, "step": 1022 }, { "epoch": 0.10449438202247191, "grad_norm": 1.4371380693389366, "learning_rate": 1.9710574191548755e-05, "loss": 0.7716, "step": 1023 }, { "epoch": 0.10459652706843718, "grad_norm": 1.490234195045288, "learning_rate": 1.97097834776033e-05, "loss": 0.8002, "step": 1024 }, { "epoch": 0.10469867211440245, "grad_norm": 1.5363878532634723, "learning_rate": 1.9708991700915775e-05, "loss": 0.6424, "step": 1025 }, { "epoch": 0.10480081716036772, "grad_norm": 1.72045974933191, "learning_rate": 1.9708198861572834e-05, "loss": 0.6349, "step": 1026 }, { "epoch": 0.104902962206333, "grad_norm": 1.5713777215251707, "learning_rate": 1.9707404959661253e-05, "loss": 0.6532, "step": 1027 }, { "epoch": 0.10500510725229827, "grad_norm": 1.862917125630126, "learning_rate": 1.970660999526793e-05, "loss": 0.8655, "step": 1028 }, { "epoch": 0.10510725229826354, "grad_norm": 1.6590294079206702, "learning_rate": 1.9705813968479873e-05, "loss": 0.711, "step": 1029 }, { "epoch": 0.10520939734422881, "grad_norm": 1.538498523148317, "learning_rate": 1.97050168793842e-05, "loss": 0.7911, "step": 1030 }, { "epoch": 0.10531154239019408, "grad_norm": 1.6016890528590662, "learning_rate": 1.9704218728068165e-05, "loss": 0.8423, "step": 1031 }, { "epoch": 0.10541368743615935, "grad_norm": 1.7442154812077557, "learning_rate": 1.970341951461912e-05, "loss": 0.873, "step": 1032 }, { "epoch": 0.10551583248212462, "grad_norm": 1.5684267190963708, "learning_rate": 1.970261923912454e-05, "loss": 0.7215, "step": 1033 }, { "epoch": 0.10561797752808989, "grad_norm": 1.3995705990407452, "learning_rate": 1.9701817901672012e-05, "loss": 0.6237, "step": 1034 }, { "epoch": 0.10572012257405516, "grad_norm": 1.447184745783299, "learning_rate": 1.970101550234925e-05, "loss": 0.7456, "step": 1035 }, { "epoch": 0.10582226762002044, "grad_norm": 1.584305978918886, "learning_rate": 1.9700212041244075e-05, "loss": 0.8151, "step": 1036 }, { "epoch": 0.1059244126659857, "grad_norm": 1.5659777252377847, "learning_rate": 1.969940751844442e-05, "loss": 0.7402, "step": 1037 }, { "epoch": 0.10602655771195096, "grad_norm": 1.5132296037226554, "learning_rate": 1.969860193403835e-05, "loss": 0.7419, "step": 1038 }, { "epoch": 0.10612870275791624, "grad_norm": 1.6292286607140969, "learning_rate": 1.969779528811403e-05, "loss": 0.8968, "step": 1039 }, { "epoch": 0.1062308478038815, "grad_norm": 1.5212551035984276, "learning_rate": 1.969698758075975e-05, "loss": 0.6529, "step": 1040 }, { "epoch": 0.10633299284984678, "grad_norm": 1.5870000281839585, "learning_rate": 1.9696178812063916e-05, "loss": 0.7447, "step": 1041 }, { "epoch": 0.10643513789581205, "grad_norm": 1.4670247471720168, "learning_rate": 1.9695368982115045e-05, "loss": 0.7211, "step": 1042 }, { "epoch": 0.10653728294177732, "grad_norm": 1.5900334373794982, "learning_rate": 1.9694558091001775e-05, "loss": 0.7828, "step": 1043 }, { "epoch": 0.10663942798774259, "grad_norm": 1.5754049427546888, "learning_rate": 1.9693746138812857e-05, "loss": 0.7867, "step": 1044 }, { "epoch": 0.10674157303370786, "grad_norm": 1.662951472327358, "learning_rate": 1.9692933125637164e-05, "loss": 0.7096, "step": 1045 }, { "epoch": 0.10684371807967313, "grad_norm": 1.4677802436261256, "learning_rate": 1.9692119051563676e-05, "loss": 0.7837, "step": 1046 }, { "epoch": 0.1069458631256384, "grad_norm": 1.5147859973405229, "learning_rate": 1.9691303916681496e-05, "loss": 0.7031, "step": 1047 }, { "epoch": 0.10704800817160368, "grad_norm": 1.6365709294718904, "learning_rate": 1.9690487721079844e-05, "loss": 0.7939, "step": 1048 }, { "epoch": 0.10715015321756895, "grad_norm": 1.4352163050125037, "learning_rate": 1.9689670464848046e-05, "loss": 0.7386, "step": 1049 }, { "epoch": 0.10725229826353422, "grad_norm": 1.8710853126370701, "learning_rate": 1.9688852148075558e-05, "loss": 0.7721, "step": 1050 }, { "epoch": 0.10735444330949949, "grad_norm": 1.5375805206814441, "learning_rate": 1.968803277085194e-05, "loss": 0.7564, "step": 1051 }, { "epoch": 0.10745658835546476, "grad_norm": 1.493112625445584, "learning_rate": 1.9687212333266878e-05, "loss": 0.6944, "step": 1052 }, { "epoch": 0.10755873340143003, "grad_norm": 1.4681266876362211, "learning_rate": 1.9686390835410166e-05, "loss": 0.7386, "step": 1053 }, { "epoch": 0.1076608784473953, "grad_norm": 1.504073201526041, "learning_rate": 1.9685568277371722e-05, "loss": 0.821, "step": 1054 }, { "epoch": 0.10776302349336057, "grad_norm": 1.608016314217692, "learning_rate": 1.9684744659241567e-05, "loss": 0.8452, "step": 1055 }, { "epoch": 0.10786516853932585, "grad_norm": 1.554589638810335, "learning_rate": 1.9683919981109855e-05, "loss": 0.7232, "step": 1056 }, { "epoch": 0.10796731358529112, "grad_norm": 1.5565380453279256, "learning_rate": 1.9683094243066846e-05, "loss": 0.9074, "step": 1057 }, { "epoch": 0.10806945863125639, "grad_norm": 1.4666115680684282, "learning_rate": 1.9682267445202915e-05, "loss": 0.7396, "step": 1058 }, { "epoch": 0.10817160367722166, "grad_norm": 1.50996832618128, "learning_rate": 1.9681439587608556e-05, "loss": 0.8248, "step": 1059 }, { "epoch": 0.10827374872318693, "grad_norm": 1.4959611080051747, "learning_rate": 1.9680610670374377e-05, "loss": 0.7332, "step": 1060 }, { "epoch": 0.1083758937691522, "grad_norm": 1.5105977612121393, "learning_rate": 1.967978069359111e-05, "loss": 0.7496, "step": 1061 }, { "epoch": 0.10847803881511747, "grad_norm": 1.4872475345984455, "learning_rate": 1.9678949657349588e-05, "loss": 0.7721, "step": 1062 }, { "epoch": 0.10858018386108274, "grad_norm": 1.6243417248933554, "learning_rate": 1.9678117561740775e-05, "loss": 0.6835, "step": 1063 }, { "epoch": 0.10868232890704801, "grad_norm": 1.536704023708454, "learning_rate": 1.967728440685574e-05, "loss": 0.7915, "step": 1064 }, { "epoch": 0.10878447395301327, "grad_norm": 1.5305739582406057, "learning_rate": 1.9676450192785678e-05, "loss": 0.8078, "step": 1065 }, { "epoch": 0.10888661899897854, "grad_norm": 1.4659650067504544, "learning_rate": 1.967561491962189e-05, "loss": 0.7155, "step": 1066 }, { "epoch": 0.10898876404494381, "grad_norm": 1.446269152380502, "learning_rate": 1.9674778587455794e-05, "loss": 0.6517, "step": 1067 }, { "epoch": 0.10909090909090909, "grad_norm": 1.5716526221715839, "learning_rate": 1.9673941196378934e-05, "loss": 0.7789, "step": 1068 }, { "epoch": 0.10919305413687436, "grad_norm": 1.5825011813906076, "learning_rate": 1.967310274648296e-05, "loss": 0.7292, "step": 1069 }, { "epoch": 0.10929519918283963, "grad_norm": 1.5485946834973099, "learning_rate": 1.967226323785964e-05, "loss": 0.7593, "step": 1070 }, { "epoch": 0.1093973442288049, "grad_norm": 1.5215823121876997, "learning_rate": 1.9671422670600856e-05, "loss": 0.792, "step": 1071 }, { "epoch": 0.10949948927477017, "grad_norm": 1.453396643211816, "learning_rate": 1.967058104479862e-05, "loss": 0.747, "step": 1072 }, { "epoch": 0.10960163432073544, "grad_norm": 1.4168331086486683, "learning_rate": 1.9669738360545035e-05, "loss": 0.7049, "step": 1073 }, { "epoch": 0.10970377936670071, "grad_norm": 1.3263318882519275, "learning_rate": 1.966889461793234e-05, "loss": 0.7235, "step": 1074 }, { "epoch": 0.10980592441266598, "grad_norm": 1.6194805510631765, "learning_rate": 1.966804981705288e-05, "loss": 0.7342, "step": 1075 }, { "epoch": 0.10990806945863126, "grad_norm": 1.491035625213636, "learning_rate": 1.9667203957999127e-05, "loss": 0.716, "step": 1076 }, { "epoch": 0.11001021450459653, "grad_norm": 1.5293161670236433, "learning_rate": 1.9666357040863652e-05, "loss": 0.6948, "step": 1077 }, { "epoch": 0.1101123595505618, "grad_norm": 1.4427104989338841, "learning_rate": 1.966550906573915e-05, "loss": 0.7067, "step": 1078 }, { "epoch": 0.11021450459652707, "grad_norm": 1.5419103208271614, "learning_rate": 1.966466003271844e-05, "loss": 0.7692, "step": 1079 }, { "epoch": 0.11031664964249234, "grad_norm": 1.287527417083186, "learning_rate": 1.966380994189444e-05, "loss": 0.7177, "step": 1080 }, { "epoch": 0.11041879468845761, "grad_norm": 1.5432135302878576, "learning_rate": 1.9662958793360206e-05, "loss": 0.7233, "step": 1081 }, { "epoch": 0.11052093973442288, "grad_norm": 1.5879548534579644, "learning_rate": 1.9662106587208886e-05, "loss": 0.8102, "step": 1082 }, { "epoch": 0.11062308478038815, "grad_norm": 1.5714630736277018, "learning_rate": 1.9661253323533757e-05, "loss": 0.8048, "step": 1083 }, { "epoch": 0.11072522982635342, "grad_norm": 1.4428855909461007, "learning_rate": 1.966039900242821e-05, "loss": 0.7014, "step": 1084 }, { "epoch": 0.1108273748723187, "grad_norm": 1.6870074077482777, "learning_rate": 1.965954362398575e-05, "loss": 0.8031, "step": 1085 }, { "epoch": 0.11092951991828397, "grad_norm": 1.5615804599702627, "learning_rate": 1.96586871883e-05, "loss": 0.7303, "step": 1086 }, { "epoch": 0.11103166496424924, "grad_norm": 1.508248958415601, "learning_rate": 1.9657829695464698e-05, "loss": 0.832, "step": 1087 }, { "epoch": 0.11113381001021451, "grad_norm": 1.5302067947637197, "learning_rate": 1.9656971145573697e-05, "loss": 0.7552, "step": 1088 }, { "epoch": 0.11123595505617978, "grad_norm": 1.520783082734965, "learning_rate": 1.965611153872096e-05, "loss": 0.7495, "step": 1089 }, { "epoch": 0.11133810010214505, "grad_norm": 1.3272489177167366, "learning_rate": 1.965525087500058e-05, "loss": 0.6495, "step": 1090 }, { "epoch": 0.11144024514811032, "grad_norm": 1.730070806759725, "learning_rate": 1.9654389154506754e-05, "loss": 0.759, "step": 1091 }, { "epoch": 0.1115423901940756, "grad_norm": 1.4720479151362003, "learning_rate": 1.9653526377333796e-05, "loss": 0.6225, "step": 1092 }, { "epoch": 0.11164453524004085, "grad_norm": 1.6282340642885975, "learning_rate": 1.965266254357614e-05, "loss": 0.8137, "step": 1093 }, { "epoch": 0.11174668028600612, "grad_norm": 1.5395414105341485, "learning_rate": 1.9651797653328332e-05, "loss": 0.6893, "step": 1094 }, { "epoch": 0.1118488253319714, "grad_norm": 1.4591337079925815, "learning_rate": 1.9650931706685036e-05, "loss": 0.7059, "step": 1095 }, { "epoch": 0.11195097037793666, "grad_norm": 1.5163459799789831, "learning_rate": 1.965006470374103e-05, "loss": 0.7266, "step": 1096 }, { "epoch": 0.11205311542390194, "grad_norm": 1.56214228902307, "learning_rate": 1.9649196644591203e-05, "loss": 0.7501, "step": 1097 }, { "epoch": 0.11215526046986721, "grad_norm": 1.6932561748304926, "learning_rate": 1.9648327529330574e-05, "loss": 0.7603, "step": 1098 }, { "epoch": 0.11225740551583248, "grad_norm": 1.5758000546280584, "learning_rate": 1.9647457358054258e-05, "loss": 0.7621, "step": 1099 }, { "epoch": 0.11235955056179775, "grad_norm": 1.6192369089675644, "learning_rate": 1.9646586130857504e-05, "loss": 0.7703, "step": 1100 }, { "epoch": 0.11246169560776302, "grad_norm": 1.6423027363222782, "learning_rate": 1.9645713847835666e-05, "loss": 0.7898, "step": 1101 }, { "epoch": 0.11256384065372829, "grad_norm": 1.5697964937421576, "learning_rate": 1.9644840509084218e-05, "loss": 0.8043, "step": 1102 }, { "epoch": 0.11266598569969356, "grad_norm": 1.3761870021062004, "learning_rate": 1.964396611469874e-05, "loss": 0.6692, "step": 1103 }, { "epoch": 0.11276813074565883, "grad_norm": 1.501636530068321, "learning_rate": 1.964309066477494e-05, "loss": 0.7291, "step": 1104 }, { "epoch": 0.1128702757916241, "grad_norm": 1.3787217226253703, "learning_rate": 1.9642214159408637e-05, "loss": 0.7984, "step": 1105 }, { "epoch": 0.11297242083758938, "grad_norm": 1.5683555170323638, "learning_rate": 1.964133659869577e-05, "loss": 0.8193, "step": 1106 }, { "epoch": 0.11307456588355465, "grad_norm": 1.4264983752144587, "learning_rate": 1.964045798273238e-05, "loss": 0.7473, "step": 1107 }, { "epoch": 0.11317671092951992, "grad_norm": 1.529251767852379, "learning_rate": 1.9639578311614633e-05, "loss": 0.8392, "step": 1108 }, { "epoch": 0.11327885597548519, "grad_norm": 1.5241632486928207, "learning_rate": 1.9638697585438816e-05, "loss": 0.701, "step": 1109 }, { "epoch": 0.11338100102145046, "grad_norm": 1.57816366971916, "learning_rate": 1.9637815804301315e-05, "loss": 0.7713, "step": 1110 }, { "epoch": 0.11348314606741573, "grad_norm": 1.6561260930902602, "learning_rate": 1.9636932968298652e-05, "loss": 0.7932, "step": 1111 }, { "epoch": 0.113585291113381, "grad_norm": 1.5135538410194025, "learning_rate": 1.963604907752745e-05, "loss": 0.6946, "step": 1112 }, { "epoch": 0.11368743615934628, "grad_norm": 1.4696545723569394, "learning_rate": 1.9635164132084447e-05, "loss": 0.7785, "step": 1113 }, { "epoch": 0.11378958120531155, "grad_norm": 1.711207260627573, "learning_rate": 1.9634278132066503e-05, "loss": 0.831, "step": 1114 }, { "epoch": 0.11389172625127682, "grad_norm": 1.4100080671510697, "learning_rate": 1.96333910775706e-05, "loss": 0.7305, "step": 1115 }, { "epoch": 0.11399387129724209, "grad_norm": 1.3506679078293322, "learning_rate": 1.9632502968693815e-05, "loss": 0.6626, "step": 1116 }, { "epoch": 0.11409601634320736, "grad_norm": 1.5736982046628725, "learning_rate": 1.9631613805533357e-05, "loss": 0.8463, "step": 1117 }, { "epoch": 0.11419816138917263, "grad_norm": 1.6030400426484313, "learning_rate": 1.9630723588186544e-05, "loss": 0.7356, "step": 1118 }, { "epoch": 0.1143003064351379, "grad_norm": 1.591432351263304, "learning_rate": 1.9629832316750814e-05, "loss": 0.8572, "step": 1119 }, { "epoch": 0.11440245148110317, "grad_norm": 1.4525598995329345, "learning_rate": 1.9628939991323717e-05, "loss": 0.7119, "step": 1120 }, { "epoch": 0.11450459652706843, "grad_norm": 1.4730526995554696, "learning_rate": 1.9628046612002912e-05, "loss": 0.7002, "step": 1121 }, { "epoch": 0.1146067415730337, "grad_norm": 1.6655097959241574, "learning_rate": 1.9627152178886192e-05, "loss": 0.7306, "step": 1122 }, { "epoch": 0.11470888661899897, "grad_norm": 1.4487324452504642, "learning_rate": 1.9626256692071443e-05, "loss": 0.7038, "step": 1123 }, { "epoch": 0.11481103166496424, "grad_norm": 1.459069387552358, "learning_rate": 1.9625360151656676e-05, "loss": 0.7114, "step": 1124 }, { "epoch": 0.11491317671092952, "grad_norm": 1.4514532613334061, "learning_rate": 1.9624462557740026e-05, "loss": 0.7659, "step": 1125 }, { "epoch": 0.11501532175689479, "grad_norm": 1.6161659026524873, "learning_rate": 1.9623563910419725e-05, "loss": 0.8431, "step": 1126 }, { "epoch": 0.11511746680286006, "grad_norm": 1.6585267557248564, "learning_rate": 1.9622664209794143e-05, "loss": 0.7978, "step": 1127 }, { "epoch": 0.11521961184882533, "grad_norm": 1.5705270438937884, "learning_rate": 1.9621763455961743e-05, "loss": 0.8928, "step": 1128 }, { "epoch": 0.1153217568947906, "grad_norm": 1.490333260246094, "learning_rate": 1.9620861649021114e-05, "loss": 0.7757, "step": 1129 }, { "epoch": 0.11542390194075587, "grad_norm": 1.393001386740254, "learning_rate": 1.9619958789070964e-05, "loss": 0.7052, "step": 1130 }, { "epoch": 0.11552604698672114, "grad_norm": 1.4685504649997314, "learning_rate": 1.961905487621011e-05, "loss": 0.7579, "step": 1131 }, { "epoch": 0.11562819203268641, "grad_norm": 1.6085513164929615, "learning_rate": 1.9618149910537486e-05, "loss": 0.7252, "step": 1132 }, { "epoch": 0.11573033707865168, "grad_norm": 1.6250195972146972, "learning_rate": 1.961724389215214e-05, "loss": 0.8278, "step": 1133 }, { "epoch": 0.11583248212461696, "grad_norm": 1.5661964553561925, "learning_rate": 1.9616336821153234e-05, "loss": 0.6943, "step": 1134 }, { "epoch": 0.11593462717058223, "grad_norm": 1.4715500789745353, "learning_rate": 1.961542869764005e-05, "loss": 0.7396, "step": 1135 }, { "epoch": 0.1160367722165475, "grad_norm": 1.579461063679608, "learning_rate": 1.961451952171199e-05, "loss": 0.7588, "step": 1136 }, { "epoch": 0.11613891726251277, "grad_norm": 1.6259954884744157, "learning_rate": 1.9613609293468547e-05, "loss": 0.7895, "step": 1137 }, { "epoch": 0.11624106230847804, "grad_norm": 1.6283075636519224, "learning_rate": 1.961269801300936e-05, "loss": 0.7949, "step": 1138 }, { "epoch": 0.11634320735444331, "grad_norm": 1.4242442476519803, "learning_rate": 1.9611785680434163e-05, "loss": 0.6252, "step": 1139 }, { "epoch": 0.11644535240040858, "grad_norm": 1.6164926118724838, "learning_rate": 1.9610872295842817e-05, "loss": 0.6829, "step": 1140 }, { "epoch": 0.11654749744637385, "grad_norm": 1.4699206989738522, "learning_rate": 1.9609957859335284e-05, "loss": 0.7251, "step": 1141 }, { "epoch": 0.11664964249233913, "grad_norm": 1.6834752269614504, "learning_rate": 1.960904237101166e-05, "loss": 0.799, "step": 1142 }, { "epoch": 0.1167517875383044, "grad_norm": 1.6881694148931172, "learning_rate": 1.9608125830972137e-05, "loss": 0.8069, "step": 1143 }, { "epoch": 0.11685393258426967, "grad_norm": 1.5036997167358905, "learning_rate": 1.9607208239317033e-05, "loss": 0.8254, "step": 1144 }, { "epoch": 0.11695607763023494, "grad_norm": 1.4370888244086095, "learning_rate": 1.9606289596146778e-05, "loss": 0.6801, "step": 1145 }, { "epoch": 0.11705822267620021, "grad_norm": 1.5769554592067168, "learning_rate": 1.9605369901561925e-05, "loss": 0.8396, "step": 1146 }, { "epoch": 0.11716036772216548, "grad_norm": 1.472737712399418, "learning_rate": 1.960444915566313e-05, "loss": 0.7266, "step": 1147 }, { "epoch": 0.11726251276813074, "grad_norm": 1.4592928041257078, "learning_rate": 1.9603527358551168e-05, "loss": 0.7671, "step": 1148 }, { "epoch": 0.11736465781409601, "grad_norm": 1.6234213775530273, "learning_rate": 1.960260451032693e-05, "loss": 0.8097, "step": 1149 }, { "epoch": 0.11746680286006128, "grad_norm": 1.411561192994768, "learning_rate": 1.960168061109143e-05, "loss": 0.7693, "step": 1150 }, { "epoch": 0.11756894790602655, "grad_norm": 1.5513596604042557, "learning_rate": 1.960075566094578e-05, "loss": 0.7398, "step": 1151 }, { "epoch": 0.11767109295199182, "grad_norm": 1.6576902488653413, "learning_rate": 1.9599829659991218e-05, "loss": 0.8122, "step": 1152 }, { "epoch": 0.1177732379979571, "grad_norm": 1.2937566604443824, "learning_rate": 1.9598902608329103e-05, "loss": 0.6592, "step": 1153 }, { "epoch": 0.11787538304392237, "grad_norm": 1.5144225465854024, "learning_rate": 1.959797450606089e-05, "loss": 0.6989, "step": 1154 }, { "epoch": 0.11797752808988764, "grad_norm": 1.6574589217740479, "learning_rate": 1.9597045353288168e-05, "loss": 0.8498, "step": 1155 }, { "epoch": 0.11807967313585291, "grad_norm": 1.4082506086397073, "learning_rate": 1.9596115150112634e-05, "loss": 0.6985, "step": 1156 }, { "epoch": 0.11818181818181818, "grad_norm": 1.508702350477482, "learning_rate": 1.9595183896636096e-05, "loss": 0.7123, "step": 1157 }, { "epoch": 0.11828396322778345, "grad_norm": 1.5366612361137204, "learning_rate": 1.959425159296048e-05, "loss": 0.8071, "step": 1158 }, { "epoch": 0.11838610827374872, "grad_norm": 1.420998684150009, "learning_rate": 1.9593318239187827e-05, "loss": 0.7303, "step": 1159 }, { "epoch": 0.118488253319714, "grad_norm": 1.5498408206922487, "learning_rate": 1.9592383835420297e-05, "loss": 0.7587, "step": 1160 }, { "epoch": 0.11859039836567926, "grad_norm": 1.469113125338137, "learning_rate": 1.959144838176016e-05, "loss": 0.6966, "step": 1161 }, { "epoch": 0.11869254341164454, "grad_norm": 1.48914328035654, "learning_rate": 1.95905118783098e-05, "loss": 0.7145, "step": 1162 }, { "epoch": 0.1187946884576098, "grad_norm": 1.5182866382588345, "learning_rate": 1.958957432517172e-05, "loss": 0.8128, "step": 1163 }, { "epoch": 0.11889683350357508, "grad_norm": 1.5061765053438796, "learning_rate": 1.958863572244853e-05, "loss": 0.6881, "step": 1164 }, { "epoch": 0.11899897854954035, "grad_norm": 1.5422965648610834, "learning_rate": 1.958769607024297e-05, "loss": 0.7557, "step": 1165 }, { "epoch": 0.11910112359550562, "grad_norm": 1.5167992794347664, "learning_rate": 1.9586755368657877e-05, "loss": 0.8036, "step": 1166 }, { "epoch": 0.11920326864147089, "grad_norm": 1.677729482694591, "learning_rate": 1.9585813617796216e-05, "loss": 0.7432, "step": 1167 }, { "epoch": 0.11930541368743616, "grad_norm": 1.6046651107365564, "learning_rate": 1.958487081776106e-05, "loss": 0.773, "step": 1168 }, { "epoch": 0.11940755873340143, "grad_norm": 1.5176283952203122, "learning_rate": 1.9583926968655605e-05, "loss": 0.7842, "step": 1169 }, { "epoch": 0.1195097037793667, "grad_norm": 1.6493618120879856, "learning_rate": 1.9582982070583147e-05, "loss": 0.7523, "step": 1170 }, { "epoch": 0.11961184882533198, "grad_norm": 1.5673699702571005, "learning_rate": 1.9582036123647113e-05, "loss": 0.7886, "step": 1171 }, { "epoch": 0.11971399387129725, "grad_norm": 1.587491673900439, "learning_rate": 1.958108912795103e-05, "loss": 0.7634, "step": 1172 }, { "epoch": 0.11981613891726252, "grad_norm": 1.515341532472603, "learning_rate": 1.9580141083598555e-05, "loss": 0.7304, "step": 1173 }, { "epoch": 0.11991828396322779, "grad_norm": 1.5622614848581975, "learning_rate": 1.9579191990693448e-05, "loss": 0.7874, "step": 1174 }, { "epoch": 0.12002042900919306, "grad_norm": 1.3199339007577178, "learning_rate": 1.957824184933959e-05, "loss": 0.6765, "step": 1175 }, { "epoch": 0.12012257405515832, "grad_norm": 1.6096869492673378, "learning_rate": 1.957729065964097e-05, "loss": 0.7237, "step": 1176 }, { "epoch": 0.12022471910112359, "grad_norm": 1.61113041065203, "learning_rate": 1.9576338421701704e-05, "loss": 0.8896, "step": 1177 }, { "epoch": 0.12032686414708886, "grad_norm": 1.5691180985889015, "learning_rate": 1.957538513562601e-05, "loss": 0.7583, "step": 1178 }, { "epoch": 0.12042900919305413, "grad_norm": 1.5851408398053064, "learning_rate": 1.9574430801518224e-05, "loss": 0.7986, "step": 1179 }, { "epoch": 0.1205311542390194, "grad_norm": 1.5157203742961871, "learning_rate": 1.95734754194828e-05, "loss": 0.7079, "step": 1180 }, { "epoch": 0.12063329928498467, "grad_norm": 1.5218548154218454, "learning_rate": 1.957251898962431e-05, "loss": 0.6954, "step": 1181 }, { "epoch": 0.12073544433094995, "grad_norm": 1.6277891174330776, "learning_rate": 1.9571561512047426e-05, "loss": 0.7921, "step": 1182 }, { "epoch": 0.12083758937691522, "grad_norm": 1.3707842287380825, "learning_rate": 1.9570602986856956e-05, "loss": 0.7534, "step": 1183 }, { "epoch": 0.12093973442288049, "grad_norm": 1.6508759615286497, "learning_rate": 1.9569643414157804e-05, "loss": 0.7422, "step": 1184 }, { "epoch": 0.12104187946884576, "grad_norm": 1.4064717885342997, "learning_rate": 1.9568682794055e-05, "loss": 0.8545, "step": 1185 }, { "epoch": 0.12114402451481103, "grad_norm": 1.5297819422412304, "learning_rate": 1.956772112665368e-05, "loss": 0.7621, "step": 1186 }, { "epoch": 0.1212461695607763, "grad_norm": 1.6584398639779026, "learning_rate": 1.9566758412059098e-05, "loss": 0.9568, "step": 1187 }, { "epoch": 0.12134831460674157, "grad_norm": 1.5671169981433042, "learning_rate": 1.956579465037663e-05, "loss": 0.8319, "step": 1188 }, { "epoch": 0.12145045965270684, "grad_norm": 1.5780710279331873, "learning_rate": 1.9564829841711756e-05, "loss": 0.6693, "step": 1189 }, { "epoch": 0.12155260469867211, "grad_norm": 1.543220695185644, "learning_rate": 1.956386398617008e-05, "loss": 0.7546, "step": 1190 }, { "epoch": 0.12165474974463739, "grad_norm": 1.513080354208372, "learning_rate": 1.9562897083857306e-05, "loss": 0.8116, "step": 1191 }, { "epoch": 0.12175689479060266, "grad_norm": 1.4152589378323934, "learning_rate": 1.956192913487927e-05, "loss": 0.7406, "step": 1192 }, { "epoch": 0.12185903983656793, "grad_norm": 1.502052507526006, "learning_rate": 1.9560960139341912e-05, "loss": 0.7904, "step": 1193 }, { "epoch": 0.1219611848825332, "grad_norm": 1.4209718536726594, "learning_rate": 1.955999009735129e-05, "loss": 0.6884, "step": 1194 }, { "epoch": 0.12206332992849847, "grad_norm": 1.487880776322938, "learning_rate": 1.9559019009013575e-05, "loss": 0.8, "step": 1195 }, { "epoch": 0.12216547497446374, "grad_norm": 1.5596167530552554, "learning_rate": 1.9558046874435053e-05, "loss": 0.7751, "step": 1196 }, { "epoch": 0.12226762002042901, "grad_norm": 1.595673203573733, "learning_rate": 1.9557073693722127e-05, "loss": 0.6974, "step": 1197 }, { "epoch": 0.12236976506639428, "grad_norm": 1.5430194348370545, "learning_rate": 1.955609946698131e-05, "loss": 0.8015, "step": 1198 }, { "epoch": 0.12247191011235956, "grad_norm": 1.5942440693844087, "learning_rate": 1.9555124194319235e-05, "loss": 0.6994, "step": 1199 }, { "epoch": 0.12257405515832483, "grad_norm": 1.5154002049573354, "learning_rate": 1.955414787584264e-05, "loss": 0.82, "step": 1200 }, { "epoch": 0.1226762002042901, "grad_norm": 1.4479051219079702, "learning_rate": 1.955317051165839e-05, "loss": 0.7366, "step": 1201 }, { "epoch": 0.12277834525025537, "grad_norm": 1.5359739446267027, "learning_rate": 1.955219210187345e-05, "loss": 0.697, "step": 1202 }, { "epoch": 0.12288049029622064, "grad_norm": 1.429308799480471, "learning_rate": 1.955121264659492e-05, "loss": 0.6499, "step": 1203 }, { "epoch": 0.1229826353421859, "grad_norm": 1.4372434832685075, "learning_rate": 1.955023214592999e-05, "loss": 0.6346, "step": 1204 }, { "epoch": 0.12308478038815117, "grad_norm": 1.5091235348899132, "learning_rate": 1.9549250599985982e-05, "loss": 0.8377, "step": 1205 }, { "epoch": 0.12318692543411644, "grad_norm": 1.6379882550955176, "learning_rate": 1.954826800887033e-05, "loss": 0.7423, "step": 1206 }, { "epoch": 0.12328907048008171, "grad_norm": 1.4341127596488723, "learning_rate": 1.9547284372690568e-05, "loss": 0.8556, "step": 1207 }, { "epoch": 0.12339121552604698, "grad_norm": 1.480754107242904, "learning_rate": 1.9546299691554368e-05, "loss": 0.7573, "step": 1208 }, { "epoch": 0.12349336057201225, "grad_norm": 1.6924205463669784, "learning_rate": 1.95453139655695e-05, "loss": 0.851, "step": 1209 }, { "epoch": 0.12359550561797752, "grad_norm": 1.4876519480318136, "learning_rate": 1.954432719484385e-05, "loss": 0.8429, "step": 1210 }, { "epoch": 0.1236976506639428, "grad_norm": 1.6435897107326578, "learning_rate": 1.954333937948542e-05, "loss": 0.8184, "step": 1211 }, { "epoch": 0.12379979570990807, "grad_norm": 1.4616993314083842, "learning_rate": 1.9542350519602334e-05, "loss": 0.7895, "step": 1212 }, { "epoch": 0.12390194075587334, "grad_norm": 1.6124068707634893, "learning_rate": 1.9541360615302815e-05, "loss": 0.8018, "step": 1213 }, { "epoch": 0.12400408580183861, "grad_norm": 1.5408240938846258, "learning_rate": 1.9540369666695213e-05, "loss": 0.8217, "step": 1214 }, { "epoch": 0.12410623084780388, "grad_norm": 1.4938413820397354, "learning_rate": 1.9539377673887986e-05, "loss": 0.7922, "step": 1215 }, { "epoch": 0.12420837589376915, "grad_norm": 1.477164919560015, "learning_rate": 1.953838463698971e-05, "loss": 0.7786, "step": 1216 }, { "epoch": 0.12431052093973442, "grad_norm": 1.5690190584247838, "learning_rate": 1.9537390556109073e-05, "loss": 0.7951, "step": 1217 }, { "epoch": 0.1244126659856997, "grad_norm": 1.451598369396406, "learning_rate": 1.9536395431354877e-05, "loss": 0.8184, "step": 1218 }, { "epoch": 0.12451481103166497, "grad_norm": 1.5424861400142693, "learning_rate": 1.9535399262836045e-05, "loss": 0.7921, "step": 1219 }, { "epoch": 0.12461695607763024, "grad_norm": 1.348330119934175, "learning_rate": 1.9534402050661596e-05, "loss": 0.6841, "step": 1220 }, { "epoch": 0.12471910112359551, "grad_norm": 1.4802448089171993, "learning_rate": 1.9533403794940685e-05, "loss": 0.7919, "step": 1221 }, { "epoch": 0.12482124616956078, "grad_norm": 1.7236807896478832, "learning_rate": 1.953240449578257e-05, "loss": 0.7787, "step": 1222 }, { "epoch": 0.12492339121552605, "grad_norm": 1.6457683616895697, "learning_rate": 1.9531404153296624e-05, "loss": 0.8624, "step": 1223 }, { "epoch": 0.12502553626149132, "grad_norm": 1.5388545431253615, "learning_rate": 1.9530402767592337e-05, "loss": 0.7026, "step": 1224 }, { "epoch": 0.12512768130745658, "grad_norm": 1.6011602411064116, "learning_rate": 1.9529400338779304e-05, "loss": 0.8759, "step": 1225 }, { "epoch": 0.12522982635342186, "grad_norm": 1.5443837398441254, "learning_rate": 1.952839686696725e-05, "loss": 0.7175, "step": 1226 }, { "epoch": 0.12533197139938712, "grad_norm": 1.5533678977592649, "learning_rate": 1.9527392352266007e-05, "loss": 0.8163, "step": 1227 }, { "epoch": 0.1254341164453524, "grad_norm": 1.5194944724104429, "learning_rate": 1.9526386794785514e-05, "loss": 0.7864, "step": 1228 }, { "epoch": 0.12553626149131766, "grad_norm": 1.5596736490843033, "learning_rate": 1.9525380194635825e-05, "loss": 0.855, "step": 1229 }, { "epoch": 0.12563840653728295, "grad_norm": 1.501947991926002, "learning_rate": 1.9524372551927128e-05, "loss": 0.7163, "step": 1230 }, { "epoch": 0.1257405515832482, "grad_norm": 1.590038770143226, "learning_rate": 1.9523363866769695e-05, "loss": 0.9486, "step": 1231 }, { "epoch": 0.1258426966292135, "grad_norm": 1.3737947858009558, "learning_rate": 1.9522354139273937e-05, "loss": 0.6417, "step": 1232 }, { "epoch": 0.12594484167517875, "grad_norm": 1.4189486373843732, "learning_rate": 1.9521343369550365e-05, "loss": 0.7811, "step": 1233 }, { "epoch": 0.12604698672114403, "grad_norm": 1.6212084434284937, "learning_rate": 1.9520331557709615e-05, "loss": 0.7716, "step": 1234 }, { "epoch": 0.1261491317671093, "grad_norm": 1.4103584984512114, "learning_rate": 1.9519318703862418e-05, "loss": 0.7178, "step": 1235 }, { "epoch": 0.12625127681307458, "grad_norm": 1.6483854093234263, "learning_rate": 1.951830480811964e-05, "loss": 0.7433, "step": 1236 }, { "epoch": 0.12635342185903983, "grad_norm": 1.4243029379299237, "learning_rate": 1.9517289870592254e-05, "loss": 0.8385, "step": 1237 }, { "epoch": 0.12645556690500512, "grad_norm": 1.5651855621549706, "learning_rate": 1.9516273891391342e-05, "loss": 0.7024, "step": 1238 }, { "epoch": 0.12655771195097038, "grad_norm": 1.5572859040246025, "learning_rate": 1.95152568706281e-05, "loss": 0.7596, "step": 1239 }, { "epoch": 0.12665985699693566, "grad_norm": 1.4300605871870788, "learning_rate": 1.951423880841385e-05, "loss": 0.7464, "step": 1240 }, { "epoch": 0.12676200204290092, "grad_norm": 1.5176634948195888, "learning_rate": 1.9513219704860016e-05, "loss": 0.7086, "step": 1241 }, { "epoch": 0.1268641470888662, "grad_norm": 1.3279057278393684, "learning_rate": 1.9512199560078137e-05, "loss": 0.7435, "step": 1242 }, { "epoch": 0.12696629213483146, "grad_norm": 1.6322991409058123, "learning_rate": 1.951117837417987e-05, "loss": 0.8146, "step": 1243 }, { "epoch": 0.12706843718079674, "grad_norm": 1.4382790233270137, "learning_rate": 1.9510156147276988e-05, "loss": 0.8311, "step": 1244 }, { "epoch": 0.127170582226762, "grad_norm": 1.4436498383514786, "learning_rate": 1.950913287948137e-05, "loss": 0.7165, "step": 1245 }, { "epoch": 0.12727272727272726, "grad_norm": 1.527741830480384, "learning_rate": 1.9508108570905013e-05, "loss": 0.7783, "step": 1246 }, { "epoch": 0.12737487231869254, "grad_norm": 1.6083031572909405, "learning_rate": 1.950708322166003e-05, "loss": 0.7499, "step": 1247 }, { "epoch": 0.1274770173646578, "grad_norm": 1.505195219056494, "learning_rate": 1.950605683185865e-05, "loss": 0.8373, "step": 1248 }, { "epoch": 0.1275791624106231, "grad_norm": 1.7304183447760781, "learning_rate": 1.95050294016132e-05, "loss": 0.814, "step": 1249 }, { "epoch": 0.12768130745658834, "grad_norm": 1.3974639477310231, "learning_rate": 1.950400093103615e-05, "loss": 0.741, "step": 1250 }, { "epoch": 0.12778345250255363, "grad_norm": 1.3839809198826516, "learning_rate": 1.9502971420240052e-05, "loss": 0.6405, "step": 1251 }, { "epoch": 0.1278855975485189, "grad_norm": 1.5778035995302413, "learning_rate": 1.9501940869337595e-05, "loss": 0.9277, "step": 1252 }, { "epoch": 0.12798774259448417, "grad_norm": 1.7643454749799408, "learning_rate": 1.9500909278441573e-05, "loss": 0.7097, "step": 1253 }, { "epoch": 0.12808988764044943, "grad_norm": 1.426867190810617, "learning_rate": 1.9499876647664885e-05, "loss": 0.809, "step": 1254 }, { "epoch": 0.12819203268641471, "grad_norm": 1.5393127596372518, "learning_rate": 1.9498842977120564e-05, "loss": 0.7591, "step": 1255 }, { "epoch": 0.12829417773237997, "grad_norm": 1.576772161834551, "learning_rate": 1.9497808266921746e-05, "loss": 0.7773, "step": 1256 }, { "epoch": 0.12839632277834526, "grad_norm": 1.534560439883318, "learning_rate": 1.949677251718167e-05, "loss": 0.7538, "step": 1257 }, { "epoch": 0.1284984678243105, "grad_norm": 1.4807566812345876, "learning_rate": 1.9495735728013708e-05, "loss": 0.726, "step": 1258 }, { "epoch": 0.1286006128702758, "grad_norm": 1.4769360453666605, "learning_rate": 1.9494697899531338e-05, "loss": 0.798, "step": 1259 }, { "epoch": 0.12870275791624106, "grad_norm": 1.5131516329856072, "learning_rate": 1.9493659031848148e-05, "loss": 0.8166, "step": 1260 }, { "epoch": 0.12880490296220634, "grad_norm": 1.523882899949307, "learning_rate": 1.9492619125077844e-05, "loss": 0.7228, "step": 1261 }, { "epoch": 0.1289070480081716, "grad_norm": 1.4289521472821463, "learning_rate": 1.9491578179334244e-05, "loss": 0.7096, "step": 1262 }, { "epoch": 0.12900919305413688, "grad_norm": 1.4967584239560239, "learning_rate": 1.9490536194731276e-05, "loss": 0.7837, "step": 1263 }, { "epoch": 0.12911133810010214, "grad_norm": 1.661852848620628, "learning_rate": 1.9489493171382993e-05, "loss": 0.7938, "step": 1264 }, { "epoch": 0.12921348314606743, "grad_norm": 1.3594635746150734, "learning_rate": 1.948844910940355e-05, "loss": 0.6353, "step": 1265 }, { "epoch": 0.12931562819203268, "grad_norm": 1.5197666808732042, "learning_rate": 1.9487404008907222e-05, "loss": 0.7502, "step": 1266 }, { "epoch": 0.12941777323799797, "grad_norm": 1.554233137007806, "learning_rate": 1.9486357870008397e-05, "loss": 0.7949, "step": 1267 }, { "epoch": 0.12951991828396323, "grad_norm": 1.4390956841570666, "learning_rate": 1.9485310692821572e-05, "loss": 0.6916, "step": 1268 }, { "epoch": 0.1296220633299285, "grad_norm": 1.4287033312432231, "learning_rate": 1.9484262477461365e-05, "loss": 0.6125, "step": 1269 }, { "epoch": 0.12972420837589377, "grad_norm": 1.500760928122116, "learning_rate": 1.94832132240425e-05, "loss": 0.7405, "step": 1270 }, { "epoch": 0.12982635342185905, "grad_norm": 1.4439762419213706, "learning_rate": 1.9482162932679824e-05, "loss": 0.8158, "step": 1271 }, { "epoch": 0.1299284984678243, "grad_norm": 1.6065463329205603, "learning_rate": 1.9481111603488284e-05, "loss": 0.7566, "step": 1272 }, { "epoch": 0.13003064351378957, "grad_norm": 1.3978531502606641, "learning_rate": 1.948005923658296e-05, "loss": 0.7509, "step": 1273 }, { "epoch": 0.13013278855975485, "grad_norm": 1.4805404323480404, "learning_rate": 1.9479005832079022e-05, "loss": 0.7183, "step": 1274 }, { "epoch": 0.1302349336057201, "grad_norm": 1.3731977149725076, "learning_rate": 1.9477951390091772e-05, "loss": 0.7126, "step": 1275 }, { "epoch": 0.1303370786516854, "grad_norm": 1.3545611955295465, "learning_rate": 1.9476895910736624e-05, "loss": 0.7196, "step": 1276 }, { "epoch": 0.13043922369765065, "grad_norm": 1.4642178006924533, "learning_rate": 1.9475839394129093e-05, "loss": 0.8878, "step": 1277 }, { "epoch": 0.13054136874361594, "grad_norm": 1.5491755067832083, "learning_rate": 1.9474781840384816e-05, "loss": 0.6414, "step": 1278 }, { "epoch": 0.1306435137895812, "grad_norm": 1.5179398342410995, "learning_rate": 1.9473723249619545e-05, "loss": 0.7288, "step": 1279 }, { "epoch": 0.13074565883554648, "grad_norm": 1.4280745019034362, "learning_rate": 1.9472663621949147e-05, "loss": 0.677, "step": 1280 }, { "epoch": 0.13084780388151174, "grad_norm": 1.4342691281373265, "learning_rate": 1.947160295748959e-05, "loss": 0.7647, "step": 1281 }, { "epoch": 0.13094994892747702, "grad_norm": 1.459768333429644, "learning_rate": 1.9470541256356976e-05, "loss": 0.741, "step": 1282 }, { "epoch": 0.13105209397344228, "grad_norm": 1.402060939716417, "learning_rate": 1.94694785186675e-05, "loss": 0.862, "step": 1283 }, { "epoch": 0.13115423901940756, "grad_norm": 1.5040641667412629, "learning_rate": 1.946841474453748e-05, "loss": 0.7683, "step": 1284 }, { "epoch": 0.13125638406537282, "grad_norm": 1.5512216794858509, "learning_rate": 1.9467349934083353e-05, "loss": 0.7777, "step": 1285 }, { "epoch": 0.1313585291113381, "grad_norm": 1.4563363227319008, "learning_rate": 1.9466284087421657e-05, "loss": 0.7635, "step": 1286 }, { "epoch": 0.13146067415730336, "grad_norm": 1.5866943978222612, "learning_rate": 1.946521720466905e-05, "loss": 0.7069, "step": 1287 }, { "epoch": 0.13156281920326865, "grad_norm": 1.6021632179471066, "learning_rate": 1.946414928594231e-05, "loss": 0.7996, "step": 1288 }, { "epoch": 0.1316649642492339, "grad_norm": 1.7024833072792165, "learning_rate": 1.946308033135831e-05, "loss": 0.8261, "step": 1289 }, { "epoch": 0.1317671092951992, "grad_norm": 1.4611987403682454, "learning_rate": 1.9462010341034054e-05, "loss": 0.8057, "step": 1290 }, { "epoch": 0.13186925434116445, "grad_norm": 1.5828858730965563, "learning_rate": 1.9460939315086656e-05, "loss": 0.6879, "step": 1291 }, { "epoch": 0.13197139938712973, "grad_norm": 1.5268272763769084, "learning_rate": 1.9459867253633336e-05, "loss": 0.7829, "step": 1292 }, { "epoch": 0.132073544433095, "grad_norm": 1.3799394442782487, "learning_rate": 1.9458794156791434e-05, "loss": 0.7399, "step": 1293 }, { "epoch": 0.13217568947906028, "grad_norm": 1.8226588432193558, "learning_rate": 1.9457720024678403e-05, "loss": 0.8049, "step": 1294 }, { "epoch": 0.13227783452502553, "grad_norm": 1.5747986639029585, "learning_rate": 1.94566448574118e-05, "loss": 0.7791, "step": 1295 }, { "epoch": 0.13237997957099082, "grad_norm": 1.5707765502019684, "learning_rate": 1.9455568655109308e-05, "loss": 0.6383, "step": 1296 }, { "epoch": 0.13248212461695608, "grad_norm": 1.5390867489841076, "learning_rate": 1.945449141788872e-05, "loss": 0.7131, "step": 1297 }, { "epoch": 0.13258426966292136, "grad_norm": 1.5738936594218587, "learning_rate": 1.9453413145867935e-05, "loss": 0.7667, "step": 1298 }, { "epoch": 0.13268641470888662, "grad_norm": 1.3888136625789946, "learning_rate": 1.9452333839164977e-05, "loss": 0.6533, "step": 1299 }, { "epoch": 0.13278855975485188, "grad_norm": 1.4486621457095246, "learning_rate": 1.945125349789797e-05, "loss": 0.7136, "step": 1300 }, { "epoch": 0.13289070480081716, "grad_norm": 1.5733295922815216, "learning_rate": 1.9450172122185166e-05, "loss": 0.6742, "step": 1301 }, { "epoch": 0.13299284984678242, "grad_norm": 1.5505307195551454, "learning_rate": 1.9449089712144912e-05, "loss": 0.7434, "step": 1302 }, { "epoch": 0.1330949948927477, "grad_norm": 1.4918367742236909, "learning_rate": 1.9448006267895688e-05, "loss": 0.7375, "step": 1303 }, { "epoch": 0.13319713993871296, "grad_norm": 1.6285234979713898, "learning_rate": 1.9446921789556072e-05, "loss": 0.8028, "step": 1304 }, { "epoch": 0.13329928498467825, "grad_norm": 1.50801812580762, "learning_rate": 1.9445836277244764e-05, "loss": 0.7068, "step": 1305 }, { "epoch": 0.1334014300306435, "grad_norm": 1.4443984402819352, "learning_rate": 1.944474973108057e-05, "loss": 0.7831, "step": 1306 }, { "epoch": 0.1335035750766088, "grad_norm": 1.5812596004458372, "learning_rate": 1.944366215118242e-05, "loss": 0.8283, "step": 1307 }, { "epoch": 0.13360572012257405, "grad_norm": 1.481286361820175, "learning_rate": 1.9442573537669344e-05, "loss": 0.7908, "step": 1308 }, { "epoch": 0.13370786516853933, "grad_norm": 1.5321188104668377, "learning_rate": 1.9441483890660494e-05, "loss": 0.7134, "step": 1309 }, { "epoch": 0.1338100102145046, "grad_norm": 1.5282223488236462, "learning_rate": 1.944039321027513e-05, "loss": 0.7157, "step": 1310 }, { "epoch": 0.13391215526046987, "grad_norm": 1.7117893928552739, "learning_rate": 1.9439301496632634e-05, "loss": 0.668, "step": 1311 }, { "epoch": 0.13401430030643513, "grad_norm": 1.5003983486835706, "learning_rate": 1.9438208749852486e-05, "loss": 0.8136, "step": 1312 }, { "epoch": 0.13411644535240042, "grad_norm": 1.6334235620493318, "learning_rate": 1.9437114970054294e-05, "loss": 0.8367, "step": 1313 }, { "epoch": 0.13421859039836567, "grad_norm": 1.3918477316569242, "learning_rate": 1.9436020157357772e-05, "loss": 0.7825, "step": 1314 }, { "epoch": 0.13432073544433096, "grad_norm": 1.4074311673112576, "learning_rate": 1.9434924311882747e-05, "loss": 0.7197, "step": 1315 }, { "epoch": 0.13442288049029621, "grad_norm": 1.4626436779780858, "learning_rate": 1.943382743374916e-05, "loss": 0.7666, "step": 1316 }, { "epoch": 0.1345250255362615, "grad_norm": 1.393085008920469, "learning_rate": 1.943272952307707e-05, "loss": 0.6859, "step": 1317 }, { "epoch": 0.13462717058222676, "grad_norm": 1.4130247019420528, "learning_rate": 1.9431630579986635e-05, "loss": 0.7929, "step": 1318 }, { "epoch": 0.13472931562819204, "grad_norm": 1.483480212650707, "learning_rate": 1.9430530604598137e-05, "loss": 0.727, "step": 1319 }, { "epoch": 0.1348314606741573, "grad_norm": 1.4358273100426875, "learning_rate": 1.9429429597031976e-05, "loss": 0.8062, "step": 1320 }, { "epoch": 0.13493360572012258, "grad_norm": 1.3728179486111791, "learning_rate": 1.942832755740865e-05, "loss": 0.7409, "step": 1321 }, { "epoch": 0.13503575076608784, "grad_norm": 1.544396196573522, "learning_rate": 1.9427224485848783e-05, "loss": 0.7213, "step": 1322 }, { "epoch": 0.13513789581205313, "grad_norm": 1.7213121530133915, "learning_rate": 1.9426120382473108e-05, "loss": 0.8171, "step": 1323 }, { "epoch": 0.13524004085801838, "grad_norm": 1.7749093718289375, "learning_rate": 1.942501524740246e-05, "loss": 0.7975, "step": 1324 }, { "epoch": 0.13534218590398367, "grad_norm": 1.5688829761916672, "learning_rate": 1.942390908075781e-05, "loss": 0.8525, "step": 1325 }, { "epoch": 0.13544433094994893, "grad_norm": 1.4841413656191014, "learning_rate": 1.9422801882660223e-05, "loss": 0.8521, "step": 1326 }, { "epoch": 0.1355464759959142, "grad_norm": 1.5283490215530708, "learning_rate": 1.942169365323088e-05, "loss": 0.6904, "step": 1327 }, { "epoch": 0.13564862104187947, "grad_norm": 1.655275932915154, "learning_rate": 1.942058439259108e-05, "loss": 0.8149, "step": 1328 }, { "epoch": 0.13575076608784473, "grad_norm": 1.5603683493673701, "learning_rate": 1.941947410086223e-05, "loss": 0.6689, "step": 1329 }, { "epoch": 0.13585291113381, "grad_norm": 1.6616119726147993, "learning_rate": 1.9418362778165855e-05, "loss": 0.7794, "step": 1330 }, { "epoch": 0.13595505617977527, "grad_norm": 1.543469652291921, "learning_rate": 1.9417250424623588e-05, "loss": 0.6894, "step": 1331 }, { "epoch": 0.13605720122574055, "grad_norm": 1.3779853751687607, "learning_rate": 1.9416137040357176e-05, "loss": 0.7143, "step": 1332 }, { "epoch": 0.1361593462717058, "grad_norm": 1.4305466348146565, "learning_rate": 1.9415022625488485e-05, "loss": 0.6136, "step": 1333 }, { "epoch": 0.1362614913176711, "grad_norm": 1.413453657167387, "learning_rate": 1.9413907180139483e-05, "loss": 0.7399, "step": 1334 }, { "epoch": 0.13636363636363635, "grad_norm": 1.4824260310503163, "learning_rate": 1.9412790704432258e-05, "loss": 0.8489, "step": 1335 }, { "epoch": 0.13646578140960164, "grad_norm": 1.6269049337234491, "learning_rate": 1.941167319848901e-05, "loss": 0.7291, "step": 1336 }, { "epoch": 0.1365679264555669, "grad_norm": 1.4560968142741078, "learning_rate": 1.941055466243205e-05, "loss": 0.6166, "step": 1337 }, { "epoch": 0.13667007150153218, "grad_norm": 1.376626204111802, "learning_rate": 1.94094350963838e-05, "loss": 0.6606, "step": 1338 }, { "epoch": 0.13677221654749744, "grad_norm": 1.561054857458974, "learning_rate": 1.94083145004668e-05, "loss": 0.7449, "step": 1339 }, { "epoch": 0.13687436159346272, "grad_norm": 1.5790169536603222, "learning_rate": 1.9407192874803703e-05, "loss": 0.9003, "step": 1340 }, { "epoch": 0.13697650663942798, "grad_norm": 1.4951587843090144, "learning_rate": 1.9406070219517264e-05, "loss": 0.6879, "step": 1341 }, { "epoch": 0.13707865168539327, "grad_norm": 1.4384364850723763, "learning_rate": 1.9404946534730365e-05, "loss": 0.7172, "step": 1342 }, { "epoch": 0.13718079673135852, "grad_norm": 1.3699886233438812, "learning_rate": 1.940382182056599e-05, "loss": 0.7686, "step": 1343 }, { "epoch": 0.1372829417773238, "grad_norm": 1.6379405576048405, "learning_rate": 1.9402696077147238e-05, "loss": 0.812, "step": 1344 }, { "epoch": 0.13738508682328907, "grad_norm": 1.4284180064912348, "learning_rate": 1.940156930459733e-05, "loss": 0.7538, "step": 1345 }, { "epoch": 0.13748723186925435, "grad_norm": 1.533963594673483, "learning_rate": 1.9400441503039586e-05, "loss": 0.7255, "step": 1346 }, { "epoch": 0.1375893769152196, "grad_norm": 1.4263382666907924, "learning_rate": 1.9399312672597447e-05, "loss": 0.8372, "step": 1347 }, { "epoch": 0.1376915219611849, "grad_norm": 1.5152440970366143, "learning_rate": 1.939818281339446e-05, "loss": 0.8038, "step": 1348 }, { "epoch": 0.13779366700715015, "grad_norm": 1.508343586759886, "learning_rate": 1.9397051925554294e-05, "loss": 0.7741, "step": 1349 }, { "epoch": 0.13789581205311544, "grad_norm": 1.3650663556253753, "learning_rate": 1.9395920009200722e-05, "loss": 0.876, "step": 1350 }, { "epoch": 0.1379979570990807, "grad_norm": 1.5103456966424964, "learning_rate": 1.939478706445764e-05, "loss": 0.7709, "step": 1351 }, { "epoch": 0.13810010214504598, "grad_norm": 1.5280813303792649, "learning_rate": 1.939365309144904e-05, "loss": 0.8724, "step": 1352 }, { "epoch": 0.13820224719101123, "grad_norm": 1.4698126131473697, "learning_rate": 1.939251809029904e-05, "loss": 0.8719, "step": 1353 }, { "epoch": 0.13830439223697652, "grad_norm": 1.5404653841737956, "learning_rate": 1.9391382061131865e-05, "loss": 0.7133, "step": 1354 }, { "epoch": 0.13840653728294178, "grad_norm": 1.4550072490606445, "learning_rate": 1.939024500407186e-05, "loss": 0.6767, "step": 1355 }, { "epoch": 0.13850868232890703, "grad_norm": 1.6688748917700813, "learning_rate": 1.938910691924347e-05, "loss": 0.7601, "step": 1356 }, { "epoch": 0.13861082737487232, "grad_norm": 1.5667379945866036, "learning_rate": 1.9387967806771263e-05, "loss": 0.7577, "step": 1357 }, { "epoch": 0.13871297242083758, "grad_norm": 1.576333592323303, "learning_rate": 1.938682766677991e-05, "loss": 0.7406, "step": 1358 }, { "epoch": 0.13881511746680286, "grad_norm": 1.5932437174249479, "learning_rate": 1.9385686499394208e-05, "loss": 0.8517, "step": 1359 }, { "epoch": 0.13891726251276812, "grad_norm": 1.5736017529706714, "learning_rate": 1.9384544304739053e-05, "loss": 0.8212, "step": 1360 }, { "epoch": 0.1390194075587334, "grad_norm": 1.41905562640827, "learning_rate": 1.938340108293946e-05, "loss": 0.773, "step": 1361 }, { "epoch": 0.13912155260469866, "grad_norm": 1.3786796593876844, "learning_rate": 1.9382256834120562e-05, "loss": 0.7801, "step": 1362 }, { "epoch": 0.13922369765066395, "grad_norm": 1.4530673814590769, "learning_rate": 1.9381111558407585e-05, "loss": 0.7972, "step": 1363 }, { "epoch": 0.1393258426966292, "grad_norm": 1.513303311858783, "learning_rate": 1.9379965255925887e-05, "loss": 0.6332, "step": 1364 }, { "epoch": 0.1394279877425945, "grad_norm": 1.8621510199139821, "learning_rate": 1.9378817926800938e-05, "loss": 0.8913, "step": 1365 }, { "epoch": 0.13953013278855975, "grad_norm": 1.6131804902974567, "learning_rate": 1.9377669571158302e-05, "loss": 0.8056, "step": 1366 }, { "epoch": 0.13963227783452503, "grad_norm": 1.4439086302212054, "learning_rate": 1.9376520189123675e-05, "loss": 0.7196, "step": 1367 }, { "epoch": 0.1397344228804903, "grad_norm": 1.4079054076740687, "learning_rate": 1.937536978082285e-05, "loss": 0.7259, "step": 1368 }, { "epoch": 0.13983656792645557, "grad_norm": 1.504774731438275, "learning_rate": 1.937421834638175e-05, "loss": 0.7839, "step": 1369 }, { "epoch": 0.13993871297242083, "grad_norm": 1.378707243885884, "learning_rate": 1.9373065885926396e-05, "loss": 0.7863, "step": 1370 }, { "epoch": 0.14004085801838612, "grad_norm": 1.5639070409621605, "learning_rate": 1.9371912399582924e-05, "loss": 0.9166, "step": 1371 }, { "epoch": 0.14014300306435137, "grad_norm": 1.4912691865465266, "learning_rate": 1.9370757887477585e-05, "loss": 0.6059, "step": 1372 }, { "epoch": 0.14024514811031666, "grad_norm": 1.7796257513381397, "learning_rate": 1.936960234973674e-05, "loss": 0.8543, "step": 1373 }, { "epoch": 0.14034729315628192, "grad_norm": 1.4614805988248598, "learning_rate": 1.936844578648686e-05, "loss": 0.7885, "step": 1374 }, { "epoch": 0.1404494382022472, "grad_norm": 1.5688929373248865, "learning_rate": 1.9367288197854544e-05, "loss": 0.6908, "step": 1375 }, { "epoch": 0.14055158324821246, "grad_norm": 1.50045233455611, "learning_rate": 1.936612958396648e-05, "loss": 0.8329, "step": 1376 }, { "epoch": 0.14065372829417774, "grad_norm": 1.5752476079955477, "learning_rate": 1.9364969944949482e-05, "loss": 0.843, "step": 1377 }, { "epoch": 0.140755873340143, "grad_norm": 1.5990392987981594, "learning_rate": 1.936380928093047e-05, "loss": 0.6926, "step": 1378 }, { "epoch": 0.14085801838610829, "grad_norm": 1.4790781526314436, "learning_rate": 1.9362647592036486e-05, "loss": 0.7661, "step": 1379 }, { "epoch": 0.14096016343207354, "grad_norm": 1.4716674800810408, "learning_rate": 1.936148487839467e-05, "loss": 0.7081, "step": 1380 }, { "epoch": 0.14106230847803883, "grad_norm": 1.5709982042963817, "learning_rate": 1.9360321140132293e-05, "loss": 0.7921, "step": 1381 }, { "epoch": 0.14116445352400409, "grad_norm": 1.599873337308863, "learning_rate": 1.9359156377376714e-05, "loss": 0.8262, "step": 1382 }, { "epoch": 0.14126659856996934, "grad_norm": 1.788449074412186, "learning_rate": 1.9357990590255424e-05, "loss": 0.8253, "step": 1383 }, { "epoch": 0.14136874361593463, "grad_norm": 1.5913189352470587, "learning_rate": 1.9356823778896015e-05, "loss": 0.6756, "step": 1384 }, { "epoch": 0.14147088866189989, "grad_norm": 1.5406190115304836, "learning_rate": 1.93556559434262e-05, "loss": 0.827, "step": 1385 }, { "epoch": 0.14157303370786517, "grad_norm": 1.3037905478178327, "learning_rate": 1.93544870839738e-05, "loss": 0.6651, "step": 1386 }, { "epoch": 0.14167517875383043, "grad_norm": 1.4623088059288514, "learning_rate": 1.935331720066674e-05, "loss": 0.7472, "step": 1387 }, { "epoch": 0.1417773237997957, "grad_norm": 1.3607830899730813, "learning_rate": 1.9352146293633075e-05, "loss": 0.6945, "step": 1388 }, { "epoch": 0.14187946884576097, "grad_norm": 1.5794147762084267, "learning_rate": 1.9350974363000954e-05, "loss": 0.7526, "step": 1389 }, { "epoch": 0.14198161389172625, "grad_norm": 1.3437609366040257, "learning_rate": 1.934980140889865e-05, "loss": 0.6844, "step": 1390 }, { "epoch": 0.1420837589376915, "grad_norm": 1.6194175430599684, "learning_rate": 1.9348627431454535e-05, "loss": 0.8139, "step": 1391 }, { "epoch": 0.1421859039836568, "grad_norm": 1.4677720205491822, "learning_rate": 1.9347452430797107e-05, "loss": 0.7707, "step": 1392 }, { "epoch": 0.14228804902962205, "grad_norm": 1.5420864374045482, "learning_rate": 1.9346276407054977e-05, "loss": 0.8047, "step": 1393 }, { "epoch": 0.14239019407558734, "grad_norm": 1.521937007190085, "learning_rate": 1.9345099360356855e-05, "loss": 0.768, "step": 1394 }, { "epoch": 0.1424923391215526, "grad_norm": 1.4872828146564219, "learning_rate": 1.9343921290831568e-05, "loss": 0.8, "step": 1395 }, { "epoch": 0.14259448416751788, "grad_norm": 1.6513987228829032, "learning_rate": 1.934274219860806e-05, "loss": 0.7522, "step": 1396 }, { "epoch": 0.14269662921348314, "grad_norm": 1.4432876187192811, "learning_rate": 1.934156208381538e-05, "loss": 0.6924, "step": 1397 }, { "epoch": 0.14279877425944842, "grad_norm": 1.506645953117852, "learning_rate": 1.9340380946582694e-05, "loss": 0.7886, "step": 1398 }, { "epoch": 0.14290091930541368, "grad_norm": 1.4171081837989667, "learning_rate": 1.9339198787039285e-05, "loss": 0.7126, "step": 1399 }, { "epoch": 0.14300306435137897, "grad_norm": 1.4739622925960878, "learning_rate": 1.933801560531453e-05, "loss": 0.7008, "step": 1400 }, { "epoch": 0.14310520939734422, "grad_norm": 1.4706000588407875, "learning_rate": 1.9336831401537933e-05, "loss": 0.778, "step": 1401 }, { "epoch": 0.1432073544433095, "grad_norm": 1.4622166757971402, "learning_rate": 1.9335646175839108e-05, "loss": 0.7631, "step": 1402 }, { "epoch": 0.14330949948927477, "grad_norm": 1.6228633576062323, "learning_rate": 1.933445992834778e-05, "loss": 0.7279, "step": 1403 }, { "epoch": 0.14341164453524005, "grad_norm": 1.4940925937386755, "learning_rate": 1.933327265919378e-05, "loss": 0.8152, "step": 1404 }, { "epoch": 0.1435137895812053, "grad_norm": 1.5523548988134994, "learning_rate": 1.9332084368507054e-05, "loss": 0.704, "step": 1405 }, { "epoch": 0.1436159346271706, "grad_norm": 1.5344956944348773, "learning_rate": 1.933089505641767e-05, "loss": 0.7647, "step": 1406 }, { "epoch": 0.14371807967313585, "grad_norm": 1.782563891796052, "learning_rate": 1.9329704723055794e-05, "loss": 0.78, "step": 1407 }, { "epoch": 0.14382022471910114, "grad_norm": 1.6328428395049432, "learning_rate": 1.9328513368551705e-05, "loss": 0.6539, "step": 1408 }, { "epoch": 0.1439223697650664, "grad_norm": 1.613057439061014, "learning_rate": 1.9327320993035798e-05, "loss": 0.7319, "step": 1409 }, { "epoch": 0.14402451481103168, "grad_norm": 1.471923621647136, "learning_rate": 1.932612759663859e-05, "loss": 0.6916, "step": 1410 }, { "epoch": 0.14412665985699694, "grad_norm": 1.4607931320206955, "learning_rate": 1.9324933179490685e-05, "loss": 0.7397, "step": 1411 }, { "epoch": 0.1442288049029622, "grad_norm": 1.5368415983405097, "learning_rate": 1.9323737741722822e-05, "loss": 0.7576, "step": 1412 }, { "epoch": 0.14433094994892748, "grad_norm": 1.5486350482380276, "learning_rate": 1.9322541283465836e-05, "loss": 0.7142, "step": 1413 }, { "epoch": 0.14443309499489274, "grad_norm": 1.593433715364878, "learning_rate": 1.9321343804850685e-05, "loss": 0.7307, "step": 1414 }, { "epoch": 0.14453524004085802, "grad_norm": 1.4320179414715932, "learning_rate": 1.932014530600843e-05, "loss": 0.7237, "step": 1415 }, { "epoch": 0.14463738508682328, "grad_norm": 1.5985084758234467, "learning_rate": 1.9318945787070254e-05, "loss": 0.7798, "step": 1416 }, { "epoch": 0.14473953013278856, "grad_norm": 1.534389328107597, "learning_rate": 1.931774524816744e-05, "loss": 0.8358, "step": 1417 }, { "epoch": 0.14484167517875382, "grad_norm": 1.4777602409689825, "learning_rate": 1.9316543689431386e-05, "loss": 0.7575, "step": 1418 }, { "epoch": 0.1449438202247191, "grad_norm": 1.3380835073849822, "learning_rate": 1.9315341110993605e-05, "loss": 0.7664, "step": 1419 }, { "epoch": 0.14504596527068436, "grad_norm": 1.5120988912766862, "learning_rate": 1.9314137512985724e-05, "loss": 0.7337, "step": 1420 }, { "epoch": 0.14514811031664965, "grad_norm": 1.6401398475298732, "learning_rate": 1.9312932895539475e-05, "loss": 0.7502, "step": 1421 }, { "epoch": 0.1452502553626149, "grad_norm": 1.6279320168785258, "learning_rate": 1.9311727258786703e-05, "loss": 0.835, "step": 1422 }, { "epoch": 0.1453524004085802, "grad_norm": 1.5352814262650878, "learning_rate": 1.9310520602859365e-05, "loss": 0.7525, "step": 1423 }, { "epoch": 0.14545454545454545, "grad_norm": 1.5431191122346422, "learning_rate": 1.9309312927889534e-05, "loss": 0.7357, "step": 1424 }, { "epoch": 0.14555669050051073, "grad_norm": 1.4817092929156874, "learning_rate": 1.9308104234009386e-05, "loss": 0.6962, "step": 1425 }, { "epoch": 0.145658835546476, "grad_norm": 1.421136600226062, "learning_rate": 1.9306894521351215e-05, "loss": 0.6703, "step": 1426 }, { "epoch": 0.14576098059244127, "grad_norm": 1.3933778803200363, "learning_rate": 1.930568379004743e-05, "loss": 0.7926, "step": 1427 }, { "epoch": 0.14586312563840653, "grad_norm": 1.5154338291269438, "learning_rate": 1.9304472040230536e-05, "loss": 0.7942, "step": 1428 }, { "epoch": 0.14596527068437182, "grad_norm": 1.549006847205402, "learning_rate": 1.9303259272033172e-05, "loss": 0.7881, "step": 1429 }, { "epoch": 0.14606741573033707, "grad_norm": 1.6967940317252428, "learning_rate": 1.9302045485588067e-05, "loss": 0.7721, "step": 1430 }, { "epoch": 0.14616956077630236, "grad_norm": 1.5144421939877968, "learning_rate": 1.9300830681028075e-05, "loss": 0.7273, "step": 1431 }, { "epoch": 0.14627170582226762, "grad_norm": 1.6361562885904688, "learning_rate": 1.9299614858486153e-05, "loss": 0.7706, "step": 1432 }, { "epoch": 0.1463738508682329, "grad_norm": 1.5441568350056314, "learning_rate": 1.9298398018095378e-05, "loss": 0.7106, "step": 1433 }, { "epoch": 0.14647599591419816, "grad_norm": 1.5046355983891921, "learning_rate": 1.9297180159988932e-05, "loss": 0.8757, "step": 1434 }, { "epoch": 0.14657814096016344, "grad_norm": 1.5692773136133311, "learning_rate": 1.929596128430011e-05, "loss": 0.6381, "step": 1435 }, { "epoch": 0.1466802860061287, "grad_norm": 1.346942079068653, "learning_rate": 1.929474139116232e-05, "loss": 0.6742, "step": 1436 }, { "epoch": 0.146782431052094, "grad_norm": 1.5823780363492548, "learning_rate": 1.929352048070908e-05, "loss": 0.8039, "step": 1437 }, { "epoch": 0.14688457609805924, "grad_norm": 1.4353076685727753, "learning_rate": 1.929229855307402e-05, "loss": 0.7874, "step": 1438 }, { "epoch": 0.1469867211440245, "grad_norm": 1.620129456542622, "learning_rate": 1.9291075608390878e-05, "loss": 0.8344, "step": 1439 }, { "epoch": 0.1470888661899898, "grad_norm": 1.654984107401775, "learning_rate": 1.928985164679351e-05, "loss": 0.7368, "step": 1440 }, { "epoch": 0.14719101123595504, "grad_norm": 1.707254850164429, "learning_rate": 1.9288626668415875e-05, "loss": 0.8333, "step": 1441 }, { "epoch": 0.14729315628192033, "grad_norm": 1.521310645851988, "learning_rate": 1.9287400673392055e-05, "loss": 0.7475, "step": 1442 }, { "epoch": 0.1473953013278856, "grad_norm": 1.6554247954801538, "learning_rate": 1.9286173661856225e-05, "loss": 0.759, "step": 1443 }, { "epoch": 0.14749744637385087, "grad_norm": 1.361857290535015, "learning_rate": 1.928494563394269e-05, "loss": 0.755, "step": 1444 }, { "epoch": 0.14759959141981613, "grad_norm": 1.5282863387931205, "learning_rate": 1.9283716589785853e-05, "loss": 0.6872, "step": 1445 }, { "epoch": 0.1477017364657814, "grad_norm": 1.5782870301353333, "learning_rate": 1.9282486529520244e-05, "loss": 0.7992, "step": 1446 }, { "epoch": 0.14780388151174667, "grad_norm": 1.383890072208761, "learning_rate": 1.9281255453280484e-05, "loss": 0.7153, "step": 1447 }, { "epoch": 0.14790602655771196, "grad_norm": 1.3671009102299712, "learning_rate": 1.9280023361201318e-05, "loss": 0.7143, "step": 1448 }, { "epoch": 0.1480081716036772, "grad_norm": 1.644965230665166, "learning_rate": 1.92787902534176e-05, "loss": 0.7854, "step": 1449 }, { "epoch": 0.1481103166496425, "grad_norm": 1.5032252828546246, "learning_rate": 1.9277556130064294e-05, "loss": 0.7645, "step": 1450 }, { "epoch": 0.14821246169560776, "grad_norm": 1.5630553567925038, "learning_rate": 1.927632099127647e-05, "loss": 0.7335, "step": 1451 }, { "epoch": 0.14831460674157304, "grad_norm": 1.44658191805543, "learning_rate": 1.9275084837189327e-05, "loss": 0.7225, "step": 1452 }, { "epoch": 0.1484167517875383, "grad_norm": 1.4567876122771806, "learning_rate": 1.927384766793815e-05, "loss": 0.767, "step": 1453 }, { "epoch": 0.14851889683350358, "grad_norm": 1.594979566644765, "learning_rate": 1.927260948365836e-05, "loss": 0.8176, "step": 1454 }, { "epoch": 0.14862104187946884, "grad_norm": 1.3493345185355654, "learning_rate": 1.9271370284485473e-05, "loss": 0.7765, "step": 1455 }, { "epoch": 0.14872318692543413, "grad_norm": 1.5042715552702999, "learning_rate": 1.9270130070555113e-05, "loss": 0.7083, "step": 1456 }, { "epoch": 0.14882533197139938, "grad_norm": 1.3788728031226123, "learning_rate": 1.926888884200303e-05, "loss": 0.6611, "step": 1457 }, { "epoch": 0.14892747701736467, "grad_norm": 1.5957093881571296, "learning_rate": 1.9267646598965072e-05, "loss": 0.7957, "step": 1458 }, { "epoch": 0.14902962206332993, "grad_norm": 1.5570548361848133, "learning_rate": 1.9266403341577207e-05, "loss": 0.8656, "step": 1459 }, { "epoch": 0.1491317671092952, "grad_norm": 1.542929474556619, "learning_rate": 1.926515906997551e-05, "loss": 0.8636, "step": 1460 }, { "epoch": 0.14923391215526047, "grad_norm": 1.5859799930217757, "learning_rate": 1.9263913784296167e-05, "loss": 0.8741, "step": 1461 }, { "epoch": 0.14933605720122575, "grad_norm": 1.545057123754058, "learning_rate": 1.9262667484675475e-05, "loss": 0.7846, "step": 1462 }, { "epoch": 0.149438202247191, "grad_norm": 1.517605416164882, "learning_rate": 1.9261420171249845e-05, "loss": 0.7935, "step": 1463 }, { "epoch": 0.1495403472931563, "grad_norm": 1.5915475863525619, "learning_rate": 1.926017184415579e-05, "loss": 0.7503, "step": 1464 }, { "epoch": 0.14964249233912155, "grad_norm": 1.6578246845499418, "learning_rate": 1.9258922503529947e-05, "loss": 0.7272, "step": 1465 }, { "epoch": 0.1497446373850868, "grad_norm": 1.4727733271924286, "learning_rate": 1.925767214950905e-05, "loss": 0.7806, "step": 1466 }, { "epoch": 0.1498467824310521, "grad_norm": 1.5813826776329285, "learning_rate": 1.9256420782229955e-05, "loss": 0.7835, "step": 1467 }, { "epoch": 0.14994892747701735, "grad_norm": 1.4336086867236078, "learning_rate": 1.925516840182963e-05, "loss": 0.7781, "step": 1468 }, { "epoch": 0.15005107252298264, "grad_norm": 1.5041070794548579, "learning_rate": 1.925391500844514e-05, "loss": 0.7518, "step": 1469 }, { "epoch": 0.1501532175689479, "grad_norm": 1.5636170630895998, "learning_rate": 1.9252660602213673e-05, "loss": 0.9089, "step": 1470 }, { "epoch": 0.15025536261491318, "grad_norm": 1.5084333374279997, "learning_rate": 1.9251405183272526e-05, "loss": 0.8935, "step": 1471 }, { "epoch": 0.15035750766087844, "grad_norm": 1.6000781055724267, "learning_rate": 1.9250148751759107e-05, "loss": 0.8639, "step": 1472 }, { "epoch": 0.15045965270684372, "grad_norm": 1.5188856248433633, "learning_rate": 1.9248891307810926e-05, "loss": 0.763, "step": 1473 }, { "epoch": 0.15056179775280898, "grad_norm": 1.5348044454410106, "learning_rate": 1.924763285156562e-05, "loss": 0.68, "step": 1474 }, { "epoch": 0.15066394279877426, "grad_norm": 1.5829844872951264, "learning_rate": 1.9246373383160922e-05, "loss": 0.7297, "step": 1475 }, { "epoch": 0.15076608784473952, "grad_norm": 1.3605914489917799, "learning_rate": 1.9245112902734684e-05, "loss": 0.7421, "step": 1476 }, { "epoch": 0.1508682328907048, "grad_norm": 1.4888298769889006, "learning_rate": 1.9243851410424864e-05, "loss": 0.7598, "step": 1477 }, { "epoch": 0.15097037793667006, "grad_norm": 1.5265355255894464, "learning_rate": 1.9242588906369538e-05, "loss": 0.7434, "step": 1478 }, { "epoch": 0.15107252298263535, "grad_norm": 1.5138511762317632, "learning_rate": 1.924132539070688e-05, "loss": 0.8592, "step": 1479 }, { "epoch": 0.1511746680286006, "grad_norm": 1.340045628142175, "learning_rate": 1.924006086357519e-05, "loss": 0.6999, "step": 1480 }, { "epoch": 0.1512768130745659, "grad_norm": 1.4160012428397106, "learning_rate": 1.9238795325112867e-05, "loss": 0.6999, "step": 1481 }, { "epoch": 0.15137895812053115, "grad_norm": 1.475502651293873, "learning_rate": 1.9237528775458433e-05, "loss": 0.8055, "step": 1482 }, { "epoch": 0.15148110316649643, "grad_norm": 1.4894674637463519, "learning_rate": 1.9236261214750497e-05, "loss": 0.7311, "step": 1483 }, { "epoch": 0.1515832482124617, "grad_norm": 1.5462416288328762, "learning_rate": 1.923499264312781e-05, "loss": 0.7023, "step": 1484 }, { "epoch": 0.15168539325842698, "grad_norm": 1.5578133567163723, "learning_rate": 1.923372306072921e-05, "loss": 0.6695, "step": 1485 }, { "epoch": 0.15178753830439223, "grad_norm": 1.4336058336887387, "learning_rate": 1.9232452467693658e-05, "loss": 0.6998, "step": 1486 }, { "epoch": 0.15188968335035752, "grad_norm": 1.602402124303217, "learning_rate": 1.9231180864160213e-05, "loss": 0.8109, "step": 1487 }, { "epoch": 0.15199182839632278, "grad_norm": 1.3743314215042068, "learning_rate": 1.922990825026806e-05, "loss": 0.6227, "step": 1488 }, { "epoch": 0.15209397344228806, "grad_norm": 1.5344504938757169, "learning_rate": 1.9228634626156486e-05, "loss": 0.8438, "step": 1489 }, { "epoch": 0.15219611848825332, "grad_norm": 1.482415502182226, "learning_rate": 1.9227359991964892e-05, "loss": 0.7681, "step": 1490 }, { "epoch": 0.1522982635342186, "grad_norm": 1.5620482590062323, "learning_rate": 1.9226084347832784e-05, "loss": 0.7147, "step": 1491 }, { "epoch": 0.15240040858018386, "grad_norm": 1.3812474160284345, "learning_rate": 1.9224807693899784e-05, "loss": 0.85, "step": 1492 }, { "epoch": 0.15250255362614915, "grad_norm": 1.5578333842420518, "learning_rate": 1.922353003030562e-05, "loss": 0.7358, "step": 1493 }, { "epoch": 0.1526046986721144, "grad_norm": 1.4892093037434606, "learning_rate": 1.9222251357190135e-05, "loss": 0.6208, "step": 1494 }, { "epoch": 0.15270684371807966, "grad_norm": 1.4776823028883899, "learning_rate": 1.922097167469328e-05, "loss": 0.7417, "step": 1495 }, { "epoch": 0.15280898876404495, "grad_norm": 1.5491537611185378, "learning_rate": 1.921969098295512e-05, "loss": 0.7588, "step": 1496 }, { "epoch": 0.1529111338100102, "grad_norm": 1.4044560034990186, "learning_rate": 1.9218409282115823e-05, "loss": 0.7251, "step": 1497 }, { "epoch": 0.1530132788559755, "grad_norm": 1.7215135328411288, "learning_rate": 1.9217126572315677e-05, "loss": 0.8966, "step": 1498 }, { "epoch": 0.15311542390194074, "grad_norm": 1.5991875664048536, "learning_rate": 1.921584285369507e-05, "loss": 0.7446, "step": 1499 }, { "epoch": 0.15321756894790603, "grad_norm": 1.380693499610882, "learning_rate": 1.921455812639451e-05, "loss": 0.7235, "step": 1500 }, { "epoch": 0.1533197139938713, "grad_norm": 1.646195549493454, "learning_rate": 1.9213272390554608e-05, "loss": 0.7211, "step": 1501 }, { "epoch": 0.15342185903983657, "grad_norm": 1.5680388039401867, "learning_rate": 1.9211985646316094e-05, "loss": 0.7268, "step": 1502 }, { "epoch": 0.15352400408580183, "grad_norm": 1.5685827032315967, "learning_rate": 1.9210697893819795e-05, "loss": 0.7977, "step": 1503 }, { "epoch": 0.15362614913176711, "grad_norm": 1.5395566516193993, "learning_rate": 1.9209409133206662e-05, "loss": 0.7629, "step": 1504 }, { "epoch": 0.15372829417773237, "grad_norm": 1.615791092400343, "learning_rate": 1.920811936461775e-05, "loss": 0.7218, "step": 1505 }, { "epoch": 0.15383043922369766, "grad_norm": 1.6597999014845815, "learning_rate": 1.9206828588194228e-05, "loss": 0.7826, "step": 1506 }, { "epoch": 0.15393258426966291, "grad_norm": 1.6256963903646897, "learning_rate": 1.920553680407736e-05, "loss": 0.7556, "step": 1507 }, { "epoch": 0.1540347293156282, "grad_norm": 1.3284838058199782, "learning_rate": 1.920424401240855e-05, "loss": 0.6448, "step": 1508 }, { "epoch": 0.15413687436159346, "grad_norm": 1.6595027537997935, "learning_rate": 1.9202950213329282e-05, "loss": 0.7422, "step": 1509 }, { "epoch": 0.15423901940755874, "grad_norm": 1.5061231992012265, "learning_rate": 1.9201655406981167e-05, "loss": 0.6989, "step": 1510 }, { "epoch": 0.154341164453524, "grad_norm": 1.5240042211537637, "learning_rate": 1.9200359593505925e-05, "loss": 0.8295, "step": 1511 }, { "epoch": 0.15444330949948928, "grad_norm": 1.6374454769112932, "learning_rate": 1.9199062773045378e-05, "loss": 0.7429, "step": 1512 }, { "epoch": 0.15454545454545454, "grad_norm": 1.4122754474247312, "learning_rate": 1.9197764945741467e-05, "loss": 0.7814, "step": 1513 }, { "epoch": 0.15464759959141983, "grad_norm": 1.4822959702434024, "learning_rate": 1.9196466111736245e-05, "loss": 0.6865, "step": 1514 }, { "epoch": 0.15474974463738508, "grad_norm": 1.5595195495231793, "learning_rate": 1.919516627117186e-05, "loss": 0.7121, "step": 1515 }, { "epoch": 0.15485188968335037, "grad_norm": 1.3592413632360343, "learning_rate": 1.919386542419059e-05, "loss": 0.7111, "step": 1516 }, { "epoch": 0.15495403472931563, "grad_norm": 1.5174767150464523, "learning_rate": 1.9192563570934805e-05, "loss": 0.7298, "step": 1517 }, { "epoch": 0.1550561797752809, "grad_norm": 1.4459753236903439, "learning_rate": 1.9191260711547003e-05, "loss": 0.8273, "step": 1518 }, { "epoch": 0.15515832482124617, "grad_norm": 1.4302288315410678, "learning_rate": 1.9189956846169774e-05, "loss": 0.6923, "step": 1519 }, { "epoch": 0.15526046986721145, "grad_norm": 1.5742852400191951, "learning_rate": 1.918865197494583e-05, "loss": 0.6542, "step": 1520 }, { "epoch": 0.1553626149131767, "grad_norm": 1.4717277874606733, "learning_rate": 1.9187346098017993e-05, "loss": 0.8227, "step": 1521 }, { "epoch": 0.15546475995914197, "grad_norm": 1.6055055790888269, "learning_rate": 1.918603921552919e-05, "loss": 0.8173, "step": 1522 }, { "epoch": 0.15556690500510725, "grad_norm": 1.5319015388508856, "learning_rate": 1.918473132762246e-05, "loss": 0.6821, "step": 1523 }, { "epoch": 0.1556690500510725, "grad_norm": 1.6163640272912092, "learning_rate": 1.9183422434440953e-05, "loss": 0.7904, "step": 1524 }, { "epoch": 0.1557711950970378, "grad_norm": 1.6624114628896023, "learning_rate": 1.9182112536127925e-05, "loss": 0.8284, "step": 1525 }, { "epoch": 0.15587334014300305, "grad_norm": 1.3443132516635576, "learning_rate": 1.918080163282675e-05, "loss": 0.6762, "step": 1526 }, { "epoch": 0.15597548518896834, "grad_norm": 1.5798935339418345, "learning_rate": 1.91794897246809e-05, "loss": 0.7859, "step": 1527 }, { "epoch": 0.1560776302349336, "grad_norm": 1.5076642313916433, "learning_rate": 1.917817681183397e-05, "loss": 0.6371, "step": 1528 }, { "epoch": 0.15617977528089888, "grad_norm": 1.5151495360068639, "learning_rate": 1.917686289442966e-05, "loss": 0.7391, "step": 1529 }, { "epoch": 0.15628192032686414, "grad_norm": 1.4692456111997299, "learning_rate": 1.917554797261178e-05, "loss": 0.7334, "step": 1530 }, { "epoch": 0.15638406537282942, "grad_norm": 1.5147098174425102, "learning_rate": 1.9174232046524245e-05, "loss": 0.7338, "step": 1531 }, { "epoch": 0.15648621041879468, "grad_norm": 1.5380149503605998, "learning_rate": 1.9172915116311083e-05, "loss": 0.7689, "step": 1532 }, { "epoch": 0.15658835546475997, "grad_norm": 1.63547965910899, "learning_rate": 1.9171597182116434e-05, "loss": 0.7391, "step": 1533 }, { "epoch": 0.15669050051072522, "grad_norm": 1.7614095923960307, "learning_rate": 1.917027824408455e-05, "loss": 0.7361, "step": 1534 }, { "epoch": 0.1567926455566905, "grad_norm": 1.6000708403381863, "learning_rate": 1.9168958302359785e-05, "loss": 0.7581, "step": 1535 }, { "epoch": 0.15689479060265576, "grad_norm": 1.4446067445378494, "learning_rate": 1.9167637357086614e-05, "loss": 0.723, "step": 1536 }, { "epoch": 0.15699693564862105, "grad_norm": 1.4763492302373629, "learning_rate": 1.9166315408409608e-05, "loss": 0.7449, "step": 1537 }, { "epoch": 0.1570990806945863, "grad_norm": 1.5607560889595977, "learning_rate": 1.916499245647346e-05, "loss": 0.6792, "step": 1538 }, { "epoch": 0.1572012257405516, "grad_norm": 1.417972093858151, "learning_rate": 1.9163668501422966e-05, "loss": 0.6908, "step": 1539 }, { "epoch": 0.15730337078651685, "grad_norm": 1.5048630223013457, "learning_rate": 1.9162343543403032e-05, "loss": 0.6314, "step": 1540 }, { "epoch": 0.15740551583248213, "grad_norm": 1.4791391080528098, "learning_rate": 1.9161017582558678e-05, "loss": 0.7655, "step": 1541 }, { "epoch": 0.1575076608784474, "grad_norm": 1.3976669928793843, "learning_rate": 1.9159690619035034e-05, "loss": 0.7092, "step": 1542 }, { "epoch": 0.15760980592441268, "grad_norm": 1.5121925015100246, "learning_rate": 1.9158362652977332e-05, "loss": 0.7172, "step": 1543 }, { "epoch": 0.15771195097037793, "grad_norm": 1.5281812662543834, "learning_rate": 1.915703368453092e-05, "loss": 0.8073, "step": 1544 }, { "epoch": 0.15781409601634322, "grad_norm": 1.3753627719149022, "learning_rate": 1.9155703713841257e-05, "loss": 0.6972, "step": 1545 }, { "epoch": 0.15791624106230848, "grad_norm": 1.4781102825009205, "learning_rate": 1.9154372741053904e-05, "loss": 0.7872, "step": 1546 }, { "epoch": 0.15801838610827376, "grad_norm": 1.5051689086469504, "learning_rate": 1.915304076631454e-05, "loss": 0.6509, "step": 1547 }, { "epoch": 0.15812053115423902, "grad_norm": 1.614441452147944, "learning_rate": 1.9151707789768954e-05, "loss": 0.8859, "step": 1548 }, { "epoch": 0.15822267620020428, "grad_norm": 1.5911981379130813, "learning_rate": 1.9150373811563038e-05, "loss": 0.8913, "step": 1549 }, { "epoch": 0.15832482124616956, "grad_norm": 1.5358565625034422, "learning_rate": 1.9149038831842793e-05, "loss": 0.7916, "step": 1550 }, { "epoch": 0.15842696629213482, "grad_norm": 1.3837364232136753, "learning_rate": 1.9147702850754338e-05, "loss": 0.6278, "step": 1551 }, { "epoch": 0.1585291113381001, "grad_norm": 1.480022898686007, "learning_rate": 1.9146365868443895e-05, "loss": 0.7792, "step": 1552 }, { "epoch": 0.15863125638406536, "grad_norm": 1.6377686966321894, "learning_rate": 1.9145027885057802e-05, "loss": 0.8093, "step": 1553 }, { "epoch": 0.15873340143003065, "grad_norm": 1.3155003021820721, "learning_rate": 1.914368890074249e-05, "loss": 0.6804, "step": 1554 }, { "epoch": 0.1588355464759959, "grad_norm": 1.4193701981551832, "learning_rate": 1.914234891564453e-05, "loss": 0.8151, "step": 1555 }, { "epoch": 0.1589376915219612, "grad_norm": 1.4887780671077047, "learning_rate": 1.914100792991057e-05, "loss": 0.789, "step": 1556 }, { "epoch": 0.15903983656792645, "grad_norm": 1.5612249613266986, "learning_rate": 1.9139665943687386e-05, "loss": 0.7155, "step": 1557 }, { "epoch": 0.15914198161389173, "grad_norm": 1.4921441466224368, "learning_rate": 1.9138322957121863e-05, "loss": 0.7558, "step": 1558 }, { "epoch": 0.159244126659857, "grad_norm": 1.4604607455546217, "learning_rate": 1.9136978970360985e-05, "loss": 0.7232, "step": 1559 }, { "epoch": 0.15934627170582227, "grad_norm": 1.47070213255059, "learning_rate": 1.9135633983551853e-05, "loss": 0.7714, "step": 1560 }, { "epoch": 0.15944841675178753, "grad_norm": 1.592211691002247, "learning_rate": 1.9134287996841683e-05, "loss": 0.81, "step": 1561 }, { "epoch": 0.15955056179775282, "grad_norm": 1.4377448699102515, "learning_rate": 1.913294101037779e-05, "loss": 0.6944, "step": 1562 }, { "epoch": 0.15965270684371807, "grad_norm": 1.455325051217147, "learning_rate": 1.9131593024307602e-05, "loss": 0.7738, "step": 1563 }, { "epoch": 0.15975485188968336, "grad_norm": 1.481435674067964, "learning_rate": 1.9130244038778658e-05, "loss": 0.7416, "step": 1564 }, { "epoch": 0.15985699693564862, "grad_norm": 1.546720915440481, "learning_rate": 1.9128894053938603e-05, "loss": 0.766, "step": 1565 }, { "epoch": 0.1599591419816139, "grad_norm": 1.4836755370296506, "learning_rate": 1.9127543069935198e-05, "loss": 0.794, "step": 1566 }, { "epoch": 0.16006128702757916, "grad_norm": 1.5141990875593103, "learning_rate": 1.912619108691631e-05, "loss": 0.7479, "step": 1567 }, { "epoch": 0.16016343207354444, "grad_norm": 1.6818059126281242, "learning_rate": 1.9124838105029904e-05, "loss": 0.8669, "step": 1568 }, { "epoch": 0.1602655771195097, "grad_norm": 1.4416524069653935, "learning_rate": 1.9123484124424075e-05, "loss": 0.7633, "step": 1569 }, { "epoch": 0.16036772216547499, "grad_norm": 1.4671853293715984, "learning_rate": 1.9122129145247018e-05, "loss": 0.6866, "step": 1570 }, { "epoch": 0.16046986721144024, "grad_norm": 1.454264529064163, "learning_rate": 1.9120773167647025e-05, "loss": 0.7947, "step": 1571 }, { "epoch": 0.16057201225740553, "grad_norm": 1.4938966085776046, "learning_rate": 1.9119416191772524e-05, "loss": 0.7928, "step": 1572 }, { "epoch": 0.16067415730337078, "grad_norm": 1.5570766453747198, "learning_rate": 1.9118058217772023e-05, "loss": 0.8438, "step": 1573 }, { "epoch": 0.16077630234933607, "grad_norm": 1.4767403309856386, "learning_rate": 1.9116699245794162e-05, "loss": 0.8599, "step": 1574 }, { "epoch": 0.16087844739530133, "grad_norm": 1.3723243203124202, "learning_rate": 1.9115339275987678e-05, "loss": 0.6862, "step": 1575 }, { "epoch": 0.1609805924412666, "grad_norm": 1.3514468478506851, "learning_rate": 1.911397830850142e-05, "loss": 0.6456, "step": 1576 }, { "epoch": 0.16108273748723187, "grad_norm": 1.5004921041519557, "learning_rate": 1.911261634348435e-05, "loss": 0.7983, "step": 1577 }, { "epoch": 0.16118488253319713, "grad_norm": 1.4344489601454746, "learning_rate": 1.911125338108553e-05, "loss": 0.7563, "step": 1578 }, { "epoch": 0.1612870275791624, "grad_norm": 1.5290196833232637, "learning_rate": 1.9109889421454143e-05, "loss": 0.7134, "step": 1579 }, { "epoch": 0.16138917262512767, "grad_norm": 1.4947955057209659, "learning_rate": 1.9108524464739474e-05, "loss": 0.8382, "step": 1580 }, { "epoch": 0.16149131767109295, "grad_norm": 1.4963982353622198, "learning_rate": 1.9107158511090916e-05, "loss": 0.7365, "step": 1581 }, { "epoch": 0.1615934627170582, "grad_norm": 1.528699384509226, "learning_rate": 1.9105791560657977e-05, "loss": 0.7352, "step": 1582 }, { "epoch": 0.1616956077630235, "grad_norm": 1.4260632097602914, "learning_rate": 1.9104423613590266e-05, "loss": 0.6898, "step": 1583 }, { "epoch": 0.16179775280898875, "grad_norm": 1.6464849610678862, "learning_rate": 1.910305467003751e-05, "loss": 0.7349, "step": 1584 }, { "epoch": 0.16189989785495404, "grad_norm": 1.4892312291190488, "learning_rate": 1.9101684730149536e-05, "loss": 0.6974, "step": 1585 }, { "epoch": 0.1620020429009193, "grad_norm": 1.5715725767644013, "learning_rate": 1.910031379407629e-05, "loss": 0.648, "step": 1586 }, { "epoch": 0.16210418794688458, "grad_norm": 1.5415212504769706, "learning_rate": 1.9098941861967822e-05, "loss": 0.7375, "step": 1587 }, { "epoch": 0.16220633299284984, "grad_norm": 1.5394846204242916, "learning_rate": 1.9097568933974283e-05, "loss": 0.754, "step": 1588 }, { "epoch": 0.16230847803881512, "grad_norm": 1.6122890734647868, "learning_rate": 1.909619501024595e-05, "loss": 0.7641, "step": 1589 }, { "epoch": 0.16241062308478038, "grad_norm": 1.5247764241788768, "learning_rate": 1.90948200909332e-05, "loss": 0.8328, "step": 1590 }, { "epoch": 0.16251276813074567, "grad_norm": 1.5472102284933233, "learning_rate": 1.909344417618651e-05, "loss": 0.7388, "step": 1591 }, { "epoch": 0.16261491317671092, "grad_norm": 1.615325720855714, "learning_rate": 1.909206726615648e-05, "loss": 0.7925, "step": 1592 }, { "epoch": 0.1627170582226762, "grad_norm": 1.6082703249241432, "learning_rate": 1.9090689360993814e-05, "loss": 0.8157, "step": 1593 }, { "epoch": 0.16281920326864147, "grad_norm": 1.6368737751832967, "learning_rate": 1.9089310460849323e-05, "loss": 0.8331, "step": 1594 }, { "epoch": 0.16292134831460675, "grad_norm": 1.436017136385229, "learning_rate": 1.9087930565873933e-05, "loss": 0.7167, "step": 1595 }, { "epoch": 0.163023493360572, "grad_norm": 1.5541491630484596, "learning_rate": 1.908654967621867e-05, "loss": 0.7357, "step": 1596 }, { "epoch": 0.1631256384065373, "grad_norm": 1.587327283240723, "learning_rate": 1.9085167792034672e-05, "loss": 0.7445, "step": 1597 }, { "epoch": 0.16322778345250255, "grad_norm": 1.3306423415552662, "learning_rate": 1.908378491347319e-05, "loss": 0.7349, "step": 1598 }, { "epoch": 0.16332992849846784, "grad_norm": 1.5799631880135903, "learning_rate": 1.9082401040685583e-05, "loss": 0.8166, "step": 1599 }, { "epoch": 0.1634320735444331, "grad_norm": 1.4433442551386697, "learning_rate": 1.908101617382331e-05, "loss": 0.7399, "step": 1600 }, { "epoch": 0.16353421859039838, "grad_norm": 1.6791272776834318, "learning_rate": 1.9079630313037954e-05, "loss": 0.7634, "step": 1601 }, { "epoch": 0.16363636363636364, "grad_norm": 1.4695100875587683, "learning_rate": 1.9078243458481188e-05, "loss": 0.6476, "step": 1602 }, { "epoch": 0.16373850868232892, "grad_norm": 1.5343137150417558, "learning_rate": 1.9076855610304817e-05, "loss": 0.7994, "step": 1603 }, { "epoch": 0.16384065372829418, "grad_norm": 1.3965652394025752, "learning_rate": 1.907546676866073e-05, "loss": 0.6857, "step": 1604 }, { "epoch": 0.16394279877425944, "grad_norm": 1.6047653580553858, "learning_rate": 1.9074076933700944e-05, "loss": 0.7206, "step": 1605 }, { "epoch": 0.16404494382022472, "grad_norm": 1.48974091554732, "learning_rate": 1.9072686105577574e-05, "loss": 0.7064, "step": 1606 }, { "epoch": 0.16414708886618998, "grad_norm": 1.5917365928271061, "learning_rate": 1.907129428444285e-05, "loss": 0.862, "step": 1607 }, { "epoch": 0.16424923391215526, "grad_norm": 1.4433989530460103, "learning_rate": 1.9069901470449107e-05, "loss": 0.7456, "step": 1608 }, { "epoch": 0.16435137895812052, "grad_norm": 1.4111019303125132, "learning_rate": 1.9068507663748785e-05, "loss": 0.749, "step": 1609 }, { "epoch": 0.1644535240040858, "grad_norm": 1.4841416599529411, "learning_rate": 1.906711286449444e-05, "loss": 0.7686, "step": 1610 }, { "epoch": 0.16455566905005106, "grad_norm": 1.3265517359956176, "learning_rate": 1.9065717072838734e-05, "loss": 0.6114, "step": 1611 }, { "epoch": 0.16465781409601635, "grad_norm": 1.4405609255588618, "learning_rate": 1.906432028893444e-05, "loss": 0.625, "step": 1612 }, { "epoch": 0.1647599591419816, "grad_norm": 1.3942795525285383, "learning_rate": 1.9062922512934432e-05, "loss": 0.7814, "step": 1613 }, { "epoch": 0.1648621041879469, "grad_norm": 1.6826560793806584, "learning_rate": 1.9061523744991698e-05, "loss": 0.7932, "step": 1614 }, { "epoch": 0.16496424923391215, "grad_norm": 1.3958695304113196, "learning_rate": 1.906012398525934e-05, "loss": 0.6472, "step": 1615 }, { "epoch": 0.16506639427987743, "grad_norm": 1.4853656218634286, "learning_rate": 1.905872323389055e-05, "loss": 0.7572, "step": 1616 }, { "epoch": 0.1651685393258427, "grad_norm": 1.5614366654222802, "learning_rate": 1.905732149103866e-05, "loss": 0.7935, "step": 1617 }, { "epoch": 0.16527068437180797, "grad_norm": 1.5425771245399245, "learning_rate": 1.9055918756857075e-05, "loss": 0.7606, "step": 1618 }, { "epoch": 0.16537282941777323, "grad_norm": 1.473843224768117, "learning_rate": 1.9054515031499332e-05, "loss": 0.7424, "step": 1619 }, { "epoch": 0.16547497446373852, "grad_norm": 1.6776054540734124, "learning_rate": 1.9053110315119068e-05, "loss": 0.6987, "step": 1620 }, { "epoch": 0.16557711950970377, "grad_norm": 1.4237983479845966, "learning_rate": 1.905170460787003e-05, "loss": 0.7126, "step": 1621 }, { "epoch": 0.16567926455566906, "grad_norm": 1.5458059101262946, "learning_rate": 1.9050297909906077e-05, "loss": 0.7562, "step": 1622 }, { "epoch": 0.16578140960163432, "grad_norm": 1.3695219129668585, "learning_rate": 1.904889022138117e-05, "loss": 0.7346, "step": 1623 }, { "epoch": 0.1658835546475996, "grad_norm": 1.4675259058785992, "learning_rate": 1.9047481542449384e-05, "loss": 0.7778, "step": 1624 }, { "epoch": 0.16598569969356486, "grad_norm": 1.3920339858865147, "learning_rate": 1.9046071873264895e-05, "loss": 0.6954, "step": 1625 }, { "epoch": 0.16608784473953014, "grad_norm": 1.5396115403901631, "learning_rate": 1.9044661213981994e-05, "loss": 0.8501, "step": 1626 }, { "epoch": 0.1661899897854954, "grad_norm": 1.3931308818360806, "learning_rate": 1.9043249564755082e-05, "loss": 0.6029, "step": 1627 }, { "epoch": 0.1662921348314607, "grad_norm": 1.539706105291121, "learning_rate": 1.9041836925738662e-05, "loss": 0.8168, "step": 1628 }, { "epoch": 0.16639427987742594, "grad_norm": 1.4326754397199128, "learning_rate": 1.9040423297087348e-05, "loss": 0.8848, "step": 1629 }, { "epoch": 0.16649642492339123, "grad_norm": 1.5637292629946007, "learning_rate": 1.9039008678955864e-05, "loss": 0.8295, "step": 1630 }, { "epoch": 0.16659856996935649, "grad_norm": 1.4435106433332063, "learning_rate": 1.903759307149904e-05, "loss": 0.7546, "step": 1631 }, { "epoch": 0.16670071501532174, "grad_norm": 1.6650833725835272, "learning_rate": 1.9036176474871814e-05, "loss": 0.859, "step": 1632 }, { "epoch": 0.16680286006128703, "grad_norm": 1.4482629629279744, "learning_rate": 1.9034758889229236e-05, "loss": 0.6713, "step": 1633 }, { "epoch": 0.16690500510725229, "grad_norm": 1.4366977213699657, "learning_rate": 1.903334031472646e-05, "loss": 0.6919, "step": 1634 }, { "epoch": 0.16700715015321757, "grad_norm": 1.5007942142242667, "learning_rate": 1.903192075151875e-05, "loss": 0.8195, "step": 1635 }, { "epoch": 0.16710929519918283, "grad_norm": 1.542868374325661, "learning_rate": 1.903050019976148e-05, "loss": 0.7311, "step": 1636 }, { "epoch": 0.1672114402451481, "grad_norm": 1.5816541235893211, "learning_rate": 1.9029078659610127e-05, "loss": 0.72, "step": 1637 }, { "epoch": 0.16731358529111337, "grad_norm": 1.4812070898416783, "learning_rate": 1.902765613122028e-05, "loss": 0.7582, "step": 1638 }, { "epoch": 0.16741573033707866, "grad_norm": 1.4969046936565689, "learning_rate": 1.9026232614747638e-05, "loss": 0.8215, "step": 1639 }, { "epoch": 0.1675178753830439, "grad_norm": 1.5196408117797398, "learning_rate": 1.9024808110348006e-05, "loss": 0.7909, "step": 1640 }, { "epoch": 0.1676200204290092, "grad_norm": 1.5169849370018311, "learning_rate": 1.9023382618177292e-05, "loss": 0.7584, "step": 1641 }, { "epoch": 0.16772216547497446, "grad_norm": 1.410582016029108, "learning_rate": 1.9021956138391524e-05, "loss": 0.7924, "step": 1642 }, { "epoch": 0.16782431052093974, "grad_norm": 1.5233424308517216, "learning_rate": 1.902052867114683e-05, "loss": 0.785, "step": 1643 }, { "epoch": 0.167926455566905, "grad_norm": 1.4748544634787273, "learning_rate": 1.901910021659944e-05, "loss": 0.7925, "step": 1644 }, { "epoch": 0.16802860061287028, "grad_norm": 1.516866789335725, "learning_rate": 1.9017670774905707e-05, "loss": 0.7976, "step": 1645 }, { "epoch": 0.16813074565883554, "grad_norm": 1.544873845940206, "learning_rate": 1.901624034622208e-05, "loss": 0.7765, "step": 1646 }, { "epoch": 0.16823289070480082, "grad_norm": 1.3202588061972562, "learning_rate": 1.9014808930705123e-05, "loss": 0.7309, "step": 1647 }, { "epoch": 0.16833503575076608, "grad_norm": 1.3568307837141995, "learning_rate": 1.9013376528511504e-05, "loss": 0.7539, "step": 1648 }, { "epoch": 0.16843718079673137, "grad_norm": 1.3370193342875056, "learning_rate": 1.9011943139797998e-05, "loss": 0.7534, "step": 1649 }, { "epoch": 0.16853932584269662, "grad_norm": 1.615637409148707, "learning_rate": 1.9010508764721496e-05, "loss": 0.7305, "step": 1650 }, { "epoch": 0.1686414708886619, "grad_norm": 1.4473167641327425, "learning_rate": 1.9009073403438988e-05, "loss": 0.6791, "step": 1651 }, { "epoch": 0.16874361593462717, "grad_norm": 1.4738993515134662, "learning_rate": 1.9007637056107576e-05, "loss": 0.7266, "step": 1652 }, { "epoch": 0.16884576098059245, "grad_norm": 1.4243258978178772, "learning_rate": 1.9006199722884465e-05, "loss": 0.7606, "step": 1653 }, { "epoch": 0.1689479060265577, "grad_norm": 1.6226558215822604, "learning_rate": 1.9004761403926978e-05, "loss": 0.734, "step": 1654 }, { "epoch": 0.169050051072523, "grad_norm": 1.437669269080698, "learning_rate": 1.9003322099392535e-05, "loss": 0.7954, "step": 1655 }, { "epoch": 0.16915219611848825, "grad_norm": 1.5897118148864038, "learning_rate": 1.9001881809438677e-05, "loss": 0.8397, "step": 1656 }, { "epoch": 0.16925434116445354, "grad_norm": 1.4546214424000696, "learning_rate": 1.9000440534223034e-05, "loss": 0.7838, "step": 1657 }, { "epoch": 0.1693564862104188, "grad_norm": 1.433737339585538, "learning_rate": 1.899899827390336e-05, "loss": 0.6751, "step": 1658 }, { "epoch": 0.16945863125638408, "grad_norm": 1.4775422645084217, "learning_rate": 1.8997555028637513e-05, "loss": 0.7986, "step": 1659 }, { "epoch": 0.16956077630234934, "grad_norm": 1.547474440855746, "learning_rate": 1.8996110798583452e-05, "loss": 0.7999, "step": 1660 }, { "epoch": 0.1696629213483146, "grad_norm": 1.5037817648206524, "learning_rate": 1.8994665583899256e-05, "loss": 0.8058, "step": 1661 }, { "epoch": 0.16976506639427988, "grad_norm": 1.555321904064513, "learning_rate": 1.89932193847431e-05, "loss": 0.7712, "step": 1662 }, { "epoch": 0.16986721144024514, "grad_norm": 1.484447776692159, "learning_rate": 1.8991772201273267e-05, "loss": 0.8166, "step": 1663 }, { "epoch": 0.16996935648621042, "grad_norm": 1.3467747148005869, "learning_rate": 1.899032403364816e-05, "loss": 0.747, "step": 1664 }, { "epoch": 0.17007150153217568, "grad_norm": 1.9864911667709322, "learning_rate": 1.898887488202628e-05, "loss": 0.7765, "step": 1665 }, { "epoch": 0.17017364657814096, "grad_norm": 1.7092137999296921, "learning_rate": 1.8987424746566237e-05, "loss": 0.8286, "step": 1666 }, { "epoch": 0.17027579162410622, "grad_norm": 1.5092524002994716, "learning_rate": 1.8985973627426747e-05, "loss": 0.8011, "step": 1667 }, { "epoch": 0.1703779366700715, "grad_norm": 1.5219706444123955, "learning_rate": 1.898452152476664e-05, "loss": 0.6489, "step": 1668 }, { "epoch": 0.17048008171603676, "grad_norm": 1.5244824046008114, "learning_rate": 1.8983068438744846e-05, "loss": 0.8236, "step": 1669 }, { "epoch": 0.17058222676200205, "grad_norm": 1.4897687453708552, "learning_rate": 1.8981614369520406e-05, "loss": 0.7701, "step": 1670 }, { "epoch": 0.1706843718079673, "grad_norm": 1.38929744303368, "learning_rate": 1.8980159317252473e-05, "loss": 0.7358, "step": 1671 }, { "epoch": 0.1707865168539326, "grad_norm": 1.3616941405183018, "learning_rate": 1.8978703282100298e-05, "loss": 0.6559, "step": 1672 }, { "epoch": 0.17088866189989785, "grad_norm": 1.4490928666444185, "learning_rate": 1.8977246264223252e-05, "loss": 0.7408, "step": 1673 }, { "epoch": 0.17099080694586313, "grad_norm": 1.5254046801576682, "learning_rate": 1.8975788263780797e-05, "loss": 0.7193, "step": 1674 }, { "epoch": 0.1710929519918284, "grad_norm": 1.401988719623699, "learning_rate": 1.8974329280932522e-05, "loss": 0.7341, "step": 1675 }, { "epoch": 0.17119509703779368, "grad_norm": 1.370941036548307, "learning_rate": 1.897286931583811e-05, "loss": 0.7095, "step": 1676 }, { "epoch": 0.17129724208375893, "grad_norm": 1.5717144347486354, "learning_rate": 1.897140836865735e-05, "loss": 0.743, "step": 1677 }, { "epoch": 0.17139938712972422, "grad_norm": 1.5696759400880376, "learning_rate": 1.896994643955015e-05, "loss": 0.7576, "step": 1678 }, { "epoch": 0.17150153217568948, "grad_norm": 1.595211228526451, "learning_rate": 1.8968483528676515e-05, "loss": 0.6652, "step": 1679 }, { "epoch": 0.17160367722165476, "grad_norm": 1.5410575434653095, "learning_rate": 1.8967019636196565e-05, "loss": 0.7147, "step": 1680 }, { "epoch": 0.17170582226762002, "grad_norm": 1.5802419666414576, "learning_rate": 1.896555476227052e-05, "loss": 0.7235, "step": 1681 }, { "epoch": 0.1718079673135853, "grad_norm": 1.6342262656995694, "learning_rate": 1.8964088907058717e-05, "loss": 0.8215, "step": 1682 }, { "epoch": 0.17191011235955056, "grad_norm": 1.461574624337787, "learning_rate": 1.896262207072159e-05, "loss": 0.7181, "step": 1683 }, { "epoch": 0.17201225740551584, "grad_norm": 1.4962878716478465, "learning_rate": 1.896115425341969e-05, "loss": 0.7495, "step": 1684 }, { "epoch": 0.1721144024514811, "grad_norm": 1.6056608198479452, "learning_rate": 1.8959685455313663e-05, "loss": 0.8148, "step": 1685 }, { "epoch": 0.1722165474974464, "grad_norm": 1.4743101807622847, "learning_rate": 1.8958215676564275e-05, "loss": 0.8504, "step": 1686 }, { "epoch": 0.17231869254341164, "grad_norm": 1.6917360973097622, "learning_rate": 1.8956744917332394e-05, "loss": 0.798, "step": 1687 }, { "epoch": 0.1724208375893769, "grad_norm": 1.5956456734270041, "learning_rate": 1.8955273177778996e-05, "loss": 0.8106, "step": 1688 }, { "epoch": 0.1725229826353422, "grad_norm": 1.5494555511141463, "learning_rate": 1.895380045806516e-05, "loss": 0.8154, "step": 1689 }, { "epoch": 0.17262512768130744, "grad_norm": 1.4902731482896234, "learning_rate": 1.8952326758352083e-05, "loss": 0.7682, "step": 1690 }, { "epoch": 0.17272727272727273, "grad_norm": 1.6816579178473003, "learning_rate": 1.8950852078801058e-05, "loss": 0.7391, "step": 1691 }, { "epoch": 0.172829417773238, "grad_norm": 1.4523307965134538, "learning_rate": 1.8949376419573484e-05, "loss": 0.7163, "step": 1692 }, { "epoch": 0.17293156281920327, "grad_norm": 1.8230617415071149, "learning_rate": 1.8947899780830884e-05, "loss": 0.6767, "step": 1693 }, { "epoch": 0.17303370786516853, "grad_norm": 1.4893545332384173, "learning_rate": 1.8946422162734872e-05, "loss": 0.6906, "step": 1694 }, { "epoch": 0.17313585291113381, "grad_norm": 1.6362313700989841, "learning_rate": 1.8944943565447174e-05, "loss": 0.7963, "step": 1695 }, { "epoch": 0.17323799795709907, "grad_norm": 1.4728666981718914, "learning_rate": 1.894346398912962e-05, "loss": 0.7412, "step": 1696 }, { "epoch": 0.17334014300306436, "grad_norm": 1.326485111681694, "learning_rate": 1.894198343394416e-05, "loss": 0.7192, "step": 1697 }, { "epoch": 0.1734422880490296, "grad_norm": 1.3980486604699218, "learning_rate": 1.894050190005283e-05, "loss": 0.7413, "step": 1698 }, { "epoch": 0.1735444330949949, "grad_norm": 1.5371689175028298, "learning_rate": 1.8939019387617796e-05, "loss": 0.7602, "step": 1699 }, { "epoch": 0.17364657814096016, "grad_norm": 1.5481695900138497, "learning_rate": 1.8937535896801312e-05, "loss": 0.9276, "step": 1700 }, { "epoch": 0.17374872318692544, "grad_norm": 1.5351098299443398, "learning_rate": 1.893605142776575e-05, "loss": 0.7257, "step": 1701 }, { "epoch": 0.1738508682328907, "grad_norm": 1.6029943099877884, "learning_rate": 1.8934565980673585e-05, "loss": 0.7507, "step": 1702 }, { "epoch": 0.17395301327885598, "grad_norm": 1.5307854744504417, "learning_rate": 1.8933079555687402e-05, "loss": 0.7397, "step": 1703 }, { "epoch": 0.17405515832482124, "grad_norm": 1.5044294398950129, "learning_rate": 1.893159215296989e-05, "loss": 0.7666, "step": 1704 }, { "epoch": 0.17415730337078653, "grad_norm": 1.3884329514997193, "learning_rate": 1.8930103772683846e-05, "loss": 0.7925, "step": 1705 }, { "epoch": 0.17425944841675178, "grad_norm": 1.4064140369115274, "learning_rate": 1.8928614414992173e-05, "loss": 0.628, "step": 1706 }, { "epoch": 0.17436159346271707, "grad_norm": 1.526064389183152, "learning_rate": 1.8927124080057884e-05, "loss": 0.7506, "step": 1707 }, { "epoch": 0.17446373850868233, "grad_norm": 1.6120619424463882, "learning_rate": 1.89256327680441e-05, "loss": 0.7424, "step": 1708 }, { "epoch": 0.1745658835546476, "grad_norm": 1.4316899499049005, "learning_rate": 1.8924140479114043e-05, "loss": 0.7658, "step": 1709 }, { "epoch": 0.17466802860061287, "grad_norm": 1.49541935024094, "learning_rate": 1.892264721343104e-05, "loss": 0.7514, "step": 1710 }, { "epoch": 0.17477017364657815, "grad_norm": 1.5955335117602674, "learning_rate": 1.8921152971158537e-05, "loss": 0.7183, "step": 1711 }, { "epoch": 0.1748723186925434, "grad_norm": 1.6931673273060281, "learning_rate": 1.891965775246008e-05, "loss": 0.7434, "step": 1712 }, { "epoch": 0.1749744637385087, "grad_norm": 1.6245549274268523, "learning_rate": 1.8918161557499316e-05, "loss": 0.7578, "step": 1713 }, { "epoch": 0.17507660878447395, "grad_norm": 1.466628674040457, "learning_rate": 1.8916664386440008e-05, "loss": 0.8323, "step": 1714 }, { "epoch": 0.1751787538304392, "grad_norm": 1.4580569827536138, "learning_rate": 1.8915166239446024e-05, "loss": 0.7374, "step": 1715 }, { "epoch": 0.1752808988764045, "grad_norm": 1.5609216741974261, "learning_rate": 1.8913667116681334e-05, "loss": 0.8141, "step": 1716 }, { "epoch": 0.17538304392236975, "grad_norm": 1.360626923508468, "learning_rate": 1.8912167018310018e-05, "loss": 0.7411, "step": 1717 }, { "epoch": 0.17548518896833504, "grad_norm": 1.4238521757524139, "learning_rate": 1.8910665944496264e-05, "loss": 0.678, "step": 1718 }, { "epoch": 0.1755873340143003, "grad_norm": 1.4592700507641492, "learning_rate": 1.8909163895404367e-05, "loss": 0.6802, "step": 1719 }, { "epoch": 0.17568947906026558, "grad_norm": 1.6002946149476283, "learning_rate": 1.8907660871198725e-05, "loss": 0.8288, "step": 1720 }, { "epoch": 0.17579162410623084, "grad_norm": 1.447649547321314, "learning_rate": 1.8906156872043846e-05, "loss": 0.6893, "step": 1721 }, { "epoch": 0.17589376915219612, "grad_norm": 1.3471277256394956, "learning_rate": 1.8904651898104346e-05, "loss": 0.6609, "step": 1722 }, { "epoch": 0.17599591419816138, "grad_norm": 1.632667771473945, "learning_rate": 1.8903145949544935e-05, "loss": 0.7966, "step": 1723 }, { "epoch": 0.17609805924412666, "grad_norm": 1.3770684692710315, "learning_rate": 1.8901639026530453e-05, "loss": 0.8224, "step": 1724 }, { "epoch": 0.17620020429009192, "grad_norm": 1.4596555305124719, "learning_rate": 1.8900131129225827e-05, "loss": 0.786, "step": 1725 }, { "epoch": 0.1763023493360572, "grad_norm": 1.344385851415088, "learning_rate": 1.88986222577961e-05, "loss": 0.6983, "step": 1726 }, { "epoch": 0.17640449438202246, "grad_norm": 1.645206585394134, "learning_rate": 1.8897112412406415e-05, "loss": 0.7984, "step": 1727 }, { "epoch": 0.17650663942798775, "grad_norm": 1.4308462162760553, "learning_rate": 1.889560159322203e-05, "loss": 0.7816, "step": 1728 }, { "epoch": 0.176608784473953, "grad_norm": 1.498765778288011, "learning_rate": 1.8894089800408302e-05, "loss": 0.7535, "step": 1729 }, { "epoch": 0.1767109295199183, "grad_norm": 1.5602934755552544, "learning_rate": 1.8892577034130704e-05, "loss": 0.7742, "step": 1730 }, { "epoch": 0.17681307456588355, "grad_norm": 1.3375155651194042, "learning_rate": 1.8891063294554798e-05, "loss": 0.7397, "step": 1731 }, { "epoch": 0.17691521961184883, "grad_norm": 1.500351151931169, "learning_rate": 1.888954858184627e-05, "loss": 0.6575, "step": 1732 }, { "epoch": 0.1770173646578141, "grad_norm": 1.4751098173716477, "learning_rate": 1.888803289617091e-05, "loss": 0.838, "step": 1733 }, { "epoch": 0.17711950970377938, "grad_norm": 1.3106026967193014, "learning_rate": 1.888651623769461e-05, "loss": 0.687, "step": 1734 }, { "epoch": 0.17722165474974463, "grad_norm": 1.3567574603849475, "learning_rate": 1.888499860658336e-05, "loss": 0.6326, "step": 1735 }, { "epoch": 0.17732379979570992, "grad_norm": 1.5292129666665577, "learning_rate": 1.8883480003003272e-05, "loss": 0.8152, "step": 1736 }, { "epoch": 0.17742594484167518, "grad_norm": 1.403810663265192, "learning_rate": 1.8881960427120562e-05, "loss": 0.6388, "step": 1737 }, { "epoch": 0.17752808988764046, "grad_norm": 1.330945086995526, "learning_rate": 1.8880439879101543e-05, "loss": 0.6775, "step": 1738 }, { "epoch": 0.17763023493360572, "grad_norm": 1.5331745981873093, "learning_rate": 1.8878918359112644e-05, "loss": 0.8849, "step": 1739 }, { "epoch": 0.177732379979571, "grad_norm": 1.556113685371182, "learning_rate": 1.8877395867320392e-05, "loss": 0.697, "step": 1740 }, { "epoch": 0.17783452502553626, "grad_norm": 1.4319459537640808, "learning_rate": 1.8875872403891425e-05, "loss": 0.6804, "step": 1741 }, { "epoch": 0.17793667007150155, "grad_norm": 1.555378852452123, "learning_rate": 1.8874347968992493e-05, "loss": 0.7376, "step": 1742 }, { "epoch": 0.1780388151174668, "grad_norm": 1.3664630482299116, "learning_rate": 1.887282256279044e-05, "loss": 0.7259, "step": 1743 }, { "epoch": 0.17814096016343206, "grad_norm": 1.460144219999322, "learning_rate": 1.8871296185452225e-05, "loss": 0.7497, "step": 1744 }, { "epoch": 0.17824310520939735, "grad_norm": 1.5321043045469942, "learning_rate": 1.8869768837144908e-05, "loss": 0.6947, "step": 1745 }, { "epoch": 0.1783452502553626, "grad_norm": 1.5054494438453947, "learning_rate": 1.8868240518035667e-05, "loss": 0.7661, "step": 1746 }, { "epoch": 0.1784473953013279, "grad_norm": 1.621487456620026, "learning_rate": 1.8866711228291768e-05, "loss": 0.7292, "step": 1747 }, { "epoch": 0.17854954034729315, "grad_norm": 1.3935445277579703, "learning_rate": 1.8865180968080595e-05, "loss": 0.7232, "step": 1748 }, { "epoch": 0.17865168539325843, "grad_norm": 1.6799115300525853, "learning_rate": 1.886364973756964e-05, "loss": 0.8039, "step": 1749 }, { "epoch": 0.1787538304392237, "grad_norm": 1.5013111853513779, "learning_rate": 1.8862117536926498e-05, "loss": 0.7039, "step": 1750 }, { "epoch": 0.17885597548518897, "grad_norm": 1.527134971761119, "learning_rate": 1.886058436631886e-05, "loss": 0.8871, "step": 1751 }, { "epoch": 0.17895812053115423, "grad_norm": 1.5475602842355707, "learning_rate": 1.885905022591454e-05, "loss": 0.7291, "step": 1752 }, { "epoch": 0.17906026557711952, "grad_norm": 1.5281695675969766, "learning_rate": 1.8857515115881447e-05, "loss": 0.6369, "step": 1753 }, { "epoch": 0.17916241062308477, "grad_norm": 1.3615608515268662, "learning_rate": 1.8855979036387607e-05, "loss": 0.7154, "step": 1754 }, { "epoch": 0.17926455566905006, "grad_norm": 1.4776607990469777, "learning_rate": 1.8854441987601137e-05, "loss": 0.7451, "step": 1755 }, { "epoch": 0.17936670071501531, "grad_norm": 1.464285683725378, "learning_rate": 1.885290396969027e-05, "loss": 0.7573, "step": 1756 }, { "epoch": 0.1794688457609806, "grad_norm": 1.6051134360793224, "learning_rate": 1.8851364982823342e-05, "loss": 0.7426, "step": 1757 }, { "epoch": 0.17957099080694586, "grad_norm": 1.5387703770992684, "learning_rate": 1.8849825027168804e-05, "loss": 0.8334, "step": 1758 }, { "epoch": 0.17967313585291114, "grad_norm": 1.549289613041583, "learning_rate": 1.8848284102895194e-05, "loss": 0.7647, "step": 1759 }, { "epoch": 0.1797752808988764, "grad_norm": 1.3342260548054745, "learning_rate": 1.8846742210171177e-05, "loss": 0.6078, "step": 1760 }, { "epoch": 0.17987742594484168, "grad_norm": 1.5539474255802859, "learning_rate": 1.8845199349165505e-05, "loss": 0.7049, "step": 1761 }, { "epoch": 0.17997957099080694, "grad_norm": 1.5160313403182686, "learning_rate": 1.884365552004705e-05, "loss": 0.7299, "step": 1762 }, { "epoch": 0.18008171603677223, "grad_norm": 1.4075691925509088, "learning_rate": 1.8842110722984787e-05, "loss": 0.7894, "step": 1763 }, { "epoch": 0.18018386108273748, "grad_norm": 1.6545597835502586, "learning_rate": 1.884056495814779e-05, "loss": 0.768, "step": 1764 }, { "epoch": 0.18028600612870277, "grad_norm": 1.3552097667371947, "learning_rate": 1.8839018225705247e-05, "loss": 0.7508, "step": 1765 }, { "epoch": 0.18038815117466803, "grad_norm": 1.6976829293541074, "learning_rate": 1.883747052582645e-05, "loss": 0.7671, "step": 1766 }, { "epoch": 0.1804902962206333, "grad_norm": 1.4390663098752343, "learning_rate": 1.8835921858680793e-05, "loss": 0.9087, "step": 1767 }, { "epoch": 0.18059244126659857, "grad_norm": 1.5926655514771286, "learning_rate": 1.8834372224437782e-05, "loss": 0.9643, "step": 1768 }, { "epoch": 0.18069458631256385, "grad_norm": 1.5124824287204868, "learning_rate": 1.883282162326702e-05, "loss": 0.7677, "step": 1769 }, { "epoch": 0.1807967313585291, "grad_norm": 1.7516725496348795, "learning_rate": 1.8831270055338223e-05, "loss": 0.8012, "step": 1770 }, { "epoch": 0.18089887640449437, "grad_norm": 1.5253110669517245, "learning_rate": 1.8829717520821217e-05, "loss": 0.8358, "step": 1771 }, { "epoch": 0.18100102145045965, "grad_norm": 1.5417836281123047, "learning_rate": 1.8828164019885923e-05, "loss": 0.7064, "step": 1772 }, { "epoch": 0.1811031664964249, "grad_norm": 1.497711602314893, "learning_rate": 1.8826609552702373e-05, "loss": 0.8343, "step": 1773 }, { "epoch": 0.1812053115423902, "grad_norm": 1.3855986453864657, "learning_rate": 1.8825054119440707e-05, "loss": 0.7801, "step": 1774 }, { "epoch": 0.18130745658835545, "grad_norm": 1.5148514645104005, "learning_rate": 1.8823497720271162e-05, "loss": 0.7157, "step": 1775 }, { "epoch": 0.18140960163432074, "grad_norm": 1.6466224709714379, "learning_rate": 1.8821940355364094e-05, "loss": 0.7871, "step": 1776 }, { "epoch": 0.181511746680286, "grad_norm": 1.459122073675351, "learning_rate": 1.882038202488995e-05, "loss": 0.8131, "step": 1777 }, { "epoch": 0.18161389172625128, "grad_norm": 1.478351886327625, "learning_rate": 1.8818822729019296e-05, "loss": 0.8378, "step": 1778 }, { "epoch": 0.18171603677221654, "grad_norm": 1.4776996874510193, "learning_rate": 1.88172624679228e-05, "loss": 0.6862, "step": 1779 }, { "epoch": 0.18181818181818182, "grad_norm": 1.4358298192796701, "learning_rate": 1.8815701241771226e-05, "loss": 0.7026, "step": 1780 }, { "epoch": 0.18192032686414708, "grad_norm": 1.424238215107601, "learning_rate": 1.8814139050735458e-05, "loss": 0.7794, "step": 1781 }, { "epoch": 0.18202247191011237, "grad_norm": 1.5530215080498537, "learning_rate": 1.8812575894986476e-05, "loss": 0.7813, "step": 1782 }, { "epoch": 0.18212461695607762, "grad_norm": 1.3936397898948503, "learning_rate": 1.8811011774695368e-05, "loss": 0.7901, "step": 1783 }, { "epoch": 0.1822267620020429, "grad_norm": 1.5462288078001551, "learning_rate": 1.880944669003333e-05, "loss": 0.8313, "step": 1784 }, { "epoch": 0.18232890704800817, "grad_norm": 1.3908833098633122, "learning_rate": 1.8807880641171658e-05, "loss": 0.6667, "step": 1785 }, { "epoch": 0.18243105209397345, "grad_norm": 1.5046107400429856, "learning_rate": 1.880631362828176e-05, "loss": 0.7726, "step": 1786 }, { "epoch": 0.1825331971399387, "grad_norm": 1.481318713359148, "learning_rate": 1.8804745651535147e-05, "loss": 0.7834, "step": 1787 }, { "epoch": 0.182635342185904, "grad_norm": 1.6737020184541211, "learning_rate": 1.8803176711103432e-05, "loss": 0.8335, "step": 1788 }, { "epoch": 0.18273748723186925, "grad_norm": 1.4675559585919482, "learning_rate": 1.8801606807158342e-05, "loss": 0.7547, "step": 1789 }, { "epoch": 0.18283963227783454, "grad_norm": 1.5216604232340878, "learning_rate": 1.8800035939871697e-05, "loss": 0.7888, "step": 1790 }, { "epoch": 0.1829417773237998, "grad_norm": 1.4887658648595692, "learning_rate": 1.879846410941543e-05, "loss": 0.6972, "step": 1791 }, { "epoch": 0.18304392236976508, "grad_norm": 1.617558762147985, "learning_rate": 1.879689131596159e-05, "loss": 0.7916, "step": 1792 }, { "epoch": 0.18314606741573033, "grad_norm": 1.3399225389184626, "learning_rate": 1.8795317559682305e-05, "loss": 0.7203, "step": 1793 }, { "epoch": 0.18324821246169562, "grad_norm": 1.6207718185361897, "learning_rate": 1.879374284074983e-05, "loss": 1.0082, "step": 1794 }, { "epoch": 0.18335035750766088, "grad_norm": 1.6288233394742437, "learning_rate": 1.8792167159336526e-05, "loss": 0.7659, "step": 1795 }, { "epoch": 0.18345250255362616, "grad_norm": 1.4509509474637101, "learning_rate": 1.8790590515614842e-05, "loss": 0.707, "step": 1796 }, { "epoch": 0.18355464759959142, "grad_norm": 1.383497665963032, "learning_rate": 1.878901290975735e-05, "loss": 0.7846, "step": 1797 }, { "epoch": 0.18365679264555668, "grad_norm": 1.4421538897054473, "learning_rate": 1.878743434193671e-05, "loss": 0.6717, "step": 1798 }, { "epoch": 0.18375893769152196, "grad_norm": 1.383079349080227, "learning_rate": 1.8785854812325704e-05, "loss": 0.6825, "step": 1799 }, { "epoch": 0.18386108273748722, "grad_norm": 1.5161614744753042, "learning_rate": 1.878427432109722e-05, "loss": 0.7663, "step": 1800 }, { "epoch": 0.1839632277834525, "grad_norm": 1.6012388046505472, "learning_rate": 1.878269286842423e-05, "loss": 0.8353, "step": 1801 }, { "epoch": 0.18406537282941776, "grad_norm": 1.5088291014967619, "learning_rate": 1.8781110454479834e-05, "loss": 0.7301, "step": 1802 }, { "epoch": 0.18416751787538305, "grad_norm": 1.5049724087621288, "learning_rate": 1.8779527079437226e-05, "loss": 0.7403, "step": 1803 }, { "epoch": 0.1842696629213483, "grad_norm": 1.6220299596196548, "learning_rate": 1.8777942743469705e-05, "loss": 0.7866, "step": 1804 }, { "epoch": 0.1843718079673136, "grad_norm": 1.500484583070211, "learning_rate": 1.877635744675068e-05, "loss": 0.7101, "step": 1805 }, { "epoch": 0.18447395301327885, "grad_norm": 1.5410474319740113, "learning_rate": 1.877477118945366e-05, "loss": 0.732, "step": 1806 }, { "epoch": 0.18457609805924413, "grad_norm": 1.5232889682497255, "learning_rate": 1.8773183971752266e-05, "loss": 0.7835, "step": 1807 }, { "epoch": 0.1846782431052094, "grad_norm": 1.4978308793429884, "learning_rate": 1.8771595793820218e-05, "loss": 0.8195, "step": 1808 }, { "epoch": 0.18478038815117467, "grad_norm": 1.5009145033963818, "learning_rate": 1.8770006655831344e-05, "loss": 0.8184, "step": 1809 }, { "epoch": 0.18488253319713993, "grad_norm": 1.345350146097316, "learning_rate": 1.8768416557959578e-05, "loss": 0.666, "step": 1810 }, { "epoch": 0.18498467824310522, "grad_norm": 1.4638958631785517, "learning_rate": 1.876682550037895e-05, "loss": 0.6191, "step": 1811 }, { "epoch": 0.18508682328907047, "grad_norm": 1.441706471078032, "learning_rate": 1.876523348326361e-05, "loss": 0.8151, "step": 1812 }, { "epoch": 0.18518896833503576, "grad_norm": 1.483149396013851, "learning_rate": 1.8763640506787795e-05, "loss": 0.7702, "step": 1813 }, { "epoch": 0.18529111338100102, "grad_norm": 1.4107501271156073, "learning_rate": 1.8762046571125873e-05, "loss": 0.7867, "step": 1814 }, { "epoch": 0.1853932584269663, "grad_norm": 1.4906393281742727, "learning_rate": 1.876045167645229e-05, "loss": 0.6602, "step": 1815 }, { "epoch": 0.18549540347293156, "grad_norm": 1.3885181269134927, "learning_rate": 1.8758855822941614e-05, "loss": 0.84, "step": 1816 }, { "epoch": 0.18559754851889684, "grad_norm": 1.6826654665163017, "learning_rate": 1.87572590107685e-05, "loss": 0.7574, "step": 1817 }, { "epoch": 0.1856996935648621, "grad_norm": 1.6679059396292548, "learning_rate": 1.875566124010774e-05, "loss": 0.7335, "step": 1818 }, { "epoch": 0.18580183861082739, "grad_norm": 1.5083446964707001, "learning_rate": 1.8754062511134195e-05, "loss": 0.7384, "step": 1819 }, { "epoch": 0.18590398365679264, "grad_norm": 1.4766639320775647, "learning_rate": 1.8752462824022853e-05, "loss": 0.8127, "step": 1820 }, { "epoch": 0.18600612870275793, "grad_norm": 1.3793029796210652, "learning_rate": 1.87508621789488e-05, "loss": 0.7941, "step": 1821 }, { "epoch": 0.18610827374872319, "grad_norm": 1.5755246060005315, "learning_rate": 1.8749260576087227e-05, "loss": 0.8592, "step": 1822 }, { "epoch": 0.18621041879468847, "grad_norm": 1.3192273290851888, "learning_rate": 1.874765801561343e-05, "loss": 0.6727, "step": 1823 }, { "epoch": 0.18631256384065373, "grad_norm": 1.3903652546849665, "learning_rate": 1.874605449770282e-05, "loss": 0.7759, "step": 1824 }, { "epoch": 0.186414708886619, "grad_norm": 1.6034452935072083, "learning_rate": 1.8744450022530883e-05, "loss": 0.6549, "step": 1825 }, { "epoch": 0.18651685393258427, "grad_norm": 1.4175914052444982, "learning_rate": 1.874284459027325e-05, "loss": 0.8694, "step": 1826 }, { "epoch": 0.18661899897854953, "grad_norm": 1.5122645635509433, "learning_rate": 1.8741238201105622e-05, "loss": 0.732, "step": 1827 }, { "epoch": 0.1867211440245148, "grad_norm": 1.6021854069114603, "learning_rate": 1.8739630855203828e-05, "loss": 0.8332, "step": 1828 }, { "epoch": 0.18682328907048007, "grad_norm": 1.5840423856404344, "learning_rate": 1.873802255274379e-05, "loss": 0.7127, "step": 1829 }, { "epoch": 0.18692543411644535, "grad_norm": 1.7424203973594155, "learning_rate": 1.8736413293901542e-05, "loss": 0.6892, "step": 1830 }, { "epoch": 0.1870275791624106, "grad_norm": 1.4116257096844307, "learning_rate": 1.8734803078853208e-05, "loss": 0.6099, "step": 1831 }, { "epoch": 0.1871297242083759, "grad_norm": 1.3619963609424839, "learning_rate": 1.8733191907775036e-05, "loss": 0.6665, "step": 1832 }, { "epoch": 0.18723186925434115, "grad_norm": 1.486470907959484, "learning_rate": 1.8731579780843368e-05, "loss": 0.7367, "step": 1833 }, { "epoch": 0.18733401430030644, "grad_norm": 1.6356446898867938, "learning_rate": 1.8729966698234655e-05, "loss": 0.7376, "step": 1834 }, { "epoch": 0.1874361593462717, "grad_norm": 1.5358069162501742, "learning_rate": 1.8728352660125438e-05, "loss": 0.7308, "step": 1835 }, { "epoch": 0.18753830439223698, "grad_norm": 1.5514393338682109, "learning_rate": 1.872673766669239e-05, "loss": 0.8184, "step": 1836 }, { "epoch": 0.18764044943820224, "grad_norm": 1.5460397059521618, "learning_rate": 1.8725121718112268e-05, "loss": 0.8031, "step": 1837 }, { "epoch": 0.18774259448416752, "grad_norm": 1.5090377119032092, "learning_rate": 1.872350481456193e-05, "loss": 0.8639, "step": 1838 }, { "epoch": 0.18784473953013278, "grad_norm": 1.4686764996879726, "learning_rate": 1.8721886956218357e-05, "loss": 0.7653, "step": 1839 }, { "epoch": 0.18794688457609807, "grad_norm": 1.5418366060267479, "learning_rate": 1.8720268143258625e-05, "loss": 0.7325, "step": 1840 }, { "epoch": 0.18804902962206332, "grad_norm": 1.5963803873062974, "learning_rate": 1.8718648375859905e-05, "loss": 0.9121, "step": 1841 }, { "epoch": 0.1881511746680286, "grad_norm": 1.3639099727070416, "learning_rate": 1.8717027654199488e-05, "loss": 0.7013, "step": 1842 }, { "epoch": 0.18825331971399387, "grad_norm": 1.402047728140098, "learning_rate": 1.871540597845477e-05, "loss": 0.8189, "step": 1843 }, { "epoch": 0.18835546475995915, "grad_norm": 1.6018790761428272, "learning_rate": 1.8713783348803228e-05, "loss": 0.8649, "step": 1844 }, { "epoch": 0.1884576098059244, "grad_norm": 1.3914856267087141, "learning_rate": 1.871215976542247e-05, "loss": 0.6692, "step": 1845 }, { "epoch": 0.1885597548518897, "grad_norm": 1.7299364332596034, "learning_rate": 1.87105352284902e-05, "loss": 0.7515, "step": 1846 }, { "epoch": 0.18866189989785495, "grad_norm": 1.6872798683832921, "learning_rate": 1.8708909738184217e-05, "loss": 0.7718, "step": 1847 }, { "epoch": 0.18876404494382024, "grad_norm": 1.6181316927710099, "learning_rate": 1.870728329468244e-05, "loss": 0.7306, "step": 1848 }, { "epoch": 0.1888661899897855, "grad_norm": 1.6245973290106248, "learning_rate": 1.870565589816288e-05, "loss": 0.7498, "step": 1849 }, { "epoch": 0.18896833503575078, "grad_norm": 1.3817131575806303, "learning_rate": 1.870402754880366e-05, "loss": 0.8759, "step": 1850 }, { "epoch": 0.18907048008171604, "grad_norm": 1.535542191029484, "learning_rate": 1.8702398246783e-05, "loss": 0.7067, "step": 1851 }, { "epoch": 0.18917262512768132, "grad_norm": 1.4381358358818255, "learning_rate": 1.870076799227923e-05, "loss": 0.7366, "step": 1852 }, { "epoch": 0.18927477017364658, "grad_norm": 1.4800359993420105, "learning_rate": 1.8699136785470776e-05, "loss": 0.6912, "step": 1853 }, { "epoch": 0.18937691521961184, "grad_norm": 1.3828234057439701, "learning_rate": 1.8697504626536186e-05, "loss": 0.8071, "step": 1854 }, { "epoch": 0.18947906026557712, "grad_norm": 1.4314531742642738, "learning_rate": 1.8695871515654094e-05, "loss": 0.7988, "step": 1855 }, { "epoch": 0.18958120531154238, "grad_norm": 1.5781007461499097, "learning_rate": 1.8694237453003245e-05, "loss": 0.8044, "step": 1856 }, { "epoch": 0.18968335035750766, "grad_norm": 1.528131515904029, "learning_rate": 1.8692602438762494e-05, "loss": 0.801, "step": 1857 }, { "epoch": 0.18978549540347292, "grad_norm": 1.493350536943422, "learning_rate": 1.8690966473110786e-05, "loss": 0.819, "step": 1858 }, { "epoch": 0.1898876404494382, "grad_norm": 1.3690976206972525, "learning_rate": 1.868932955622718e-05, "loss": 0.6622, "step": 1859 }, { "epoch": 0.18998978549540346, "grad_norm": 1.4450488388196274, "learning_rate": 1.8687691688290844e-05, "loss": 0.6883, "step": 1860 }, { "epoch": 0.19009193054136875, "grad_norm": 1.455465197830054, "learning_rate": 1.868605286948104e-05, "loss": 0.6401, "step": 1861 }, { "epoch": 0.190194075587334, "grad_norm": 1.3850561552178788, "learning_rate": 1.8684413099977138e-05, "loss": 0.7819, "step": 1862 }, { "epoch": 0.1902962206332993, "grad_norm": 1.4758628906664164, "learning_rate": 1.8682772379958607e-05, "loss": 0.8318, "step": 1863 }, { "epoch": 0.19039836567926455, "grad_norm": 1.3538602537036406, "learning_rate": 1.868113070960503e-05, "loss": 0.7179, "step": 1864 }, { "epoch": 0.19050051072522983, "grad_norm": 1.5387815915942704, "learning_rate": 1.8679488089096092e-05, "loss": 0.8461, "step": 1865 }, { "epoch": 0.1906026557711951, "grad_norm": 1.3976824294560608, "learning_rate": 1.8677844518611575e-05, "loss": 0.7678, "step": 1866 }, { "epoch": 0.19070480081716037, "grad_norm": 1.5810223584642418, "learning_rate": 1.8676199998331368e-05, "loss": 0.6961, "step": 1867 }, { "epoch": 0.19080694586312563, "grad_norm": 1.626365738288862, "learning_rate": 1.8674554528435463e-05, "loss": 0.7584, "step": 1868 }, { "epoch": 0.19090909090909092, "grad_norm": 1.6077810178001672, "learning_rate": 1.8672908109103963e-05, "loss": 0.8119, "step": 1869 }, { "epoch": 0.19101123595505617, "grad_norm": 1.6741140356003115, "learning_rate": 1.8671260740517066e-05, "loss": 0.7464, "step": 1870 }, { "epoch": 0.19111338100102146, "grad_norm": 1.2776327613783214, "learning_rate": 1.8669612422855078e-05, "loss": 0.6256, "step": 1871 }, { "epoch": 0.19121552604698672, "grad_norm": 1.4387947498518552, "learning_rate": 1.866796315629841e-05, "loss": 0.6996, "step": 1872 }, { "epoch": 0.191317671092952, "grad_norm": 1.5078622680932743, "learning_rate": 1.8666312941027574e-05, "loss": 0.732, "step": 1873 }, { "epoch": 0.19141981613891726, "grad_norm": 1.4129728956947285, "learning_rate": 1.8664661777223186e-05, "loss": 0.8086, "step": 1874 }, { "epoch": 0.19152196118488254, "grad_norm": 1.4015431412158756, "learning_rate": 1.866300966506597e-05, "loss": 0.7117, "step": 1875 }, { "epoch": 0.1916241062308478, "grad_norm": 1.7719634414037608, "learning_rate": 1.866135660473675e-05, "loss": 0.7304, "step": 1876 }, { "epoch": 0.1917262512768131, "grad_norm": 1.6776031928414037, "learning_rate": 1.8659702596416453e-05, "loss": 0.7946, "step": 1877 }, { "epoch": 0.19182839632277834, "grad_norm": 1.6375638804262858, "learning_rate": 1.865804764028611e-05, "loss": 0.7835, "step": 1878 }, { "epoch": 0.19193054136874363, "grad_norm": 1.6179990019852686, "learning_rate": 1.8656391736526866e-05, "loss": 0.7849, "step": 1879 }, { "epoch": 0.1920326864147089, "grad_norm": 1.460960367095404, "learning_rate": 1.8654734885319944e-05, "loss": 0.798, "step": 1880 }, { "epoch": 0.19213483146067414, "grad_norm": 1.5828678069230913, "learning_rate": 1.86530770868467e-05, "loss": 0.81, "step": 1881 }, { "epoch": 0.19223697650663943, "grad_norm": 1.5249023921072191, "learning_rate": 1.865141834128858e-05, "loss": 0.8151, "step": 1882 }, { "epoch": 0.19233912155260469, "grad_norm": 1.4508863810355825, "learning_rate": 1.8649758648827133e-05, "loss": 0.7209, "step": 1883 }, { "epoch": 0.19244126659856997, "grad_norm": 1.4243962873432259, "learning_rate": 1.8648098009644012e-05, "loss": 0.7746, "step": 1884 }, { "epoch": 0.19254341164453523, "grad_norm": 1.4154742707954997, "learning_rate": 1.864643642392098e-05, "loss": 0.7587, "step": 1885 }, { "epoch": 0.1926455566905005, "grad_norm": 1.482793213357357, "learning_rate": 1.864477389183989e-05, "loss": 0.7325, "step": 1886 }, { "epoch": 0.19274770173646577, "grad_norm": 1.4006109574226546, "learning_rate": 1.864311041358272e-05, "loss": 0.7799, "step": 1887 }, { "epoch": 0.19284984678243106, "grad_norm": 1.5423755699242454, "learning_rate": 1.8641445989331525e-05, "loss": 0.7521, "step": 1888 }, { "epoch": 0.1929519918283963, "grad_norm": 1.5107425400423258, "learning_rate": 1.8639780619268484e-05, "loss": 0.6824, "step": 1889 }, { "epoch": 0.1930541368743616, "grad_norm": 1.4022406563421497, "learning_rate": 1.863811430357587e-05, "loss": 0.7835, "step": 1890 }, { "epoch": 0.19315628192032686, "grad_norm": 1.5402200953891934, "learning_rate": 1.863644704243607e-05, "loss": 0.6614, "step": 1891 }, { "epoch": 0.19325842696629214, "grad_norm": 1.4577731384804729, "learning_rate": 1.8634778836031558e-05, "loss": 0.7888, "step": 1892 }, { "epoch": 0.1933605720122574, "grad_norm": 1.588695888542344, "learning_rate": 1.8633109684544924e-05, "loss": 0.7575, "step": 1893 }, { "epoch": 0.19346271705822268, "grad_norm": 1.3729918210137502, "learning_rate": 1.8631439588158858e-05, "loss": 0.6792, "step": 1894 }, { "epoch": 0.19356486210418794, "grad_norm": 1.411363842431786, "learning_rate": 1.862976854705615e-05, "loss": 0.6889, "step": 1895 }, { "epoch": 0.19366700715015323, "grad_norm": 1.6070516218488293, "learning_rate": 1.8628096561419703e-05, "loss": 0.8046, "step": 1896 }, { "epoch": 0.19376915219611848, "grad_norm": 1.4750123144322365, "learning_rate": 1.862642363143251e-05, "loss": 0.7965, "step": 1897 }, { "epoch": 0.19387129724208377, "grad_norm": 1.5009741606429343, "learning_rate": 1.862474975727768e-05, "loss": 0.7776, "step": 1898 }, { "epoch": 0.19397344228804902, "grad_norm": 1.5994998080441714, "learning_rate": 1.8623074939138412e-05, "loss": 0.831, "step": 1899 }, { "epoch": 0.1940755873340143, "grad_norm": 1.424427887050029, "learning_rate": 1.8621399177198024e-05, "loss": 0.6619, "step": 1900 }, { "epoch": 0.19417773237997957, "grad_norm": 1.3860322049818534, "learning_rate": 1.8619722471639926e-05, "loss": 0.7187, "step": 1901 }, { "epoch": 0.19427987742594485, "grad_norm": 1.4118451742368041, "learning_rate": 1.8618044822647632e-05, "loss": 0.8235, "step": 1902 }, { "epoch": 0.1943820224719101, "grad_norm": 1.51959420797935, "learning_rate": 1.8616366230404766e-05, "loss": 0.8163, "step": 1903 }, { "epoch": 0.1944841675178754, "grad_norm": 1.582372673945881, "learning_rate": 1.861468669509505e-05, "loss": 0.8239, "step": 1904 }, { "epoch": 0.19458631256384065, "grad_norm": 1.593714968221231, "learning_rate": 1.8613006216902308e-05, "loss": 0.8247, "step": 1905 }, { "epoch": 0.19468845760980594, "grad_norm": 1.473919090855708, "learning_rate": 1.861132479601047e-05, "loss": 0.7108, "step": 1906 }, { "epoch": 0.1947906026557712, "grad_norm": 1.4756489829032506, "learning_rate": 1.8609642432603572e-05, "loss": 0.6867, "step": 1907 }, { "epoch": 0.19489274770173648, "grad_norm": 1.4242614829739424, "learning_rate": 1.8607959126865745e-05, "loss": 0.797, "step": 1908 }, { "epoch": 0.19499489274770174, "grad_norm": 1.540765670424351, "learning_rate": 1.860627487898123e-05, "loss": 0.7588, "step": 1909 }, { "epoch": 0.195097037793667, "grad_norm": 1.3859286822537837, "learning_rate": 1.8604589689134372e-05, "loss": 0.6985, "step": 1910 }, { "epoch": 0.19519918283963228, "grad_norm": 1.4748488944200038, "learning_rate": 1.8602903557509614e-05, "loss": 0.7315, "step": 1911 }, { "epoch": 0.19530132788559754, "grad_norm": 1.572858652310003, "learning_rate": 1.86012164842915e-05, "loss": 0.8717, "step": 1912 }, { "epoch": 0.19540347293156282, "grad_norm": 1.4037941666807017, "learning_rate": 1.8599528469664686e-05, "loss": 0.753, "step": 1913 }, { "epoch": 0.19550561797752808, "grad_norm": 1.3429018085967726, "learning_rate": 1.8597839513813923e-05, "loss": 0.7411, "step": 1914 }, { "epoch": 0.19560776302349336, "grad_norm": 1.4246578833523384, "learning_rate": 1.8596149616924074e-05, "loss": 0.6829, "step": 1915 }, { "epoch": 0.19570990806945862, "grad_norm": 1.4512162991090363, "learning_rate": 1.859445877918009e-05, "loss": 0.8141, "step": 1916 }, { "epoch": 0.1958120531154239, "grad_norm": 1.3049286636486919, "learning_rate": 1.8592767000767045e-05, "loss": 0.7225, "step": 1917 }, { "epoch": 0.19591419816138916, "grad_norm": 1.5067016238727486, "learning_rate": 1.85910742818701e-05, "loss": 0.7098, "step": 1918 }, { "epoch": 0.19601634320735445, "grad_norm": 1.3232204956764402, "learning_rate": 1.858938062267452e-05, "loss": 0.7405, "step": 1919 }, { "epoch": 0.1961184882533197, "grad_norm": 1.6134026411615998, "learning_rate": 1.8587686023365685e-05, "loss": 0.8508, "step": 1920 }, { "epoch": 0.196220633299285, "grad_norm": 1.4218705507206422, "learning_rate": 1.8585990484129068e-05, "loss": 0.6873, "step": 1921 }, { "epoch": 0.19632277834525025, "grad_norm": 1.3306354822203843, "learning_rate": 1.858429400515024e-05, "loss": 0.7596, "step": 1922 }, { "epoch": 0.19642492339121553, "grad_norm": 1.5564872715920985, "learning_rate": 1.858259658661489e-05, "loss": 0.7861, "step": 1923 }, { "epoch": 0.1965270684371808, "grad_norm": 1.3899564337708528, "learning_rate": 1.85808982287088e-05, "loss": 0.6688, "step": 1924 }, { "epoch": 0.19662921348314608, "grad_norm": 1.4797554434541693, "learning_rate": 1.857919893161785e-05, "loss": 0.7476, "step": 1925 }, { "epoch": 0.19673135852911133, "grad_norm": 1.4798779955283625, "learning_rate": 1.857749869552804e-05, "loss": 0.6764, "step": 1926 }, { "epoch": 0.19683350357507662, "grad_norm": 1.4943403560117658, "learning_rate": 1.8575797520625455e-05, "loss": 0.8671, "step": 1927 }, { "epoch": 0.19693564862104188, "grad_norm": 1.5565047182305098, "learning_rate": 1.8574095407096288e-05, "loss": 0.8176, "step": 1928 }, { "epoch": 0.19703779366700716, "grad_norm": 1.2468424023893614, "learning_rate": 1.857239235512684e-05, "loss": 0.6553, "step": 1929 }, { "epoch": 0.19713993871297242, "grad_norm": 1.496461421038721, "learning_rate": 1.857068836490351e-05, "loss": 0.85, "step": 1930 }, { "epoch": 0.1972420837589377, "grad_norm": 1.471542687876823, "learning_rate": 1.85689834366128e-05, "loss": 0.7993, "step": 1931 }, { "epoch": 0.19734422880490296, "grad_norm": 1.5698689898477731, "learning_rate": 1.856727757044132e-05, "loss": 0.8024, "step": 1932 }, { "epoch": 0.19744637385086825, "grad_norm": 1.3410885483553978, "learning_rate": 1.8565570766575773e-05, "loss": 0.7714, "step": 1933 }, { "epoch": 0.1975485188968335, "grad_norm": 1.4115390443411366, "learning_rate": 1.856386302520297e-05, "loss": 0.8079, "step": 1934 }, { "epoch": 0.1976506639427988, "grad_norm": 1.4288246439813557, "learning_rate": 1.8562154346509826e-05, "loss": 0.8133, "step": 1935 }, { "epoch": 0.19775280898876405, "grad_norm": 1.4887123448310347, "learning_rate": 1.856044473068336e-05, "loss": 0.6497, "step": 1936 }, { "epoch": 0.1978549540347293, "grad_norm": 1.5025613321783005, "learning_rate": 1.8558734177910684e-05, "loss": 0.7289, "step": 1937 }, { "epoch": 0.1979570990806946, "grad_norm": 1.5937089680265561, "learning_rate": 1.8557022688379027e-05, "loss": 0.7769, "step": 1938 }, { "epoch": 0.19805924412665984, "grad_norm": 1.5331409208630988, "learning_rate": 1.8555310262275704e-05, "loss": 0.8643, "step": 1939 }, { "epoch": 0.19816138917262513, "grad_norm": 1.4937707428868934, "learning_rate": 1.8553596899788145e-05, "loss": 0.7264, "step": 1940 }, { "epoch": 0.1982635342185904, "grad_norm": 1.501415343683645, "learning_rate": 1.8551882601103882e-05, "loss": 0.6718, "step": 1941 }, { "epoch": 0.19836567926455567, "grad_norm": 1.3535263529173234, "learning_rate": 1.8550167366410543e-05, "loss": 0.7615, "step": 1942 }, { "epoch": 0.19846782431052093, "grad_norm": 1.5345621231029483, "learning_rate": 1.854845119589586e-05, "loss": 0.6778, "step": 1943 }, { "epoch": 0.19856996935648621, "grad_norm": 1.527429128809299, "learning_rate": 1.854673408974767e-05, "loss": 0.7269, "step": 1944 }, { "epoch": 0.19867211440245147, "grad_norm": 1.5286100575086587, "learning_rate": 1.8545016048153918e-05, "loss": 0.7642, "step": 1945 }, { "epoch": 0.19877425944841676, "grad_norm": 1.5833916586056414, "learning_rate": 1.854329707130263e-05, "loss": 0.7759, "step": 1946 }, { "epoch": 0.19887640449438201, "grad_norm": 1.5565213204671897, "learning_rate": 1.8541577159381964e-05, "loss": 0.6472, "step": 1947 }, { "epoch": 0.1989785495403473, "grad_norm": 1.3948486246636957, "learning_rate": 1.8539856312580157e-05, "loss": 0.7066, "step": 1948 }, { "epoch": 0.19908069458631256, "grad_norm": 1.5923729631280517, "learning_rate": 1.853813453108556e-05, "loss": 0.7033, "step": 1949 }, { "epoch": 0.19918283963227784, "grad_norm": 1.5486725694228305, "learning_rate": 1.853641181508662e-05, "loss": 0.8215, "step": 1950 }, { "epoch": 0.1992849846782431, "grad_norm": 1.494804139996211, "learning_rate": 1.8534688164771894e-05, "loss": 0.7935, "step": 1951 }, { "epoch": 0.19938712972420838, "grad_norm": 1.4768290753090216, "learning_rate": 1.8532963580330034e-05, "loss": 0.7194, "step": 1952 }, { "epoch": 0.19948927477017364, "grad_norm": 1.308830779646724, "learning_rate": 1.8531238061949795e-05, "loss": 0.6173, "step": 1953 }, { "epoch": 0.19959141981613893, "grad_norm": 2.628761458649275, "learning_rate": 1.8529511609820038e-05, "loss": 0.6455, "step": 1954 }, { "epoch": 0.19969356486210418, "grad_norm": 1.5133101579122967, "learning_rate": 1.8527784224129724e-05, "loss": 0.8339, "step": 1955 }, { "epoch": 0.19979570990806947, "grad_norm": 1.4048530607589202, "learning_rate": 1.852605590506792e-05, "loss": 0.7987, "step": 1956 }, { "epoch": 0.19989785495403473, "grad_norm": 1.39960794402212, "learning_rate": 1.852432665282379e-05, "loss": 0.7656, "step": 1957 }, { "epoch": 0.2, "grad_norm": 1.6460628434217563, "learning_rate": 1.85225964675866e-05, "loss": 0.7192, "step": 1958 }, { "epoch": 0.20010214504596527, "grad_norm": 1.6355988495570337, "learning_rate": 1.852086534954572e-05, "loss": 0.7113, "step": 1959 }, { "epoch": 0.20020429009193055, "grad_norm": 1.3818621921642458, "learning_rate": 1.8519133298890616e-05, "loss": 0.6766, "step": 1960 }, { "epoch": 0.2003064351378958, "grad_norm": 1.4465293767329117, "learning_rate": 1.8517400315810878e-05, "loss": 0.8144, "step": 1961 }, { "epoch": 0.2004085801838611, "grad_norm": 1.5246992590946782, "learning_rate": 1.8515666400496164e-05, "loss": 0.6801, "step": 1962 }, { "epoch": 0.20051072522982635, "grad_norm": 1.4651214133224106, "learning_rate": 1.8513931553136263e-05, "loss": 0.7262, "step": 1963 }, { "epoch": 0.2006128702757916, "grad_norm": 1.5502974719813756, "learning_rate": 1.8512195773921056e-05, "loss": 0.74, "step": 1964 }, { "epoch": 0.2007150153217569, "grad_norm": 1.4982588487837019, "learning_rate": 1.851045906304052e-05, "loss": 0.761, "step": 1965 }, { "epoch": 0.20081716036772215, "grad_norm": 1.5258121740907602, "learning_rate": 1.8508721420684743e-05, "loss": 0.7761, "step": 1966 }, { "epoch": 0.20091930541368744, "grad_norm": 1.5314986028424151, "learning_rate": 1.8506982847043906e-05, "loss": 0.7799, "step": 1967 }, { "epoch": 0.2010214504596527, "grad_norm": 1.49753058932652, "learning_rate": 1.8505243342308302e-05, "loss": 0.7539, "step": 1968 }, { "epoch": 0.20112359550561798, "grad_norm": 1.5102195609725788, "learning_rate": 1.8503502906668318e-05, "loss": 0.813, "step": 1969 }, { "epoch": 0.20122574055158324, "grad_norm": 1.4294148616171312, "learning_rate": 1.850176154031445e-05, "loss": 0.8452, "step": 1970 }, { "epoch": 0.20132788559754852, "grad_norm": 1.6134728165619554, "learning_rate": 1.8500019243437287e-05, "loss": 0.7918, "step": 1971 }, { "epoch": 0.20143003064351378, "grad_norm": 1.4888672023968599, "learning_rate": 1.8498276016227525e-05, "loss": 0.6697, "step": 1972 }, { "epoch": 0.20153217568947907, "grad_norm": 1.4067934388596872, "learning_rate": 1.849653185887596e-05, "loss": 0.791, "step": 1973 }, { "epoch": 0.20163432073544432, "grad_norm": 1.5579291710654195, "learning_rate": 1.8494786771573495e-05, "loss": 0.7772, "step": 1974 }, { "epoch": 0.2017364657814096, "grad_norm": 1.493381037018715, "learning_rate": 1.8493040754511132e-05, "loss": 0.8466, "step": 1975 }, { "epoch": 0.20183861082737486, "grad_norm": 1.5590402182649785, "learning_rate": 1.8491293807879972e-05, "loss": 0.7136, "step": 1976 }, { "epoch": 0.20194075587334015, "grad_norm": 1.4019830053847468, "learning_rate": 1.8489545931871214e-05, "loss": 0.6857, "step": 1977 }, { "epoch": 0.2020429009193054, "grad_norm": 1.4784031950862793, "learning_rate": 1.8487797126676174e-05, "loss": 0.693, "step": 1978 }, { "epoch": 0.2021450459652707, "grad_norm": 1.4839370120001454, "learning_rate": 1.848604739248625e-05, "loss": 0.7291, "step": 1979 }, { "epoch": 0.20224719101123595, "grad_norm": 1.6531613286945974, "learning_rate": 1.8484296729492958e-05, "loss": 0.7761, "step": 1980 }, { "epoch": 0.20234933605720123, "grad_norm": 1.4980778473229213, "learning_rate": 1.8482545137887912e-05, "loss": 0.7785, "step": 1981 }, { "epoch": 0.2024514811031665, "grad_norm": 1.4518042801621294, "learning_rate": 1.8480792617862816e-05, "loss": 0.6263, "step": 1982 }, { "epoch": 0.20255362614913178, "grad_norm": 1.485100487599264, "learning_rate": 1.8479039169609488e-05, "loss": 0.7862, "step": 1983 }, { "epoch": 0.20265577119509703, "grad_norm": 1.4308328446326286, "learning_rate": 1.8477284793319843e-05, "loss": 0.7139, "step": 1984 }, { "epoch": 0.20275791624106232, "grad_norm": 1.328213752396637, "learning_rate": 1.8475529489185904e-05, "loss": 0.6588, "step": 1985 }, { "epoch": 0.20286006128702758, "grad_norm": 1.6186169013989817, "learning_rate": 1.8473773257399786e-05, "loss": 0.7592, "step": 1986 }, { "epoch": 0.20296220633299286, "grad_norm": 1.617177365724882, "learning_rate": 1.847201609815371e-05, "loss": 0.8496, "step": 1987 }, { "epoch": 0.20306435137895812, "grad_norm": 1.6298769061893112, "learning_rate": 1.8470258011639998e-05, "loss": 0.8767, "step": 1988 }, { "epoch": 0.2031664964249234, "grad_norm": 1.5049975488487637, "learning_rate": 1.846849899805107e-05, "loss": 0.7352, "step": 1989 }, { "epoch": 0.20326864147088866, "grad_norm": 1.5278642226644494, "learning_rate": 1.846673905757946e-05, "loss": 0.6461, "step": 1990 }, { "epoch": 0.20337078651685395, "grad_norm": 1.6295635029207831, "learning_rate": 1.846497819041779e-05, "loss": 0.8611, "step": 1991 }, { "epoch": 0.2034729315628192, "grad_norm": 1.377571698136443, "learning_rate": 1.8463216396758788e-05, "loss": 0.6533, "step": 1992 }, { "epoch": 0.20357507660878446, "grad_norm": 1.572635835252953, "learning_rate": 1.8461453676795284e-05, "loss": 0.7948, "step": 1993 }, { "epoch": 0.20367722165474975, "grad_norm": 1.8295475991324666, "learning_rate": 1.8459690030720206e-05, "loss": 0.9703, "step": 1994 }, { "epoch": 0.203779366700715, "grad_norm": 1.5020685263382414, "learning_rate": 1.845792545872659e-05, "loss": 0.7606, "step": 1995 }, { "epoch": 0.2038815117466803, "grad_norm": 1.4870910578830265, "learning_rate": 1.845615996100757e-05, "loss": 0.7735, "step": 1996 }, { "epoch": 0.20398365679264555, "grad_norm": 1.6040578783438644, "learning_rate": 1.845439353775637e-05, "loss": 0.8046, "step": 1997 }, { "epoch": 0.20408580183861083, "grad_norm": 1.4108683605792747, "learning_rate": 1.8452626189166345e-05, "loss": 0.8333, "step": 1998 }, { "epoch": 0.2041879468845761, "grad_norm": 1.5799295583486759, "learning_rate": 1.8450857915430918e-05, "loss": 0.8773, "step": 1999 }, { "epoch": 0.20429009193054137, "grad_norm": 1.5751828695429502, "learning_rate": 1.8449088716743633e-05, "loss": 0.7924, "step": 2000 }, { "epoch": 0.20439223697650663, "grad_norm": 1.5645695377719366, "learning_rate": 1.844731859329813e-05, "loss": 0.6189, "step": 2001 }, { "epoch": 0.20449438202247192, "grad_norm": 1.5015370914764101, "learning_rate": 1.8445547545288146e-05, "loss": 0.7861, "step": 2002 }, { "epoch": 0.20459652706843717, "grad_norm": 1.3632761947355794, "learning_rate": 1.844377557290753e-05, "loss": 0.7927, "step": 2003 }, { "epoch": 0.20469867211440246, "grad_norm": 1.5717071407541336, "learning_rate": 1.8442002676350224e-05, "loss": 0.8023, "step": 2004 }, { "epoch": 0.20480081716036772, "grad_norm": 1.5153615319188158, "learning_rate": 1.844022885581027e-05, "loss": 0.6348, "step": 2005 }, { "epoch": 0.204902962206333, "grad_norm": 1.5194370039645273, "learning_rate": 1.8438454111481808e-05, "loss": 0.7191, "step": 2006 }, { "epoch": 0.20500510725229826, "grad_norm": 1.3552030344916781, "learning_rate": 1.84366784435591e-05, "loss": 0.6666, "step": 2007 }, { "epoch": 0.20510725229826354, "grad_norm": 1.5506437213023405, "learning_rate": 1.8434901852236482e-05, "loss": 0.7843, "step": 2008 }, { "epoch": 0.2052093973442288, "grad_norm": 1.4449150711497472, "learning_rate": 1.8433124337708404e-05, "loss": 0.8281, "step": 2009 }, { "epoch": 0.20531154239019409, "grad_norm": 1.5864974878969003, "learning_rate": 1.8431345900169422e-05, "loss": 0.7422, "step": 2010 }, { "epoch": 0.20541368743615934, "grad_norm": 1.4693047016163825, "learning_rate": 1.8429566539814186e-05, "loss": 0.7514, "step": 2011 }, { "epoch": 0.20551583248212463, "grad_norm": 1.5056618554993821, "learning_rate": 1.842778625683744e-05, "loss": 0.7536, "step": 2012 }, { "epoch": 0.20561797752808988, "grad_norm": 1.4416577657806737, "learning_rate": 1.842600505143405e-05, "loss": 0.6774, "step": 2013 }, { "epoch": 0.20572012257405517, "grad_norm": 1.506422446381562, "learning_rate": 1.8424222923798956e-05, "loss": 0.7627, "step": 2014 }, { "epoch": 0.20582226762002043, "grad_norm": 1.4433964433851796, "learning_rate": 1.8422439874127226e-05, "loss": 0.6599, "step": 2015 }, { "epoch": 0.2059244126659857, "grad_norm": 1.580549495557907, "learning_rate": 1.8420655902614007e-05, "loss": 0.7836, "step": 2016 }, { "epoch": 0.20602655771195097, "grad_norm": 1.4897306924484361, "learning_rate": 1.841887100945456e-05, "loss": 0.8188, "step": 2017 }, { "epoch": 0.20612870275791625, "grad_norm": 1.5047856578520649, "learning_rate": 1.841708519484424e-05, "loss": 0.7897, "step": 2018 }, { "epoch": 0.2062308478038815, "grad_norm": 1.5094717210603286, "learning_rate": 1.841529845897851e-05, "loss": 0.8066, "step": 2019 }, { "epoch": 0.20633299284984677, "grad_norm": 1.5899988397134077, "learning_rate": 1.8413510802052926e-05, "loss": 0.8315, "step": 2020 }, { "epoch": 0.20643513789581205, "grad_norm": 1.5544695230386947, "learning_rate": 1.8411722224263152e-05, "loss": 0.8818, "step": 2021 }, { "epoch": 0.2065372829417773, "grad_norm": 1.4019880121267256, "learning_rate": 1.840993272580494e-05, "loss": 0.692, "step": 2022 }, { "epoch": 0.2066394279877426, "grad_norm": 1.423634387031326, "learning_rate": 1.840814230687416e-05, "loss": 0.7227, "step": 2023 }, { "epoch": 0.20674157303370785, "grad_norm": 1.487160879474208, "learning_rate": 1.8406350967666774e-05, "loss": 0.7468, "step": 2024 }, { "epoch": 0.20684371807967314, "grad_norm": 1.3286581571720821, "learning_rate": 1.8404558708378843e-05, "loss": 0.8188, "step": 2025 }, { "epoch": 0.2069458631256384, "grad_norm": 1.5704137611571891, "learning_rate": 1.8402765529206528e-05, "loss": 0.777, "step": 2026 }, { "epoch": 0.20704800817160368, "grad_norm": 1.6179467516882442, "learning_rate": 1.84009714303461e-05, "loss": 0.8077, "step": 2027 }, { "epoch": 0.20715015321756894, "grad_norm": 1.3004220069934909, "learning_rate": 1.839917641199392e-05, "loss": 0.6739, "step": 2028 }, { "epoch": 0.20725229826353422, "grad_norm": 1.500408916975731, "learning_rate": 1.8397380474346456e-05, "loss": 0.7216, "step": 2029 }, { "epoch": 0.20735444330949948, "grad_norm": 1.4209433267665676, "learning_rate": 1.839558361760027e-05, "loss": 0.7866, "step": 2030 }, { "epoch": 0.20745658835546477, "grad_norm": 1.5818601032682016, "learning_rate": 1.839378584195204e-05, "loss": 0.7316, "step": 2031 }, { "epoch": 0.20755873340143002, "grad_norm": 1.3869440640764208, "learning_rate": 1.839198714759852e-05, "loss": 0.7804, "step": 2032 }, { "epoch": 0.2076608784473953, "grad_norm": 1.6204912852391895, "learning_rate": 1.839018753473659e-05, "loss": 0.6945, "step": 2033 }, { "epoch": 0.20776302349336057, "grad_norm": 1.468773924920138, "learning_rate": 1.838838700356321e-05, "loss": 0.7717, "step": 2034 }, { "epoch": 0.20786516853932585, "grad_norm": 1.4269646212277507, "learning_rate": 1.838658555427545e-05, "loss": 0.7997, "step": 2035 }, { "epoch": 0.2079673135852911, "grad_norm": 1.3739776483999735, "learning_rate": 1.8384783187070487e-05, "loss": 0.7342, "step": 2036 }, { "epoch": 0.2080694586312564, "grad_norm": 1.3562759997073126, "learning_rate": 1.838297990214559e-05, "loss": 0.7419, "step": 2037 }, { "epoch": 0.20817160367722165, "grad_norm": 1.5417145194308144, "learning_rate": 1.838117569969812e-05, "loss": 0.7188, "step": 2038 }, { "epoch": 0.20827374872318694, "grad_norm": 1.3101003277730476, "learning_rate": 1.837937057992556e-05, "loss": 0.7758, "step": 2039 }, { "epoch": 0.2083758937691522, "grad_norm": 1.4675705583988556, "learning_rate": 1.8377564543025474e-05, "loss": 0.6654, "step": 2040 }, { "epoch": 0.20847803881511748, "grad_norm": 1.7065416919413976, "learning_rate": 1.8375757589195532e-05, "loss": 0.817, "step": 2041 }, { "epoch": 0.20858018386108274, "grad_norm": 1.4444808048060602, "learning_rate": 1.8373949718633518e-05, "loss": 0.7596, "step": 2042 }, { "epoch": 0.20868232890704802, "grad_norm": 1.3794600152584189, "learning_rate": 1.8372140931537294e-05, "loss": 0.7312, "step": 2043 }, { "epoch": 0.20878447395301328, "grad_norm": 1.571723845117633, "learning_rate": 1.837033122810483e-05, "loss": 0.8345, "step": 2044 }, { "epoch": 0.20888661899897856, "grad_norm": 1.3139337186162048, "learning_rate": 1.8368520608534216e-05, "loss": 0.7829, "step": 2045 }, { "epoch": 0.20898876404494382, "grad_norm": 1.4616660447801493, "learning_rate": 1.8366709073023608e-05, "loss": 0.7297, "step": 2046 }, { "epoch": 0.20909090909090908, "grad_norm": 1.4744081581011423, "learning_rate": 1.8364896621771287e-05, "loss": 0.7169, "step": 2047 }, { "epoch": 0.20919305413687436, "grad_norm": 1.510535600973433, "learning_rate": 1.8363083254975627e-05, "loss": 0.7373, "step": 2048 }, { "epoch": 0.20929519918283962, "grad_norm": 1.4004000289526342, "learning_rate": 1.83612689728351e-05, "loss": 0.684, "step": 2049 }, { "epoch": 0.2093973442288049, "grad_norm": 1.4307149467262503, "learning_rate": 1.8359453775548283e-05, "loss": 0.6795, "step": 2050 }, { "epoch": 0.20949948927477016, "grad_norm": 1.7064406437417563, "learning_rate": 1.8357637663313852e-05, "loss": 0.7316, "step": 2051 }, { "epoch": 0.20960163432073545, "grad_norm": 1.541461610898297, "learning_rate": 1.8355820636330577e-05, "loss": 0.793, "step": 2052 }, { "epoch": 0.2097037793667007, "grad_norm": 1.5889879491363323, "learning_rate": 1.835400269479733e-05, "loss": 0.8224, "step": 2053 }, { "epoch": 0.209805924412666, "grad_norm": 1.4265537252153697, "learning_rate": 1.8352183838913097e-05, "loss": 0.8203, "step": 2054 }, { "epoch": 0.20990806945863125, "grad_norm": 1.6309985673485417, "learning_rate": 1.8350364068876946e-05, "loss": 0.6241, "step": 2055 }, { "epoch": 0.21001021450459653, "grad_norm": 1.407549674262613, "learning_rate": 1.834854338488805e-05, "loss": 0.7287, "step": 2056 }, { "epoch": 0.2101123595505618, "grad_norm": 1.413590711742134, "learning_rate": 1.834672178714569e-05, "loss": 0.8517, "step": 2057 }, { "epoch": 0.21021450459652707, "grad_norm": 1.3353719348380286, "learning_rate": 1.8344899275849237e-05, "loss": 0.7017, "step": 2058 }, { "epoch": 0.21031664964249233, "grad_norm": 1.604792385605004, "learning_rate": 1.8343075851198163e-05, "loss": 0.7329, "step": 2059 }, { "epoch": 0.21041879468845762, "grad_norm": 1.3924637181215194, "learning_rate": 1.8341251513392052e-05, "loss": 0.6772, "step": 2060 }, { "epoch": 0.21052093973442287, "grad_norm": 1.5911575409181304, "learning_rate": 1.8339426262630572e-05, "loss": 0.7236, "step": 2061 }, { "epoch": 0.21062308478038816, "grad_norm": 1.4542117485798298, "learning_rate": 1.8337600099113496e-05, "loss": 0.6965, "step": 2062 }, { "epoch": 0.21072522982635342, "grad_norm": 1.4101642710176907, "learning_rate": 1.8335773023040703e-05, "loss": 0.757, "step": 2063 }, { "epoch": 0.2108273748723187, "grad_norm": 1.4393450520729103, "learning_rate": 1.833394503461217e-05, "loss": 0.7471, "step": 2064 }, { "epoch": 0.21092951991828396, "grad_norm": 1.5948754727264873, "learning_rate": 1.8332116134027972e-05, "loss": 0.723, "step": 2065 }, { "epoch": 0.21103166496424924, "grad_norm": 1.4668231794758515, "learning_rate": 1.8330286321488273e-05, "loss": 0.6852, "step": 2066 }, { "epoch": 0.2111338100102145, "grad_norm": 1.51108979980516, "learning_rate": 1.832845559719336e-05, "loss": 0.7959, "step": 2067 }, { "epoch": 0.21123595505617979, "grad_norm": 1.5123843416906917, "learning_rate": 1.8326623961343592e-05, "loss": 0.8118, "step": 2068 }, { "epoch": 0.21133810010214504, "grad_norm": 1.6035584156977898, "learning_rate": 1.8324791414139456e-05, "loss": 0.828, "step": 2069 }, { "epoch": 0.21144024514811033, "grad_norm": 1.3744926444855061, "learning_rate": 1.8322957955781525e-05, "loss": 0.7085, "step": 2070 }, { "epoch": 0.21154239019407559, "grad_norm": 1.388727978012941, "learning_rate": 1.8321123586470467e-05, "loss": 0.6965, "step": 2071 }, { "epoch": 0.21164453524004087, "grad_norm": 1.5973752776047894, "learning_rate": 1.8319288306407053e-05, "loss": 0.7312, "step": 2072 }, { "epoch": 0.21174668028600613, "grad_norm": 1.606928888842806, "learning_rate": 1.8317452115792163e-05, "loss": 0.7089, "step": 2073 }, { "epoch": 0.2118488253319714, "grad_norm": 1.3935734696945647, "learning_rate": 1.8315615014826762e-05, "loss": 0.7905, "step": 2074 }, { "epoch": 0.21195097037793667, "grad_norm": 1.6725542655152714, "learning_rate": 1.8313777003711927e-05, "loss": 0.755, "step": 2075 }, { "epoch": 0.21205311542390193, "grad_norm": 1.6531169744306426, "learning_rate": 1.8311938082648827e-05, "loss": 0.7662, "step": 2076 }, { "epoch": 0.2121552604698672, "grad_norm": 1.6492877696717072, "learning_rate": 1.8310098251838735e-05, "loss": 0.6538, "step": 2077 }, { "epoch": 0.21225740551583247, "grad_norm": 1.4807942407080226, "learning_rate": 1.8308257511483018e-05, "loss": 0.7229, "step": 2078 }, { "epoch": 0.21235955056179776, "grad_norm": 1.581636977746479, "learning_rate": 1.8306415861783148e-05, "loss": 0.7848, "step": 2079 }, { "epoch": 0.212461695607763, "grad_norm": 1.3967852371062568, "learning_rate": 1.83045733029407e-05, "loss": 0.785, "step": 2080 }, { "epoch": 0.2125638406537283, "grad_norm": 1.4523256904193949, "learning_rate": 1.8302729835157335e-05, "loss": 0.7177, "step": 2081 }, { "epoch": 0.21266598569969355, "grad_norm": 1.306571562523013, "learning_rate": 1.8300885458634824e-05, "loss": 0.7532, "step": 2082 }, { "epoch": 0.21276813074565884, "grad_norm": 1.6133269522473785, "learning_rate": 1.829904017357504e-05, "loss": 0.7049, "step": 2083 }, { "epoch": 0.2128702757916241, "grad_norm": 1.5373634117186494, "learning_rate": 1.8297193980179943e-05, "loss": 0.7188, "step": 2084 }, { "epoch": 0.21297242083758938, "grad_norm": 1.3887690358930573, "learning_rate": 1.8295346878651606e-05, "loss": 0.6514, "step": 2085 }, { "epoch": 0.21307456588355464, "grad_norm": 1.4938789958938496, "learning_rate": 1.8293498869192198e-05, "loss": 0.7768, "step": 2086 }, { "epoch": 0.21317671092951992, "grad_norm": 1.4819939852680848, "learning_rate": 1.8291649952003972e-05, "loss": 0.7177, "step": 2087 }, { "epoch": 0.21327885597548518, "grad_norm": 1.5502525209606006, "learning_rate": 1.8289800127289308e-05, "loss": 0.7997, "step": 2088 }, { "epoch": 0.21338100102145047, "grad_norm": 1.540853794114667, "learning_rate": 1.828794939525066e-05, "loss": 0.6858, "step": 2089 }, { "epoch": 0.21348314606741572, "grad_norm": 1.5431259225314777, "learning_rate": 1.82860977560906e-05, "loss": 0.7772, "step": 2090 }, { "epoch": 0.213585291113381, "grad_norm": 1.3593147927026836, "learning_rate": 1.8284245210011787e-05, "loss": 0.6476, "step": 2091 }, { "epoch": 0.21368743615934627, "grad_norm": 1.788935535671766, "learning_rate": 1.828239175721698e-05, "loss": 0.7655, "step": 2092 }, { "epoch": 0.21378958120531155, "grad_norm": 1.488968606739639, "learning_rate": 1.8280537397909048e-05, "loss": 0.8084, "step": 2093 }, { "epoch": 0.2138917262512768, "grad_norm": 1.4409127232229466, "learning_rate": 1.8278682132290946e-05, "loss": 0.7598, "step": 2094 }, { "epoch": 0.2139938712972421, "grad_norm": 1.5499899735415374, "learning_rate": 1.8276825960565733e-05, "loss": 0.8267, "step": 2095 }, { "epoch": 0.21409601634320735, "grad_norm": 1.428975859350018, "learning_rate": 1.8274968882936576e-05, "loss": 0.6844, "step": 2096 }, { "epoch": 0.21419816138917264, "grad_norm": 1.3985035190401145, "learning_rate": 1.8273110899606727e-05, "loss": 0.7164, "step": 2097 }, { "epoch": 0.2143003064351379, "grad_norm": 1.6055106408312223, "learning_rate": 1.8271252010779546e-05, "loss": 0.7214, "step": 2098 }, { "epoch": 0.21440245148110318, "grad_norm": 1.6122077685824703, "learning_rate": 1.8269392216658485e-05, "loss": 0.732, "step": 2099 }, { "epoch": 0.21450459652706844, "grad_norm": 1.5042028174754953, "learning_rate": 1.8267531517447104e-05, "loss": 0.8137, "step": 2100 }, { "epoch": 0.21460674157303372, "grad_norm": 1.3900401158657485, "learning_rate": 1.826566991334906e-05, "loss": 0.7466, "step": 2101 }, { "epoch": 0.21470888661899898, "grad_norm": 1.636502160804699, "learning_rate": 1.82638074045681e-05, "loss": 0.8565, "step": 2102 }, { "epoch": 0.21481103166496424, "grad_norm": 1.3915238660264562, "learning_rate": 1.8261943991308082e-05, "loss": 0.6745, "step": 2103 }, { "epoch": 0.21491317671092952, "grad_norm": 1.4444631272058663, "learning_rate": 1.826007967377296e-05, "loss": 0.8136, "step": 2104 }, { "epoch": 0.21501532175689478, "grad_norm": 1.6304712970290334, "learning_rate": 1.8258214452166773e-05, "loss": 0.8468, "step": 2105 }, { "epoch": 0.21511746680286006, "grad_norm": 1.4084882167106183, "learning_rate": 1.8256348326693684e-05, "loss": 0.7414, "step": 2106 }, { "epoch": 0.21521961184882532, "grad_norm": 1.7061448383206608, "learning_rate": 1.8254481297557935e-05, "loss": 0.7698, "step": 2107 }, { "epoch": 0.2153217568947906, "grad_norm": 1.7035587949568587, "learning_rate": 1.8252613364963875e-05, "loss": 0.8073, "step": 2108 }, { "epoch": 0.21542390194075586, "grad_norm": 1.4553185199455754, "learning_rate": 1.825074452911595e-05, "loss": 0.7455, "step": 2109 }, { "epoch": 0.21552604698672115, "grad_norm": 1.4525814825844223, "learning_rate": 1.8248874790218707e-05, "loss": 0.6685, "step": 2110 }, { "epoch": 0.2156281920326864, "grad_norm": 1.4069050366260896, "learning_rate": 1.824700414847679e-05, "loss": 0.8173, "step": 2111 }, { "epoch": 0.2157303370786517, "grad_norm": 1.3989181068482597, "learning_rate": 1.8245132604094936e-05, "loss": 0.6993, "step": 2112 }, { "epoch": 0.21583248212461695, "grad_norm": 1.4719824888977369, "learning_rate": 1.8243260157277995e-05, "loss": 0.7268, "step": 2113 }, { "epoch": 0.21593462717058223, "grad_norm": 1.489918526224448, "learning_rate": 1.8241386808230903e-05, "loss": 0.6918, "step": 2114 }, { "epoch": 0.2160367722165475, "grad_norm": 1.4178084805745839, "learning_rate": 1.82395125571587e-05, "loss": 0.8055, "step": 2115 }, { "epoch": 0.21613891726251278, "grad_norm": 1.4900179229326167, "learning_rate": 1.8237637404266526e-05, "loss": 0.7398, "step": 2116 }, { "epoch": 0.21624106230847803, "grad_norm": 1.4894374211654242, "learning_rate": 1.8235761349759612e-05, "loss": 0.8566, "step": 2117 }, { "epoch": 0.21634320735444332, "grad_norm": 1.5581749471972128, "learning_rate": 1.8233884393843298e-05, "loss": 0.7762, "step": 2118 }, { "epoch": 0.21644535240040857, "grad_norm": 1.4518022440033582, "learning_rate": 1.8232006536723024e-05, "loss": 0.7583, "step": 2119 }, { "epoch": 0.21654749744637386, "grad_norm": 1.4844528573118905, "learning_rate": 1.823012777860431e-05, "loss": 0.7391, "step": 2120 }, { "epoch": 0.21664964249233912, "grad_norm": 1.7865463264112236, "learning_rate": 1.8228248119692793e-05, "loss": 0.8152, "step": 2121 }, { "epoch": 0.2167517875383044, "grad_norm": 1.400114069474454, "learning_rate": 1.8226367560194207e-05, "loss": 0.709, "step": 2122 }, { "epoch": 0.21685393258426966, "grad_norm": 1.4013632179523992, "learning_rate": 1.8224486100314373e-05, "loss": 0.7756, "step": 2123 }, { "epoch": 0.21695607763023494, "grad_norm": 1.4682817331361064, "learning_rate": 1.8222603740259225e-05, "loss": 0.7522, "step": 2124 }, { "epoch": 0.2170582226762002, "grad_norm": 1.6217799793622887, "learning_rate": 1.822072048023478e-05, "loss": 0.8209, "step": 2125 }, { "epoch": 0.2171603677221655, "grad_norm": 1.445991947316743, "learning_rate": 1.821883632044717e-05, "loss": 0.7197, "step": 2126 }, { "epoch": 0.21726251276813074, "grad_norm": 1.3955558761203861, "learning_rate": 1.8216951261102617e-05, "loss": 0.7987, "step": 2127 }, { "epoch": 0.21736465781409603, "grad_norm": 1.4410819127944816, "learning_rate": 1.8215065302407434e-05, "loss": 0.716, "step": 2128 }, { "epoch": 0.2174668028600613, "grad_norm": 1.3906123715857208, "learning_rate": 1.821317844456805e-05, "loss": 0.7869, "step": 2129 }, { "epoch": 0.21756894790602654, "grad_norm": 1.7597452841216537, "learning_rate": 1.8211290687790982e-05, "loss": 0.8277, "step": 2130 }, { "epoch": 0.21767109295199183, "grad_norm": 1.684821468580705, "learning_rate": 1.8209402032282836e-05, "loss": 0.7409, "step": 2131 }, { "epoch": 0.2177732379979571, "grad_norm": 1.6270149238149112, "learning_rate": 1.8207512478250338e-05, "loss": 0.8205, "step": 2132 }, { "epoch": 0.21787538304392237, "grad_norm": 1.5866400383225503, "learning_rate": 1.8205622025900296e-05, "loss": 0.753, "step": 2133 }, { "epoch": 0.21797752808988763, "grad_norm": 1.521239109076847, "learning_rate": 1.8203730675439622e-05, "loss": 0.8686, "step": 2134 }, { "epoch": 0.21807967313585291, "grad_norm": 1.5062432591629473, "learning_rate": 1.8201838427075323e-05, "loss": 0.7432, "step": 2135 }, { "epoch": 0.21818181818181817, "grad_norm": 1.4352007878742952, "learning_rate": 1.819994528101451e-05, "loss": 0.65, "step": 2136 }, { "epoch": 0.21828396322778346, "grad_norm": 1.4154865926017972, "learning_rate": 1.8198051237464393e-05, "loss": 0.672, "step": 2137 }, { "epoch": 0.2183861082737487, "grad_norm": 1.4607241510790332, "learning_rate": 1.8196156296632263e-05, "loss": 0.8406, "step": 2138 }, { "epoch": 0.218488253319714, "grad_norm": 1.617681807204396, "learning_rate": 1.8194260458725533e-05, "loss": 0.7737, "step": 2139 }, { "epoch": 0.21859039836567926, "grad_norm": 1.2463906254703652, "learning_rate": 1.8192363723951705e-05, "loss": 0.7782, "step": 2140 }, { "epoch": 0.21869254341164454, "grad_norm": 1.4870730004981867, "learning_rate": 1.8190466092518375e-05, "loss": 0.7454, "step": 2141 }, { "epoch": 0.2187946884576098, "grad_norm": 1.5918394593422303, "learning_rate": 1.8188567564633237e-05, "loss": 0.7298, "step": 2142 }, { "epoch": 0.21889683350357508, "grad_norm": 1.5701632289869287, "learning_rate": 1.818666814050409e-05, "loss": 0.7444, "step": 2143 }, { "epoch": 0.21899897854954034, "grad_norm": 1.5344081849011026, "learning_rate": 1.8184767820338825e-05, "loss": 0.7035, "step": 2144 }, { "epoch": 0.21910112359550563, "grad_norm": 1.4940957162966149, "learning_rate": 1.8182866604345433e-05, "loss": 0.7385, "step": 2145 }, { "epoch": 0.21920326864147088, "grad_norm": 1.6896339591632972, "learning_rate": 1.8180964492732007e-05, "loss": 0.6489, "step": 2146 }, { "epoch": 0.21930541368743617, "grad_norm": 1.3852214206008133, "learning_rate": 1.817906148570673e-05, "loss": 0.704, "step": 2147 }, { "epoch": 0.21940755873340143, "grad_norm": 1.553754856312667, "learning_rate": 1.817715758347789e-05, "loss": 0.8483, "step": 2148 }, { "epoch": 0.2195097037793667, "grad_norm": 1.5647800268835241, "learning_rate": 1.817525278625387e-05, "loss": 0.7021, "step": 2149 }, { "epoch": 0.21961184882533197, "grad_norm": 1.6297907260472146, "learning_rate": 1.8173347094243145e-05, "loss": 0.8895, "step": 2150 }, { "epoch": 0.21971399387129725, "grad_norm": 1.3964193726162373, "learning_rate": 1.8171440507654306e-05, "loss": 0.6856, "step": 2151 }, { "epoch": 0.2198161389172625, "grad_norm": 1.481262439060626, "learning_rate": 1.8169533026696022e-05, "loss": 0.805, "step": 2152 }, { "epoch": 0.2199182839632278, "grad_norm": 1.4603100794421457, "learning_rate": 1.816762465157707e-05, "loss": 0.7575, "step": 2153 }, { "epoch": 0.22002042900919305, "grad_norm": 1.489964007505512, "learning_rate": 1.8165715382506322e-05, "loss": 0.8119, "step": 2154 }, { "epoch": 0.22012257405515834, "grad_norm": 1.5148362329959033, "learning_rate": 1.816380521969275e-05, "loss": 0.7456, "step": 2155 }, { "epoch": 0.2202247191011236, "grad_norm": 1.4194025002024102, "learning_rate": 1.8161894163345425e-05, "loss": 0.6658, "step": 2156 }, { "epoch": 0.22032686414708888, "grad_norm": 1.5293083797520495, "learning_rate": 1.8159982213673507e-05, "loss": 0.7905, "step": 2157 }, { "epoch": 0.22042900919305414, "grad_norm": 1.3632866001313433, "learning_rate": 1.8158069370886268e-05, "loss": 0.7123, "step": 2158 }, { "epoch": 0.2205311542390194, "grad_norm": 1.6215404876928532, "learning_rate": 1.815615563519306e-05, "loss": 0.7296, "step": 2159 }, { "epoch": 0.22063329928498468, "grad_norm": 1.4363715359576077, "learning_rate": 1.815424100680335e-05, "loss": 0.7896, "step": 2160 }, { "epoch": 0.22073544433094994, "grad_norm": 1.4995682322010773, "learning_rate": 1.8152325485926695e-05, "loss": 0.7912, "step": 2161 }, { "epoch": 0.22083758937691522, "grad_norm": 1.4691670166720254, "learning_rate": 1.8150409072772747e-05, "loss": 0.7641, "step": 2162 }, { "epoch": 0.22093973442288048, "grad_norm": 1.5246341366930463, "learning_rate": 1.814849176755126e-05, "loss": 0.7959, "step": 2163 }, { "epoch": 0.22104187946884576, "grad_norm": 1.4281279670554279, "learning_rate": 1.814657357047209e-05, "loss": 0.7247, "step": 2164 }, { "epoch": 0.22114402451481102, "grad_norm": 1.5555611075939852, "learning_rate": 1.814465448174517e-05, "loss": 0.7781, "step": 2165 }, { "epoch": 0.2212461695607763, "grad_norm": 1.6561648739818293, "learning_rate": 1.8142734501580563e-05, "loss": 0.6929, "step": 2166 }, { "epoch": 0.22134831460674156, "grad_norm": 1.494375267982028, "learning_rate": 1.81408136301884e-05, "loss": 0.7546, "step": 2167 }, { "epoch": 0.22145045965270685, "grad_norm": 1.461359056038184, "learning_rate": 1.8138891867778924e-05, "loss": 0.7025, "step": 2168 }, { "epoch": 0.2215526046986721, "grad_norm": 1.5122281699532736, "learning_rate": 1.813696921456248e-05, "loss": 0.7346, "step": 2169 }, { "epoch": 0.2216547497446374, "grad_norm": 1.3092740253981447, "learning_rate": 1.8135045670749495e-05, "loss": 0.6549, "step": 2170 }, { "epoch": 0.22175689479060265, "grad_norm": 1.509960393811027, "learning_rate": 1.8133121236550503e-05, "loss": 0.7185, "step": 2171 }, { "epoch": 0.22185903983656793, "grad_norm": 1.452398623609375, "learning_rate": 1.813119591217614e-05, "loss": 0.7273, "step": 2172 }, { "epoch": 0.2219611848825332, "grad_norm": 1.5024907486125059, "learning_rate": 1.812926969783713e-05, "loss": 0.7154, "step": 2173 }, { "epoch": 0.22206332992849848, "grad_norm": 1.5458364026483244, "learning_rate": 1.81273425937443e-05, "loss": 0.7987, "step": 2174 }, { "epoch": 0.22216547497446373, "grad_norm": 1.4018816479705711, "learning_rate": 1.812541460010857e-05, "loss": 0.592, "step": 2175 }, { "epoch": 0.22226762002042902, "grad_norm": 1.5922269384916061, "learning_rate": 1.8123485717140966e-05, "loss": 0.7026, "step": 2176 }, { "epoch": 0.22236976506639428, "grad_norm": 1.362015913345122, "learning_rate": 1.81215559450526e-05, "loss": 0.6748, "step": 2177 }, { "epoch": 0.22247191011235956, "grad_norm": 1.5506780225655552, "learning_rate": 1.8119625284054688e-05, "loss": 0.6892, "step": 2178 }, { "epoch": 0.22257405515832482, "grad_norm": 1.7001436218866008, "learning_rate": 1.8117693734358546e-05, "loss": 0.74, "step": 2179 }, { "epoch": 0.2226762002042901, "grad_norm": 1.6096451273504218, "learning_rate": 1.8115761296175577e-05, "loss": 0.8141, "step": 2180 }, { "epoch": 0.22277834525025536, "grad_norm": 1.3747471864596215, "learning_rate": 1.8113827969717294e-05, "loss": 0.6984, "step": 2181 }, { "epoch": 0.22288049029622065, "grad_norm": 1.4312242756424636, "learning_rate": 1.811189375519529e-05, "loss": 0.7605, "step": 2182 }, { "epoch": 0.2229826353421859, "grad_norm": 1.5555113687906015, "learning_rate": 1.810995865282128e-05, "loss": 0.87, "step": 2183 }, { "epoch": 0.2230847803881512, "grad_norm": 1.388715590990118, "learning_rate": 1.8108022662807056e-05, "loss": 0.6701, "step": 2184 }, { "epoch": 0.22318692543411645, "grad_norm": 1.5447230233770404, "learning_rate": 1.810608578536451e-05, "loss": 0.6952, "step": 2185 }, { "epoch": 0.2232890704800817, "grad_norm": 1.534131897551025, "learning_rate": 1.810414802070564e-05, "loss": 0.7598, "step": 2186 }, { "epoch": 0.223391215526047, "grad_norm": 1.6157871090779623, "learning_rate": 1.810220936904253e-05, "loss": 0.8224, "step": 2187 }, { "epoch": 0.22349336057201225, "grad_norm": 1.4937764987169526, "learning_rate": 1.810026983058737e-05, "loss": 0.8178, "step": 2188 }, { "epoch": 0.22359550561797753, "grad_norm": 1.5226654330374452, "learning_rate": 1.8098329405552445e-05, "loss": 0.8251, "step": 2189 }, { "epoch": 0.2236976506639428, "grad_norm": 1.4318903812085957, "learning_rate": 1.809638809415014e-05, "loss": 0.7453, "step": 2190 }, { "epoch": 0.22379979570990807, "grad_norm": 1.570671992897667, "learning_rate": 1.8094445896592917e-05, "loss": 0.77, "step": 2191 }, { "epoch": 0.22390194075587333, "grad_norm": 1.5508999517808788, "learning_rate": 1.809250281309337e-05, "loss": 0.7708, "step": 2192 }, { "epoch": 0.22400408580183861, "grad_norm": 1.5953760915182307, "learning_rate": 1.8090558843864157e-05, "loss": 0.755, "step": 2193 }, { "epoch": 0.22410623084780387, "grad_norm": 1.5282893215491953, "learning_rate": 1.808861398911805e-05, "loss": 0.8884, "step": 2194 }, { "epoch": 0.22420837589376916, "grad_norm": 1.4078613235692472, "learning_rate": 1.8086668249067917e-05, "loss": 0.7796, "step": 2195 }, { "epoch": 0.22431052093973441, "grad_norm": 1.4086118130852237, "learning_rate": 1.8084721623926723e-05, "loss": 0.7473, "step": 2196 }, { "epoch": 0.2244126659856997, "grad_norm": 1.5961317142602676, "learning_rate": 1.808277411390752e-05, "loss": 0.7479, "step": 2197 }, { "epoch": 0.22451481103166496, "grad_norm": 1.3118973019852176, "learning_rate": 1.8080825719223468e-05, "loss": 0.7609, "step": 2198 }, { "epoch": 0.22461695607763024, "grad_norm": 1.500695164054136, "learning_rate": 1.8078876440087825e-05, "loss": 0.8477, "step": 2199 }, { "epoch": 0.2247191011235955, "grad_norm": 1.4424821144357522, "learning_rate": 1.807692627671393e-05, "loss": 0.7675, "step": 2200 }, { "epoch": 0.22482124616956078, "grad_norm": 1.567971020963631, "learning_rate": 1.8074975229315238e-05, "loss": 0.7718, "step": 2201 }, { "epoch": 0.22492339121552604, "grad_norm": 1.4949306728392664, "learning_rate": 1.807302329810529e-05, "loss": 0.7225, "step": 2202 }, { "epoch": 0.22502553626149133, "grad_norm": 1.5428241516277315, "learning_rate": 1.8071070483297726e-05, "loss": 0.7814, "step": 2203 }, { "epoch": 0.22512768130745658, "grad_norm": 1.5234889319730684, "learning_rate": 1.8069116785106284e-05, "loss": 0.741, "step": 2204 }, { "epoch": 0.22522982635342187, "grad_norm": 1.5758575602871117, "learning_rate": 1.80671622037448e-05, "loss": 0.7757, "step": 2205 }, { "epoch": 0.22533197139938713, "grad_norm": 1.7847750150565742, "learning_rate": 1.8065206739427194e-05, "loss": 0.7292, "step": 2206 }, { "epoch": 0.2254341164453524, "grad_norm": 1.5244089832649776, "learning_rate": 1.80632503923675e-05, "loss": 0.7869, "step": 2207 }, { "epoch": 0.22553626149131767, "grad_norm": 1.5266363300135084, "learning_rate": 1.8061293162779844e-05, "loss": 0.678, "step": 2208 }, { "epoch": 0.22563840653728295, "grad_norm": 2.016420386121811, "learning_rate": 1.8059335050878444e-05, "loss": 0.7701, "step": 2209 }, { "epoch": 0.2257405515832482, "grad_norm": 1.513049220012714, "learning_rate": 1.8057376056877614e-05, "loss": 0.7414, "step": 2210 }, { "epoch": 0.2258426966292135, "grad_norm": 1.3410379113382804, "learning_rate": 1.8055416180991768e-05, "loss": 0.6121, "step": 2211 }, { "epoch": 0.22594484167517875, "grad_norm": 1.553020154456576, "learning_rate": 1.805345542343542e-05, "loss": 0.7998, "step": 2212 }, { "epoch": 0.226046986721144, "grad_norm": 1.3773108991464726, "learning_rate": 1.8051493784423168e-05, "loss": 0.7759, "step": 2213 }, { "epoch": 0.2261491317671093, "grad_norm": 1.474910656165063, "learning_rate": 1.804953126416972e-05, "loss": 0.6994, "step": 2214 }, { "epoch": 0.22625127681307455, "grad_norm": 1.3407411016043098, "learning_rate": 1.8047567862889878e-05, "loss": 0.6962, "step": 2215 }, { "epoch": 0.22635342185903984, "grad_norm": 1.729455997868836, "learning_rate": 1.804560358079853e-05, "loss": 0.8504, "step": 2216 }, { "epoch": 0.2264555669050051, "grad_norm": 1.5392278751420363, "learning_rate": 1.8043638418110677e-05, "loss": 0.728, "step": 2217 }, { "epoch": 0.22655771195097038, "grad_norm": 1.3839962961302597, "learning_rate": 1.80416723750414e-05, "loss": 0.7321, "step": 2218 }, { "epoch": 0.22665985699693564, "grad_norm": 1.495738662653201, "learning_rate": 1.8039705451805883e-05, "loss": 0.7508, "step": 2219 }, { "epoch": 0.22676200204290092, "grad_norm": 1.6459511767404105, "learning_rate": 1.8037737648619413e-05, "loss": 0.757, "step": 2220 }, { "epoch": 0.22686414708886618, "grad_norm": 1.4631819411596434, "learning_rate": 1.803576896569736e-05, "loss": 0.7744, "step": 2221 }, { "epoch": 0.22696629213483147, "grad_norm": 1.5913993505268982, "learning_rate": 1.803379940325521e-05, "loss": 0.7432, "step": 2222 }, { "epoch": 0.22706843718079672, "grad_norm": 1.566916547400519, "learning_rate": 1.803182896150852e-05, "loss": 0.8052, "step": 2223 }, { "epoch": 0.227170582226762, "grad_norm": 1.5511398490670008, "learning_rate": 1.8029857640672957e-05, "loss": 0.8809, "step": 2224 }, { "epoch": 0.22727272727272727, "grad_norm": 1.3257929635315593, "learning_rate": 1.8027885440964295e-05, "loss": 0.7429, "step": 2225 }, { "epoch": 0.22737487231869255, "grad_norm": 1.4638744637389853, "learning_rate": 1.802591236259838e-05, "loss": 0.6428, "step": 2226 }, { "epoch": 0.2274770173646578, "grad_norm": 1.5106584619626535, "learning_rate": 1.8023938405791173e-05, "loss": 0.6864, "step": 2227 }, { "epoch": 0.2275791624106231, "grad_norm": 1.479067116735724, "learning_rate": 1.802196357075872e-05, "loss": 0.8227, "step": 2228 }, { "epoch": 0.22768130745658835, "grad_norm": 1.3970166515304214, "learning_rate": 1.8019987857717178e-05, "loss": 0.746, "step": 2229 }, { "epoch": 0.22778345250255363, "grad_norm": 1.3318998198006518, "learning_rate": 1.801801126688278e-05, "loss": 0.6928, "step": 2230 }, { "epoch": 0.2278855975485189, "grad_norm": 1.502952300935393, "learning_rate": 1.801603379847187e-05, "loss": 0.6767, "step": 2231 }, { "epoch": 0.22798774259448418, "grad_norm": 1.5419401587002324, "learning_rate": 1.801405545270088e-05, "loss": 0.7674, "step": 2232 }, { "epoch": 0.22808988764044943, "grad_norm": 1.5914319304211697, "learning_rate": 1.8012076229786338e-05, "loss": 0.7798, "step": 2233 }, { "epoch": 0.22819203268641472, "grad_norm": 1.4889831167447773, "learning_rate": 1.8010096129944884e-05, "loss": 0.6627, "step": 2234 }, { "epoch": 0.22829417773237998, "grad_norm": 1.5239622940090982, "learning_rate": 1.8008115153393234e-05, "loss": 0.7905, "step": 2235 }, { "epoch": 0.22839632277834526, "grad_norm": 1.4660430251983063, "learning_rate": 1.8006133300348197e-05, "loss": 0.7685, "step": 2236 }, { "epoch": 0.22849846782431052, "grad_norm": 1.4604166406308885, "learning_rate": 1.800415057102671e-05, "loss": 0.8402, "step": 2237 }, { "epoch": 0.2286006128702758, "grad_norm": 1.5125380256688954, "learning_rate": 1.800216696564576e-05, "loss": 0.6569, "step": 2238 }, { "epoch": 0.22870275791624106, "grad_norm": 1.436990583324024, "learning_rate": 1.8000182484422474e-05, "loss": 0.739, "step": 2239 }, { "epoch": 0.22880490296220635, "grad_norm": 1.5639613248402557, "learning_rate": 1.7998197127574043e-05, "loss": 0.7093, "step": 2240 }, { "epoch": 0.2289070480081716, "grad_norm": 1.5102232644057905, "learning_rate": 1.799621089531777e-05, "loss": 0.7061, "step": 2241 }, { "epoch": 0.22900919305413686, "grad_norm": 1.6028482002374724, "learning_rate": 1.7994223787871048e-05, "loss": 0.6863, "step": 2242 }, { "epoch": 0.22911133810010215, "grad_norm": 1.4837161609534522, "learning_rate": 1.7992235805451367e-05, "loss": 0.7504, "step": 2243 }, { "epoch": 0.2292134831460674, "grad_norm": 1.439252436000204, "learning_rate": 1.7990246948276314e-05, "loss": 0.7402, "step": 2244 }, { "epoch": 0.2293156281920327, "grad_norm": 1.548878248967547, "learning_rate": 1.798825721656357e-05, "loss": 0.7152, "step": 2245 }, { "epoch": 0.22941777323799795, "grad_norm": 1.4043650632998554, "learning_rate": 1.798626661053091e-05, "loss": 0.8063, "step": 2246 }, { "epoch": 0.22951991828396323, "grad_norm": 1.385090671964129, "learning_rate": 1.7984275130396214e-05, "loss": 0.7733, "step": 2247 }, { "epoch": 0.2296220633299285, "grad_norm": 1.5282215285001788, "learning_rate": 1.7982282776377443e-05, "loss": 0.7861, "step": 2248 }, { "epoch": 0.22972420837589377, "grad_norm": 1.575765077090057, "learning_rate": 1.7980289548692667e-05, "loss": 0.7618, "step": 2249 }, { "epoch": 0.22982635342185903, "grad_norm": 1.4417347604966313, "learning_rate": 1.7978295447560043e-05, "loss": 0.7698, "step": 2250 }, { "epoch": 0.22992849846782432, "grad_norm": 1.543460035628267, "learning_rate": 1.797630047319783e-05, "loss": 0.7873, "step": 2251 }, { "epoch": 0.23003064351378957, "grad_norm": 1.542635570381685, "learning_rate": 1.7974304625824373e-05, "loss": 0.8224, "step": 2252 }, { "epoch": 0.23013278855975486, "grad_norm": 1.3440967047450516, "learning_rate": 1.7972307905658123e-05, "loss": 0.6651, "step": 2253 }, { "epoch": 0.23023493360572012, "grad_norm": 1.6517787884755837, "learning_rate": 1.7970310312917624e-05, "loss": 0.6768, "step": 2254 }, { "epoch": 0.2303370786516854, "grad_norm": 1.4379487366888686, "learning_rate": 1.796831184782151e-05, "loss": 0.6449, "step": 2255 }, { "epoch": 0.23043922369765066, "grad_norm": 1.2802216611143595, "learning_rate": 1.7966312510588517e-05, "loss": 0.645, "step": 2256 }, { "epoch": 0.23054136874361594, "grad_norm": 1.5774276613648448, "learning_rate": 1.7964312301437474e-05, "loss": 0.7485, "step": 2257 }, { "epoch": 0.2306435137895812, "grad_norm": 1.5859717961743545, "learning_rate": 1.7962311220587304e-05, "loss": 0.849, "step": 2258 }, { "epoch": 0.23074565883554649, "grad_norm": 1.440608682898191, "learning_rate": 1.7960309268257024e-05, "loss": 0.7194, "step": 2259 }, { "epoch": 0.23084780388151174, "grad_norm": 1.418599357047127, "learning_rate": 1.7958306444665755e-05, "loss": 0.776, "step": 2260 }, { "epoch": 0.23094994892747703, "grad_norm": 1.6003001298581863, "learning_rate": 1.79563027500327e-05, "loss": 0.7081, "step": 2261 }, { "epoch": 0.23105209397344229, "grad_norm": 1.5156679118681746, "learning_rate": 1.7954298184577173e-05, "loss": 0.8683, "step": 2262 }, { "epoch": 0.23115423901940757, "grad_norm": 1.5114724498732142, "learning_rate": 1.795229274851857e-05, "loss": 0.7587, "step": 2263 }, { "epoch": 0.23125638406537283, "grad_norm": 1.559938467075922, "learning_rate": 1.7950286442076383e-05, "loss": 0.8863, "step": 2264 }, { "epoch": 0.2313585291113381, "grad_norm": 1.3752438335468948, "learning_rate": 1.7948279265470214e-05, "loss": 0.6629, "step": 2265 }, { "epoch": 0.23146067415730337, "grad_norm": 1.5562406263921862, "learning_rate": 1.794627121891974e-05, "loss": 0.7745, "step": 2266 }, { "epoch": 0.23156281920326865, "grad_norm": 1.5226662327447575, "learning_rate": 1.794426230264475e-05, "loss": 0.9143, "step": 2267 }, { "epoch": 0.2316649642492339, "grad_norm": 1.381626866864444, "learning_rate": 1.7942252516865117e-05, "loss": 0.67, "step": 2268 }, { "epoch": 0.23176710929519917, "grad_norm": 1.4066136790909638, "learning_rate": 1.7940241861800814e-05, "loss": 0.7225, "step": 2269 }, { "epoch": 0.23186925434116445, "grad_norm": 1.2364683906554712, "learning_rate": 1.793823033767191e-05, "loss": 0.6689, "step": 2270 }, { "epoch": 0.2319713993871297, "grad_norm": 1.5745694456952424, "learning_rate": 1.7936217944698566e-05, "loss": 0.759, "step": 2271 }, { "epoch": 0.232073544433095, "grad_norm": 1.5910801588719057, "learning_rate": 1.793420468310104e-05, "loss": 0.761, "step": 2272 }, { "epoch": 0.23217568947906025, "grad_norm": 1.5634218543931175, "learning_rate": 1.7932190553099687e-05, "loss": 0.7146, "step": 2273 }, { "epoch": 0.23227783452502554, "grad_norm": 1.445991166567057, "learning_rate": 1.7930175554914955e-05, "loss": 0.8187, "step": 2274 }, { "epoch": 0.2323799795709908, "grad_norm": 1.613578782633479, "learning_rate": 1.7928159688767382e-05, "loss": 0.7272, "step": 2275 }, { "epoch": 0.23248212461695608, "grad_norm": 1.4775973486812264, "learning_rate": 1.792614295487761e-05, "loss": 0.7676, "step": 2276 }, { "epoch": 0.23258426966292134, "grad_norm": 1.4112900050313055, "learning_rate": 1.792412535346637e-05, "loss": 0.7633, "step": 2277 }, { "epoch": 0.23268641470888662, "grad_norm": 1.5181607025987163, "learning_rate": 1.7922106884754488e-05, "loss": 0.7006, "step": 2278 }, { "epoch": 0.23278855975485188, "grad_norm": 1.5488822933025903, "learning_rate": 1.7920087548962893e-05, "loss": 0.8707, "step": 2279 }, { "epoch": 0.23289070480081717, "grad_norm": 1.428331501273192, "learning_rate": 1.79180673463126e-05, "loss": 0.7661, "step": 2280 }, { "epoch": 0.23299284984678242, "grad_norm": 1.2971838353715497, "learning_rate": 1.7916046277024716e-05, "loss": 0.6081, "step": 2281 }, { "epoch": 0.2330949948927477, "grad_norm": 1.334761741405998, "learning_rate": 1.791402434132045e-05, "loss": 0.6518, "step": 2282 }, { "epoch": 0.23319713993871297, "grad_norm": 1.4348481025212665, "learning_rate": 1.7912001539421115e-05, "loss": 0.7731, "step": 2283 }, { "epoch": 0.23329928498467825, "grad_norm": 1.5022604650174964, "learning_rate": 1.7909977871548093e-05, "loss": 0.8634, "step": 2284 }, { "epoch": 0.2334014300306435, "grad_norm": 1.495464531484559, "learning_rate": 1.7907953337922886e-05, "loss": 0.7308, "step": 2285 }, { "epoch": 0.2335035750766088, "grad_norm": 1.6540739460663558, "learning_rate": 1.7905927938767078e-05, "loss": 0.8601, "step": 2286 }, { "epoch": 0.23360572012257405, "grad_norm": 1.4413272571248266, "learning_rate": 1.7903901674302346e-05, "loss": 0.7354, "step": 2287 }, { "epoch": 0.23370786516853934, "grad_norm": 1.4878559597083703, "learning_rate": 1.790187454475047e-05, "loss": 0.7418, "step": 2288 }, { "epoch": 0.2338100102145046, "grad_norm": 1.422485039213835, "learning_rate": 1.7899846550333318e-05, "loss": 0.7895, "step": 2289 }, { "epoch": 0.23391215526046988, "grad_norm": 1.4873503032403526, "learning_rate": 1.789781769127286e-05, "loss": 0.7935, "step": 2290 }, { "epoch": 0.23401430030643514, "grad_norm": 1.4863173869585053, "learning_rate": 1.7895787967791155e-05, "loss": 0.7646, "step": 2291 }, { "epoch": 0.23411644535240042, "grad_norm": 1.6957628828034594, "learning_rate": 1.7893757380110352e-05, "loss": 0.695, "step": 2292 }, { "epoch": 0.23421859039836568, "grad_norm": 1.4699294198663664, "learning_rate": 1.7891725928452704e-05, "loss": 0.8275, "step": 2293 }, { "epoch": 0.23432073544433096, "grad_norm": 1.3278889630107682, "learning_rate": 1.7889693613040556e-05, "loss": 0.6059, "step": 2294 }, { "epoch": 0.23442288049029622, "grad_norm": 1.5211357248757373, "learning_rate": 1.788766043409634e-05, "loss": 0.7526, "step": 2295 }, { "epoch": 0.23452502553626148, "grad_norm": 1.4327442554026883, "learning_rate": 1.78856263918426e-05, "loss": 0.6395, "step": 2296 }, { "epoch": 0.23462717058222676, "grad_norm": 1.7133308758286023, "learning_rate": 1.7883591486501953e-05, "loss": 0.8447, "step": 2297 }, { "epoch": 0.23472931562819202, "grad_norm": 1.519156984194039, "learning_rate": 1.7881555718297124e-05, "loss": 0.7098, "step": 2298 }, { "epoch": 0.2348314606741573, "grad_norm": 1.3168533700859788, "learning_rate": 1.7879519087450933e-05, "loss": 0.6878, "step": 2299 }, { "epoch": 0.23493360572012256, "grad_norm": 1.3947351799650776, "learning_rate": 1.7877481594186285e-05, "loss": 0.6934, "step": 2300 }, { "epoch": 0.23503575076608785, "grad_norm": 1.570408057358551, "learning_rate": 1.7875443238726186e-05, "loss": 0.7915, "step": 2301 }, { "epoch": 0.2351378958120531, "grad_norm": 1.557453309794812, "learning_rate": 1.787340402129374e-05, "loss": 0.7653, "step": 2302 }, { "epoch": 0.2352400408580184, "grad_norm": 1.5698234406783456, "learning_rate": 1.787136394211213e-05, "loss": 0.8094, "step": 2303 }, { "epoch": 0.23534218590398365, "grad_norm": 1.422834337062916, "learning_rate": 1.7869323001404657e-05, "loss": 0.7707, "step": 2304 }, { "epoch": 0.23544433094994893, "grad_norm": 1.573601072098912, "learning_rate": 1.7867281199394692e-05, "loss": 0.7173, "step": 2305 }, { "epoch": 0.2355464759959142, "grad_norm": 1.468374520029808, "learning_rate": 1.786523853630572e-05, "loss": 0.8133, "step": 2306 }, { "epoch": 0.23564862104187947, "grad_norm": 1.3964703783396522, "learning_rate": 1.7863195012361313e-05, "loss": 0.8023, "step": 2307 }, { "epoch": 0.23575076608784473, "grad_norm": 1.5276908490215264, "learning_rate": 1.7861150627785124e-05, "loss": 0.7689, "step": 2308 }, { "epoch": 0.23585291113381002, "grad_norm": 1.3439849746765093, "learning_rate": 1.7859105382800925e-05, "loss": 0.7287, "step": 2309 }, { "epoch": 0.23595505617977527, "grad_norm": 1.6359460599042688, "learning_rate": 1.7857059277632562e-05, "loss": 0.8252, "step": 2310 }, { "epoch": 0.23605720122574056, "grad_norm": 1.5300452464971621, "learning_rate": 1.7855012312503984e-05, "loss": 0.7194, "step": 2311 }, { "epoch": 0.23615934627170582, "grad_norm": 1.2742065030103453, "learning_rate": 1.7852964487639234e-05, "loss": 0.7506, "step": 2312 }, { "epoch": 0.2362614913176711, "grad_norm": 1.4162841743178576, "learning_rate": 1.785091580326245e-05, "loss": 0.7138, "step": 2313 }, { "epoch": 0.23636363636363636, "grad_norm": 1.4645735268805768, "learning_rate": 1.7848866259597856e-05, "loss": 0.7468, "step": 2314 }, { "epoch": 0.23646578140960164, "grad_norm": 1.5580889777555762, "learning_rate": 1.784681585686978e-05, "loss": 0.8557, "step": 2315 }, { "epoch": 0.2365679264555669, "grad_norm": 1.3356311348412555, "learning_rate": 1.784476459530264e-05, "loss": 0.6268, "step": 2316 }, { "epoch": 0.2366700715015322, "grad_norm": 1.6289846518430162, "learning_rate": 1.7842712475120944e-05, "loss": 0.8419, "step": 2317 }, { "epoch": 0.23677221654749744, "grad_norm": 1.4988142177307007, "learning_rate": 1.78406594965493e-05, "loss": 0.7524, "step": 2318 }, { "epoch": 0.23687436159346273, "grad_norm": 1.57127692249938, "learning_rate": 1.783860565981241e-05, "loss": 0.9007, "step": 2319 }, { "epoch": 0.236976506639428, "grad_norm": 1.4793839755248703, "learning_rate": 1.7836550965135067e-05, "loss": 0.7987, "step": 2320 }, { "epoch": 0.23707865168539327, "grad_norm": 1.5007739995668181, "learning_rate": 1.7834495412742157e-05, "loss": 0.7539, "step": 2321 }, { "epoch": 0.23718079673135853, "grad_norm": 1.3279366150972207, "learning_rate": 1.783243900285866e-05, "loss": 0.7629, "step": 2322 }, { "epoch": 0.2372829417773238, "grad_norm": 1.4215120029599968, "learning_rate": 1.7830381735709656e-05, "loss": 0.6693, "step": 2323 }, { "epoch": 0.23738508682328907, "grad_norm": 1.4520168704811396, "learning_rate": 1.7828323611520315e-05, "loss": 0.7403, "step": 2324 }, { "epoch": 0.23748723186925433, "grad_norm": 1.6298136376119485, "learning_rate": 1.7826264630515894e-05, "loss": 0.7234, "step": 2325 }, { "epoch": 0.2375893769152196, "grad_norm": 1.4576078641144812, "learning_rate": 1.782420479292175e-05, "loss": 0.6412, "step": 2326 }, { "epoch": 0.23769152196118487, "grad_norm": 1.4276453595865672, "learning_rate": 1.7822144098963342e-05, "loss": 0.7456, "step": 2327 }, { "epoch": 0.23779366700715016, "grad_norm": 1.5220145879582514, "learning_rate": 1.7820082548866206e-05, "loss": 0.7072, "step": 2328 }, { "epoch": 0.2378958120531154, "grad_norm": 1.498515970486518, "learning_rate": 1.7818020142855982e-05, "loss": 0.6709, "step": 2329 }, { "epoch": 0.2379979570990807, "grad_norm": 1.5185129348834838, "learning_rate": 1.7815956881158404e-05, "loss": 0.7443, "step": 2330 }, { "epoch": 0.23810010214504596, "grad_norm": 1.397257004228778, "learning_rate": 1.781389276399929e-05, "loss": 0.6209, "step": 2331 }, { "epoch": 0.23820224719101124, "grad_norm": 1.4470808556333505, "learning_rate": 1.781182779160457e-05, "loss": 0.7866, "step": 2332 }, { "epoch": 0.2383043922369765, "grad_norm": 1.6450900329554976, "learning_rate": 1.780976196420025e-05, "loss": 0.7973, "step": 2333 }, { "epoch": 0.23840653728294178, "grad_norm": 1.5119696538564331, "learning_rate": 1.7807695282012436e-05, "loss": 0.7195, "step": 2334 }, { "epoch": 0.23850868232890704, "grad_norm": 1.5274218770247723, "learning_rate": 1.780562774526733e-05, "loss": 0.8483, "step": 2335 }, { "epoch": 0.23861082737487233, "grad_norm": 1.5547254156086003, "learning_rate": 1.7803559354191226e-05, "loss": 0.8588, "step": 2336 }, { "epoch": 0.23871297242083758, "grad_norm": 1.452495943115992, "learning_rate": 1.7801490109010506e-05, "loss": 0.7402, "step": 2337 }, { "epoch": 0.23881511746680287, "grad_norm": 1.5766398233516086, "learning_rate": 1.7799420009951657e-05, "loss": 0.7572, "step": 2338 }, { "epoch": 0.23891726251276812, "grad_norm": 1.5893488076973672, "learning_rate": 1.7797349057241244e-05, "loss": 0.7321, "step": 2339 }, { "epoch": 0.2390194075587334, "grad_norm": 1.529135594533242, "learning_rate": 1.7795277251105942e-05, "loss": 0.7683, "step": 2340 }, { "epoch": 0.23912155260469867, "grad_norm": 1.5571252938205509, "learning_rate": 1.7793204591772506e-05, "loss": 0.7857, "step": 2341 }, { "epoch": 0.23922369765066395, "grad_norm": 1.4694894733231714, "learning_rate": 1.7791131079467792e-05, "loss": 0.7661, "step": 2342 }, { "epoch": 0.2393258426966292, "grad_norm": 1.4898869219799205, "learning_rate": 1.778905671441875e-05, "loss": 0.7899, "step": 2343 }, { "epoch": 0.2394279877425945, "grad_norm": 1.4120202773707085, "learning_rate": 1.778698149685242e-05, "loss": 0.7782, "step": 2344 }, { "epoch": 0.23953013278855975, "grad_norm": 1.532063777137145, "learning_rate": 1.7784905426995933e-05, "loss": 0.8282, "step": 2345 }, { "epoch": 0.23963227783452504, "grad_norm": 1.4789475295666736, "learning_rate": 1.7782828505076516e-05, "loss": 0.7144, "step": 2346 }, { "epoch": 0.2397344228804903, "grad_norm": 1.4529151144171197, "learning_rate": 1.778075073132149e-05, "loss": 0.7181, "step": 2347 }, { "epoch": 0.23983656792645558, "grad_norm": 1.4809647561212889, "learning_rate": 1.7778672105958272e-05, "loss": 0.7772, "step": 2348 }, { "epoch": 0.23993871297242084, "grad_norm": 1.5143417817580695, "learning_rate": 1.7776592629214362e-05, "loss": 0.7565, "step": 2349 }, { "epoch": 0.24004085801838612, "grad_norm": 1.3095464468008544, "learning_rate": 1.777451230131737e-05, "loss": 0.6469, "step": 2350 }, { "epoch": 0.24014300306435138, "grad_norm": 1.3558318111966303, "learning_rate": 1.777243112249498e-05, "loss": 0.6463, "step": 2351 }, { "epoch": 0.24024514811031664, "grad_norm": 1.6386088260531508, "learning_rate": 1.7770349092974984e-05, "loss": 0.7223, "step": 2352 }, { "epoch": 0.24034729315628192, "grad_norm": 1.3108908501220131, "learning_rate": 1.7768266212985256e-05, "loss": 0.7616, "step": 2353 }, { "epoch": 0.24044943820224718, "grad_norm": 1.5526747792606155, "learning_rate": 1.7766182482753774e-05, "loss": 0.7811, "step": 2354 }, { "epoch": 0.24055158324821246, "grad_norm": 1.4225897167292565, "learning_rate": 1.7764097902508604e-05, "loss": 0.6207, "step": 2355 }, { "epoch": 0.24065372829417772, "grad_norm": 1.5294448049579061, "learning_rate": 1.77620124724779e-05, "loss": 0.7056, "step": 2356 }, { "epoch": 0.240755873340143, "grad_norm": 1.5466285318949227, "learning_rate": 1.7759926192889918e-05, "loss": 0.7202, "step": 2357 }, { "epoch": 0.24085801838610826, "grad_norm": 1.3081101799256516, "learning_rate": 1.7757839063972998e-05, "loss": 0.6699, "step": 2358 }, { "epoch": 0.24096016343207355, "grad_norm": 1.5767085023541487, "learning_rate": 1.775575108595558e-05, "loss": 0.7507, "step": 2359 }, { "epoch": 0.2410623084780388, "grad_norm": 1.4661620916689682, "learning_rate": 1.7753662259066198e-05, "loss": 0.7489, "step": 2360 }, { "epoch": 0.2411644535240041, "grad_norm": 1.6677185033743713, "learning_rate": 1.775157258353347e-05, "loss": 0.8029, "step": 2361 }, { "epoch": 0.24126659856996935, "grad_norm": 1.5180751787437001, "learning_rate": 1.7749482059586112e-05, "loss": 0.7026, "step": 2362 }, { "epoch": 0.24136874361593463, "grad_norm": 1.3320274527157165, "learning_rate": 1.774739068745294e-05, "loss": 0.6834, "step": 2363 }, { "epoch": 0.2414708886618999, "grad_norm": 1.4041688775491048, "learning_rate": 1.7745298467362852e-05, "loss": 0.7352, "step": 2364 }, { "epoch": 0.24157303370786518, "grad_norm": 1.480708371358585, "learning_rate": 1.774320539954484e-05, "loss": 0.7907, "step": 2365 }, { "epoch": 0.24167517875383043, "grad_norm": 1.5085178274648747, "learning_rate": 1.7741111484227996e-05, "loss": 0.8016, "step": 2366 }, { "epoch": 0.24177732379979572, "grad_norm": 1.4210421800690174, "learning_rate": 1.7739016721641498e-05, "loss": 0.6579, "step": 2367 }, { "epoch": 0.24187946884576098, "grad_norm": 1.551289673957261, "learning_rate": 1.773692111201462e-05, "loss": 0.7459, "step": 2368 }, { "epoch": 0.24198161389172626, "grad_norm": 1.464698914506377, "learning_rate": 1.773482465557673e-05, "loss": 0.6947, "step": 2369 }, { "epoch": 0.24208375893769152, "grad_norm": 1.4269626820144585, "learning_rate": 1.773272735255728e-05, "loss": 0.7747, "step": 2370 }, { "epoch": 0.2421859039836568, "grad_norm": 1.494376630773842, "learning_rate": 1.7730629203185825e-05, "loss": 0.6532, "step": 2371 }, { "epoch": 0.24228804902962206, "grad_norm": 1.6400547181897738, "learning_rate": 1.772853020769201e-05, "loss": 0.6818, "step": 2372 }, { "epoch": 0.24239019407558735, "grad_norm": 1.6105616815516357, "learning_rate": 1.7726430366305574e-05, "loss": 0.6569, "step": 2373 }, { "epoch": 0.2424923391215526, "grad_norm": 1.4792272073054589, "learning_rate": 1.772432967925634e-05, "loss": 0.8057, "step": 2374 }, { "epoch": 0.2425944841675179, "grad_norm": 1.4203686544907335, "learning_rate": 1.7722228146774233e-05, "loss": 0.8411, "step": 2375 }, { "epoch": 0.24269662921348314, "grad_norm": 1.4812832404025646, "learning_rate": 1.7720125769089262e-05, "loss": 0.8277, "step": 2376 }, { "epoch": 0.24279877425944843, "grad_norm": 1.6338901346513521, "learning_rate": 1.771802254643154e-05, "loss": 0.69, "step": 2377 }, { "epoch": 0.2429009193054137, "grad_norm": 1.4664711521019633, "learning_rate": 1.7715918479031267e-05, "loss": 0.7233, "step": 2378 }, { "epoch": 0.24300306435137894, "grad_norm": 1.4304005434065687, "learning_rate": 1.7713813567118728e-05, "loss": 0.7249, "step": 2379 }, { "epoch": 0.24310520939734423, "grad_norm": 1.5198919714298205, "learning_rate": 1.7711707810924313e-05, "loss": 0.74, "step": 2380 }, { "epoch": 0.2432073544433095, "grad_norm": 1.3856765039225727, "learning_rate": 1.7709601210678493e-05, "loss": 0.8445, "step": 2381 }, { "epoch": 0.24330949948927477, "grad_norm": 1.3648789124432636, "learning_rate": 1.770749376661184e-05, "loss": 0.7381, "step": 2382 }, { "epoch": 0.24341164453524003, "grad_norm": 1.4066400860429227, "learning_rate": 1.7705385478955014e-05, "loss": 0.853, "step": 2383 }, { "epoch": 0.24351378958120531, "grad_norm": 1.4449899374944386, "learning_rate": 1.770327634793877e-05, "loss": 0.7248, "step": 2384 }, { "epoch": 0.24361593462717057, "grad_norm": 1.4233804420031348, "learning_rate": 1.7701166373793955e-05, "loss": 0.655, "step": 2385 }, { "epoch": 0.24371807967313586, "grad_norm": 1.7629905098275387, "learning_rate": 1.7699055556751502e-05, "loss": 0.8323, "step": 2386 }, { "epoch": 0.24382022471910111, "grad_norm": 1.438484558975726, "learning_rate": 1.7696943897042444e-05, "loss": 0.7486, "step": 2387 }, { "epoch": 0.2439223697650664, "grad_norm": 1.4987952575529664, "learning_rate": 1.7694831394897904e-05, "loss": 0.7808, "step": 2388 }, { "epoch": 0.24402451481103166, "grad_norm": 1.4549700150458358, "learning_rate": 1.7692718050549097e-05, "loss": 0.7289, "step": 2389 }, { "epoch": 0.24412665985699694, "grad_norm": 1.507981833163515, "learning_rate": 1.769060386422733e-05, "loss": 0.8092, "step": 2390 }, { "epoch": 0.2442288049029622, "grad_norm": 1.5320845843661672, "learning_rate": 1.7688488836164e-05, "loss": 0.6939, "step": 2391 }, { "epoch": 0.24433094994892748, "grad_norm": 1.5029724377291633, "learning_rate": 1.7686372966590598e-05, "loss": 0.7223, "step": 2392 }, { "epoch": 0.24443309499489274, "grad_norm": 1.5426524861163202, "learning_rate": 1.768425625573871e-05, "loss": 0.6834, "step": 2393 }, { "epoch": 0.24453524004085803, "grad_norm": 1.3584803287585723, "learning_rate": 1.7682138703840014e-05, "loss": 0.6999, "step": 2394 }, { "epoch": 0.24463738508682328, "grad_norm": 1.450883767963888, "learning_rate": 1.768002031112627e-05, "loss": 0.7316, "step": 2395 }, { "epoch": 0.24473953013278857, "grad_norm": 1.480473785598229, "learning_rate": 1.767790107782934e-05, "loss": 0.7828, "step": 2396 }, { "epoch": 0.24484167517875383, "grad_norm": 1.6405185914311855, "learning_rate": 1.7675781004181177e-05, "loss": 0.8154, "step": 2397 }, { "epoch": 0.2449438202247191, "grad_norm": 1.4920254930923424, "learning_rate": 1.7673660090413825e-05, "loss": 0.7102, "step": 2398 }, { "epoch": 0.24504596527068437, "grad_norm": 1.3355352090232555, "learning_rate": 1.7671538336759418e-05, "loss": 0.7116, "step": 2399 }, { "epoch": 0.24514811031664965, "grad_norm": 1.5751864691561008, "learning_rate": 1.766941574345018e-05, "loss": 0.82, "step": 2400 }, { "epoch": 0.2452502553626149, "grad_norm": 1.3606555163608096, "learning_rate": 1.766729231071844e-05, "loss": 0.7304, "step": 2401 }, { "epoch": 0.2453524004085802, "grad_norm": 1.395238870403432, "learning_rate": 1.76651680387966e-05, "loss": 0.7966, "step": 2402 }, { "epoch": 0.24545454545454545, "grad_norm": 1.6160266469726008, "learning_rate": 1.7663042927917165e-05, "loss": 0.8005, "step": 2403 }, { "epoch": 0.24555669050051074, "grad_norm": 1.363140090600953, "learning_rate": 1.7660916978312733e-05, "loss": 0.7167, "step": 2404 }, { "epoch": 0.245658835546476, "grad_norm": 1.4651707930250255, "learning_rate": 1.7658790190215987e-05, "loss": 0.8045, "step": 2405 }, { "epoch": 0.24576098059244128, "grad_norm": 1.5965999833709852, "learning_rate": 1.7656662563859702e-05, "loss": 0.7401, "step": 2406 }, { "epoch": 0.24586312563840654, "grad_norm": 1.8824709320875674, "learning_rate": 1.765453409947676e-05, "loss": 0.8479, "step": 2407 }, { "epoch": 0.2459652706843718, "grad_norm": 1.5366865341769178, "learning_rate": 1.7652404797300114e-05, "loss": 0.6067, "step": 2408 }, { "epoch": 0.24606741573033708, "grad_norm": 1.2967898002305431, "learning_rate": 1.7650274657562815e-05, "loss": 0.7205, "step": 2409 }, { "epoch": 0.24616956077630234, "grad_norm": 1.471879112081625, "learning_rate": 1.7648143680498012e-05, "loss": 0.7807, "step": 2410 }, { "epoch": 0.24627170582226762, "grad_norm": 1.480730862721299, "learning_rate": 1.7646011866338946e-05, "loss": 0.7316, "step": 2411 }, { "epoch": 0.24637385086823288, "grad_norm": 1.5574511585398474, "learning_rate": 1.7643879215318938e-05, "loss": 0.6395, "step": 2412 }, { "epoch": 0.24647599591419816, "grad_norm": 1.4742350271716305, "learning_rate": 1.764174572767141e-05, "loss": 0.7625, "step": 2413 }, { "epoch": 0.24657814096016342, "grad_norm": 1.5590627604033827, "learning_rate": 1.7639611403629878e-05, "loss": 0.8724, "step": 2414 }, { "epoch": 0.2466802860061287, "grad_norm": 1.5810763809914643, "learning_rate": 1.763747624342794e-05, "loss": 0.7716, "step": 2415 }, { "epoch": 0.24678243105209396, "grad_norm": 1.534494376720642, "learning_rate": 1.76353402472993e-05, "loss": 0.8068, "step": 2416 }, { "epoch": 0.24688457609805925, "grad_norm": 1.4421118626501486, "learning_rate": 1.7633203415477725e-05, "loss": 0.7254, "step": 2417 }, { "epoch": 0.2469867211440245, "grad_norm": 1.6607433446557796, "learning_rate": 1.7631065748197113e-05, "loss": 0.7762, "step": 2418 }, { "epoch": 0.2470888661899898, "grad_norm": 1.4768915718985887, "learning_rate": 1.762892724569142e-05, "loss": 0.7165, "step": 2419 }, { "epoch": 0.24719101123595505, "grad_norm": 1.2793392497238998, "learning_rate": 1.7626787908194716e-05, "loss": 0.7239, "step": 2420 }, { "epoch": 0.24729315628192033, "grad_norm": 1.4151451700371307, "learning_rate": 1.7624647735941143e-05, "loss": 0.6944, "step": 2421 }, { "epoch": 0.2473953013278856, "grad_norm": 1.4532080003554693, "learning_rate": 1.762250672916495e-05, "loss": 0.8138, "step": 2422 }, { "epoch": 0.24749744637385088, "grad_norm": 1.4760857758709363, "learning_rate": 1.7620364888100475e-05, "loss": 0.768, "step": 2423 }, { "epoch": 0.24759959141981613, "grad_norm": 1.497074189235693, "learning_rate": 1.7618222212982135e-05, "loss": 0.7517, "step": 2424 }, { "epoch": 0.24770173646578142, "grad_norm": 1.5447997021489788, "learning_rate": 1.7616078704044454e-05, "loss": 0.7279, "step": 2425 }, { "epoch": 0.24780388151174668, "grad_norm": 1.505465684130928, "learning_rate": 1.7613934361522035e-05, "loss": 0.8398, "step": 2426 }, { "epoch": 0.24790602655771196, "grad_norm": 1.3911482459316213, "learning_rate": 1.7611789185649584e-05, "loss": 0.7155, "step": 2427 }, { "epoch": 0.24800817160367722, "grad_norm": 1.4600345477847263, "learning_rate": 1.7609643176661887e-05, "loss": 0.7057, "step": 2428 }, { "epoch": 0.2481103166496425, "grad_norm": 1.4654499777225087, "learning_rate": 1.760749633479383e-05, "loss": 0.7498, "step": 2429 }, { "epoch": 0.24821246169560776, "grad_norm": 1.4845547691744059, "learning_rate": 1.7605348660280384e-05, "loss": 0.7896, "step": 2430 }, { "epoch": 0.24831460674157305, "grad_norm": 1.3791372973965634, "learning_rate": 1.760320015335661e-05, "loss": 0.7521, "step": 2431 }, { "epoch": 0.2484167517875383, "grad_norm": 1.4443624014500467, "learning_rate": 1.760105081425767e-05, "loss": 0.7074, "step": 2432 }, { "epoch": 0.2485188968335036, "grad_norm": 1.5242569887136201, "learning_rate": 1.7598900643218807e-05, "loss": 0.8413, "step": 2433 }, { "epoch": 0.24862104187946885, "grad_norm": 1.424725355238924, "learning_rate": 1.7596749640475362e-05, "loss": 0.7151, "step": 2434 }, { "epoch": 0.2487231869254341, "grad_norm": 1.5444500017297091, "learning_rate": 1.7594597806262755e-05, "loss": 0.8003, "step": 2435 }, { "epoch": 0.2488253319713994, "grad_norm": 1.4427705615013624, "learning_rate": 1.7592445140816515e-05, "loss": 0.8018, "step": 2436 }, { "epoch": 0.24892747701736465, "grad_norm": 1.5563090770848207, "learning_rate": 1.759029164437225e-05, "loss": 0.667, "step": 2437 }, { "epoch": 0.24902962206332993, "grad_norm": 1.4311162344212127, "learning_rate": 1.7588137317165656e-05, "loss": 0.7, "step": 2438 }, { "epoch": 0.2491317671092952, "grad_norm": 1.5559287357976523, "learning_rate": 1.7585982159432534e-05, "loss": 0.8859, "step": 2439 }, { "epoch": 0.24923391215526047, "grad_norm": 1.4447791248480248, "learning_rate": 1.758382617140876e-05, "loss": 0.6832, "step": 2440 }, { "epoch": 0.24933605720122573, "grad_norm": 1.5569893188803954, "learning_rate": 1.7581669353330314e-05, "loss": 0.7232, "step": 2441 }, { "epoch": 0.24943820224719102, "grad_norm": 1.6071322627415818, "learning_rate": 1.757951170543326e-05, "loss": 0.7723, "step": 2442 }, { "epoch": 0.24954034729315627, "grad_norm": 1.3331138364392787, "learning_rate": 1.7577353227953748e-05, "loss": 0.727, "step": 2443 }, { "epoch": 0.24964249233912156, "grad_norm": 1.4416978045306965, "learning_rate": 1.7575193921128037e-05, "loss": 0.8391, "step": 2444 }, { "epoch": 0.24974463738508682, "grad_norm": 1.7060462707902826, "learning_rate": 1.7573033785192454e-05, "loss": 0.6732, "step": 2445 }, { "epoch": 0.2498467824310521, "grad_norm": 1.4897470731280371, "learning_rate": 1.757087282038343e-05, "loss": 0.7137, "step": 2446 }, { "epoch": 0.24994892747701736, "grad_norm": 1.388446458758296, "learning_rate": 1.756871102693748e-05, "loss": 0.7705, "step": 2447 }, { "epoch": 0.25005107252298264, "grad_norm": 1.4888803986269923, "learning_rate": 1.7566548405091223e-05, "loss": 0.7483, "step": 2448 }, { "epoch": 0.2501532175689479, "grad_norm": 1.6574932603710408, "learning_rate": 1.756438495508135e-05, "loss": 0.8423, "step": 2449 }, { "epoch": 0.25025536261491316, "grad_norm": 1.6806067033295211, "learning_rate": 1.7562220677144664e-05, "loss": 0.6819, "step": 2450 }, { "epoch": 0.25035750766087844, "grad_norm": 1.410017732171322, "learning_rate": 1.7560055571518034e-05, "loss": 0.7458, "step": 2451 }, { "epoch": 0.2504596527068437, "grad_norm": 1.7129430986357244, "learning_rate": 1.755788963843844e-05, "loss": 0.7836, "step": 2452 }, { "epoch": 0.250561797752809, "grad_norm": 1.5937253828973257, "learning_rate": 1.755572287814294e-05, "loss": 0.7841, "step": 2453 }, { "epoch": 0.25066394279877424, "grad_norm": 1.5264025692588492, "learning_rate": 1.755355529086869e-05, "loss": 0.6556, "step": 2454 }, { "epoch": 0.2507660878447395, "grad_norm": 1.4927912298365436, "learning_rate": 1.7551386876852933e-05, "loss": 0.708, "step": 2455 }, { "epoch": 0.2508682328907048, "grad_norm": 1.4801661756968634, "learning_rate": 1.7549217636333005e-05, "loss": 0.7421, "step": 2456 }, { "epoch": 0.2509703779366701, "grad_norm": 1.4716682950343924, "learning_rate": 1.7547047569546328e-05, "loss": 0.7962, "step": 2457 }, { "epoch": 0.2510725229826353, "grad_norm": 1.531938351706911, "learning_rate": 1.7544876676730417e-05, "loss": 0.7895, "step": 2458 }, { "epoch": 0.2511746680286006, "grad_norm": 1.3902575555486032, "learning_rate": 1.7542704958122882e-05, "loss": 0.6955, "step": 2459 }, { "epoch": 0.2512768130745659, "grad_norm": 1.5196329047156716, "learning_rate": 1.7540532413961413e-05, "loss": 0.7982, "step": 2460 }, { "epoch": 0.2513789581205312, "grad_norm": 1.541687848125844, "learning_rate": 1.75383590444838e-05, "loss": 0.7273, "step": 2461 }, { "epoch": 0.2514811031664964, "grad_norm": 1.4611238266361886, "learning_rate": 1.7536184849927922e-05, "loss": 0.6934, "step": 2462 }, { "epoch": 0.2515832482124617, "grad_norm": 1.3373169727863152, "learning_rate": 1.7534009830531742e-05, "loss": 0.7496, "step": 2463 }, { "epoch": 0.251685393258427, "grad_norm": 1.6330527183887573, "learning_rate": 1.7531833986533318e-05, "loss": 0.9134, "step": 2464 }, { "epoch": 0.2517875383043922, "grad_norm": 1.3707177336426284, "learning_rate": 1.7529657318170798e-05, "loss": 0.7355, "step": 2465 }, { "epoch": 0.2518896833503575, "grad_norm": 1.6421972849918223, "learning_rate": 1.752747982568242e-05, "loss": 0.7167, "step": 2466 }, { "epoch": 0.2519918283963228, "grad_norm": 1.5093522175105039, "learning_rate": 1.752530150930651e-05, "loss": 0.8964, "step": 2467 }, { "epoch": 0.25209397344228807, "grad_norm": 1.5119352181772094, "learning_rate": 1.7523122369281488e-05, "loss": 0.7688, "step": 2468 }, { "epoch": 0.2521961184882533, "grad_norm": 1.1717346113327243, "learning_rate": 1.7520942405845864e-05, "loss": 0.5172, "step": 2469 }, { "epoch": 0.2522982635342186, "grad_norm": 1.4396644136059134, "learning_rate": 1.7518761619238234e-05, "loss": 0.6575, "step": 2470 }, { "epoch": 0.25240040858018387, "grad_norm": 1.4496678115893424, "learning_rate": 1.7516580009697287e-05, "loss": 0.8468, "step": 2471 }, { "epoch": 0.25250255362614915, "grad_norm": 1.5165708480545144, "learning_rate": 1.7514397577461803e-05, "loss": 0.6896, "step": 2472 }, { "epoch": 0.2526046986721144, "grad_norm": 1.5602780340089109, "learning_rate": 1.7512214322770646e-05, "loss": 0.7057, "step": 2473 }, { "epoch": 0.25270684371807967, "grad_norm": 1.5192559223739266, "learning_rate": 1.751003024586278e-05, "loss": 0.8266, "step": 2474 }, { "epoch": 0.25280898876404495, "grad_norm": 1.4647564757868894, "learning_rate": 1.750784534697725e-05, "loss": 0.7663, "step": 2475 }, { "epoch": 0.25291113381001024, "grad_norm": 1.3355296825148013, "learning_rate": 1.75056596263532e-05, "loss": 0.7194, "step": 2476 }, { "epoch": 0.25301327885597547, "grad_norm": 1.6077019398415318, "learning_rate": 1.7503473084229846e-05, "loss": 0.8057, "step": 2477 }, { "epoch": 0.25311542390194075, "grad_norm": 1.33279044991065, "learning_rate": 1.7501285720846523e-05, "loss": 0.7312, "step": 2478 }, { "epoch": 0.25321756894790604, "grad_norm": 1.6253531646833466, "learning_rate": 1.749909753644263e-05, "loss": 0.8413, "step": 2479 }, { "epoch": 0.2533197139938713, "grad_norm": 1.5339948129886503, "learning_rate": 1.7496908531257666e-05, "loss": 0.805, "step": 2480 }, { "epoch": 0.25342185903983655, "grad_norm": 4.492418736885847, "learning_rate": 1.749471870553122e-05, "loss": 0.7006, "step": 2481 }, { "epoch": 0.25352400408580184, "grad_norm": 1.5480504229660053, "learning_rate": 1.7492528059502966e-05, "loss": 0.7453, "step": 2482 }, { "epoch": 0.2536261491317671, "grad_norm": 1.4121637706640053, "learning_rate": 1.749033659341268e-05, "loss": 0.7526, "step": 2483 }, { "epoch": 0.2537282941777324, "grad_norm": 1.460684151437332, "learning_rate": 1.7488144307500214e-05, "loss": 0.7504, "step": 2484 }, { "epoch": 0.25383043922369763, "grad_norm": 1.566965089252394, "learning_rate": 1.7485951202005514e-05, "loss": 0.7228, "step": 2485 }, { "epoch": 0.2539325842696629, "grad_norm": 1.4683808569093877, "learning_rate": 1.7483757277168617e-05, "loss": 0.9251, "step": 2486 }, { "epoch": 0.2540347293156282, "grad_norm": 1.304300866353276, "learning_rate": 1.748156253322965e-05, "loss": 0.6368, "step": 2487 }, { "epoch": 0.2541368743615935, "grad_norm": 1.3569700616182394, "learning_rate": 1.7479366970428833e-05, "loss": 0.6261, "step": 2488 }, { "epoch": 0.2542390194075587, "grad_norm": 1.4100159812192496, "learning_rate": 1.7477170589006468e-05, "loss": 0.736, "step": 2489 }, { "epoch": 0.254341164453524, "grad_norm": 1.453620145974583, "learning_rate": 1.7474973389202953e-05, "loss": 0.6989, "step": 2490 }, { "epoch": 0.2544433094994893, "grad_norm": 1.4765189719442993, "learning_rate": 1.747277537125877e-05, "loss": 0.6685, "step": 2491 }, { "epoch": 0.2545454545454545, "grad_norm": 1.7161609927060864, "learning_rate": 1.7470576535414492e-05, "loss": 0.7923, "step": 2492 }, { "epoch": 0.2546475995914198, "grad_norm": 1.62423539457153, "learning_rate": 1.746837688191079e-05, "loss": 0.8328, "step": 2493 }, { "epoch": 0.2547497446373851, "grad_norm": 1.5176620634628604, "learning_rate": 1.746617641098841e-05, "loss": 0.6953, "step": 2494 }, { "epoch": 0.2548518896833504, "grad_norm": 1.4704562717589216, "learning_rate": 1.74639751228882e-05, "loss": 0.6608, "step": 2495 }, { "epoch": 0.2549540347293156, "grad_norm": 1.5147407390165644, "learning_rate": 1.746177301785109e-05, "loss": 0.8198, "step": 2496 }, { "epoch": 0.2550561797752809, "grad_norm": 1.5104917487397456, "learning_rate": 1.74595700961181e-05, "loss": 0.6858, "step": 2497 }, { "epoch": 0.2551583248212462, "grad_norm": 1.4198018408635444, "learning_rate": 1.7457366357930343e-05, "loss": 0.8058, "step": 2498 }, { "epoch": 0.25526046986721146, "grad_norm": 1.8597769830263262, "learning_rate": 1.7455161803529025e-05, "loss": 0.6807, "step": 2499 }, { "epoch": 0.2553626149131767, "grad_norm": 1.6423930319217643, "learning_rate": 1.7452956433155427e-05, "loss": 0.7743, "step": 2500 }, { "epoch": 0.255464759959142, "grad_norm": 1.4570993699965709, "learning_rate": 1.745075024705093e-05, "loss": 0.8229, "step": 2501 }, { "epoch": 0.25556690500510726, "grad_norm": 1.359563072389055, "learning_rate": 1.744854324545701e-05, "loss": 0.6327, "step": 2502 }, { "epoch": 0.25566905005107254, "grad_norm": 1.5241561813698241, "learning_rate": 1.7446335428615217e-05, "loss": 0.686, "step": 2503 }, { "epoch": 0.2557711950970378, "grad_norm": 1.6263860495979683, "learning_rate": 1.74441267967672e-05, "loss": 0.7349, "step": 2504 }, { "epoch": 0.25587334014300306, "grad_norm": 1.463234343645937, "learning_rate": 1.74419173501547e-05, "loss": 0.6576, "step": 2505 }, { "epoch": 0.25597548518896834, "grad_norm": 1.3907928379952301, "learning_rate": 1.743970708901953e-05, "loss": 0.7154, "step": 2506 }, { "epoch": 0.25607763023493363, "grad_norm": 1.2870484780553566, "learning_rate": 1.743749601360362e-05, "loss": 0.7361, "step": 2507 }, { "epoch": 0.25617977528089886, "grad_norm": 1.6068122594285188, "learning_rate": 1.743528412414896e-05, "loss": 0.8294, "step": 2508 }, { "epoch": 0.25628192032686414, "grad_norm": 1.469824547644569, "learning_rate": 1.7433071420897653e-05, "loss": 0.7936, "step": 2509 }, { "epoch": 0.25638406537282943, "grad_norm": 1.4497261693289538, "learning_rate": 1.7430857904091873e-05, "loss": 0.7387, "step": 2510 }, { "epoch": 0.2564862104187947, "grad_norm": 1.6397995786431518, "learning_rate": 1.7428643573973895e-05, "loss": 0.6616, "step": 2511 }, { "epoch": 0.25658835546475994, "grad_norm": 1.4034425486798285, "learning_rate": 1.7426428430786083e-05, "loss": 0.7603, "step": 2512 }, { "epoch": 0.25669050051072523, "grad_norm": 1.6043854670748998, "learning_rate": 1.7424212474770875e-05, "loss": 0.7474, "step": 2513 }, { "epoch": 0.2567926455566905, "grad_norm": 1.6530455811890246, "learning_rate": 1.742199570617082e-05, "loss": 0.8297, "step": 2514 }, { "epoch": 0.2568947906026558, "grad_norm": 1.4434252528521987, "learning_rate": 1.7419778125228538e-05, "loss": 0.6857, "step": 2515 }, { "epoch": 0.256996935648621, "grad_norm": 1.5153853694350543, "learning_rate": 1.7417559732186747e-05, "loss": 0.872, "step": 2516 }, { "epoch": 0.2570990806945863, "grad_norm": 1.569630800106155, "learning_rate": 1.7415340527288247e-05, "loss": 0.9354, "step": 2517 }, { "epoch": 0.2572012257405516, "grad_norm": 1.5462828477202166, "learning_rate": 1.741312051077594e-05, "loss": 0.8827, "step": 2518 }, { "epoch": 0.2573033707865168, "grad_norm": 1.6126111620149348, "learning_rate": 1.7410899682892802e-05, "loss": 0.8399, "step": 2519 }, { "epoch": 0.2574055158324821, "grad_norm": 1.420590093068615, "learning_rate": 1.7408678043881905e-05, "loss": 0.6294, "step": 2520 }, { "epoch": 0.2575076608784474, "grad_norm": 1.5996841334286362, "learning_rate": 1.740645559398641e-05, "loss": 0.7573, "step": 2521 }, { "epoch": 0.2576098059244127, "grad_norm": 1.4190033206039538, "learning_rate": 1.7404232333449566e-05, "loss": 0.7002, "step": 2522 }, { "epoch": 0.2577119509703779, "grad_norm": 1.6258667617437892, "learning_rate": 1.7402008262514706e-05, "loss": 0.8791, "step": 2523 }, { "epoch": 0.2578140960163432, "grad_norm": 1.3590453456515896, "learning_rate": 1.7399783381425264e-05, "loss": 0.6989, "step": 2524 }, { "epoch": 0.2579162410623085, "grad_norm": 1.4830960215986995, "learning_rate": 1.7397557690424748e-05, "loss": 0.706, "step": 2525 }, { "epoch": 0.25801838610827377, "grad_norm": 1.5192457907690529, "learning_rate": 1.7395331189756763e-05, "loss": 0.7171, "step": 2526 }, { "epoch": 0.258120531154239, "grad_norm": 1.3622636626355842, "learning_rate": 1.7393103879665e-05, "loss": 0.7405, "step": 2527 }, { "epoch": 0.2582226762002043, "grad_norm": 1.3863364065659285, "learning_rate": 1.7390875760393245e-05, "loss": 0.7437, "step": 2528 }, { "epoch": 0.25832482124616957, "grad_norm": 1.6038163906735787, "learning_rate": 1.7388646832185358e-05, "loss": 0.8477, "step": 2529 }, { "epoch": 0.25842696629213485, "grad_norm": 1.5744136045514094, "learning_rate": 1.7386417095285308e-05, "loss": 0.7987, "step": 2530 }, { "epoch": 0.2585291113381001, "grad_norm": 1.55256592535351, "learning_rate": 1.7384186549937124e-05, "loss": 0.7301, "step": 2531 }, { "epoch": 0.25863125638406537, "grad_norm": 1.4174166781686748, "learning_rate": 1.7381955196384962e-05, "loss": 0.7502, "step": 2532 }, { "epoch": 0.25873340143003065, "grad_norm": 1.469171656177889, "learning_rate": 1.737972303487303e-05, "loss": 0.7369, "step": 2533 }, { "epoch": 0.25883554647599594, "grad_norm": 1.6808143844062435, "learning_rate": 1.7377490065645643e-05, "loss": 0.8497, "step": 2534 }, { "epoch": 0.25893769152196117, "grad_norm": 1.437318569631455, "learning_rate": 1.7375256288947203e-05, "loss": 0.7336, "step": 2535 }, { "epoch": 0.25903983656792645, "grad_norm": 1.4314490253269458, "learning_rate": 1.7373021705022197e-05, "loss": 0.6936, "step": 2536 }, { "epoch": 0.25914198161389174, "grad_norm": 1.4687293827111565, "learning_rate": 1.73707863141152e-05, "loss": 0.6611, "step": 2537 }, { "epoch": 0.259244126659857, "grad_norm": 1.4261926985299205, "learning_rate": 1.736855011647088e-05, "loss": 0.7058, "step": 2538 }, { "epoch": 0.25934627170582225, "grad_norm": 1.3145077188043062, "learning_rate": 1.7366313112333993e-05, "loss": 0.6085, "step": 2539 }, { "epoch": 0.25944841675178754, "grad_norm": 1.5570003248760094, "learning_rate": 1.7364075301949374e-05, "loss": 0.7837, "step": 2540 }, { "epoch": 0.2595505617977528, "grad_norm": 1.4893310902576453, "learning_rate": 1.7361836685561954e-05, "loss": 0.7056, "step": 2541 }, { "epoch": 0.2596527068437181, "grad_norm": 1.5764267476701108, "learning_rate": 1.735959726341675e-05, "loss": 0.7784, "step": 2542 }, { "epoch": 0.25975485188968334, "grad_norm": 1.4574646136279008, "learning_rate": 1.7357357035758875e-05, "loss": 0.6259, "step": 2543 }, { "epoch": 0.2598569969356486, "grad_norm": 1.4553885613493278, "learning_rate": 1.735511600283352e-05, "loss": 0.7996, "step": 2544 }, { "epoch": 0.2599591419816139, "grad_norm": 1.3823775467417545, "learning_rate": 1.7352874164885964e-05, "loss": 0.752, "step": 2545 }, { "epoch": 0.26006128702757914, "grad_norm": 1.5586763941157193, "learning_rate": 1.735063152216158e-05, "loss": 0.7232, "step": 2546 }, { "epoch": 0.2601634320735444, "grad_norm": 1.4248645634957147, "learning_rate": 1.734838807490583e-05, "loss": 0.7109, "step": 2547 }, { "epoch": 0.2602655771195097, "grad_norm": 1.3727171900547974, "learning_rate": 1.7346143823364252e-05, "loss": 0.7235, "step": 2548 }, { "epoch": 0.260367722165475, "grad_norm": 1.5174154516143281, "learning_rate": 1.734389876778249e-05, "loss": 0.7048, "step": 2549 }, { "epoch": 0.2604698672114402, "grad_norm": 1.512344286849908, "learning_rate": 1.734165290840626e-05, "loss": 0.7521, "step": 2550 }, { "epoch": 0.2605720122574055, "grad_norm": 1.372586099134527, "learning_rate": 1.7339406245481378e-05, "loss": 0.7023, "step": 2551 }, { "epoch": 0.2606741573033708, "grad_norm": 1.4599542776826786, "learning_rate": 1.7337158779253743e-05, "loss": 0.7809, "step": 2552 }, { "epoch": 0.2607763023493361, "grad_norm": 1.3738173405216818, "learning_rate": 1.7334910509969335e-05, "loss": 0.7816, "step": 2553 }, { "epoch": 0.2608784473953013, "grad_norm": 1.4101160596615534, "learning_rate": 1.7332661437874235e-05, "loss": 0.7248, "step": 2554 }, { "epoch": 0.2609805924412666, "grad_norm": 1.5954432771459077, "learning_rate": 1.73304115632146e-05, "loss": 0.8169, "step": 2555 }, { "epoch": 0.2610827374872319, "grad_norm": 1.433861268340218, "learning_rate": 1.732816088623669e-05, "loss": 0.7619, "step": 2556 }, { "epoch": 0.26118488253319716, "grad_norm": 1.4602216140665614, "learning_rate": 1.732590940718683e-05, "loss": 0.7005, "step": 2557 }, { "epoch": 0.2612870275791624, "grad_norm": 1.4914068586334843, "learning_rate": 1.7323657126311454e-05, "loss": 0.7101, "step": 2558 }, { "epoch": 0.2613891726251277, "grad_norm": 1.5912432249550088, "learning_rate": 1.7321404043857076e-05, "loss": 0.7397, "step": 2559 }, { "epoch": 0.26149131767109296, "grad_norm": 1.5427259462402998, "learning_rate": 1.7319150160070292e-05, "loss": 0.6991, "step": 2560 }, { "epoch": 0.26159346271705824, "grad_norm": 1.4267636595591144, "learning_rate": 1.7316895475197796e-05, "loss": 0.7526, "step": 2561 }, { "epoch": 0.2616956077630235, "grad_norm": 1.5120069113897876, "learning_rate": 1.7314639989486364e-05, "loss": 0.6901, "step": 2562 }, { "epoch": 0.26179775280898876, "grad_norm": 1.4606811726735793, "learning_rate": 1.7312383703182857e-05, "loss": 0.7241, "step": 2563 }, { "epoch": 0.26189989785495404, "grad_norm": 1.3575072357885103, "learning_rate": 1.7310126616534232e-05, "loss": 0.774, "step": 2564 }, { "epoch": 0.26200204290091933, "grad_norm": 1.6002884476183385, "learning_rate": 1.7307868729787524e-05, "loss": 0.792, "step": 2565 }, { "epoch": 0.26210418794688456, "grad_norm": 1.5225220314153924, "learning_rate": 1.730561004318986e-05, "loss": 0.7879, "step": 2566 }, { "epoch": 0.26220633299284984, "grad_norm": 1.4257587427082887, "learning_rate": 1.7303350556988457e-05, "loss": 0.8273, "step": 2567 }, { "epoch": 0.26230847803881513, "grad_norm": 1.4072810140894876, "learning_rate": 1.7301090271430622e-05, "loss": 0.8085, "step": 2568 }, { "epoch": 0.2624106230847804, "grad_norm": 1.7163092285645056, "learning_rate": 1.729882918676374e-05, "loss": 0.869, "step": 2569 }, { "epoch": 0.26251276813074564, "grad_norm": 1.6170774137739983, "learning_rate": 1.729656730323528e-05, "loss": 0.7057, "step": 2570 }, { "epoch": 0.26261491317671093, "grad_norm": 1.4418590387031005, "learning_rate": 1.729430462109282e-05, "loss": 0.7984, "step": 2571 }, { "epoch": 0.2627170582226762, "grad_norm": 1.4177237403308627, "learning_rate": 1.7292041140584005e-05, "loss": 0.6945, "step": 2572 }, { "epoch": 0.26281920326864144, "grad_norm": 1.4062452927225306, "learning_rate": 1.7289776861956576e-05, "loss": 0.7476, "step": 2573 }, { "epoch": 0.26292134831460673, "grad_norm": 1.4449053220202175, "learning_rate": 1.7287511785458358e-05, "loss": 0.7807, "step": 2574 }, { "epoch": 0.263023493360572, "grad_norm": 1.3600144363567668, "learning_rate": 1.728524591133727e-05, "loss": 0.6867, "step": 2575 }, { "epoch": 0.2631256384065373, "grad_norm": 1.7884559326480263, "learning_rate": 1.7282979239841312e-05, "loss": 0.825, "step": 2576 }, { "epoch": 0.26322778345250253, "grad_norm": 1.595011626536254, "learning_rate": 1.7280711771218564e-05, "loss": 0.7214, "step": 2577 }, { "epoch": 0.2633299284984678, "grad_norm": 1.4815314908237394, "learning_rate": 1.7278443505717214e-05, "loss": 0.7101, "step": 2578 }, { "epoch": 0.2634320735444331, "grad_norm": 1.548533378429834, "learning_rate": 1.7276174443585518e-05, "loss": 0.7109, "step": 2579 }, { "epoch": 0.2635342185903984, "grad_norm": 1.5134585592924366, "learning_rate": 1.727390458507183e-05, "loss": 0.7409, "step": 2580 }, { "epoch": 0.2636363636363636, "grad_norm": 1.5098391169665875, "learning_rate": 1.7271633930424584e-05, "loss": 0.6816, "step": 2581 }, { "epoch": 0.2637385086823289, "grad_norm": 1.3022167865283196, "learning_rate": 1.7269362479892304e-05, "loss": 0.7948, "step": 2582 }, { "epoch": 0.2638406537282942, "grad_norm": 1.6514932505623174, "learning_rate": 1.7267090233723606e-05, "loss": 0.7503, "step": 2583 }, { "epoch": 0.26394279877425947, "grad_norm": 1.4960113748500468, "learning_rate": 1.7264817192167186e-05, "loss": 0.7785, "step": 2584 }, { "epoch": 0.2640449438202247, "grad_norm": 1.347994350926256, "learning_rate": 1.7262543355471834e-05, "loss": 0.7211, "step": 2585 }, { "epoch": 0.26414708886619, "grad_norm": 1.3989797706569274, "learning_rate": 1.7260268723886416e-05, "loss": 0.5961, "step": 2586 }, { "epoch": 0.26424923391215527, "grad_norm": 1.4640591504571363, "learning_rate": 1.7257993297659897e-05, "loss": 0.6993, "step": 2587 }, { "epoch": 0.26435137895812055, "grad_norm": 1.3710118829241082, "learning_rate": 1.725571707704132e-05, "loss": 0.8234, "step": 2588 }, { "epoch": 0.2644535240040858, "grad_norm": 1.4887185599174333, "learning_rate": 1.7253440062279825e-05, "loss": 0.6585, "step": 2589 }, { "epoch": 0.26455566905005107, "grad_norm": 1.4462067989209562, "learning_rate": 1.7251162253624624e-05, "loss": 0.6664, "step": 2590 }, { "epoch": 0.26465781409601635, "grad_norm": 1.4415258812394312, "learning_rate": 1.7248883651325033e-05, "loss": 0.7796, "step": 2591 }, { "epoch": 0.26475995914198164, "grad_norm": 1.4975523439984146, "learning_rate": 1.7246604255630443e-05, "loss": 0.7454, "step": 2592 }, { "epoch": 0.26486210418794687, "grad_norm": 1.405537713584991, "learning_rate": 1.7244324066790336e-05, "loss": 0.7534, "step": 2593 }, { "epoch": 0.26496424923391215, "grad_norm": 1.3375152933428638, "learning_rate": 1.7242043085054278e-05, "loss": 0.6631, "step": 2594 }, { "epoch": 0.26506639427987744, "grad_norm": 1.4754694364773744, "learning_rate": 1.7239761310671923e-05, "loss": 0.7421, "step": 2595 }, { "epoch": 0.2651685393258427, "grad_norm": 1.4324979778724882, "learning_rate": 1.723747874389302e-05, "loss": 0.8013, "step": 2596 }, { "epoch": 0.26527068437180795, "grad_norm": 1.541976736218398, "learning_rate": 1.7235195384967388e-05, "loss": 0.6678, "step": 2597 }, { "epoch": 0.26537282941777324, "grad_norm": 1.5804864344753153, "learning_rate": 1.7232911234144947e-05, "loss": 0.6474, "step": 2598 }, { "epoch": 0.2654749744637385, "grad_norm": 1.647174256251865, "learning_rate": 1.7230626291675702e-05, "loss": 0.8284, "step": 2599 }, { "epoch": 0.26557711950970375, "grad_norm": 1.6204698327624225, "learning_rate": 1.7228340557809734e-05, "loss": 0.8262, "step": 2600 }, { "epoch": 0.26567926455566904, "grad_norm": 1.3633740671849883, "learning_rate": 1.7226054032797223e-05, "loss": 0.7736, "step": 2601 }, { "epoch": 0.2657814096016343, "grad_norm": 1.2677299921300325, "learning_rate": 1.7223766716888432e-05, "loss": 0.6868, "step": 2602 }, { "epoch": 0.2658835546475996, "grad_norm": 1.4575284242366675, "learning_rate": 1.7221478610333708e-05, "loss": 0.7536, "step": 2603 }, { "epoch": 0.26598569969356484, "grad_norm": 1.66703457406756, "learning_rate": 1.7219189713383477e-05, "loss": 0.8388, "step": 2604 }, { "epoch": 0.2660878447395301, "grad_norm": 1.5989751549480957, "learning_rate": 1.7216900026288272e-05, "loss": 0.8566, "step": 2605 }, { "epoch": 0.2661899897854954, "grad_norm": 1.6842324880039306, "learning_rate": 1.72146095492987e-05, "loss": 0.7016, "step": 2606 }, { "epoch": 0.2662921348314607, "grad_norm": 1.4566409704351093, "learning_rate": 1.7212318282665442e-05, "loss": 0.7062, "step": 2607 }, { "epoch": 0.2663942798774259, "grad_norm": 1.429521751121697, "learning_rate": 1.72100262266393e-05, "loss": 0.7792, "step": 2608 }, { "epoch": 0.2664964249233912, "grad_norm": 1.4589532508338408, "learning_rate": 1.7207733381471122e-05, "loss": 0.654, "step": 2609 }, { "epoch": 0.2665985699693565, "grad_norm": 1.3996357210499535, "learning_rate": 1.7205439747411867e-05, "loss": 0.6873, "step": 2610 }, { "epoch": 0.2667007150153218, "grad_norm": 1.4891202337744267, "learning_rate": 1.720314532471258e-05, "loss": 0.7736, "step": 2611 }, { "epoch": 0.266802860061287, "grad_norm": 1.5208262563013906, "learning_rate": 1.7200850113624384e-05, "loss": 0.7282, "step": 2612 }, { "epoch": 0.2669050051072523, "grad_norm": 1.3052087803141619, "learning_rate": 1.719855411439849e-05, "loss": 0.6592, "step": 2613 }, { "epoch": 0.2670071501532176, "grad_norm": 1.472917562630221, "learning_rate": 1.7196257327286195e-05, "loss": 0.8273, "step": 2614 }, { "epoch": 0.26710929519918286, "grad_norm": 1.4479760356290818, "learning_rate": 1.7193959752538886e-05, "loss": 0.7423, "step": 2615 }, { "epoch": 0.2672114402451481, "grad_norm": 1.2954904787858723, "learning_rate": 1.719166139040804e-05, "loss": 0.6636, "step": 2616 }, { "epoch": 0.2673135852911134, "grad_norm": 1.539730300261252, "learning_rate": 1.7189362241145202e-05, "loss": 0.6679, "step": 2617 }, { "epoch": 0.26741573033707866, "grad_norm": 1.5069173701769514, "learning_rate": 1.7187062305002025e-05, "loss": 0.857, "step": 2618 }, { "epoch": 0.26751787538304395, "grad_norm": 1.495559316813588, "learning_rate": 1.7184761582230233e-05, "loss": 0.6719, "step": 2619 }, { "epoch": 0.2676200204290092, "grad_norm": 1.5727692731142129, "learning_rate": 1.7182460073081644e-05, "loss": 0.7304, "step": 2620 }, { "epoch": 0.26772216547497446, "grad_norm": 1.3280373947043984, "learning_rate": 1.718015777780816e-05, "loss": 0.6062, "step": 2621 }, { "epoch": 0.26782431052093975, "grad_norm": 1.4141652113642578, "learning_rate": 1.7177854696661774e-05, "loss": 0.6872, "step": 2622 }, { "epoch": 0.26792645556690503, "grad_norm": 1.6295738361306191, "learning_rate": 1.7175550829894545e-05, "loss": 0.6925, "step": 2623 }, { "epoch": 0.26802860061287026, "grad_norm": 1.6093269761681401, "learning_rate": 1.717324617775865e-05, "loss": 0.8464, "step": 2624 }, { "epoch": 0.26813074565883555, "grad_norm": 1.4136816011681543, "learning_rate": 1.7170940740506318e-05, "loss": 0.7431, "step": 2625 }, { "epoch": 0.26823289070480083, "grad_norm": 1.3353829216752438, "learning_rate": 1.7168634518389896e-05, "loss": 0.6679, "step": 2626 }, { "epoch": 0.2683350357507661, "grad_norm": 1.3911946856564807, "learning_rate": 1.7166327511661788e-05, "loss": 0.7022, "step": 2627 }, { "epoch": 0.26843718079673135, "grad_norm": 1.2940846490331492, "learning_rate": 1.716401972057451e-05, "loss": 0.6863, "step": 2628 }, { "epoch": 0.26853932584269663, "grad_norm": 1.497118255198858, "learning_rate": 1.716171114538064e-05, "loss": 0.7402, "step": 2629 }, { "epoch": 0.2686414708886619, "grad_norm": 1.5833943878999488, "learning_rate": 1.7159401786332862e-05, "loss": 0.7949, "step": 2630 }, { "epoch": 0.26874361593462714, "grad_norm": 1.66428270474977, "learning_rate": 1.7157091643683932e-05, "loss": 0.7643, "step": 2631 }, { "epoch": 0.26884576098059243, "grad_norm": 1.559043388225005, "learning_rate": 1.7154780717686695e-05, "loss": 0.8236, "step": 2632 }, { "epoch": 0.2689479060265577, "grad_norm": 1.4788791377011152, "learning_rate": 1.715246900859409e-05, "loss": 0.8109, "step": 2633 }, { "epoch": 0.269050051072523, "grad_norm": 1.5054228764923097, "learning_rate": 1.7150156516659127e-05, "loss": 0.6948, "step": 2634 }, { "epoch": 0.26915219611848823, "grad_norm": 1.484573646124468, "learning_rate": 1.7147843242134915e-05, "loss": 0.6386, "step": 2635 }, { "epoch": 0.2692543411644535, "grad_norm": 1.4800926169361108, "learning_rate": 1.7145529185274644e-05, "loss": 0.7017, "step": 2636 }, { "epoch": 0.2693564862104188, "grad_norm": 1.6286395649407879, "learning_rate": 1.7143214346331586e-05, "loss": 0.9433, "step": 2637 }, { "epoch": 0.2694586312563841, "grad_norm": 1.7125761880265544, "learning_rate": 1.71408987255591e-05, "loss": 0.8322, "step": 2638 }, { "epoch": 0.2695607763023493, "grad_norm": 1.573273062007928, "learning_rate": 1.7138582323210635e-05, "loss": 0.7032, "step": 2639 }, { "epoch": 0.2696629213483146, "grad_norm": 1.4996973945044365, "learning_rate": 1.7136265139539725e-05, "loss": 0.7459, "step": 2640 }, { "epoch": 0.2697650663942799, "grad_norm": 1.3248726143376297, "learning_rate": 1.7133947174799984e-05, "loss": 0.7095, "step": 2641 }, { "epoch": 0.26986721144024517, "grad_norm": 1.493091223523191, "learning_rate": 1.7131628429245117e-05, "loss": 0.7093, "step": 2642 }, { "epoch": 0.2699693564862104, "grad_norm": 1.5553983240600513, "learning_rate": 1.712930890312891e-05, "loss": 0.7343, "step": 2643 }, { "epoch": 0.2700715015321757, "grad_norm": 1.593448444578975, "learning_rate": 1.712698859670524e-05, "loss": 0.8282, "step": 2644 }, { "epoch": 0.27017364657814097, "grad_norm": 1.4520453075067568, "learning_rate": 1.712466751022806e-05, "loss": 0.7499, "step": 2645 }, { "epoch": 0.27027579162410625, "grad_norm": 1.5074051470023935, "learning_rate": 1.7122345643951418e-05, "loss": 0.8329, "step": 2646 }, { "epoch": 0.2703779366700715, "grad_norm": 1.3920831840632624, "learning_rate": 1.7120022998129445e-05, "loss": 0.6624, "step": 2647 }, { "epoch": 0.27048008171603677, "grad_norm": 1.5244277965039217, "learning_rate": 1.7117699573016353e-05, "loss": 0.8023, "step": 2648 }, { "epoch": 0.27058222676200205, "grad_norm": 1.546285309414115, "learning_rate": 1.7115375368866444e-05, "loss": 0.7926, "step": 2649 }, { "epoch": 0.27068437180796734, "grad_norm": 1.6483171700518657, "learning_rate": 1.7113050385934107e-05, "loss": 0.6854, "step": 2650 }, { "epoch": 0.27078651685393257, "grad_norm": 1.3923591212877138, "learning_rate": 1.7110724624473808e-05, "loss": 0.7254, "step": 2651 }, { "epoch": 0.27088866189989785, "grad_norm": 1.4816820257265522, "learning_rate": 1.71083980847401e-05, "loss": 0.8438, "step": 2652 }, { "epoch": 0.27099080694586314, "grad_norm": 1.4187506873532743, "learning_rate": 1.7106070766987636e-05, "loss": 0.6166, "step": 2653 }, { "epoch": 0.2710929519918284, "grad_norm": 1.3642969069045667, "learning_rate": 1.710374267147113e-05, "loss": 0.8211, "step": 2654 }, { "epoch": 0.27119509703779365, "grad_norm": 1.3410571875961252, "learning_rate": 1.7101413798445404e-05, "loss": 0.7595, "step": 2655 }, { "epoch": 0.27129724208375894, "grad_norm": 1.4583230738658919, "learning_rate": 1.7099084148165344e-05, "loss": 0.721, "step": 2656 }, { "epoch": 0.2713993871297242, "grad_norm": 1.42047014785119, "learning_rate": 1.709675372088594e-05, "loss": 0.7737, "step": 2657 }, { "epoch": 0.27150153217568945, "grad_norm": 1.3974156350464202, "learning_rate": 1.709442251686226e-05, "loss": 0.7159, "step": 2658 }, { "epoch": 0.27160367722165474, "grad_norm": 1.3054891804741153, "learning_rate": 1.709209053634945e-05, "loss": 0.6308, "step": 2659 }, { "epoch": 0.27170582226762, "grad_norm": 1.420709002055265, "learning_rate": 1.7089757779602747e-05, "loss": 0.7175, "step": 2660 }, { "epoch": 0.2718079673135853, "grad_norm": 1.5848739371794853, "learning_rate": 1.7087424246877474e-05, "loss": 0.7118, "step": 2661 }, { "epoch": 0.27191011235955054, "grad_norm": 1.4871111146499125, "learning_rate": 1.708508993842904e-05, "loss": 0.8389, "step": 2662 }, { "epoch": 0.2720122574055158, "grad_norm": 1.3652945290603586, "learning_rate": 1.7082754854512932e-05, "loss": 0.7052, "step": 2663 }, { "epoch": 0.2721144024514811, "grad_norm": 1.5253014922218116, "learning_rate": 1.7080418995384733e-05, "loss": 0.7414, "step": 2664 }, { "epoch": 0.2722165474974464, "grad_norm": 1.4386115755708755, "learning_rate": 1.70780823613001e-05, "loss": 0.789, "step": 2665 }, { "epoch": 0.2723186925434116, "grad_norm": 1.4781905312151657, "learning_rate": 1.7075744952514774e-05, "loss": 0.7083, "step": 2666 }, { "epoch": 0.2724208375893769, "grad_norm": 1.417197515595689, "learning_rate": 1.7073406769284594e-05, "loss": 0.7992, "step": 2667 }, { "epoch": 0.2725229826353422, "grad_norm": 1.6280921591438862, "learning_rate": 1.7071067811865477e-05, "loss": 0.7997, "step": 2668 }, { "epoch": 0.2726251276813075, "grad_norm": 1.6097314594668686, "learning_rate": 1.7068728080513417e-05, "loss": 0.8193, "step": 2669 }, { "epoch": 0.2727272727272727, "grad_norm": 1.5960715787940696, "learning_rate": 1.7066387575484502e-05, "loss": 0.7556, "step": 2670 }, { "epoch": 0.272829417773238, "grad_norm": 1.3707855115982637, "learning_rate": 1.70640462970349e-05, "loss": 0.7026, "step": 2671 }, { "epoch": 0.2729315628192033, "grad_norm": 1.3990838439606879, "learning_rate": 1.706170424542087e-05, "loss": 0.6821, "step": 2672 }, { "epoch": 0.27303370786516856, "grad_norm": 1.224444433666637, "learning_rate": 1.7059361420898743e-05, "loss": 0.6499, "step": 2673 }, { "epoch": 0.2731358529111338, "grad_norm": 1.6150672438730833, "learning_rate": 1.705701782372495e-05, "loss": 0.8249, "step": 2674 }, { "epoch": 0.2732379979570991, "grad_norm": 1.555482420963003, "learning_rate": 1.7054673454155997e-05, "loss": 0.7855, "step": 2675 }, { "epoch": 0.27334014300306436, "grad_norm": 1.400417303694646, "learning_rate": 1.7052328312448475e-05, "loss": 0.7424, "step": 2676 }, { "epoch": 0.27344228804902965, "grad_norm": 1.4776822238448912, "learning_rate": 1.7049982398859065e-05, "loss": 0.6699, "step": 2677 }, { "epoch": 0.2735444330949949, "grad_norm": 1.5096942864103626, "learning_rate": 1.7047635713644528e-05, "loss": 0.8032, "step": 2678 }, { "epoch": 0.27364657814096016, "grad_norm": 1.4634104240886805, "learning_rate": 1.7045288257061704e-05, "loss": 0.704, "step": 2679 }, { "epoch": 0.27374872318692545, "grad_norm": 1.5095164623655783, "learning_rate": 1.7042940029367532e-05, "loss": 0.8195, "step": 2680 }, { "epoch": 0.27385086823289073, "grad_norm": 1.4061515363450419, "learning_rate": 1.7040591030819022e-05, "loss": 0.687, "step": 2681 }, { "epoch": 0.27395301327885596, "grad_norm": 1.288834386184951, "learning_rate": 1.703824126167328e-05, "loss": 0.7711, "step": 2682 }, { "epoch": 0.27405515832482125, "grad_norm": 1.4172085059463584, "learning_rate": 1.7035890722187477e-05, "loss": 0.7508, "step": 2683 }, { "epoch": 0.27415730337078653, "grad_norm": 1.551307937035291, "learning_rate": 1.7033539412618892e-05, "loss": 0.7151, "step": 2684 }, { "epoch": 0.27425944841675176, "grad_norm": 1.437928189223863, "learning_rate": 1.7031187333224874e-05, "loss": 0.7296, "step": 2685 }, { "epoch": 0.27436159346271705, "grad_norm": 1.506923697623631, "learning_rate": 1.702883448426286e-05, "loss": 0.7073, "step": 2686 }, { "epoch": 0.27446373850868233, "grad_norm": 1.3984482978267556, "learning_rate": 1.702648086599037e-05, "loss": 0.6967, "step": 2687 }, { "epoch": 0.2745658835546476, "grad_norm": 1.7439214007364532, "learning_rate": 1.702412647866501e-05, "loss": 0.8031, "step": 2688 }, { "epoch": 0.27466802860061285, "grad_norm": 1.4680059828849006, "learning_rate": 1.7021771322544465e-05, "loss": 0.7142, "step": 2689 }, { "epoch": 0.27477017364657813, "grad_norm": 1.5954742591347104, "learning_rate": 1.701941539788652e-05, "loss": 0.8175, "step": 2690 }, { "epoch": 0.2748723186925434, "grad_norm": 1.4159407648291686, "learning_rate": 1.7017058704949017e-05, "loss": 0.7699, "step": 2691 }, { "epoch": 0.2749744637385087, "grad_norm": 1.4131290065180206, "learning_rate": 1.7014701243989908e-05, "loss": 0.8113, "step": 2692 }, { "epoch": 0.27507660878447393, "grad_norm": 1.3746077997788342, "learning_rate": 1.7012343015267214e-05, "loss": 0.6559, "step": 2693 }, { "epoch": 0.2751787538304392, "grad_norm": 1.593375505766051, "learning_rate": 1.700998401903905e-05, "loss": 0.8184, "step": 2694 }, { "epoch": 0.2752808988764045, "grad_norm": 1.3793637290999876, "learning_rate": 1.70076242555636e-05, "loss": 0.7029, "step": 2695 }, { "epoch": 0.2753830439223698, "grad_norm": 1.295422466292458, "learning_rate": 1.7005263725099156e-05, "loss": 0.6995, "step": 2696 }, { "epoch": 0.275485188968335, "grad_norm": 1.3863095979657845, "learning_rate": 1.7002902427904065e-05, "loss": 0.6945, "step": 2697 }, { "epoch": 0.2755873340143003, "grad_norm": 1.438489973693737, "learning_rate": 1.7000540364236782e-05, "loss": 0.7397, "step": 2698 }, { "epoch": 0.2756894790602656, "grad_norm": 1.653902437420994, "learning_rate": 1.6998177534355834e-05, "loss": 0.7013, "step": 2699 }, { "epoch": 0.27579162410623087, "grad_norm": 1.5227313030545238, "learning_rate": 1.699581393851983e-05, "loss": 0.8071, "step": 2700 }, { "epoch": 0.2758937691521961, "grad_norm": 1.5969077189900034, "learning_rate": 1.6993449576987476e-05, "loss": 0.7325, "step": 2701 }, { "epoch": 0.2759959141981614, "grad_norm": 1.4940562249976852, "learning_rate": 1.699108445001754e-05, "loss": 0.7686, "step": 2702 }, { "epoch": 0.27609805924412667, "grad_norm": 1.3970824598559681, "learning_rate": 1.69887185578689e-05, "loss": 0.7098, "step": 2703 }, { "epoch": 0.27620020429009196, "grad_norm": 1.5457069519026894, "learning_rate": 1.6986351900800495e-05, "loss": 0.7898, "step": 2704 }, { "epoch": 0.2763023493360572, "grad_norm": 1.4859900593318567, "learning_rate": 1.6983984479071364e-05, "loss": 0.6659, "step": 2705 }, { "epoch": 0.27640449438202247, "grad_norm": 1.4855798408977536, "learning_rate": 1.6981616292940616e-05, "loss": 0.7984, "step": 2706 }, { "epoch": 0.27650663942798775, "grad_norm": 1.469142541559895, "learning_rate": 1.6979247342667457e-05, "loss": 0.8047, "step": 2707 }, { "epoch": 0.27660878447395304, "grad_norm": 1.58748545612327, "learning_rate": 1.6976877628511163e-05, "loss": 0.7323, "step": 2708 }, { "epoch": 0.27671092951991827, "grad_norm": 1.4804273531573104, "learning_rate": 1.697450715073111e-05, "loss": 0.7613, "step": 2709 }, { "epoch": 0.27681307456588355, "grad_norm": 1.4239506375111246, "learning_rate": 1.6972135909586742e-05, "loss": 0.6764, "step": 2710 }, { "epoch": 0.27691521961184884, "grad_norm": 1.5947370392060487, "learning_rate": 1.696976390533759e-05, "loss": 0.7802, "step": 2711 }, { "epoch": 0.27701736465781407, "grad_norm": 1.3708948045360412, "learning_rate": 1.696739113824328e-05, "loss": 0.7691, "step": 2712 }, { "epoch": 0.27711950970377935, "grad_norm": 1.4575766926668987, "learning_rate": 1.6965017608563507e-05, "loss": 0.7276, "step": 2713 }, { "epoch": 0.27722165474974464, "grad_norm": 1.4488488809750262, "learning_rate": 1.696264331655806e-05, "loss": 0.8104, "step": 2714 }, { "epoch": 0.2773237997957099, "grad_norm": 1.3600543908031333, "learning_rate": 1.6960268262486796e-05, "loss": 0.7655, "step": 2715 }, { "epoch": 0.27742594484167515, "grad_norm": 1.4940221924143973, "learning_rate": 1.6957892446609682e-05, "loss": 0.7464, "step": 2716 }, { "epoch": 0.27752808988764044, "grad_norm": 1.540959286939293, "learning_rate": 1.695551586918674e-05, "loss": 0.8456, "step": 2717 }, { "epoch": 0.2776302349336057, "grad_norm": 1.4490763101433775, "learning_rate": 1.6953138530478093e-05, "loss": 0.6973, "step": 2718 }, { "epoch": 0.277732379979571, "grad_norm": 1.483751867549589, "learning_rate": 1.695076043074394e-05, "loss": 0.8316, "step": 2719 }, { "epoch": 0.27783452502553624, "grad_norm": 1.4781664239902208, "learning_rate": 1.694838157024457e-05, "loss": 0.7753, "step": 2720 }, { "epoch": 0.2779366700715015, "grad_norm": 1.717470494869726, "learning_rate": 1.6946001949240347e-05, "loss": 0.7369, "step": 2721 }, { "epoch": 0.2780388151174668, "grad_norm": 1.5457589847756297, "learning_rate": 1.694362156799172e-05, "loss": 0.7292, "step": 2722 }, { "epoch": 0.2781409601634321, "grad_norm": 1.4433376967960552, "learning_rate": 1.694124042675923e-05, "loss": 0.6502, "step": 2723 }, { "epoch": 0.2782431052093973, "grad_norm": 1.5798180280547671, "learning_rate": 1.6938858525803488e-05, "loss": 0.734, "step": 2724 }, { "epoch": 0.2783452502553626, "grad_norm": 1.597946201710472, "learning_rate": 1.69364758653852e-05, "loss": 0.8071, "step": 2725 }, { "epoch": 0.2784473953013279, "grad_norm": 1.4802526806978218, "learning_rate": 1.6934092445765145e-05, "loss": 0.6918, "step": 2726 }, { "epoch": 0.2785495403472932, "grad_norm": 1.4752357688489037, "learning_rate": 1.693170826720419e-05, "loss": 0.835, "step": 2727 }, { "epoch": 0.2786516853932584, "grad_norm": 1.514618837048056, "learning_rate": 1.692932332996329e-05, "loss": 0.716, "step": 2728 }, { "epoch": 0.2787538304392237, "grad_norm": 1.393579411627894, "learning_rate": 1.6926937634303472e-05, "loss": 0.7181, "step": 2729 }, { "epoch": 0.278855975485189, "grad_norm": 1.4781618717206944, "learning_rate": 1.6924551180485858e-05, "loss": 0.7838, "step": 2730 }, { "epoch": 0.27895812053115426, "grad_norm": 1.3615241376444494, "learning_rate": 1.6922163968771644e-05, "loss": 0.7139, "step": 2731 }, { "epoch": 0.2790602655771195, "grad_norm": 1.5784290867441937, "learning_rate": 1.6919775999422108e-05, "loss": 0.8609, "step": 2732 }, { "epoch": 0.2791624106230848, "grad_norm": 1.530901473593101, "learning_rate": 1.6917387272698618e-05, "loss": 0.6067, "step": 2733 }, { "epoch": 0.27926455566905006, "grad_norm": 1.8603584015824977, "learning_rate": 1.6914997788862622e-05, "loss": 0.7243, "step": 2734 }, { "epoch": 0.27936670071501535, "grad_norm": 1.374737236816398, "learning_rate": 1.691260754817565e-05, "loss": 0.6409, "step": 2735 }, { "epoch": 0.2794688457609806, "grad_norm": 1.4264357872517461, "learning_rate": 1.691021655089932e-05, "loss": 0.7245, "step": 2736 }, { "epoch": 0.27957099080694586, "grad_norm": 1.6736113763237774, "learning_rate": 1.690782479729532e-05, "loss": 0.6807, "step": 2737 }, { "epoch": 0.27967313585291115, "grad_norm": 1.3318814019281573, "learning_rate": 1.6905432287625433e-05, "loss": 0.6771, "step": 2738 }, { "epoch": 0.2797752808988764, "grad_norm": 1.3191784959238555, "learning_rate": 1.690303902215152e-05, "loss": 0.7805, "step": 2739 }, { "epoch": 0.27987742594484166, "grad_norm": 1.3930927106326545, "learning_rate": 1.690064500113553e-05, "loss": 0.6925, "step": 2740 }, { "epoch": 0.27997957099080695, "grad_norm": 1.4133192311260205, "learning_rate": 1.6898250224839485e-05, "loss": 0.6053, "step": 2741 }, { "epoch": 0.28008171603677223, "grad_norm": 1.5948243352157303, "learning_rate": 1.6895854693525494e-05, "loss": 0.6746, "step": 2742 }, { "epoch": 0.28018386108273746, "grad_norm": 1.470731987805263, "learning_rate": 1.6893458407455752e-05, "loss": 0.7222, "step": 2743 }, { "epoch": 0.28028600612870275, "grad_norm": 1.5703532000444576, "learning_rate": 1.6891061366892533e-05, "loss": 0.7096, "step": 2744 }, { "epoch": 0.28038815117466803, "grad_norm": 1.4130981859295657, "learning_rate": 1.6888663572098198e-05, "loss": 0.7633, "step": 2745 }, { "epoch": 0.2804902962206333, "grad_norm": 1.5002437232087638, "learning_rate": 1.688626502333518e-05, "loss": 0.7772, "step": 2746 }, { "epoch": 0.28059244126659855, "grad_norm": 1.6962806075028674, "learning_rate": 1.6883865720866008e-05, "loss": 0.7236, "step": 2747 }, { "epoch": 0.28069458631256383, "grad_norm": 1.4650366414531233, "learning_rate": 1.6881465664953286e-05, "loss": 0.7184, "step": 2748 }, { "epoch": 0.2807967313585291, "grad_norm": 1.4770889921460757, "learning_rate": 1.6879064855859702e-05, "loss": 0.786, "step": 2749 }, { "epoch": 0.2808988764044944, "grad_norm": 1.5048470136593244, "learning_rate": 1.6876663293848024e-05, "loss": 0.7877, "step": 2750 }, { "epoch": 0.28100102145045963, "grad_norm": 1.4932386816512246, "learning_rate": 1.6874260979181105e-05, "loss": 0.727, "step": 2751 }, { "epoch": 0.2811031664964249, "grad_norm": 1.5782043999091189, "learning_rate": 1.6871857912121882e-05, "loss": 0.8501, "step": 2752 }, { "epoch": 0.2812053115423902, "grad_norm": 1.4413373233862905, "learning_rate": 1.6869454092933368e-05, "loss": 0.8206, "step": 2753 }, { "epoch": 0.2813074565883555, "grad_norm": 1.550982880637789, "learning_rate": 1.686704952187867e-05, "loss": 0.7871, "step": 2754 }, { "epoch": 0.2814096016343207, "grad_norm": 1.4327917308453353, "learning_rate": 1.686464419922096e-05, "loss": 0.7159, "step": 2755 }, { "epoch": 0.281511746680286, "grad_norm": 1.599050719649285, "learning_rate": 1.686223812522351e-05, "loss": 0.8334, "step": 2756 }, { "epoch": 0.2816138917262513, "grad_norm": 1.4682349846466691, "learning_rate": 1.6859831300149664e-05, "loss": 0.7098, "step": 2757 }, { "epoch": 0.28171603677221657, "grad_norm": 1.490262393051143, "learning_rate": 1.685742372426285e-05, "loss": 0.752, "step": 2758 }, { "epoch": 0.2818181818181818, "grad_norm": 1.3606932546575097, "learning_rate": 1.685501539782658e-05, "loss": 0.8104, "step": 2759 }, { "epoch": 0.2819203268641471, "grad_norm": 1.5520105288143495, "learning_rate": 1.6852606321104442e-05, "loss": 0.6835, "step": 2760 }, { "epoch": 0.28202247191011237, "grad_norm": 1.5069558708340467, "learning_rate": 1.6850196494360116e-05, "loss": 0.7498, "step": 2761 }, { "epoch": 0.28212461695607766, "grad_norm": 1.5041780553464301, "learning_rate": 1.684778591785736e-05, "loss": 0.7839, "step": 2762 }, { "epoch": 0.2822267620020429, "grad_norm": 1.607590745019505, "learning_rate": 1.6845374591860005e-05, "loss": 0.774, "step": 2763 }, { "epoch": 0.28232890704800817, "grad_norm": 1.5616933909339754, "learning_rate": 1.6842962516631985e-05, "loss": 0.7749, "step": 2764 }, { "epoch": 0.28243105209397346, "grad_norm": 1.4241140101972511, "learning_rate": 1.6840549692437295e-05, "loss": 0.7377, "step": 2765 }, { "epoch": 0.2825331971399387, "grad_norm": 1.612642885127295, "learning_rate": 1.6838136119540024e-05, "loss": 0.6413, "step": 2766 }, { "epoch": 0.28263534218590397, "grad_norm": 1.491721739930825, "learning_rate": 1.6835721798204333e-05, "loss": 0.6679, "step": 2767 }, { "epoch": 0.28273748723186926, "grad_norm": 1.5983074541336582, "learning_rate": 1.6833306728694476e-05, "loss": 0.8002, "step": 2768 }, { "epoch": 0.28283963227783454, "grad_norm": 1.7189214631960457, "learning_rate": 1.683089091127478e-05, "loss": 0.7432, "step": 2769 }, { "epoch": 0.28294177732379977, "grad_norm": 1.3779697795289045, "learning_rate": 1.6828474346209662e-05, "loss": 0.7266, "step": 2770 }, { "epoch": 0.28304392236976506, "grad_norm": 1.577932266003647, "learning_rate": 1.6826057033763618e-05, "loss": 0.8496, "step": 2771 }, { "epoch": 0.28314606741573034, "grad_norm": 1.4520125099088317, "learning_rate": 1.682363897420122e-05, "loss": 0.7282, "step": 2772 }, { "epoch": 0.2832482124616956, "grad_norm": 1.4909023084109942, "learning_rate": 1.6821220167787126e-05, "loss": 0.6602, "step": 2773 }, { "epoch": 0.28335035750766085, "grad_norm": 1.4305616272001505, "learning_rate": 1.6818800614786077e-05, "loss": 0.7517, "step": 2774 }, { "epoch": 0.28345250255362614, "grad_norm": 1.4998628821363957, "learning_rate": 1.6816380315462895e-05, "loss": 0.7301, "step": 2775 }, { "epoch": 0.2835546475995914, "grad_norm": 1.5605652540760686, "learning_rate": 1.6813959270082486e-05, "loss": 0.8421, "step": 2776 }, { "epoch": 0.2836567926455567, "grad_norm": 1.4986014312341027, "learning_rate": 1.681153747890983e-05, "loss": 0.7782, "step": 2777 }, { "epoch": 0.28375893769152194, "grad_norm": 1.5154072467185085, "learning_rate": 1.6809114942209997e-05, "loss": 0.7447, "step": 2778 }, { "epoch": 0.2838610827374872, "grad_norm": 1.399010552162622, "learning_rate": 1.6806691660248137e-05, "loss": 0.694, "step": 2779 }, { "epoch": 0.2839632277834525, "grad_norm": 1.6053481361047428, "learning_rate": 1.6804267633289476e-05, "loss": 0.5971, "step": 2780 }, { "epoch": 0.2840653728294178, "grad_norm": 1.5852817182313759, "learning_rate": 1.6801842861599326e-05, "loss": 0.8626, "step": 2781 }, { "epoch": 0.284167517875383, "grad_norm": 1.6499951491544196, "learning_rate": 1.679941734544308e-05, "loss": 0.6536, "step": 2782 }, { "epoch": 0.2842696629213483, "grad_norm": 1.510953994438809, "learning_rate": 1.6796991085086212e-05, "loss": 0.7424, "step": 2783 }, { "epoch": 0.2843718079673136, "grad_norm": 1.4623172630468966, "learning_rate": 1.6794564080794277e-05, "loss": 0.8547, "step": 2784 }, { "epoch": 0.2844739530132789, "grad_norm": 1.2099953594852613, "learning_rate": 1.6792136332832916e-05, "loss": 0.7187, "step": 2785 }, { "epoch": 0.2845760980592441, "grad_norm": 1.3752156860319218, "learning_rate": 1.6789707841467845e-05, "loss": 0.6105, "step": 2786 }, { "epoch": 0.2846782431052094, "grad_norm": 1.499864856012898, "learning_rate": 1.678727860696486e-05, "loss": 0.7716, "step": 2787 }, { "epoch": 0.2847803881511747, "grad_norm": 1.5226392424192683, "learning_rate": 1.6784848629589852e-05, "loss": 0.7838, "step": 2788 }, { "epoch": 0.28488253319713996, "grad_norm": 1.4383104587668107, "learning_rate": 1.6782417909608777e-05, "loss": 0.6304, "step": 2789 }, { "epoch": 0.2849846782431052, "grad_norm": 1.4503944562449598, "learning_rate": 1.6779986447287678e-05, "loss": 0.772, "step": 2790 }, { "epoch": 0.2850868232890705, "grad_norm": 1.3926352495480518, "learning_rate": 1.677755424289268e-05, "loss": 0.7395, "step": 2791 }, { "epoch": 0.28518896833503576, "grad_norm": 1.5801671649376114, "learning_rate": 1.6775121296689992e-05, "loss": 0.8078, "step": 2792 }, { "epoch": 0.28529111338100105, "grad_norm": 1.4833249963724287, "learning_rate": 1.6772687608945905e-05, "loss": 0.7824, "step": 2793 }, { "epoch": 0.2853932584269663, "grad_norm": 1.3790783660149224, "learning_rate": 1.6770253179926782e-05, "loss": 0.7207, "step": 2794 }, { "epoch": 0.28549540347293156, "grad_norm": 1.6019423520873124, "learning_rate": 1.676781800989907e-05, "loss": 0.7156, "step": 2795 }, { "epoch": 0.28559754851889685, "grad_norm": 1.3761208340859388, "learning_rate": 1.6765382099129307e-05, "loss": 0.6948, "step": 2796 }, { "epoch": 0.2856996935648621, "grad_norm": 1.6908249161054503, "learning_rate": 1.6762945447884105e-05, "loss": 0.9073, "step": 2797 }, { "epoch": 0.28580183861082736, "grad_norm": 1.4317232212413435, "learning_rate": 1.676050805643015e-05, "loss": 0.6277, "step": 2798 }, { "epoch": 0.28590398365679265, "grad_norm": 1.6827361390946232, "learning_rate": 1.6758069925034222e-05, "loss": 0.7688, "step": 2799 }, { "epoch": 0.28600612870275793, "grad_norm": 1.3534526114191314, "learning_rate": 1.6755631053963176e-05, "loss": 0.7132, "step": 2800 }, { "epoch": 0.28610827374872316, "grad_norm": 1.4179859355922673, "learning_rate": 1.6753191443483943e-05, "loss": 0.627, "step": 2801 }, { "epoch": 0.28621041879468845, "grad_norm": 1.4546346739533043, "learning_rate": 1.6750751093863548e-05, "loss": 0.7547, "step": 2802 }, { "epoch": 0.28631256384065373, "grad_norm": 1.434871454560573, "learning_rate": 1.6748310005369082e-05, "loss": 0.7767, "step": 2803 }, { "epoch": 0.286414708886619, "grad_norm": 1.5766168604663289, "learning_rate": 1.6745868178267724e-05, "loss": 0.7929, "step": 2804 }, { "epoch": 0.28651685393258425, "grad_norm": 1.5406590020667785, "learning_rate": 1.674342561282674e-05, "loss": 0.7354, "step": 2805 }, { "epoch": 0.28661899897854953, "grad_norm": 1.5162257771877374, "learning_rate": 1.674098230931346e-05, "loss": 0.848, "step": 2806 }, { "epoch": 0.2867211440245148, "grad_norm": 1.616532947570184, "learning_rate": 1.6738538267995315e-05, "loss": 0.7164, "step": 2807 }, { "epoch": 0.2868232890704801, "grad_norm": 1.2143075154378131, "learning_rate": 1.67360934891398e-05, "loss": 0.6611, "step": 2808 }, { "epoch": 0.28692543411644533, "grad_norm": 1.514482053352889, "learning_rate": 1.6733647973014503e-05, "loss": 0.6677, "step": 2809 }, { "epoch": 0.2870275791624106, "grad_norm": 1.4564734791511453, "learning_rate": 1.6731201719887087e-05, "loss": 0.7029, "step": 2810 }, { "epoch": 0.2871297242083759, "grad_norm": 1.5047961948856832, "learning_rate": 1.672875473002529e-05, "loss": 0.7842, "step": 2811 }, { "epoch": 0.2872318692543412, "grad_norm": 1.5307217867772727, "learning_rate": 1.672630700369694e-05, "loss": 0.7329, "step": 2812 }, { "epoch": 0.2873340143003064, "grad_norm": 1.3558646712150189, "learning_rate": 1.6723858541169946e-05, "loss": 0.793, "step": 2813 }, { "epoch": 0.2874361593462717, "grad_norm": 1.4610971768613235, "learning_rate": 1.672140934271229e-05, "loss": 0.748, "step": 2814 }, { "epoch": 0.287538304392237, "grad_norm": 1.5049190345467856, "learning_rate": 1.6718959408592036e-05, "loss": 0.7263, "step": 2815 }, { "epoch": 0.2876404494382023, "grad_norm": 1.419989670804442, "learning_rate": 1.6716508739077335e-05, "loss": 0.7889, "step": 2816 }, { "epoch": 0.2877425944841675, "grad_norm": 1.3592013552854598, "learning_rate": 1.6714057334436416e-05, "loss": 0.6861, "step": 2817 }, { "epoch": 0.2878447395301328, "grad_norm": 1.56967823676569, "learning_rate": 1.671160519493758e-05, "loss": 0.7581, "step": 2818 }, { "epoch": 0.28794688457609807, "grad_norm": 1.5269493584796219, "learning_rate": 1.670915232084922e-05, "loss": 0.7908, "step": 2819 }, { "epoch": 0.28804902962206336, "grad_norm": 1.5289571934557498, "learning_rate": 1.6706698712439807e-05, "loss": 0.7294, "step": 2820 }, { "epoch": 0.2881511746680286, "grad_norm": 1.8668822609717257, "learning_rate": 1.6704244369977885e-05, "loss": 0.8379, "step": 2821 }, { "epoch": 0.28825331971399387, "grad_norm": 1.5497375525912822, "learning_rate": 1.6701789293732083e-05, "loss": 0.7145, "step": 2822 }, { "epoch": 0.28835546475995916, "grad_norm": 1.4010942243117177, "learning_rate": 1.669933348397111e-05, "loss": 0.6806, "step": 2823 }, { "epoch": 0.2884576098059244, "grad_norm": 1.4415451416714418, "learning_rate": 1.6696876940963765e-05, "loss": 0.6197, "step": 2824 }, { "epoch": 0.28855975485188967, "grad_norm": 1.3158274213835415, "learning_rate": 1.6694419664978912e-05, "loss": 0.6913, "step": 2825 }, { "epoch": 0.28866189989785496, "grad_norm": 1.5253776230452414, "learning_rate": 1.66919616562855e-05, "loss": 0.8631, "step": 2826 }, { "epoch": 0.28876404494382024, "grad_norm": 1.4430692042349824, "learning_rate": 1.6689502915152562e-05, "loss": 0.7671, "step": 2827 }, { "epoch": 0.28886618998978547, "grad_norm": 1.6100101822440054, "learning_rate": 1.6687043441849206e-05, "loss": 0.7941, "step": 2828 }, { "epoch": 0.28896833503575076, "grad_norm": 1.5831921937611138, "learning_rate": 1.6684583236644627e-05, "loss": 0.7736, "step": 2829 }, { "epoch": 0.28907048008171604, "grad_norm": 1.5337898189615202, "learning_rate": 1.6682122299808092e-05, "loss": 0.7236, "step": 2830 }, { "epoch": 0.2891726251276813, "grad_norm": 1.48549946348166, "learning_rate": 1.6679660631608955e-05, "loss": 0.7448, "step": 2831 }, { "epoch": 0.28927477017364656, "grad_norm": 1.5325980198451972, "learning_rate": 1.6677198232316646e-05, "loss": 0.8228, "step": 2832 }, { "epoch": 0.28937691521961184, "grad_norm": 1.485924272785635, "learning_rate": 1.6674735102200675e-05, "loss": 0.673, "step": 2833 }, { "epoch": 0.2894790602655771, "grad_norm": 1.3985405167411264, "learning_rate": 1.667227124153064e-05, "loss": 0.8041, "step": 2834 }, { "epoch": 0.2895812053115424, "grad_norm": 1.4530551162690877, "learning_rate": 1.66698066505762e-05, "loss": 0.693, "step": 2835 }, { "epoch": 0.28968335035750764, "grad_norm": 1.4663825897174396, "learning_rate": 1.6667341329607118e-05, "loss": 0.6589, "step": 2836 }, { "epoch": 0.2897854954034729, "grad_norm": 1.600620126936236, "learning_rate": 1.6664875278893216e-05, "loss": 0.7195, "step": 2837 }, { "epoch": 0.2898876404494382, "grad_norm": 1.4188925545365707, "learning_rate": 1.666240849870441e-05, "loss": 0.6847, "step": 2838 }, { "epoch": 0.2899897854954035, "grad_norm": 1.6044867972793402, "learning_rate": 1.665994098931069e-05, "loss": 0.8431, "step": 2839 }, { "epoch": 0.2900919305413687, "grad_norm": 1.4330934703744602, "learning_rate": 1.6657472750982126e-05, "loss": 0.765, "step": 2840 }, { "epoch": 0.290194075587334, "grad_norm": 1.4179666155909492, "learning_rate": 1.6655003783988868e-05, "loss": 0.7772, "step": 2841 }, { "epoch": 0.2902962206332993, "grad_norm": 1.504545090179601, "learning_rate": 1.6652534088601147e-05, "loss": 0.7939, "step": 2842 }, { "epoch": 0.2903983656792646, "grad_norm": 1.4304168775649606, "learning_rate": 1.6650063665089268e-05, "loss": 0.7651, "step": 2843 }, { "epoch": 0.2905005107252298, "grad_norm": 1.430929012414795, "learning_rate": 1.6647592513723627e-05, "loss": 0.6768, "step": 2844 }, { "epoch": 0.2906026557711951, "grad_norm": 1.2893322075298452, "learning_rate": 1.6645120634774692e-05, "loss": 0.6647, "step": 2845 }, { "epoch": 0.2907048008171604, "grad_norm": 1.5628170490246414, "learning_rate": 1.664264802851301e-05, "loss": 0.7141, "step": 2846 }, { "epoch": 0.29080694586312567, "grad_norm": 1.346970084072366, "learning_rate": 1.664017469520921e-05, "loss": 0.6637, "step": 2847 }, { "epoch": 0.2909090909090909, "grad_norm": 1.4616828889998055, "learning_rate": 1.6637700635133996e-05, "loss": 0.7907, "step": 2848 }, { "epoch": 0.2910112359550562, "grad_norm": 1.475142222493101, "learning_rate": 1.663522584855816e-05, "loss": 0.7789, "step": 2849 }, { "epoch": 0.29111338100102147, "grad_norm": 1.4167016524832552, "learning_rate": 1.663275033575257e-05, "loss": 0.652, "step": 2850 }, { "epoch": 0.2912155260469867, "grad_norm": 1.4353201818073666, "learning_rate": 1.6630274096988167e-05, "loss": 0.7383, "step": 2851 }, { "epoch": 0.291317671092952, "grad_norm": 1.5940225832741983, "learning_rate": 1.6627797132535983e-05, "loss": 0.779, "step": 2852 }, { "epoch": 0.29141981613891726, "grad_norm": 1.5541596991353728, "learning_rate": 1.662531944266712e-05, "loss": 0.7335, "step": 2853 }, { "epoch": 0.29152196118488255, "grad_norm": 1.5354036339263717, "learning_rate": 1.6622841027652764e-05, "loss": 0.8779, "step": 2854 }, { "epoch": 0.2916241062308478, "grad_norm": 1.4311257985244856, "learning_rate": 1.6620361887764178e-05, "loss": 0.6242, "step": 2855 }, { "epoch": 0.29172625127681306, "grad_norm": 1.5221237709693678, "learning_rate": 1.6617882023272708e-05, "loss": 0.8523, "step": 2856 }, { "epoch": 0.29182839632277835, "grad_norm": 1.5549215781204933, "learning_rate": 1.6615401434449775e-05, "loss": 0.7678, "step": 2857 }, { "epoch": 0.29193054136874363, "grad_norm": 1.4353851932861221, "learning_rate": 1.6612920121566876e-05, "loss": 0.7897, "step": 2858 }, { "epoch": 0.29203268641470886, "grad_norm": 1.4366036317612432, "learning_rate": 1.6610438084895602e-05, "loss": 0.6792, "step": 2859 }, { "epoch": 0.29213483146067415, "grad_norm": 1.3971259359523729, "learning_rate": 1.6607955324707608e-05, "loss": 0.7179, "step": 2860 }, { "epoch": 0.29223697650663943, "grad_norm": 1.5717274838097084, "learning_rate": 1.6605471841274634e-05, "loss": 0.7502, "step": 2861 }, { "epoch": 0.2923391215526047, "grad_norm": 1.5162169985543725, "learning_rate": 1.66029876348685e-05, "loss": 0.8161, "step": 2862 }, { "epoch": 0.29244126659856995, "grad_norm": 1.5004810658208525, "learning_rate": 1.6600502705761103e-05, "loss": 0.7393, "step": 2863 }, { "epoch": 0.29254341164453523, "grad_norm": 1.3031910464098717, "learning_rate": 1.659801705422442e-05, "loss": 0.6812, "step": 2864 }, { "epoch": 0.2926455566905005, "grad_norm": 1.3906044798076644, "learning_rate": 1.6595530680530504e-05, "loss": 0.7528, "step": 2865 }, { "epoch": 0.2927477017364658, "grad_norm": 1.4079983387515254, "learning_rate": 1.6593043584951497e-05, "loss": 0.8166, "step": 2866 }, { "epoch": 0.29284984678243103, "grad_norm": 1.5311361489857431, "learning_rate": 1.659055576775961e-05, "loss": 0.6136, "step": 2867 }, { "epoch": 0.2929519918283963, "grad_norm": 1.4206634724934915, "learning_rate": 1.6588067229227137e-05, "loss": 0.6952, "step": 2868 }, { "epoch": 0.2930541368743616, "grad_norm": 1.6360590197487377, "learning_rate": 1.658557796962645e-05, "loss": 0.7068, "step": 2869 }, { "epoch": 0.2931562819203269, "grad_norm": 1.3888543748557232, "learning_rate": 1.6583087989229997e-05, "loss": 0.7586, "step": 2870 }, { "epoch": 0.2932584269662921, "grad_norm": 1.5474050189858701, "learning_rate": 1.658059728831031e-05, "loss": 0.7932, "step": 2871 }, { "epoch": 0.2933605720122574, "grad_norm": 1.3922608176119244, "learning_rate": 1.657810586714e-05, "loss": 0.6973, "step": 2872 }, { "epoch": 0.2934627170582227, "grad_norm": 1.4537355441413866, "learning_rate": 1.6575613725991753e-05, "loss": 0.8902, "step": 2873 }, { "epoch": 0.293564862104188, "grad_norm": 1.574747421874692, "learning_rate": 1.657312086513833e-05, "loss": 0.8187, "step": 2874 }, { "epoch": 0.2936670071501532, "grad_norm": 1.5186059133226095, "learning_rate": 1.657062728485259e-05, "loss": 0.8286, "step": 2875 }, { "epoch": 0.2937691521961185, "grad_norm": 1.513854892585937, "learning_rate": 1.6568132985407444e-05, "loss": 0.8191, "step": 2876 }, { "epoch": 0.2938712972420838, "grad_norm": 1.517462268187125, "learning_rate": 1.65656379670759e-05, "loss": 0.7219, "step": 2877 }, { "epoch": 0.293973442288049, "grad_norm": 1.3258942406376375, "learning_rate": 1.656314223013104e-05, "loss": 0.7145, "step": 2878 }, { "epoch": 0.2940755873340143, "grad_norm": 1.3961400543503997, "learning_rate": 1.656064577484602e-05, "loss": 0.6767, "step": 2879 }, { "epoch": 0.2941777323799796, "grad_norm": 1.465867172098062, "learning_rate": 1.6558148601494082e-05, "loss": 0.7762, "step": 2880 }, { "epoch": 0.29427987742594486, "grad_norm": 1.330529008426249, "learning_rate": 1.6555650710348543e-05, "loss": 0.7321, "step": 2881 }, { "epoch": 0.2943820224719101, "grad_norm": 1.419509622184691, "learning_rate": 1.6553152101682797e-05, "loss": 0.6949, "step": 2882 }, { "epoch": 0.2944841675178754, "grad_norm": 1.4367340656489234, "learning_rate": 1.655065277577032e-05, "loss": 0.7875, "step": 2883 }, { "epoch": 0.29458631256384066, "grad_norm": 1.3834987931234641, "learning_rate": 1.6548152732884664e-05, "loss": 0.8609, "step": 2884 }, { "epoch": 0.29468845760980594, "grad_norm": 1.3949974541994266, "learning_rate": 1.654565197329946e-05, "loss": 0.7451, "step": 2885 }, { "epoch": 0.2947906026557712, "grad_norm": 1.3476796189888747, "learning_rate": 1.6543150497288426e-05, "loss": 0.7388, "step": 2886 }, { "epoch": 0.29489274770173646, "grad_norm": 1.4456774935562005, "learning_rate": 1.6540648305125334e-05, "loss": 0.674, "step": 2887 }, { "epoch": 0.29499489274770174, "grad_norm": 1.4494321871032114, "learning_rate": 1.653814539708406e-05, "loss": 0.7845, "step": 2888 }, { "epoch": 0.295097037793667, "grad_norm": 1.4666687648116359, "learning_rate": 1.653564177343855e-05, "loss": 0.7289, "step": 2889 }, { "epoch": 0.29519918283963226, "grad_norm": 1.470239545966603, "learning_rate": 1.6533137434462827e-05, "loss": 0.7662, "step": 2890 }, { "epoch": 0.29530132788559754, "grad_norm": 1.5251565658143804, "learning_rate": 1.6530632380430992e-05, "loss": 0.7309, "step": 2891 }, { "epoch": 0.2954034729315628, "grad_norm": 1.5111177474206396, "learning_rate": 1.652812661161722e-05, "loss": 0.7451, "step": 2892 }, { "epoch": 0.2955056179775281, "grad_norm": 1.415504315145862, "learning_rate": 1.6525620128295772e-05, "loss": 0.764, "step": 2893 }, { "epoch": 0.29560776302349334, "grad_norm": 1.4078136694284162, "learning_rate": 1.652311293074099e-05, "loss": 0.8005, "step": 2894 }, { "epoch": 0.2957099080694586, "grad_norm": 1.584088411795648, "learning_rate": 1.6520605019227275e-05, "loss": 0.631, "step": 2895 }, { "epoch": 0.2958120531154239, "grad_norm": 1.5858650341567242, "learning_rate": 1.6518096394029132e-05, "loss": 0.7102, "step": 2896 }, { "epoch": 0.2959141981613892, "grad_norm": 1.5211052965394134, "learning_rate": 1.6515587055421128e-05, "loss": 0.7589, "step": 2897 }, { "epoch": 0.2960163432073544, "grad_norm": 1.6303217709368616, "learning_rate": 1.6513077003677912e-05, "loss": 0.8261, "step": 2898 }, { "epoch": 0.2961184882533197, "grad_norm": 1.414917464994601, "learning_rate": 1.651056623907421e-05, "loss": 0.6939, "step": 2899 }, { "epoch": 0.296220633299285, "grad_norm": 1.4411507421356449, "learning_rate": 1.6508054761884828e-05, "loss": 0.7422, "step": 2900 }, { "epoch": 0.2963227783452503, "grad_norm": 1.4247201475106037, "learning_rate": 1.6505542572384643e-05, "loss": 0.6344, "step": 2901 }, { "epoch": 0.2964249233912155, "grad_norm": 1.5294782380518392, "learning_rate": 1.6503029670848624e-05, "loss": 0.7614, "step": 2902 }, { "epoch": 0.2965270684371808, "grad_norm": 1.6520815281035013, "learning_rate": 1.6500516057551802e-05, "loss": 0.761, "step": 2903 }, { "epoch": 0.2966292134831461, "grad_norm": 1.4375623372915718, "learning_rate": 1.6498001732769303e-05, "loss": 0.715, "step": 2904 }, { "epoch": 0.2967313585291113, "grad_norm": 1.5830050638800264, "learning_rate": 1.6495486696776312e-05, "loss": 0.7341, "step": 2905 }, { "epoch": 0.2968335035750766, "grad_norm": 1.5135091164641334, "learning_rate": 1.6492970949848108e-05, "loss": 0.7902, "step": 2906 }, { "epoch": 0.2969356486210419, "grad_norm": 1.3243270245147984, "learning_rate": 1.6490454492260036e-05, "loss": 0.6196, "step": 2907 }, { "epoch": 0.29703779366700717, "grad_norm": 1.689875193454914, "learning_rate": 1.648793732428753e-05, "loss": 0.8381, "step": 2908 }, { "epoch": 0.2971399387129724, "grad_norm": 1.4909728774563242, "learning_rate": 1.648541944620609e-05, "loss": 0.753, "step": 2909 }, { "epoch": 0.2972420837589377, "grad_norm": 1.3620683061481003, "learning_rate": 1.64829008582913e-05, "loss": 0.7772, "step": 2910 }, { "epoch": 0.29734422880490297, "grad_norm": 1.3653853428049207, "learning_rate": 1.6480381560818824e-05, "loss": 0.6945, "step": 2911 }, { "epoch": 0.29744637385086825, "grad_norm": 1.3394932694923543, "learning_rate": 1.6477861554064397e-05, "loss": 0.6226, "step": 2912 }, { "epoch": 0.2975485188968335, "grad_norm": 1.4128449121502766, "learning_rate": 1.6475340838303843e-05, "loss": 0.7495, "step": 2913 }, { "epoch": 0.29765066394279877, "grad_norm": 1.480706003822705, "learning_rate": 1.6472819413813045e-05, "loss": 0.7392, "step": 2914 }, { "epoch": 0.29775280898876405, "grad_norm": 1.62549317355222, "learning_rate": 1.6470297280867983e-05, "loss": 0.7241, "step": 2915 }, { "epoch": 0.29785495403472934, "grad_norm": 1.3821823662257937, "learning_rate": 1.6467774439744704e-05, "loss": 0.7084, "step": 2916 }, { "epoch": 0.29795709908069457, "grad_norm": 1.531755530094324, "learning_rate": 1.6465250890719335e-05, "loss": 0.7546, "step": 2917 }, { "epoch": 0.29805924412665985, "grad_norm": 1.5087001997849436, "learning_rate": 1.6462726634068077e-05, "loss": 0.7577, "step": 2918 }, { "epoch": 0.29816138917262514, "grad_norm": 1.5390813658444604, "learning_rate": 1.646020167006721e-05, "loss": 0.6994, "step": 2919 }, { "epoch": 0.2982635342185904, "grad_norm": 1.3335420469670134, "learning_rate": 1.6457675998993102e-05, "loss": 0.6778, "step": 2920 }, { "epoch": 0.29836567926455565, "grad_norm": 1.4470621367305514, "learning_rate": 1.645514962112218e-05, "loss": 0.7507, "step": 2921 }, { "epoch": 0.29846782431052093, "grad_norm": 1.4432926123560361, "learning_rate": 1.6452622536730968e-05, "loss": 0.7649, "step": 2922 }, { "epoch": 0.2985699693564862, "grad_norm": 1.4303968036103398, "learning_rate": 1.6450094746096043e-05, "loss": 0.6432, "step": 2923 }, { "epoch": 0.2986721144024515, "grad_norm": 1.6533297432775904, "learning_rate": 1.644756624949408e-05, "loss": 0.7014, "step": 2924 }, { "epoch": 0.29877425944841673, "grad_norm": 1.4914067670574191, "learning_rate": 1.644503704720183e-05, "loss": 0.7978, "step": 2925 }, { "epoch": 0.298876404494382, "grad_norm": 1.5182971302416068, "learning_rate": 1.644250713949611e-05, "loss": 0.7905, "step": 2926 }, { "epoch": 0.2989785495403473, "grad_norm": 1.5203266543109104, "learning_rate": 1.643997652665382e-05, "loss": 0.6518, "step": 2927 }, { "epoch": 0.2990806945863126, "grad_norm": 1.3971185682428557, "learning_rate": 1.643744520895194e-05, "loss": 0.8061, "step": 2928 }, { "epoch": 0.2991828396322778, "grad_norm": 1.487988272554379, "learning_rate": 1.643491318666752e-05, "loss": 0.7341, "step": 2929 }, { "epoch": 0.2992849846782431, "grad_norm": 1.4657399898270884, "learning_rate": 1.64323804600777e-05, "loss": 0.7759, "step": 2930 }, { "epoch": 0.2993871297242084, "grad_norm": 1.5206389787821928, "learning_rate": 1.6429847029459676e-05, "loss": 0.7856, "step": 2931 }, { "epoch": 0.2994892747701736, "grad_norm": 1.515864313254794, "learning_rate": 1.6427312895090744e-05, "loss": 0.68, "step": 2932 }, { "epoch": 0.2995914198161389, "grad_norm": 1.5513670496790655, "learning_rate": 1.6424778057248262e-05, "loss": 0.7494, "step": 2933 }, { "epoch": 0.2996935648621042, "grad_norm": 3.664115278414078, "learning_rate": 1.6422242516209673e-05, "loss": 0.7683, "step": 2934 }, { "epoch": 0.2997957099080695, "grad_norm": 1.4280942068603537, "learning_rate": 1.641970627225249e-05, "loss": 0.7742, "step": 2935 }, { "epoch": 0.2998978549540347, "grad_norm": 1.5342558870774012, "learning_rate": 1.6417169325654306e-05, "loss": 0.617, "step": 2936 }, { "epoch": 0.3, "grad_norm": 1.4298730766335166, "learning_rate": 1.6414631676692794e-05, "loss": 0.7013, "step": 2937 }, { "epoch": 0.3001021450459653, "grad_norm": 1.5346043016438722, "learning_rate": 1.6412093325645702e-05, "loss": 0.7234, "step": 2938 }, { "epoch": 0.30020429009193056, "grad_norm": 1.4712538220212281, "learning_rate": 1.640955427279085e-05, "loss": 0.652, "step": 2939 }, { "epoch": 0.3003064351378958, "grad_norm": 1.5475529667094963, "learning_rate": 1.6407014518406145e-05, "loss": 0.6731, "step": 2940 }, { "epoch": 0.3004085801838611, "grad_norm": 1.5750543388915255, "learning_rate": 1.6404474062769557e-05, "loss": 0.7743, "step": 2941 }, { "epoch": 0.30051072522982636, "grad_norm": 1.5187609471980226, "learning_rate": 1.640193290615915e-05, "loss": 0.8586, "step": 2942 }, { "epoch": 0.30061287027579164, "grad_norm": 1.3686523624579994, "learning_rate": 1.639939104885305e-05, "loss": 0.7381, "step": 2943 }, { "epoch": 0.3007150153217569, "grad_norm": 1.5309711593386164, "learning_rate": 1.6396848491129462e-05, "loss": 0.8211, "step": 2944 }, { "epoch": 0.30081716036772216, "grad_norm": 1.4832227316100668, "learning_rate": 1.6394305233266674e-05, "loss": 0.8156, "step": 2945 }, { "epoch": 0.30091930541368744, "grad_norm": 1.4688256340602475, "learning_rate": 1.639176127554305e-05, "loss": 0.7078, "step": 2946 }, { "epoch": 0.30102145045965273, "grad_norm": 1.610395113232806, "learning_rate": 1.638921661823702e-05, "loss": 0.7888, "step": 2947 }, { "epoch": 0.30112359550561796, "grad_norm": 1.3610819588055967, "learning_rate": 1.638667126162711e-05, "loss": 0.7219, "step": 2948 }, { "epoch": 0.30122574055158324, "grad_norm": 1.4864985365765986, "learning_rate": 1.63841252059919e-05, "loss": 0.6854, "step": 2949 }, { "epoch": 0.30132788559754853, "grad_norm": 1.4277915296352843, "learning_rate": 1.6381578451610062e-05, "loss": 0.7831, "step": 2950 }, { "epoch": 0.3014300306435138, "grad_norm": 1.526164367582635, "learning_rate": 1.637903099876034e-05, "loss": 0.7593, "step": 2951 }, { "epoch": 0.30153217568947904, "grad_norm": 2.09754533939028, "learning_rate": 1.6376482847721553e-05, "loss": 0.6572, "step": 2952 }, { "epoch": 0.30163432073544433, "grad_norm": 1.4758808431190351, "learning_rate": 1.6373933998772597e-05, "loss": 0.6381, "step": 2953 }, { "epoch": 0.3017364657814096, "grad_norm": 1.4599549892955195, "learning_rate": 1.637138445219245e-05, "loss": 0.7031, "step": 2954 }, { "epoch": 0.3018386108273749, "grad_norm": 1.6082262121873285, "learning_rate": 1.6368834208260155e-05, "loss": 0.6932, "step": 2955 }, { "epoch": 0.3019407558733401, "grad_norm": 1.5667207671001133, "learning_rate": 1.6366283267254842e-05, "loss": 0.6257, "step": 2956 }, { "epoch": 0.3020429009193054, "grad_norm": 1.6467063384506067, "learning_rate": 1.636373162945571e-05, "loss": 0.8482, "step": 2957 }, { "epoch": 0.3021450459652707, "grad_norm": 1.4985411284293428, "learning_rate": 1.636117929514205e-05, "loss": 0.7767, "step": 2958 }, { "epoch": 0.302247191011236, "grad_norm": 1.6396412584855686, "learning_rate": 1.6358626264593195e-05, "loss": 0.6589, "step": 2959 }, { "epoch": 0.3023493360572012, "grad_norm": 1.343234381827057, "learning_rate": 1.635607253808859e-05, "loss": 0.6706, "step": 2960 }, { "epoch": 0.3024514811031665, "grad_norm": 1.5207641040280568, "learning_rate": 1.6353518115907742e-05, "loss": 0.7944, "step": 2961 }, { "epoch": 0.3025536261491318, "grad_norm": 1.4443279280110644, "learning_rate": 1.6350962998330232e-05, "loss": 0.6582, "step": 2962 }, { "epoch": 0.302655771195097, "grad_norm": 1.418962614044098, "learning_rate": 1.6348407185635714e-05, "loss": 0.783, "step": 2963 }, { "epoch": 0.3027579162410623, "grad_norm": 1.5383864193453312, "learning_rate": 1.634585067810393e-05, "loss": 0.7429, "step": 2964 }, { "epoch": 0.3028600612870276, "grad_norm": 1.4980263988136335, "learning_rate": 1.634329347601469e-05, "loss": 0.7364, "step": 2965 }, { "epoch": 0.30296220633299287, "grad_norm": 1.5604354609325344, "learning_rate": 1.634073557964788e-05, "loss": 0.8035, "step": 2966 }, { "epoch": 0.3030643513789581, "grad_norm": 1.4885223804742989, "learning_rate": 1.6338176989283464e-05, "loss": 0.6796, "step": 2967 }, { "epoch": 0.3031664964249234, "grad_norm": 1.4462283813799703, "learning_rate": 1.633561770520148e-05, "loss": 0.7429, "step": 2968 }, { "epoch": 0.30326864147088867, "grad_norm": 1.4203512695129104, "learning_rate": 1.6333057727682048e-05, "loss": 0.7339, "step": 2969 }, { "epoch": 0.30337078651685395, "grad_norm": 1.637186513350223, "learning_rate": 1.6330497057005355e-05, "loss": 0.8423, "step": 2970 }, { "epoch": 0.3034729315628192, "grad_norm": 1.5181812093359124, "learning_rate": 1.632793569345167e-05, "loss": 0.8474, "step": 2971 }, { "epoch": 0.30357507660878447, "grad_norm": 1.6511547667884172, "learning_rate": 1.6325373637301332e-05, "loss": 0.7353, "step": 2972 }, { "epoch": 0.30367722165474975, "grad_norm": 1.4546755011640309, "learning_rate": 1.6322810888834765e-05, "loss": 0.7728, "step": 2973 }, { "epoch": 0.30377936670071504, "grad_norm": 1.3495997974083354, "learning_rate": 1.6320247448332464e-05, "loss": 0.7342, "step": 2974 }, { "epoch": 0.30388151174668027, "grad_norm": 1.6508505696324733, "learning_rate": 1.631768331607499e-05, "loss": 0.7263, "step": 2975 }, { "epoch": 0.30398365679264555, "grad_norm": 1.4621332829880127, "learning_rate": 1.6315118492343e-05, "loss": 0.6282, "step": 2976 }, { "epoch": 0.30408580183861084, "grad_norm": 1.3829854064490696, "learning_rate": 1.631255297741721e-05, "loss": 0.6467, "step": 2977 }, { "epoch": 0.3041879468845761, "grad_norm": 1.404607283843521, "learning_rate": 1.6309986771578415e-05, "loss": 0.626, "step": 2978 }, { "epoch": 0.30429009193054135, "grad_norm": 1.3418539116756683, "learning_rate": 1.6307419875107496e-05, "loss": 0.7031, "step": 2979 }, { "epoch": 0.30439223697650664, "grad_norm": 1.4783172199146684, "learning_rate": 1.6304852288285393e-05, "loss": 0.7902, "step": 2980 }, { "epoch": 0.3044943820224719, "grad_norm": 1.3635804445266602, "learning_rate": 1.6302284011393136e-05, "loss": 0.674, "step": 2981 }, { "epoch": 0.3045965270684372, "grad_norm": 1.4085882732638786, "learning_rate": 1.629971504471182e-05, "loss": 0.7481, "step": 2982 }, { "epoch": 0.30469867211440244, "grad_norm": 1.6269960130940964, "learning_rate": 1.6297145388522625e-05, "loss": 0.7944, "step": 2983 }, { "epoch": 0.3048008171603677, "grad_norm": 1.3859331620215272, "learning_rate": 1.6294575043106794e-05, "loss": 0.7034, "step": 2984 }, { "epoch": 0.304902962206333, "grad_norm": 1.5176137032442059, "learning_rate": 1.629200400874566e-05, "loss": 0.86, "step": 2985 }, { "epoch": 0.3050051072522983, "grad_norm": 1.6833596373373285, "learning_rate": 1.6289432285720623e-05, "loss": 0.8431, "step": 2986 }, { "epoch": 0.3051072522982635, "grad_norm": 1.5367363345133567, "learning_rate": 1.628685987431316e-05, "loss": 0.741, "step": 2987 }, { "epoch": 0.3052093973442288, "grad_norm": 1.5686612982424375, "learning_rate": 1.628428677480482e-05, "loss": 0.7529, "step": 2988 }, { "epoch": 0.3053115423901941, "grad_norm": 1.510191242769554, "learning_rate": 1.628171298747723e-05, "loss": 0.7072, "step": 2989 }, { "epoch": 0.3054136874361593, "grad_norm": 1.5263761390227724, "learning_rate": 1.62791385126121e-05, "loss": 0.6769, "step": 2990 }, { "epoch": 0.3055158324821246, "grad_norm": 1.5666469252613624, "learning_rate": 1.62765633504912e-05, "loss": 0.7939, "step": 2991 }, { "epoch": 0.3056179775280899, "grad_norm": 1.4969327017590897, "learning_rate": 1.6273987501396388e-05, "loss": 0.7237, "step": 2992 }, { "epoch": 0.3057201225740552, "grad_norm": 1.4968080794642804, "learning_rate": 1.627141096560959e-05, "loss": 0.7024, "step": 2993 }, { "epoch": 0.3058222676200204, "grad_norm": 1.3776636117539025, "learning_rate": 1.626883374341281e-05, "loss": 0.7211, "step": 2994 }, { "epoch": 0.3059244126659857, "grad_norm": 1.5870745275850817, "learning_rate": 1.6266255835088123e-05, "loss": 0.7908, "step": 2995 }, { "epoch": 0.306026557711951, "grad_norm": 1.6836189174380305, "learning_rate": 1.626367724091769e-05, "loss": 0.7055, "step": 2996 }, { "epoch": 0.30612870275791626, "grad_norm": 1.3947974755432426, "learning_rate": 1.6261097961183736e-05, "loss": 0.7129, "step": 2997 }, { "epoch": 0.3062308478038815, "grad_norm": 1.3550227539447222, "learning_rate": 1.6258517996168565e-05, "loss": 0.7175, "step": 2998 }, { "epoch": 0.3063329928498468, "grad_norm": 1.426185052638176, "learning_rate": 1.6255937346154555e-05, "loss": 0.8229, "step": 2999 }, { "epoch": 0.30643513789581206, "grad_norm": 1.512170598824874, "learning_rate": 1.625335601142416e-05, "loss": 0.7188, "step": 3000 }, { "epoch": 0.30653728294177734, "grad_norm": 1.6163562700855492, "learning_rate": 1.625077399225991e-05, "loss": 0.7765, "step": 3001 }, { "epoch": 0.3066394279877426, "grad_norm": 1.3935909309383394, "learning_rate": 1.624819128894441e-05, "loss": 0.723, "step": 3002 }, { "epoch": 0.30674157303370786, "grad_norm": 1.4123189558202773, "learning_rate": 1.6245607901760334e-05, "loss": 0.6323, "step": 3003 }, { "epoch": 0.30684371807967314, "grad_norm": 1.4388189324756848, "learning_rate": 1.6243023830990438e-05, "loss": 0.6966, "step": 3004 }, { "epoch": 0.30694586312563843, "grad_norm": 1.4993678765796332, "learning_rate": 1.6240439076917552e-05, "loss": 0.8046, "step": 3005 }, { "epoch": 0.30704800817160366, "grad_norm": 1.4645296271586108, "learning_rate": 1.6237853639824576e-05, "loss": 0.7098, "step": 3006 }, { "epoch": 0.30715015321756894, "grad_norm": 1.4900076437308027, "learning_rate": 1.6235267519994485e-05, "loss": 0.6922, "step": 3007 }, { "epoch": 0.30725229826353423, "grad_norm": 1.4743236454455042, "learning_rate": 1.6232680717710342e-05, "loss": 0.7221, "step": 3008 }, { "epoch": 0.3073544433094995, "grad_norm": 1.4861671267874368, "learning_rate": 1.6230093233255265e-05, "loss": 0.7873, "step": 3009 }, { "epoch": 0.30745658835546474, "grad_norm": 1.5262693390613993, "learning_rate": 1.622750506691246e-05, "loss": 0.7265, "step": 3010 }, { "epoch": 0.30755873340143003, "grad_norm": 1.332127647759973, "learning_rate": 1.6224916218965198e-05, "loss": 0.6794, "step": 3011 }, { "epoch": 0.3076608784473953, "grad_norm": 1.6221497597829193, "learning_rate": 1.6222326689696838e-05, "loss": 0.8461, "step": 3012 }, { "epoch": 0.3077630234933606, "grad_norm": 1.40873259258424, "learning_rate": 1.62197364793908e-05, "loss": 0.7601, "step": 3013 }, { "epoch": 0.30786516853932583, "grad_norm": 1.326302968075467, "learning_rate": 1.6217145588330587e-05, "loss": 0.663, "step": 3014 }, { "epoch": 0.3079673135852911, "grad_norm": 1.4398039992373803, "learning_rate": 1.621455401679977e-05, "loss": 0.7456, "step": 3015 }, { "epoch": 0.3080694586312564, "grad_norm": 1.3998465509450893, "learning_rate": 1.6211961765082e-05, "loss": 0.697, "step": 3016 }, { "epoch": 0.30817160367722163, "grad_norm": 1.4994353348988576, "learning_rate": 1.6209368833461006e-05, "loss": 0.7334, "step": 3017 }, { "epoch": 0.3082737487231869, "grad_norm": 1.5527164831227749, "learning_rate": 1.6206775222220578e-05, "loss": 0.7359, "step": 3018 }, { "epoch": 0.3083758937691522, "grad_norm": 1.3536278539386644, "learning_rate": 1.620418093164459e-05, "loss": 0.652, "step": 3019 }, { "epoch": 0.3084780388151175, "grad_norm": 1.3253847080521448, "learning_rate": 1.6201585962016995e-05, "loss": 0.7919, "step": 3020 }, { "epoch": 0.3085801838610827, "grad_norm": 1.409819090707468, "learning_rate": 1.619899031362181e-05, "loss": 0.6603, "step": 3021 }, { "epoch": 0.308682328907048, "grad_norm": 1.5450983462175814, "learning_rate": 1.619639398674313e-05, "loss": 0.646, "step": 3022 }, { "epoch": 0.3087844739530133, "grad_norm": 1.484686154347848, "learning_rate": 1.619379698166512e-05, "loss": 0.685, "step": 3023 }, { "epoch": 0.30888661899897857, "grad_norm": 1.3334198958772887, "learning_rate": 1.6191199298672032e-05, "loss": 0.7167, "step": 3024 }, { "epoch": 0.3089887640449438, "grad_norm": 1.6874450681500093, "learning_rate": 1.6188600938048185e-05, "loss": 0.7868, "step": 3025 }, { "epoch": 0.3090909090909091, "grad_norm": 1.4703910097891506, "learning_rate": 1.6186001900077962e-05, "loss": 0.7859, "step": 3026 }, { "epoch": 0.30919305413687437, "grad_norm": 1.431432071895974, "learning_rate": 1.6183402185045833e-05, "loss": 0.7748, "step": 3027 }, { "epoch": 0.30929519918283965, "grad_norm": 1.5968137604601693, "learning_rate": 1.6180801793236342e-05, "loss": 0.7329, "step": 3028 }, { "epoch": 0.3093973442288049, "grad_norm": 1.630846402382367, "learning_rate": 1.61782007249341e-05, "loss": 0.8414, "step": 3029 }, { "epoch": 0.30949948927477017, "grad_norm": 1.6655821816248195, "learning_rate": 1.61755989804238e-05, "loss": 0.7824, "step": 3030 }, { "epoch": 0.30960163432073545, "grad_norm": 1.687518245748029, "learning_rate": 1.6172996559990197e-05, "loss": 0.7468, "step": 3031 }, { "epoch": 0.30970377936670074, "grad_norm": 1.3909140287869024, "learning_rate": 1.6170393463918137e-05, "loss": 0.767, "step": 3032 }, { "epoch": 0.30980592441266597, "grad_norm": 1.3877852826241506, "learning_rate": 1.6167789692492522e-05, "loss": 0.7865, "step": 3033 }, { "epoch": 0.30990806945863125, "grad_norm": 1.4526652866708634, "learning_rate": 1.6165185245998346e-05, "loss": 0.7429, "step": 3034 }, { "epoch": 0.31001021450459654, "grad_norm": 1.3996391509884138, "learning_rate": 1.6162580124720653e-05, "loss": 0.729, "step": 3035 }, { "epoch": 0.3101123595505618, "grad_norm": 1.5455310168130485, "learning_rate": 1.615997432894459e-05, "loss": 0.7655, "step": 3036 }, { "epoch": 0.31021450459652705, "grad_norm": 1.460210174628382, "learning_rate": 1.6157367858955358e-05, "loss": 0.7029, "step": 3037 }, { "epoch": 0.31031664964249234, "grad_norm": 1.4486431775649355, "learning_rate": 1.615476071503823e-05, "loss": 0.7245, "step": 3038 }, { "epoch": 0.3104187946884576, "grad_norm": 1.4501729806051957, "learning_rate": 1.615215289747857e-05, "loss": 0.8189, "step": 3039 }, { "epoch": 0.3105209397344229, "grad_norm": 1.3530291419608678, "learning_rate": 1.6149544406561797e-05, "loss": 0.6959, "step": 3040 }, { "epoch": 0.31062308478038814, "grad_norm": 1.4748528992186238, "learning_rate": 1.614693524257342e-05, "loss": 0.696, "step": 3041 }, { "epoch": 0.3107252298263534, "grad_norm": 1.569515928356824, "learning_rate": 1.614432540579901e-05, "loss": 0.6905, "step": 3042 }, { "epoch": 0.3108273748723187, "grad_norm": 1.3845131031411568, "learning_rate": 1.6141714896524208e-05, "loss": 0.7513, "step": 3043 }, { "epoch": 0.31092951991828394, "grad_norm": 1.4869464889987924, "learning_rate": 1.6139103715034746e-05, "loss": 0.6353, "step": 3044 }, { "epoch": 0.3110316649642492, "grad_norm": 1.48500908825774, "learning_rate": 1.6136491861616414e-05, "loss": 0.7292, "step": 3045 }, { "epoch": 0.3111338100102145, "grad_norm": 1.537539942001847, "learning_rate": 1.6133879336555085e-05, "loss": 0.7759, "step": 3046 }, { "epoch": 0.3112359550561798, "grad_norm": 1.5202636033182637, "learning_rate": 1.61312661401367e-05, "loss": 0.7287, "step": 3047 }, { "epoch": 0.311338100102145, "grad_norm": 1.4807668749227374, "learning_rate": 1.6128652272647274e-05, "loss": 0.7319, "step": 3048 }, { "epoch": 0.3114402451481103, "grad_norm": 1.430217392925012, "learning_rate": 1.61260377343729e-05, "loss": 0.7888, "step": 3049 }, { "epoch": 0.3115423901940756, "grad_norm": 1.7903840588151976, "learning_rate": 1.6123422525599735e-05, "loss": 0.8025, "step": 3050 }, { "epoch": 0.3116445352400409, "grad_norm": 1.6264293020015002, "learning_rate": 1.6120806646614018e-05, "loss": 0.7091, "step": 3051 }, { "epoch": 0.3117466802860061, "grad_norm": 1.4875781678219508, "learning_rate": 1.611819009770206e-05, "loss": 0.701, "step": 3052 }, { "epoch": 0.3118488253319714, "grad_norm": 1.4465137806625734, "learning_rate": 1.6115572879150243e-05, "loss": 0.6744, "step": 3053 }, { "epoch": 0.3119509703779367, "grad_norm": 1.2894489678481411, "learning_rate": 1.6112954991245023e-05, "loss": 0.7584, "step": 3054 }, { "epoch": 0.31205311542390196, "grad_norm": 1.2740205157182116, "learning_rate": 1.6110336434272927e-05, "loss": 0.7173, "step": 3055 }, { "epoch": 0.3121552604698672, "grad_norm": 1.4450953260891202, "learning_rate": 1.6107717208520563e-05, "loss": 0.6146, "step": 3056 }, { "epoch": 0.3122574055158325, "grad_norm": 1.4179392588351547, "learning_rate": 1.6105097314274605e-05, "loss": 0.7363, "step": 3057 }, { "epoch": 0.31235955056179776, "grad_norm": 1.4884520098184097, "learning_rate": 1.6102476751821804e-05, "loss": 0.7006, "step": 3058 }, { "epoch": 0.31246169560776305, "grad_norm": 1.5072974325170947, "learning_rate": 1.6099855521448975e-05, "loss": 0.6804, "step": 3059 }, { "epoch": 0.3125638406537283, "grad_norm": 1.5448349331432443, "learning_rate": 1.609723362344302e-05, "loss": 0.7348, "step": 3060 }, { "epoch": 0.31266598569969356, "grad_norm": 1.2587571267707682, "learning_rate": 1.6094611058090905e-05, "loss": 0.591, "step": 3061 }, { "epoch": 0.31276813074565885, "grad_norm": 1.4009027161346672, "learning_rate": 1.6091987825679672e-05, "loss": 0.7295, "step": 3062 }, { "epoch": 0.31287027579162413, "grad_norm": 1.4802466221493724, "learning_rate": 1.6089363926496436e-05, "loss": 0.7569, "step": 3063 }, { "epoch": 0.31297242083758936, "grad_norm": 1.4710691017784918, "learning_rate": 1.6086739360828385e-05, "loss": 0.7824, "step": 3064 }, { "epoch": 0.31307456588355465, "grad_norm": 1.3428302905445242, "learning_rate": 1.608411412896278e-05, "loss": 0.8099, "step": 3065 }, { "epoch": 0.31317671092951993, "grad_norm": 1.4396268825799485, "learning_rate": 1.608148823118695e-05, "loss": 0.7202, "step": 3066 }, { "epoch": 0.3132788559754852, "grad_norm": 1.4235875429270053, "learning_rate": 1.6078861667788307e-05, "loss": 0.7336, "step": 3067 }, { "epoch": 0.31338100102145044, "grad_norm": 1.4175319425862563, "learning_rate": 1.607623443905432e-05, "loss": 0.749, "step": 3068 }, { "epoch": 0.31348314606741573, "grad_norm": 1.298328559908578, "learning_rate": 1.6073606545272555e-05, "loss": 0.7409, "step": 3069 }, { "epoch": 0.313585291113381, "grad_norm": 1.5564802144213863, "learning_rate": 1.6070977986730625e-05, "loss": 0.6281, "step": 3070 }, { "epoch": 0.31368743615934624, "grad_norm": 1.5205225055890226, "learning_rate": 1.6068348763716237e-05, "loss": 0.7095, "step": 3071 }, { "epoch": 0.31378958120531153, "grad_norm": 1.3776727449209483, "learning_rate": 1.606571887651715e-05, "loss": 0.7872, "step": 3072 }, { "epoch": 0.3138917262512768, "grad_norm": 1.3588663605504723, "learning_rate": 1.6063088325421218e-05, "loss": 0.6551, "step": 3073 }, { "epoch": 0.3139938712972421, "grad_norm": 1.517960542665752, "learning_rate": 1.6060457110716346e-05, "loss": 0.7017, "step": 3074 }, { "epoch": 0.31409601634320733, "grad_norm": 1.6318436163581094, "learning_rate": 1.6057825232690538e-05, "loss": 0.8613, "step": 3075 }, { "epoch": 0.3141981613891726, "grad_norm": 1.5475269555410534, "learning_rate": 1.605519269163183e-05, "loss": 0.7734, "step": 3076 }, { "epoch": 0.3143003064351379, "grad_norm": 1.4042244684580985, "learning_rate": 1.6052559487828382e-05, "loss": 0.7653, "step": 3077 }, { "epoch": 0.3144024514811032, "grad_norm": 1.5003909503481574, "learning_rate": 1.6049925621568384e-05, "loss": 0.7562, "step": 3078 }, { "epoch": 0.3145045965270684, "grad_norm": 1.3464103901206892, "learning_rate": 1.6047291093140116e-05, "loss": 0.708, "step": 3079 }, { "epoch": 0.3146067415730337, "grad_norm": 1.564135381951507, "learning_rate": 1.604465590283193e-05, "loss": 0.7882, "step": 3080 }, { "epoch": 0.314708886618999, "grad_norm": 1.5776893622208517, "learning_rate": 1.604202005093225e-05, "loss": 0.6244, "step": 3081 }, { "epoch": 0.31481103166496427, "grad_norm": 1.6314371353896648, "learning_rate": 1.6039383537729577e-05, "loss": 0.7916, "step": 3082 }, { "epoch": 0.3149131767109295, "grad_norm": 1.5453943080862158, "learning_rate": 1.603674636351247e-05, "loss": 0.7008, "step": 3083 }, { "epoch": 0.3150153217568948, "grad_norm": 1.3540603718937378, "learning_rate": 1.6034108528569574e-05, "loss": 0.7427, "step": 3084 }, { "epoch": 0.31511746680286007, "grad_norm": 1.44405631108969, "learning_rate": 1.60314700331896e-05, "loss": 0.724, "step": 3085 }, { "epoch": 0.31521961184882535, "grad_norm": 1.5229590803299635, "learning_rate": 1.6028830877661334e-05, "loss": 0.7232, "step": 3086 }, { "epoch": 0.3153217568947906, "grad_norm": 1.4930109491698582, "learning_rate": 1.6026191062273636e-05, "loss": 0.7456, "step": 3087 }, { "epoch": 0.31542390194075587, "grad_norm": 1.4591704762135231, "learning_rate": 1.602355058731543e-05, "loss": 0.6834, "step": 3088 }, { "epoch": 0.31552604698672115, "grad_norm": 1.3818664334612503, "learning_rate": 1.602090945307572e-05, "loss": 0.7314, "step": 3089 }, { "epoch": 0.31562819203268644, "grad_norm": 1.4070299100122066, "learning_rate": 1.6018267659843584e-05, "loss": 0.7394, "step": 3090 }, { "epoch": 0.31573033707865167, "grad_norm": 1.6474723453163884, "learning_rate": 1.6015625207908162e-05, "loss": 0.7134, "step": 3091 }, { "epoch": 0.31583248212461695, "grad_norm": 1.6297677820582663, "learning_rate": 1.6012982097558675e-05, "loss": 0.7906, "step": 3092 }, { "epoch": 0.31593462717058224, "grad_norm": 1.5734812516448629, "learning_rate": 1.601033832908441e-05, "loss": 0.7983, "step": 3093 }, { "epoch": 0.3160367722165475, "grad_norm": 1.5401753830626395, "learning_rate": 1.6007693902774735e-05, "loss": 0.7228, "step": 3094 }, { "epoch": 0.31613891726251275, "grad_norm": 1.490498790217978, "learning_rate": 1.600504881891908e-05, "loss": 0.7451, "step": 3095 }, { "epoch": 0.31624106230847804, "grad_norm": 1.582059730670658, "learning_rate": 1.6002403077806952e-05, "loss": 0.7293, "step": 3096 }, { "epoch": 0.3163432073544433, "grad_norm": 1.374902956599118, "learning_rate": 1.599975667972793e-05, "loss": 0.6923, "step": 3097 }, { "epoch": 0.31644535240040855, "grad_norm": 1.3376025654143355, "learning_rate": 1.599710962497166e-05, "loss": 0.7181, "step": 3098 }, { "epoch": 0.31654749744637384, "grad_norm": 1.5256944847953056, "learning_rate": 1.599446191382787e-05, "loss": 0.7699, "step": 3099 }, { "epoch": 0.3166496424923391, "grad_norm": 1.4698314193472413, "learning_rate": 1.5991813546586346e-05, "loss": 0.7787, "step": 3100 }, { "epoch": 0.3167517875383044, "grad_norm": 1.5174651017065346, "learning_rate": 1.5989164523536964e-05, "loss": 0.7487, "step": 3101 }, { "epoch": 0.31685393258426964, "grad_norm": 1.48289533624744, "learning_rate": 1.5986514844969655e-05, "loss": 0.6847, "step": 3102 }, { "epoch": 0.3169560776302349, "grad_norm": 1.5531717099186495, "learning_rate": 1.5983864511174425e-05, "loss": 0.6584, "step": 3103 }, { "epoch": 0.3170582226762002, "grad_norm": 1.6455938973451356, "learning_rate": 1.5981213522441358e-05, "loss": 0.7722, "step": 3104 }, { "epoch": 0.3171603677221655, "grad_norm": 1.6543100617449766, "learning_rate": 1.5978561879060608e-05, "loss": 0.7815, "step": 3105 }, { "epoch": 0.3172625127681307, "grad_norm": 1.4813002408413227, "learning_rate": 1.59759095813224e-05, "loss": 0.7035, "step": 3106 }, { "epoch": 0.317364657814096, "grad_norm": 1.4549448443312227, "learning_rate": 1.5973256629517026e-05, "loss": 0.7501, "step": 3107 }, { "epoch": 0.3174668028600613, "grad_norm": 1.2605158717334182, "learning_rate": 1.597060302393485e-05, "loss": 0.7577, "step": 3108 }, { "epoch": 0.3175689479060266, "grad_norm": 1.3808301198882824, "learning_rate": 1.5967948764866324e-05, "loss": 0.78, "step": 3109 }, { "epoch": 0.3176710929519918, "grad_norm": 1.7306359890667031, "learning_rate": 1.5965293852601944e-05, "loss": 0.7736, "step": 3110 }, { "epoch": 0.3177732379979571, "grad_norm": 1.4330875879895189, "learning_rate": 1.59626382874323e-05, "loss": 0.7955, "step": 3111 }, { "epoch": 0.3178753830439224, "grad_norm": 1.5819126773600796, "learning_rate": 1.595998206964804e-05, "loss": 0.7288, "step": 3112 }, { "epoch": 0.31797752808988766, "grad_norm": 1.5042557934951588, "learning_rate": 1.5957325199539894e-05, "loss": 0.7439, "step": 3113 }, { "epoch": 0.3180796731358529, "grad_norm": 1.4669119195743343, "learning_rate": 1.5954667677398656e-05, "loss": 0.735, "step": 3114 }, { "epoch": 0.3181818181818182, "grad_norm": 1.4778851228347591, "learning_rate": 1.5952009503515195e-05, "loss": 0.7017, "step": 3115 }, { "epoch": 0.31828396322778346, "grad_norm": 1.5231365885489327, "learning_rate": 1.5949350678180446e-05, "loss": 0.7967, "step": 3116 }, { "epoch": 0.31838610827374875, "grad_norm": 1.366553037593949, "learning_rate": 1.594669120168542e-05, "loss": 0.6538, "step": 3117 }, { "epoch": 0.318488253319714, "grad_norm": 1.4441295369191471, "learning_rate": 1.5944031074321205e-05, "loss": 0.7573, "step": 3118 }, { "epoch": 0.31859039836567926, "grad_norm": 1.4822607635545413, "learning_rate": 1.5941370296378943e-05, "loss": 0.708, "step": 3119 }, { "epoch": 0.31869254341164455, "grad_norm": 1.4286238142859087, "learning_rate": 1.5938708868149867e-05, "loss": 0.697, "step": 3120 }, { "epoch": 0.31879468845760983, "grad_norm": 1.3537816954976416, "learning_rate": 1.5936046789925268e-05, "loss": 0.6319, "step": 3121 }, { "epoch": 0.31889683350357506, "grad_norm": 1.6901698259549744, "learning_rate": 1.5933384061996515e-05, "loss": 0.7934, "step": 3122 }, { "epoch": 0.31899897854954035, "grad_norm": 1.3963072846062035, "learning_rate": 1.593072068465504e-05, "loss": 0.6693, "step": 3123 }, { "epoch": 0.31910112359550563, "grad_norm": 1.4364062624366685, "learning_rate": 1.5928056658192353e-05, "loss": 0.815, "step": 3124 }, { "epoch": 0.3192032686414709, "grad_norm": 1.4041114324768975, "learning_rate": 1.5925391982900038e-05, "loss": 0.7075, "step": 3125 }, { "epoch": 0.31930541368743615, "grad_norm": 1.457495197868525, "learning_rate": 1.592272665906974e-05, "loss": 0.7757, "step": 3126 }, { "epoch": 0.31940755873340143, "grad_norm": 1.5165108904143538, "learning_rate": 1.5920060686993184e-05, "loss": 0.7771, "step": 3127 }, { "epoch": 0.3195097037793667, "grad_norm": 1.4989976046027713, "learning_rate": 1.591739406696216e-05, "loss": 0.7439, "step": 3128 }, { "epoch": 0.31961184882533195, "grad_norm": 1.5262232841654173, "learning_rate": 1.5914726799268532e-05, "loss": 0.6633, "step": 3129 }, { "epoch": 0.31971399387129723, "grad_norm": 1.4092019469654895, "learning_rate": 1.5912058884204234e-05, "loss": 0.7546, "step": 3130 }, { "epoch": 0.3198161389172625, "grad_norm": 1.4703629863413072, "learning_rate": 1.5909390322061273e-05, "loss": 0.819, "step": 3131 }, { "epoch": 0.3199182839632278, "grad_norm": 1.447766714788458, "learning_rate": 1.590672111313172e-05, "loss": 0.7853, "step": 3132 }, { "epoch": 0.32002042900919303, "grad_norm": 1.2620085582897298, "learning_rate": 1.590405125770773e-05, "loss": 0.6594, "step": 3133 }, { "epoch": 0.3201225740551583, "grad_norm": 1.4127196902528627, "learning_rate": 1.5901380756081516e-05, "loss": 0.7225, "step": 3134 }, { "epoch": 0.3202247191011236, "grad_norm": 1.5254939412977586, "learning_rate": 1.5898709608545358e-05, "loss": 0.707, "step": 3135 }, { "epoch": 0.3203268641470889, "grad_norm": 1.4381238734409696, "learning_rate": 1.5896037815391628e-05, "loss": 0.7337, "step": 3136 }, { "epoch": 0.3204290091930541, "grad_norm": 1.4088205886503185, "learning_rate": 1.589336537691275e-05, "loss": 0.6336, "step": 3137 }, { "epoch": 0.3205311542390194, "grad_norm": 1.5079191076378837, "learning_rate": 1.5890692293401223e-05, "loss": 0.7211, "step": 3138 }, { "epoch": 0.3206332992849847, "grad_norm": 1.4218699664081809, "learning_rate": 1.5888018565149615e-05, "loss": 0.7349, "step": 3139 }, { "epoch": 0.32073544433094997, "grad_norm": 1.5307806227362002, "learning_rate": 1.5885344192450577e-05, "loss": 0.7286, "step": 3140 }, { "epoch": 0.3208375893769152, "grad_norm": 1.512777391543793, "learning_rate": 1.588266917559681e-05, "loss": 0.9063, "step": 3141 }, { "epoch": 0.3209397344228805, "grad_norm": 1.3185820465752274, "learning_rate": 1.58799935148811e-05, "loss": 0.6893, "step": 3142 }, { "epoch": 0.32104187946884577, "grad_norm": 1.614629945443676, "learning_rate": 1.5877317210596305e-05, "loss": 0.7478, "step": 3143 }, { "epoch": 0.32114402451481106, "grad_norm": 1.4018372136618655, "learning_rate": 1.5874640263035343e-05, "loss": 0.717, "step": 3144 }, { "epoch": 0.3212461695607763, "grad_norm": 1.4789247198622901, "learning_rate": 1.587196267249121e-05, "loss": 0.7336, "step": 3145 }, { "epoch": 0.32134831460674157, "grad_norm": 1.3913728821159912, "learning_rate": 1.5869284439256965e-05, "loss": 0.6261, "step": 3146 }, { "epoch": 0.32145045965270685, "grad_norm": 1.3837854021565528, "learning_rate": 1.586660556362575e-05, "loss": 0.7622, "step": 3147 }, { "epoch": 0.32155260469867214, "grad_norm": 1.3077053121375404, "learning_rate": 1.586392604589076e-05, "loss": 0.6457, "step": 3148 }, { "epoch": 0.32165474974463737, "grad_norm": 1.4909438059921996, "learning_rate": 1.586124588634528e-05, "loss": 0.7323, "step": 3149 }, { "epoch": 0.32175689479060265, "grad_norm": 1.6009696292917666, "learning_rate": 1.585856508528265e-05, "loss": 0.7531, "step": 3150 }, { "epoch": 0.32185903983656794, "grad_norm": 1.615000614156137, "learning_rate": 1.585588364299629e-05, "loss": 0.754, "step": 3151 }, { "epoch": 0.3219611848825332, "grad_norm": 1.644114762029356, "learning_rate": 1.585320155977968e-05, "loss": 0.7645, "step": 3152 }, { "epoch": 0.32206332992849845, "grad_norm": 1.4614037724000324, "learning_rate": 1.5850518835926373e-05, "loss": 0.7749, "step": 3153 }, { "epoch": 0.32216547497446374, "grad_norm": 1.4831604856505511, "learning_rate": 1.5847835471730002e-05, "loss": 0.6788, "step": 3154 }, { "epoch": 0.322267620020429, "grad_norm": 1.5093990636490668, "learning_rate": 1.5845151467484265e-05, "loss": 0.8108, "step": 3155 }, { "epoch": 0.32236976506639425, "grad_norm": 1.381916304040464, "learning_rate": 1.5842466823482917e-05, "loss": 0.7196, "step": 3156 }, { "epoch": 0.32247191011235954, "grad_norm": 1.4095040515783297, "learning_rate": 1.5839781540019803e-05, "loss": 0.7394, "step": 3157 }, { "epoch": 0.3225740551583248, "grad_norm": 1.7946142818552229, "learning_rate": 1.5837095617388828e-05, "loss": 0.6953, "step": 3158 }, { "epoch": 0.3226762002042901, "grad_norm": 1.801415091775609, "learning_rate": 1.5834409055883964e-05, "loss": 0.9012, "step": 3159 }, { "epoch": 0.32277834525025534, "grad_norm": 1.5968695004440543, "learning_rate": 1.5831721855799257e-05, "loss": 0.8781, "step": 3160 }, { "epoch": 0.3228804902962206, "grad_norm": 1.3775667428999565, "learning_rate": 1.582903401742883e-05, "loss": 0.7462, "step": 3161 }, { "epoch": 0.3229826353421859, "grad_norm": 1.3513245425781428, "learning_rate": 1.582634554106686e-05, "loss": 0.5911, "step": 3162 }, { "epoch": 0.3230847803881512, "grad_norm": 1.5372303101442195, "learning_rate": 1.582365642700761e-05, "loss": 0.7475, "step": 3163 }, { "epoch": 0.3231869254341164, "grad_norm": 1.4062154194413885, "learning_rate": 1.58209666755454e-05, "loss": 0.6229, "step": 3164 }, { "epoch": 0.3232890704800817, "grad_norm": 1.5092287280348367, "learning_rate": 1.581827628697463e-05, "loss": 0.6711, "step": 3165 }, { "epoch": 0.323391215526047, "grad_norm": 1.4453441331123345, "learning_rate": 1.5815585261589755e-05, "loss": 0.7218, "step": 3166 }, { "epoch": 0.3234933605720123, "grad_norm": 1.6067039256901203, "learning_rate": 1.581289359968532e-05, "loss": 0.7231, "step": 3167 }, { "epoch": 0.3235955056179775, "grad_norm": 1.2805422802578352, "learning_rate": 1.5810201301555922e-05, "loss": 0.6406, "step": 3168 }, { "epoch": 0.3236976506639428, "grad_norm": 1.4944750854197961, "learning_rate": 1.5807508367496238e-05, "loss": 0.7755, "step": 3169 }, { "epoch": 0.3237997957099081, "grad_norm": 1.439145131178109, "learning_rate": 1.5804814797801014e-05, "loss": 0.7648, "step": 3170 }, { "epoch": 0.32390194075587336, "grad_norm": 1.490692409040485, "learning_rate": 1.5802120592765055e-05, "loss": 0.7744, "step": 3171 }, { "epoch": 0.3240040858018386, "grad_norm": 1.4527040626391075, "learning_rate": 1.579942575268325e-05, "loss": 0.72, "step": 3172 }, { "epoch": 0.3241062308478039, "grad_norm": 1.5082261146308416, "learning_rate": 1.5796730277850554e-05, "loss": 0.6905, "step": 3173 }, { "epoch": 0.32420837589376916, "grad_norm": 1.488172649732497, "learning_rate": 1.5794034168561984e-05, "loss": 0.7455, "step": 3174 }, { "epoch": 0.32431052093973445, "grad_norm": 1.4513689627174102, "learning_rate": 1.5791337425112626e-05, "loss": 0.7022, "step": 3175 }, { "epoch": 0.3244126659856997, "grad_norm": 1.4647076374182229, "learning_rate": 1.5788640047797645e-05, "loss": 0.7983, "step": 3176 }, { "epoch": 0.32451481103166496, "grad_norm": 1.5571821743789693, "learning_rate": 1.5785942036912275e-05, "loss": 0.7128, "step": 3177 }, { "epoch": 0.32461695607763025, "grad_norm": 1.5453019596632513, "learning_rate": 1.5783243392751806e-05, "loss": 0.7323, "step": 3178 }, { "epoch": 0.32471910112359553, "grad_norm": 1.6056266796390353, "learning_rate": 1.5780544115611615e-05, "loss": 0.7751, "step": 3179 }, { "epoch": 0.32482124616956076, "grad_norm": 1.4771976341250417, "learning_rate": 1.5777844205787133e-05, "loss": 0.7415, "step": 3180 }, { "epoch": 0.32492339121552605, "grad_norm": 1.5650452541116633, "learning_rate": 1.577514366357387e-05, "loss": 0.7624, "step": 3181 }, { "epoch": 0.32502553626149133, "grad_norm": 1.4604677141952278, "learning_rate": 1.5772442489267406e-05, "loss": 0.6682, "step": 3182 }, { "epoch": 0.32512768130745656, "grad_norm": 1.515884128103966, "learning_rate": 1.576974068316338e-05, "loss": 0.7131, "step": 3183 }, { "epoch": 0.32522982635342185, "grad_norm": 1.4612145656279392, "learning_rate": 1.5767038245557505e-05, "loss": 0.7247, "step": 3184 }, { "epoch": 0.32533197139938713, "grad_norm": 1.3295340565880491, "learning_rate": 1.576433517674557e-05, "loss": 0.7107, "step": 3185 }, { "epoch": 0.3254341164453524, "grad_norm": 1.4142487280569698, "learning_rate": 1.5761631477023426e-05, "loss": 0.7974, "step": 3186 }, { "epoch": 0.32553626149131765, "grad_norm": 1.379941691974214, "learning_rate": 1.5758927146686997e-05, "loss": 0.5921, "step": 3187 }, { "epoch": 0.32563840653728293, "grad_norm": 1.4172497954989012, "learning_rate": 1.5756222186032268e-05, "loss": 0.7809, "step": 3188 }, { "epoch": 0.3257405515832482, "grad_norm": 1.5045828061375472, "learning_rate": 1.5753516595355303e-05, "loss": 0.8268, "step": 3189 }, { "epoch": 0.3258426966292135, "grad_norm": 1.59257601836005, "learning_rate": 1.575081037495223e-05, "loss": 0.7619, "step": 3190 }, { "epoch": 0.32594484167517873, "grad_norm": 1.402149768876966, "learning_rate": 1.5748103525119245e-05, "loss": 0.7387, "step": 3191 }, { "epoch": 0.326046986721144, "grad_norm": 1.4853098463676446, "learning_rate": 1.5745396046152612e-05, "loss": 0.7495, "step": 3192 }, { "epoch": 0.3261491317671093, "grad_norm": 1.4255981624160279, "learning_rate": 1.5742687938348674e-05, "loss": 0.6476, "step": 3193 }, { "epoch": 0.3262512768130746, "grad_norm": 1.4539988230119991, "learning_rate": 1.573997920200383e-05, "loss": 0.8233, "step": 3194 }, { "epoch": 0.3263534218590398, "grad_norm": 1.6198400467018736, "learning_rate": 1.5737269837414554e-05, "loss": 0.742, "step": 3195 }, { "epoch": 0.3264555669050051, "grad_norm": 1.5534621823727732, "learning_rate": 1.5734559844877385e-05, "loss": 0.6889, "step": 3196 }, { "epoch": 0.3265577119509704, "grad_norm": 1.3695887739846708, "learning_rate": 1.573184922468894e-05, "loss": 0.7033, "step": 3197 }, { "epoch": 0.32665985699693567, "grad_norm": 1.5309164790822922, "learning_rate": 1.5729137977145895e-05, "loss": 0.6969, "step": 3198 }, { "epoch": 0.3267620020429009, "grad_norm": 1.8111785168776477, "learning_rate": 1.5726426102544994e-05, "loss": 0.6838, "step": 3199 }, { "epoch": 0.3268641470888662, "grad_norm": 1.4252885476313923, "learning_rate": 1.5723713601183056e-05, "loss": 0.726, "step": 3200 }, { "epoch": 0.32696629213483147, "grad_norm": 1.6040566064955546, "learning_rate": 1.5721000473356967e-05, "loss": 0.7839, "step": 3201 }, { "epoch": 0.32706843718079676, "grad_norm": 1.7360121551456873, "learning_rate": 1.571828671936368e-05, "loss": 0.7981, "step": 3202 }, { "epoch": 0.327170582226762, "grad_norm": 1.4428765490399944, "learning_rate": 1.5715572339500217e-05, "loss": 0.7501, "step": 3203 }, { "epoch": 0.32727272727272727, "grad_norm": 1.2929382342232392, "learning_rate": 1.5712857334063668e-05, "loss": 0.6532, "step": 3204 }, { "epoch": 0.32737487231869256, "grad_norm": 1.376749520144248, "learning_rate": 1.5710141703351195e-05, "loss": 0.665, "step": 3205 }, { "epoch": 0.32747701736465784, "grad_norm": 1.504068715685511, "learning_rate": 1.570742544766002e-05, "loss": 0.7442, "step": 3206 }, { "epoch": 0.32757916241062307, "grad_norm": 1.3437591305543541, "learning_rate": 1.5704708567287442e-05, "loss": 0.7324, "step": 3207 }, { "epoch": 0.32768130745658836, "grad_norm": 1.6581196350180274, "learning_rate": 1.570199106253083e-05, "loss": 0.7784, "step": 3208 }, { "epoch": 0.32778345250255364, "grad_norm": 1.5000147236590053, "learning_rate": 1.569927293368761e-05, "loss": 0.7552, "step": 3209 }, { "epoch": 0.32788559754851887, "grad_norm": 1.4104842674995575, "learning_rate": 1.5696554181055287e-05, "loss": 0.6991, "step": 3210 }, { "epoch": 0.32798774259448416, "grad_norm": 1.4944582596542075, "learning_rate": 1.5693834804931424e-05, "loss": 0.7273, "step": 3211 }, { "epoch": 0.32808988764044944, "grad_norm": 1.4642661036929416, "learning_rate": 1.5691114805613668e-05, "loss": 0.8251, "step": 3212 }, { "epoch": 0.3281920326864147, "grad_norm": 1.4080630930759865, "learning_rate": 1.5688394183399717e-05, "loss": 0.8083, "step": 3213 }, { "epoch": 0.32829417773237995, "grad_norm": 1.5462047125626246, "learning_rate": 1.5685672938587347e-05, "loss": 0.827, "step": 3214 }, { "epoch": 0.32839632277834524, "grad_norm": 1.4190457700193895, "learning_rate": 1.56829510714744e-05, "loss": 0.6672, "step": 3215 }, { "epoch": 0.3284984678243105, "grad_norm": 1.4664530886055338, "learning_rate": 1.5680228582358786e-05, "loss": 0.7056, "step": 3216 }, { "epoch": 0.3286006128702758, "grad_norm": 1.4884559542880456, "learning_rate": 1.567750547153849e-05, "loss": 0.6796, "step": 3217 }, { "epoch": 0.32870275791624104, "grad_norm": 1.5411691071699969, "learning_rate": 1.5674781739311545e-05, "loss": 0.8391, "step": 3218 }, { "epoch": 0.3288049029622063, "grad_norm": 1.4377979561367276, "learning_rate": 1.5672057385976076e-05, "loss": 0.6357, "step": 3219 }, { "epoch": 0.3289070480081716, "grad_norm": 1.417739240547038, "learning_rate": 1.5669332411830258e-05, "loss": 0.6553, "step": 3220 }, { "epoch": 0.3290091930541369, "grad_norm": 1.4295232627732297, "learning_rate": 1.566660681717235e-05, "loss": 0.7158, "step": 3221 }, { "epoch": 0.3291113381001021, "grad_norm": 1.3991895310723974, "learning_rate": 1.566388060230066e-05, "loss": 0.6807, "step": 3222 }, { "epoch": 0.3292134831460674, "grad_norm": 1.4739461852517082, "learning_rate": 1.5661153767513582e-05, "loss": 0.8427, "step": 3223 }, { "epoch": 0.3293156281920327, "grad_norm": 1.5325490107618203, "learning_rate": 1.565842631310956e-05, "loss": 0.746, "step": 3224 }, { "epoch": 0.329417773237998, "grad_norm": 1.4394424834094535, "learning_rate": 1.5655698239387128e-05, "loss": 0.7761, "step": 3225 }, { "epoch": 0.3295199182839632, "grad_norm": 1.4830921796079124, "learning_rate": 1.5652969546644872e-05, "loss": 0.8025, "step": 3226 }, { "epoch": 0.3296220633299285, "grad_norm": 1.4858218100554632, "learning_rate": 1.5650240235181443e-05, "loss": 0.6648, "step": 3227 }, { "epoch": 0.3297242083758938, "grad_norm": 1.4170608906685958, "learning_rate": 1.564751030529557e-05, "loss": 0.7561, "step": 3228 }, { "epoch": 0.32982635342185906, "grad_norm": 1.5587312785237493, "learning_rate": 1.5644779757286045e-05, "loss": 0.7219, "step": 3229 }, { "epoch": 0.3299284984678243, "grad_norm": 1.5105058862432446, "learning_rate": 1.564204859145173e-05, "loss": 0.6899, "step": 3230 }, { "epoch": 0.3300306435137896, "grad_norm": 1.3537566807077885, "learning_rate": 1.563931680809155e-05, "loss": 0.7471, "step": 3231 }, { "epoch": 0.33013278855975486, "grad_norm": 1.5907046914699265, "learning_rate": 1.5636584407504503e-05, "loss": 0.7846, "step": 3232 }, { "epoch": 0.33023493360572015, "grad_norm": 1.5175111835997452, "learning_rate": 1.563385138998965e-05, "loss": 0.6581, "step": 3233 }, { "epoch": 0.3303370786516854, "grad_norm": 1.4820670821889848, "learning_rate": 1.5631117755846124e-05, "loss": 0.6944, "step": 3234 }, { "epoch": 0.33043922369765066, "grad_norm": 1.475688515608224, "learning_rate": 1.562838350537312e-05, "loss": 0.7611, "step": 3235 }, { "epoch": 0.33054136874361595, "grad_norm": 1.4861096018849942, "learning_rate": 1.5625648638869907e-05, "loss": 0.7882, "step": 3236 }, { "epoch": 0.3306435137895812, "grad_norm": 1.6703513440105708, "learning_rate": 1.5622913156635814e-05, "loss": 0.7852, "step": 3237 }, { "epoch": 0.33074565883554646, "grad_norm": 1.4250526659507556, "learning_rate": 1.562017705897024e-05, "loss": 0.7116, "step": 3238 }, { "epoch": 0.33084780388151175, "grad_norm": 1.4899933030397654, "learning_rate": 1.5617440346172662e-05, "loss": 0.8083, "step": 3239 }, { "epoch": 0.33094994892747703, "grad_norm": 1.5058450739569742, "learning_rate": 1.5614703018542605e-05, "loss": 0.7349, "step": 3240 }, { "epoch": 0.33105209397344226, "grad_norm": 1.5296706420979667, "learning_rate": 1.5611965076379675e-05, "loss": 0.8003, "step": 3241 }, { "epoch": 0.33115423901940755, "grad_norm": 1.389387504490224, "learning_rate": 1.5609226519983542e-05, "loss": 0.7483, "step": 3242 }, { "epoch": 0.33125638406537283, "grad_norm": 1.4157852037769132, "learning_rate": 1.5606487349653945e-05, "loss": 0.747, "step": 3243 }, { "epoch": 0.3313585291113381, "grad_norm": 1.3594504359038992, "learning_rate": 1.5603747565690682e-05, "loss": 0.7737, "step": 3244 }, { "epoch": 0.33146067415730335, "grad_norm": 1.4805687007310917, "learning_rate": 1.560100716839363e-05, "loss": 0.7246, "step": 3245 }, { "epoch": 0.33156281920326863, "grad_norm": 1.4705456276609332, "learning_rate": 1.5598266158062724e-05, "loss": 0.78, "step": 3246 }, { "epoch": 0.3316649642492339, "grad_norm": 1.4741283280059332, "learning_rate": 1.559552453499797e-05, "loss": 0.7354, "step": 3247 }, { "epoch": 0.3317671092951992, "grad_norm": 1.582734547492715, "learning_rate": 1.5592782299499437e-05, "loss": 0.7321, "step": 3248 }, { "epoch": 0.33186925434116443, "grad_norm": 1.420538424313716, "learning_rate": 1.559003945186727e-05, "loss": 0.7086, "step": 3249 }, { "epoch": 0.3319713993871297, "grad_norm": 1.3546936074253384, "learning_rate": 1.5587295992401675e-05, "loss": 0.7241, "step": 3250 }, { "epoch": 0.332073544433095, "grad_norm": 1.4814248709663673, "learning_rate": 1.558455192140292e-05, "loss": 0.7578, "step": 3251 }, { "epoch": 0.3321756894790603, "grad_norm": 1.482691396240518, "learning_rate": 1.5581807239171353e-05, "loss": 0.7862, "step": 3252 }, { "epoch": 0.3322778345250255, "grad_norm": 1.5750308813678915, "learning_rate": 1.557906194600738e-05, "loss": 0.8405, "step": 3253 }, { "epoch": 0.3323799795709908, "grad_norm": 1.5687637368943848, "learning_rate": 1.557631604221147e-05, "loss": 0.8054, "step": 3254 }, { "epoch": 0.3324821246169561, "grad_norm": 1.3853995155285455, "learning_rate": 1.5573569528084163e-05, "loss": 0.6503, "step": 3255 }, { "epoch": 0.3325842696629214, "grad_norm": 1.4712951004815336, "learning_rate": 1.5570822403926072e-05, "loss": 0.7611, "step": 3256 }, { "epoch": 0.3326864147088866, "grad_norm": 1.3936765100370476, "learning_rate": 1.556807467003787e-05, "loss": 0.6976, "step": 3257 }, { "epoch": 0.3327885597548519, "grad_norm": 1.3901479111218875, "learning_rate": 1.55653263267203e-05, "loss": 0.6964, "step": 3258 }, { "epoch": 0.33289070480081717, "grad_norm": 1.4568241506181387, "learning_rate": 1.5562577374274165e-05, "loss": 0.7547, "step": 3259 }, { "epoch": 0.33299284984678246, "grad_norm": 1.4273555373360076, "learning_rate": 1.5559827813000343e-05, "loss": 0.7569, "step": 3260 }, { "epoch": 0.3330949948927477, "grad_norm": 1.3646348030683986, "learning_rate": 1.5557077643199775e-05, "loss": 0.6259, "step": 3261 }, { "epoch": 0.33319713993871297, "grad_norm": 1.4270874474809343, "learning_rate": 1.5554326865173468e-05, "loss": 0.7927, "step": 3262 }, { "epoch": 0.33329928498467826, "grad_norm": 1.4487679303899983, "learning_rate": 1.5551575479222497e-05, "loss": 0.6719, "step": 3263 }, { "epoch": 0.3334014300306435, "grad_norm": 1.4209759881235449, "learning_rate": 1.5548823485648003e-05, "loss": 0.6819, "step": 3264 }, { "epoch": 0.33350357507660877, "grad_norm": 1.355652353074514, "learning_rate": 1.5546070884751197e-05, "loss": 0.6353, "step": 3265 }, { "epoch": 0.33360572012257406, "grad_norm": 1.5064286591210039, "learning_rate": 1.5543317676833346e-05, "loss": 0.7923, "step": 3266 }, { "epoch": 0.33370786516853934, "grad_norm": 1.4405624564922124, "learning_rate": 1.5540563862195796e-05, "loss": 0.6525, "step": 3267 }, { "epoch": 0.33381001021450457, "grad_norm": 1.3540593012084174, "learning_rate": 1.553780944113995e-05, "loss": 0.7083, "step": 3268 }, { "epoch": 0.33391215526046986, "grad_norm": 1.5304081513299377, "learning_rate": 1.5535054413967282e-05, "loss": 0.7905, "step": 3269 }, { "epoch": 0.33401430030643514, "grad_norm": 1.3942962524055378, "learning_rate": 1.5532298780979334e-05, "loss": 0.776, "step": 3270 }, { "epoch": 0.3341164453524004, "grad_norm": 1.5065332611311364, "learning_rate": 1.552954254247771e-05, "loss": 0.6446, "step": 3271 }, { "epoch": 0.33421859039836566, "grad_norm": 1.3934988741390135, "learning_rate": 1.5526785698764083e-05, "loss": 0.7259, "step": 3272 }, { "epoch": 0.33432073544433094, "grad_norm": 1.621800375156446, "learning_rate": 1.552402825014019e-05, "loss": 0.7743, "step": 3273 }, { "epoch": 0.3344228804902962, "grad_norm": 1.5317575709755615, "learning_rate": 1.5521270196907833e-05, "loss": 0.7385, "step": 3274 }, { "epoch": 0.3345250255362615, "grad_norm": 1.3858287353078322, "learning_rate": 1.5518511539368887e-05, "loss": 0.6952, "step": 3275 }, { "epoch": 0.33462717058222674, "grad_norm": 1.3232904159298622, "learning_rate": 1.551575227782529e-05, "loss": 0.6959, "step": 3276 }, { "epoch": 0.334729315628192, "grad_norm": 1.3327894417159267, "learning_rate": 1.551299241257904e-05, "loss": 0.6131, "step": 3277 }, { "epoch": 0.3348314606741573, "grad_norm": 1.4482016217809337, "learning_rate": 1.551023194393221e-05, "loss": 0.8464, "step": 3278 }, { "epoch": 0.3349336057201226, "grad_norm": 1.5305604344737402, "learning_rate": 1.5507470872186937e-05, "loss": 0.82, "step": 3279 }, { "epoch": 0.3350357507660878, "grad_norm": 1.5473837823763479, "learning_rate": 1.550470919764541e-05, "loss": 0.6676, "step": 3280 }, { "epoch": 0.3351378958120531, "grad_norm": 1.4840177997712543, "learning_rate": 1.5501946920609913e-05, "loss": 0.7853, "step": 3281 }, { "epoch": 0.3352400408580184, "grad_norm": 1.4862972202392228, "learning_rate": 1.5499184041382766e-05, "loss": 0.7284, "step": 3282 }, { "epoch": 0.3353421859039837, "grad_norm": 1.389601148518838, "learning_rate": 1.5496420560266376e-05, "loss": 0.7483, "step": 3283 }, { "epoch": 0.3354443309499489, "grad_norm": 1.554107948782582, "learning_rate": 1.54936564775632e-05, "loss": 0.8009, "step": 3284 }, { "epoch": 0.3355464759959142, "grad_norm": 1.540788565703329, "learning_rate": 1.5490891793575776e-05, "loss": 0.7022, "step": 3285 }, { "epoch": 0.3356486210418795, "grad_norm": 1.3546561314558745, "learning_rate": 1.5488126508606703e-05, "loss": 0.6079, "step": 3286 }, { "epoch": 0.33575076608784477, "grad_norm": 1.624089662665287, "learning_rate": 1.548536062295863e-05, "loss": 0.7486, "step": 3287 }, { "epoch": 0.33585291113381, "grad_norm": 1.7169547667715563, "learning_rate": 1.5482594136934294e-05, "loss": 0.7682, "step": 3288 }, { "epoch": 0.3359550561797753, "grad_norm": 1.5872660302032644, "learning_rate": 1.5479827050836493e-05, "loss": 0.7647, "step": 3289 }, { "epoch": 0.33605720122574056, "grad_norm": 1.463373074227921, "learning_rate": 1.5477059364968075e-05, "loss": 0.7035, "step": 3290 }, { "epoch": 0.3361593462717058, "grad_norm": 1.5063153124677793, "learning_rate": 1.5474291079631974e-05, "loss": 0.717, "step": 3291 }, { "epoch": 0.3362614913176711, "grad_norm": 1.6154186070418297, "learning_rate": 1.5471522195131176e-05, "loss": 0.7438, "step": 3292 }, { "epoch": 0.33636363636363636, "grad_norm": 1.5740479828427727, "learning_rate": 1.5468752711768737e-05, "loss": 0.8456, "step": 3293 }, { "epoch": 0.33646578140960165, "grad_norm": 1.6122946783515235, "learning_rate": 1.5465982629847785e-05, "loss": 0.8598, "step": 3294 }, { "epoch": 0.3365679264555669, "grad_norm": 1.5752251537811204, "learning_rate": 1.54632119496715e-05, "loss": 0.7607, "step": 3295 }, { "epoch": 0.33667007150153216, "grad_norm": 1.51489808651215, "learning_rate": 1.5460440671543135e-05, "loss": 0.7611, "step": 3296 }, { "epoch": 0.33677221654749745, "grad_norm": 1.3844384735656063, "learning_rate": 1.5457668795766016e-05, "loss": 0.647, "step": 3297 }, { "epoch": 0.33687436159346273, "grad_norm": 1.404439985078323, "learning_rate": 1.5454896322643516e-05, "loss": 0.7321, "step": 3298 }, { "epoch": 0.33697650663942796, "grad_norm": 1.3924645822976185, "learning_rate": 1.5452123252479092e-05, "loss": 0.6265, "step": 3299 }, { "epoch": 0.33707865168539325, "grad_norm": 1.3772933745027827, "learning_rate": 1.5449349585576254e-05, "loss": 0.7704, "step": 3300 }, { "epoch": 0.33718079673135853, "grad_norm": 1.4226809755292762, "learning_rate": 1.5446575322238584e-05, "loss": 0.7358, "step": 3301 }, { "epoch": 0.3372829417773238, "grad_norm": 1.3402406284942294, "learning_rate": 1.5443800462769728e-05, "loss": 0.6951, "step": 3302 }, { "epoch": 0.33738508682328905, "grad_norm": 1.53859699252445, "learning_rate": 1.5441025007473394e-05, "loss": 0.7451, "step": 3303 }, { "epoch": 0.33748723186925433, "grad_norm": 1.5790097234292393, "learning_rate": 1.543824895665335e-05, "loss": 0.7098, "step": 3304 }, { "epoch": 0.3375893769152196, "grad_norm": 1.3331208242031245, "learning_rate": 1.543547231061345e-05, "loss": 0.6585, "step": 3305 }, { "epoch": 0.3376915219611849, "grad_norm": 1.60704440583945, "learning_rate": 1.5432695069657596e-05, "loss": 0.8641, "step": 3306 }, { "epoch": 0.33779366700715013, "grad_norm": 1.4445083378762913, "learning_rate": 1.5429917234089758e-05, "loss": 0.653, "step": 3307 }, { "epoch": 0.3378958120531154, "grad_norm": 1.3998612812053595, "learning_rate": 1.5427138804213962e-05, "loss": 0.7616, "step": 3308 }, { "epoch": 0.3379979570990807, "grad_norm": 1.5914737369902654, "learning_rate": 1.5424359780334326e-05, "loss": 0.7901, "step": 3309 }, { "epoch": 0.338100102145046, "grad_norm": 1.4992085523670196, "learning_rate": 1.5421580162755003e-05, "loss": 0.7541, "step": 3310 }, { "epoch": 0.3382022471910112, "grad_norm": 1.5175183486489991, "learning_rate": 1.541879995178023e-05, "loss": 0.7468, "step": 3311 }, { "epoch": 0.3383043922369765, "grad_norm": 1.5274448962791183, "learning_rate": 1.54160191477143e-05, "loss": 0.6931, "step": 3312 }, { "epoch": 0.3384065372829418, "grad_norm": 1.44161675607315, "learning_rate": 1.541323775086158e-05, "loss": 0.6184, "step": 3313 }, { "epoch": 0.3385086823289071, "grad_norm": 1.4060724041997152, "learning_rate": 1.5410455761526484e-05, "loss": 0.7778, "step": 3314 }, { "epoch": 0.3386108273748723, "grad_norm": 1.4299226949615216, "learning_rate": 1.5407673180013513e-05, "loss": 0.7344, "step": 3315 }, { "epoch": 0.3387129724208376, "grad_norm": 1.4765630577339524, "learning_rate": 1.5404890006627214e-05, "loss": 0.8285, "step": 3316 }, { "epoch": 0.3388151174668029, "grad_norm": 1.1260006448172057, "learning_rate": 1.5402106241672218e-05, "loss": 0.5868, "step": 3317 }, { "epoch": 0.33891726251276816, "grad_norm": 1.5121913297871572, "learning_rate": 1.5399321885453204e-05, "loss": 0.717, "step": 3318 }, { "epoch": 0.3390194075587334, "grad_norm": 1.3312734342530954, "learning_rate": 1.5396536938274915e-05, "loss": 0.7897, "step": 3319 }, { "epoch": 0.3391215526046987, "grad_norm": 1.48292191731946, "learning_rate": 1.5393751400442176e-05, "loss": 0.7083, "step": 3320 }, { "epoch": 0.33922369765066396, "grad_norm": 1.4965597775256376, "learning_rate": 1.539096527225986e-05, "loss": 0.7132, "step": 3321 }, { "epoch": 0.3393258426966292, "grad_norm": 1.4123325764417054, "learning_rate": 1.538817855403291e-05, "loss": 0.7032, "step": 3322 }, { "epoch": 0.3394279877425945, "grad_norm": 1.6268857247240145, "learning_rate": 1.538539124606634e-05, "loss": 0.7439, "step": 3323 }, { "epoch": 0.33953013278855976, "grad_norm": 1.4891674071196392, "learning_rate": 1.5382603348665215e-05, "loss": 0.7959, "step": 3324 }, { "epoch": 0.33963227783452504, "grad_norm": 1.463889003517186, "learning_rate": 1.5379814862134677e-05, "loss": 0.721, "step": 3325 }, { "epoch": 0.33973442288049027, "grad_norm": 1.285506973442181, "learning_rate": 1.537702578677993e-05, "loss": 0.6956, "step": 3326 }, { "epoch": 0.33983656792645556, "grad_norm": 1.5110286865269755, "learning_rate": 1.5374236122906233e-05, "loss": 0.754, "step": 3327 }, { "epoch": 0.33993871297242084, "grad_norm": 1.4863904476908825, "learning_rate": 1.537144587081892e-05, "loss": 0.6914, "step": 3328 }, { "epoch": 0.3400408580183861, "grad_norm": 1.5256055667223307, "learning_rate": 1.5368655030823388e-05, "loss": 0.7439, "step": 3329 }, { "epoch": 0.34014300306435136, "grad_norm": 1.4284593196147581, "learning_rate": 1.536586360322509e-05, "loss": 0.6974, "step": 3330 }, { "epoch": 0.34024514811031664, "grad_norm": 1.5563762394161158, "learning_rate": 1.536307158832956e-05, "loss": 0.7644, "step": 3331 }, { "epoch": 0.3403472931562819, "grad_norm": 1.256478386698582, "learning_rate": 1.5360278986442376e-05, "loss": 0.6987, "step": 3332 }, { "epoch": 0.3404494382022472, "grad_norm": 1.518868212352236, "learning_rate": 1.5357485797869192e-05, "loss": 0.7461, "step": 3333 }, { "epoch": 0.34055158324821244, "grad_norm": 1.5343462715601681, "learning_rate": 1.5354692022915733e-05, "loss": 0.7902, "step": 3334 }, { "epoch": 0.3406537282941777, "grad_norm": 1.3447880836313535, "learning_rate": 1.535189766188777e-05, "loss": 0.7819, "step": 3335 }, { "epoch": 0.340755873340143, "grad_norm": 1.5495100692126673, "learning_rate": 1.5349102715091144e-05, "loss": 0.7107, "step": 3336 }, { "epoch": 0.3408580183861083, "grad_norm": 1.3718131329451053, "learning_rate": 1.5346307182831775e-05, "loss": 0.7419, "step": 3337 }, { "epoch": 0.3409601634320735, "grad_norm": 1.4017797398540883, "learning_rate": 1.534351106541563e-05, "loss": 0.7271, "step": 3338 }, { "epoch": 0.3410623084780388, "grad_norm": 1.4604436569380952, "learning_rate": 1.5340714363148746e-05, "loss": 0.7384, "step": 3339 }, { "epoch": 0.3411644535240041, "grad_norm": 1.4253974854376215, "learning_rate": 1.5337917076337222e-05, "loss": 0.7415, "step": 3340 }, { "epoch": 0.3412665985699694, "grad_norm": 1.5726795663048634, "learning_rate": 1.533511920528723e-05, "loss": 0.7816, "step": 3341 }, { "epoch": 0.3413687436159346, "grad_norm": 1.4958733996899503, "learning_rate": 1.5332320750304994e-05, "loss": 0.8024, "step": 3342 }, { "epoch": 0.3414708886618999, "grad_norm": 1.3021417431399696, "learning_rate": 1.5329521711696805e-05, "loss": 0.6668, "step": 3343 }, { "epoch": 0.3415730337078652, "grad_norm": 1.465441727287025, "learning_rate": 1.532672208976902e-05, "loss": 0.9229, "step": 3344 }, { "epoch": 0.34167517875383047, "grad_norm": 1.553032757363597, "learning_rate": 1.532392188482806e-05, "loss": 0.7421, "step": 3345 }, { "epoch": 0.3417773237997957, "grad_norm": 1.66435108372262, "learning_rate": 1.5321121097180414e-05, "loss": 0.6508, "step": 3346 }, { "epoch": 0.341879468845761, "grad_norm": 1.365435676369416, "learning_rate": 1.5318319727132625e-05, "loss": 0.782, "step": 3347 }, { "epoch": 0.34198161389172627, "grad_norm": 1.445110387830733, "learning_rate": 1.5315517774991303e-05, "loss": 0.8075, "step": 3348 }, { "epoch": 0.3420837589376915, "grad_norm": 1.3394268781671446, "learning_rate": 1.5312715241063128e-05, "loss": 0.7988, "step": 3349 }, { "epoch": 0.3421859039836568, "grad_norm": 1.4619371548541915, "learning_rate": 1.530991212565484e-05, "loss": 0.7783, "step": 3350 }, { "epoch": 0.34228804902962207, "grad_norm": 1.4910610273297067, "learning_rate": 1.5307108429073237e-05, "loss": 0.7818, "step": 3351 }, { "epoch": 0.34239019407558735, "grad_norm": 1.3957346663597578, "learning_rate": 1.5304304151625185e-05, "loss": 0.723, "step": 3352 }, { "epoch": 0.3424923391215526, "grad_norm": 1.57406759961631, "learning_rate": 1.530149929361762e-05, "loss": 0.6834, "step": 3353 }, { "epoch": 0.34259448416751787, "grad_norm": 1.5363157234836475, "learning_rate": 1.529869385535753e-05, "loss": 0.7606, "step": 3354 }, { "epoch": 0.34269662921348315, "grad_norm": 1.4365716888318951, "learning_rate": 1.5295887837151977e-05, "loss": 0.7621, "step": 3355 }, { "epoch": 0.34279877425944844, "grad_norm": 1.313998023706264, "learning_rate": 1.5293081239308074e-05, "loss": 0.6554, "step": 3356 }, { "epoch": 0.34290091930541367, "grad_norm": 1.6567305420412044, "learning_rate": 1.5290274062133015e-05, "loss": 0.901, "step": 3357 }, { "epoch": 0.34300306435137895, "grad_norm": 1.5225169070614142, "learning_rate": 1.528746630593404e-05, "loss": 0.7319, "step": 3358 }, { "epoch": 0.34310520939734424, "grad_norm": 1.3491875636552484, "learning_rate": 1.528465797101846e-05, "loss": 0.7435, "step": 3359 }, { "epoch": 0.3432073544433095, "grad_norm": 1.476300789979567, "learning_rate": 1.528184905769365e-05, "loss": 0.7791, "step": 3360 }, { "epoch": 0.34330949948927475, "grad_norm": 1.5605263918426975, "learning_rate": 1.527903956626705e-05, "loss": 0.7029, "step": 3361 }, { "epoch": 0.34341164453524003, "grad_norm": 1.3666902001507952, "learning_rate": 1.527622949704616e-05, "loss": 0.6527, "step": 3362 }, { "epoch": 0.3435137895812053, "grad_norm": 1.3029056227785296, "learning_rate": 1.5273418850338542e-05, "loss": 0.688, "step": 3363 }, { "epoch": 0.3436159346271706, "grad_norm": 1.3867867853246734, "learning_rate": 1.527060762645182e-05, "loss": 0.7276, "step": 3364 }, { "epoch": 0.34371807967313583, "grad_norm": 1.4936931658326622, "learning_rate": 1.5267795825693693e-05, "loss": 0.8404, "step": 3365 }, { "epoch": 0.3438202247191011, "grad_norm": 1.4503041754999721, "learning_rate": 1.5264983448371907e-05, "loss": 0.6835, "step": 3366 }, { "epoch": 0.3439223697650664, "grad_norm": 1.436405493899439, "learning_rate": 1.526217049479428e-05, "loss": 0.7392, "step": 3367 }, { "epoch": 0.3440245148110317, "grad_norm": 1.6534927222554228, "learning_rate": 1.5259356965268695e-05, "loss": 0.8119, "step": 3368 }, { "epoch": 0.3441266598569969, "grad_norm": 1.5312410489326242, "learning_rate": 1.5256542860103091e-05, "loss": 0.811, "step": 3369 }, { "epoch": 0.3442288049029622, "grad_norm": 1.5191926596049206, "learning_rate": 1.5253728179605479e-05, "loss": 0.846, "step": 3370 }, { "epoch": 0.3443309499489275, "grad_norm": 1.4769420038099945, "learning_rate": 1.5250912924083915e-05, "loss": 0.8066, "step": 3371 }, { "epoch": 0.3444330949948928, "grad_norm": 1.4546985029209918, "learning_rate": 1.5248097093846545e-05, "loss": 0.79, "step": 3372 }, { "epoch": 0.344535240040858, "grad_norm": 1.3897632115074896, "learning_rate": 1.5245280689201556e-05, "loss": 0.815, "step": 3373 }, { "epoch": 0.3446373850868233, "grad_norm": 1.4671974502722975, "learning_rate": 1.5242463710457206e-05, "loss": 0.7651, "step": 3374 }, { "epoch": 0.3447395301327886, "grad_norm": 1.6341393402331448, "learning_rate": 1.5239646157921817e-05, "loss": 0.7912, "step": 3375 }, { "epoch": 0.3448416751787538, "grad_norm": 1.4799974358657892, "learning_rate": 1.523682803190377e-05, "loss": 0.7871, "step": 3376 }, { "epoch": 0.3449438202247191, "grad_norm": 1.4650289632212923, "learning_rate": 1.5234009332711512e-05, "loss": 0.6692, "step": 3377 }, { "epoch": 0.3450459652706844, "grad_norm": 1.51744225649683, "learning_rate": 1.523119006065355e-05, "loss": 0.8473, "step": 3378 }, { "epoch": 0.34514811031664966, "grad_norm": 1.3094245334154937, "learning_rate": 1.5228370216038455e-05, "loss": 0.7104, "step": 3379 }, { "epoch": 0.3452502553626149, "grad_norm": 1.5442241536259158, "learning_rate": 1.5225549799174863e-05, "loss": 0.7623, "step": 3380 }, { "epoch": 0.3453524004085802, "grad_norm": 1.4530108725648814, "learning_rate": 1.522272881037147e-05, "loss": 0.7884, "step": 3381 }, { "epoch": 0.34545454545454546, "grad_norm": 1.5058314385062694, "learning_rate": 1.5219907249937036e-05, "loss": 0.7337, "step": 3382 }, { "epoch": 0.34555669050051074, "grad_norm": 1.4492761932463916, "learning_rate": 1.5217085118180377e-05, "loss": 0.827, "step": 3383 }, { "epoch": 0.345658835546476, "grad_norm": 1.470222863519962, "learning_rate": 1.5214262415410384e-05, "loss": 0.7552, "step": 3384 }, { "epoch": 0.34576098059244126, "grad_norm": 1.5634339613105022, "learning_rate": 1.5211439141936e-05, "loss": 0.7548, "step": 3385 }, { "epoch": 0.34586312563840654, "grad_norm": 1.3837891548369847, "learning_rate": 1.5208615298066237e-05, "loss": 0.7685, "step": 3386 }, { "epoch": 0.34596527068437183, "grad_norm": 1.3664539290780435, "learning_rate": 1.5205790884110161e-05, "loss": 0.71, "step": 3387 }, { "epoch": 0.34606741573033706, "grad_norm": 1.5372413174893773, "learning_rate": 1.520296590037691e-05, "loss": 0.7299, "step": 3388 }, { "epoch": 0.34616956077630234, "grad_norm": 1.319893487694971, "learning_rate": 1.5200140347175683e-05, "loss": 0.7022, "step": 3389 }, { "epoch": 0.34627170582226763, "grad_norm": 1.4939101597645068, "learning_rate": 1.5197314224815732e-05, "loss": 0.7206, "step": 3390 }, { "epoch": 0.3463738508682329, "grad_norm": 1.4616953213066424, "learning_rate": 1.5194487533606382e-05, "loss": 0.7478, "step": 3391 }, { "epoch": 0.34647599591419814, "grad_norm": 1.3979791502407504, "learning_rate": 1.5191660273857013e-05, "loss": 0.7256, "step": 3392 }, { "epoch": 0.3465781409601634, "grad_norm": 1.5292771036050423, "learning_rate": 1.5188832445877075e-05, "loss": 0.7095, "step": 3393 }, { "epoch": 0.3466802860061287, "grad_norm": 1.2429152568794204, "learning_rate": 1.5186004049976075e-05, "loss": 0.7607, "step": 3394 }, { "epoch": 0.346782431052094, "grad_norm": 1.4826450261413437, "learning_rate": 1.5183175086463577e-05, "loss": 0.7012, "step": 3395 }, { "epoch": 0.3468845760980592, "grad_norm": 1.351328627507031, "learning_rate": 1.5180345555649221e-05, "loss": 0.6151, "step": 3396 }, { "epoch": 0.3469867211440245, "grad_norm": 1.5180780738584176, "learning_rate": 1.5177515457842695e-05, "loss": 0.7667, "step": 3397 }, { "epoch": 0.3470888661899898, "grad_norm": 1.4311420068513432, "learning_rate": 1.517468479335376e-05, "loss": 0.6755, "step": 3398 }, { "epoch": 0.3471910112359551, "grad_norm": 1.5549500688027087, "learning_rate": 1.517185356249223e-05, "loss": 0.7548, "step": 3399 }, { "epoch": 0.3472931562819203, "grad_norm": 1.4420062826177098, "learning_rate": 1.5169021765567982e-05, "loss": 0.7128, "step": 3400 }, { "epoch": 0.3473953013278856, "grad_norm": 1.57363409920895, "learning_rate": 1.5166189402890964e-05, "loss": 0.7371, "step": 3401 }, { "epoch": 0.3474974463738509, "grad_norm": 1.4636790509833857, "learning_rate": 1.516335647477118e-05, "loss": 0.7682, "step": 3402 }, { "epoch": 0.3475995914198161, "grad_norm": 1.398249307043079, "learning_rate": 1.5160522981518693e-05, "loss": 0.7503, "step": 3403 }, { "epoch": 0.3477017364657814, "grad_norm": 1.3814416886241143, "learning_rate": 1.5157688923443631e-05, "loss": 0.6841, "step": 3404 }, { "epoch": 0.3478038815117467, "grad_norm": 1.4062328275645273, "learning_rate": 1.5154854300856183e-05, "loss": 0.7348, "step": 3405 }, { "epoch": 0.34790602655771197, "grad_norm": 1.4644360574743611, "learning_rate": 1.5152019114066607e-05, "loss": 0.6388, "step": 3406 }, { "epoch": 0.3480081716036772, "grad_norm": 1.4108025819183712, "learning_rate": 1.5149183363385204e-05, "loss": 0.7791, "step": 3407 }, { "epoch": 0.3481103166496425, "grad_norm": 1.324044635313514, "learning_rate": 1.5146347049122359e-05, "loss": 0.7488, "step": 3408 }, { "epoch": 0.34821246169560777, "grad_norm": 1.2990363260539666, "learning_rate": 1.5143510171588503e-05, "loss": 0.5292, "step": 3409 }, { "epoch": 0.34831460674157305, "grad_norm": 1.3252633140563592, "learning_rate": 1.5140672731094132e-05, "loss": 0.6736, "step": 3410 }, { "epoch": 0.3484167517875383, "grad_norm": 1.5846164995459888, "learning_rate": 1.5137834727949816e-05, "loss": 0.7278, "step": 3411 }, { "epoch": 0.34851889683350357, "grad_norm": 1.556090445659398, "learning_rate": 1.5134996162466165e-05, "loss": 0.6273, "step": 3412 }, { "epoch": 0.34862104187946885, "grad_norm": 1.612785848448576, "learning_rate": 1.5132157034953868e-05, "loss": 0.7402, "step": 3413 }, { "epoch": 0.34872318692543414, "grad_norm": 1.6135834246552838, "learning_rate": 1.5129317345723666e-05, "loss": 0.7582, "step": 3414 }, { "epoch": 0.34882533197139937, "grad_norm": 1.4487798633456597, "learning_rate": 1.5126477095086369e-05, "loss": 0.6119, "step": 3415 }, { "epoch": 0.34892747701736465, "grad_norm": 1.430202752605433, "learning_rate": 1.512363628335284e-05, "loss": 0.7548, "step": 3416 }, { "epoch": 0.34902962206332994, "grad_norm": 1.4468497281872048, "learning_rate": 1.5120794910834011e-05, "loss": 0.8076, "step": 3417 }, { "epoch": 0.3491317671092952, "grad_norm": 1.352474348751627, "learning_rate": 1.511795297784087e-05, "loss": 0.7886, "step": 3418 }, { "epoch": 0.34923391215526045, "grad_norm": 1.4771702269152291, "learning_rate": 1.5115110484684468e-05, "loss": 0.7286, "step": 3419 }, { "epoch": 0.34933605720122574, "grad_norm": 1.546063194323747, "learning_rate": 1.5112267431675917e-05, "loss": 0.6583, "step": 3420 }, { "epoch": 0.349438202247191, "grad_norm": 1.5270215823614932, "learning_rate": 1.51094238191264e-05, "loss": 0.7424, "step": 3421 }, { "epoch": 0.3495403472931563, "grad_norm": 1.4887727674525129, "learning_rate": 1.5106579647347137e-05, "loss": 0.7102, "step": 3422 }, { "epoch": 0.34964249233912154, "grad_norm": 1.4410281802057714, "learning_rate": 1.5103734916649435e-05, "loss": 0.8522, "step": 3423 }, { "epoch": 0.3497446373850868, "grad_norm": 1.3465310216277304, "learning_rate": 1.510088962734465e-05, "loss": 0.5364, "step": 3424 }, { "epoch": 0.3498467824310521, "grad_norm": 1.35223319576278, "learning_rate": 1.5098043779744199e-05, "loss": 0.6654, "step": 3425 }, { "epoch": 0.3499489274770174, "grad_norm": 1.5292233619163258, "learning_rate": 1.5095197374159563e-05, "loss": 0.6185, "step": 3426 }, { "epoch": 0.3500510725229826, "grad_norm": 1.3571795257377226, "learning_rate": 1.509235041090228e-05, "loss": 0.8161, "step": 3427 }, { "epoch": 0.3501532175689479, "grad_norm": 1.4044316185486687, "learning_rate": 1.5089502890283956e-05, "loss": 0.6069, "step": 3428 }, { "epoch": 0.3502553626149132, "grad_norm": 1.4672762982420333, "learning_rate": 1.5086654812616252e-05, "loss": 0.7699, "step": 3429 }, { "epoch": 0.3503575076608784, "grad_norm": 1.530689324186679, "learning_rate": 1.5083806178210896e-05, "loss": 0.7831, "step": 3430 }, { "epoch": 0.3504596527068437, "grad_norm": 1.492672955090508, "learning_rate": 1.5080956987379667e-05, "loss": 0.764, "step": 3431 }, { "epoch": 0.350561797752809, "grad_norm": 1.4420435803893972, "learning_rate": 1.507810724043441e-05, "loss": 0.6622, "step": 3432 }, { "epoch": 0.3506639427987743, "grad_norm": 1.4629403640541534, "learning_rate": 1.5075256937687037e-05, "loss": 0.7666, "step": 3433 }, { "epoch": 0.3507660878447395, "grad_norm": 1.3296728703171432, "learning_rate": 1.5072406079449513e-05, "loss": 0.6275, "step": 3434 }, { "epoch": 0.3508682328907048, "grad_norm": 1.5129103232056698, "learning_rate": 1.5069554666033868e-05, "loss": 0.7632, "step": 3435 }, { "epoch": 0.3509703779366701, "grad_norm": 1.3253104284979316, "learning_rate": 1.5066702697752189e-05, "loss": 0.6696, "step": 3436 }, { "epoch": 0.35107252298263536, "grad_norm": 1.3599403749533974, "learning_rate": 1.5063850174916623e-05, "loss": 0.6437, "step": 3437 }, { "epoch": 0.3511746680286006, "grad_norm": 1.4580357630489664, "learning_rate": 1.5060997097839387e-05, "loss": 0.7275, "step": 3438 }, { "epoch": 0.3512768130745659, "grad_norm": 1.4802066664080362, "learning_rate": 1.5058143466832746e-05, "loss": 0.8175, "step": 3439 }, { "epoch": 0.35137895812053116, "grad_norm": 1.4531321987947696, "learning_rate": 1.5055289282209038e-05, "loss": 0.7293, "step": 3440 }, { "epoch": 0.35148110316649644, "grad_norm": 1.5941396051836583, "learning_rate": 1.5052434544280653e-05, "loss": 0.697, "step": 3441 }, { "epoch": 0.3515832482124617, "grad_norm": 1.4136744475621397, "learning_rate": 1.5049579253360036e-05, "loss": 0.6729, "step": 3442 }, { "epoch": 0.35168539325842696, "grad_norm": 1.546678772075708, "learning_rate": 1.5046723409759714e-05, "loss": 0.8121, "step": 3443 }, { "epoch": 0.35178753830439224, "grad_norm": 1.363543008520354, "learning_rate": 1.5043867013792247e-05, "loss": 0.6115, "step": 3444 }, { "epoch": 0.35188968335035753, "grad_norm": 1.5075641164706608, "learning_rate": 1.5041010065770283e-05, "loss": 0.7689, "step": 3445 }, { "epoch": 0.35199182839632276, "grad_norm": 1.4259825773755006, "learning_rate": 1.5038152566006509e-05, "loss": 0.7309, "step": 3446 }, { "epoch": 0.35209397344228804, "grad_norm": 1.508435323702614, "learning_rate": 1.5035294514813683e-05, "loss": 0.7875, "step": 3447 }, { "epoch": 0.35219611848825333, "grad_norm": 1.449852434100444, "learning_rate": 1.5032435912504614e-05, "loss": 0.7518, "step": 3448 }, { "epoch": 0.3522982635342186, "grad_norm": 1.3928249433419313, "learning_rate": 1.5029576759392188e-05, "loss": 0.7527, "step": 3449 }, { "epoch": 0.35240040858018384, "grad_norm": 1.3565495072430203, "learning_rate": 1.5026717055789335e-05, "loss": 0.6281, "step": 3450 }, { "epoch": 0.35250255362614913, "grad_norm": 1.4871685648001876, "learning_rate": 1.5023856802009051e-05, "loss": 0.66, "step": 3451 }, { "epoch": 0.3526046986721144, "grad_norm": 1.3942758923372949, "learning_rate": 1.5020995998364396e-05, "loss": 0.7584, "step": 3452 }, { "epoch": 0.3527068437180797, "grad_norm": 1.462698950660686, "learning_rate": 1.5018134645168485e-05, "loss": 0.7722, "step": 3453 }, { "epoch": 0.35280898876404493, "grad_norm": 1.4361997782887943, "learning_rate": 1.5015272742734492e-05, "loss": 0.6669, "step": 3454 }, { "epoch": 0.3529111338100102, "grad_norm": 1.4663552686239123, "learning_rate": 1.501241029137566e-05, "loss": 0.7998, "step": 3455 }, { "epoch": 0.3530132788559755, "grad_norm": 1.2869539340291634, "learning_rate": 1.5009547291405281e-05, "loss": 0.6173, "step": 3456 }, { "epoch": 0.35311542390194073, "grad_norm": 1.4809190532159078, "learning_rate": 1.5006683743136718e-05, "loss": 0.748, "step": 3457 }, { "epoch": 0.353217568947906, "grad_norm": 1.3587705125384204, "learning_rate": 1.5003819646883382e-05, "loss": 0.7565, "step": 3458 }, { "epoch": 0.3533197139938713, "grad_norm": 1.4941894834093674, "learning_rate": 1.5000955002958755e-05, "loss": 0.7689, "step": 3459 }, { "epoch": 0.3534218590398366, "grad_norm": 1.4420640472446187, "learning_rate": 1.4998089811676369e-05, "loss": 0.6285, "step": 3460 }, { "epoch": 0.3535240040858018, "grad_norm": 1.4642921926943364, "learning_rate": 1.4995224073349823e-05, "loss": 0.7587, "step": 3461 }, { "epoch": 0.3536261491317671, "grad_norm": 1.4832647330136208, "learning_rate": 1.4992357788292777e-05, "loss": 0.6677, "step": 3462 }, { "epoch": 0.3537282941777324, "grad_norm": 1.3651078885340238, "learning_rate": 1.4989490956818946e-05, "loss": 0.6582, "step": 3463 }, { "epoch": 0.35383043922369767, "grad_norm": 1.5129772279950182, "learning_rate": 1.4986623579242108e-05, "loss": 0.7867, "step": 3464 }, { "epoch": 0.3539325842696629, "grad_norm": 1.4015547421215944, "learning_rate": 1.4983755655876094e-05, "loss": 0.7968, "step": 3465 }, { "epoch": 0.3540347293156282, "grad_norm": 1.5685636800194525, "learning_rate": 1.4980887187034802e-05, "loss": 0.755, "step": 3466 }, { "epoch": 0.35413687436159347, "grad_norm": 1.5158432370555537, "learning_rate": 1.4978018173032194e-05, "loss": 0.72, "step": 3467 }, { "epoch": 0.35423901940755875, "grad_norm": 1.4901399747260724, "learning_rate": 1.4975148614182278e-05, "loss": 0.7535, "step": 3468 }, { "epoch": 0.354341164453524, "grad_norm": 1.5628745024265729, "learning_rate": 1.4972278510799132e-05, "loss": 0.7556, "step": 3469 }, { "epoch": 0.35444330949948927, "grad_norm": 1.5339507765919724, "learning_rate": 1.4969407863196892e-05, "loss": 0.7727, "step": 3470 }, { "epoch": 0.35454545454545455, "grad_norm": 1.5089756292586483, "learning_rate": 1.496653667168975e-05, "loss": 0.7992, "step": 3471 }, { "epoch": 0.35464759959141984, "grad_norm": 1.4500057358146876, "learning_rate": 1.496366493659196e-05, "loss": 0.7376, "step": 3472 }, { "epoch": 0.35474974463738507, "grad_norm": 1.4892080436987092, "learning_rate": 1.4960792658217833e-05, "loss": 0.7403, "step": 3473 }, { "epoch": 0.35485188968335035, "grad_norm": 1.5421938469781589, "learning_rate": 1.4957919836881749e-05, "loss": 0.6524, "step": 3474 }, { "epoch": 0.35495403472931564, "grad_norm": 1.497063599842979, "learning_rate": 1.4955046472898136e-05, "loss": 0.7861, "step": 3475 }, { "epoch": 0.3550561797752809, "grad_norm": 1.3199440293674702, "learning_rate": 1.4952172566581483e-05, "loss": 0.7108, "step": 3476 }, { "epoch": 0.35515832482124615, "grad_norm": 1.3792026544676559, "learning_rate": 1.494929811824634e-05, "loss": 0.6613, "step": 3477 }, { "epoch": 0.35526046986721144, "grad_norm": 1.5444835796379772, "learning_rate": 1.4946423128207323e-05, "loss": 0.8557, "step": 3478 }, { "epoch": 0.3553626149131767, "grad_norm": 1.5221103489030912, "learning_rate": 1.4943547596779101e-05, "loss": 0.7461, "step": 3479 }, { "epoch": 0.355464759959142, "grad_norm": 1.382138657837125, "learning_rate": 1.4940671524276397e-05, "loss": 0.7458, "step": 3480 }, { "epoch": 0.35556690500510724, "grad_norm": 1.3067897680407983, "learning_rate": 1.4937794911014006e-05, "loss": 0.7342, "step": 3481 }, { "epoch": 0.3556690500510725, "grad_norm": 1.450379527853015, "learning_rate": 1.493491775730677e-05, "loss": 0.7262, "step": 3482 }, { "epoch": 0.3557711950970378, "grad_norm": 1.5820857410791551, "learning_rate": 1.4932040063469596e-05, "loss": 0.7621, "step": 3483 }, { "epoch": 0.3558733401430031, "grad_norm": 1.595097436051451, "learning_rate": 1.4929161829817456e-05, "loss": 0.862, "step": 3484 }, { "epoch": 0.3559754851889683, "grad_norm": 1.433964036543034, "learning_rate": 1.4926283056665366e-05, "loss": 0.6741, "step": 3485 }, { "epoch": 0.3560776302349336, "grad_norm": 1.5682518553936355, "learning_rate": 1.4923403744328408e-05, "loss": 0.7717, "step": 3486 }, { "epoch": 0.3561797752808989, "grad_norm": 1.5500563506333622, "learning_rate": 1.4920523893121735e-05, "loss": 0.6815, "step": 3487 }, { "epoch": 0.3562819203268641, "grad_norm": 1.3639794747253813, "learning_rate": 1.4917643503360539e-05, "loss": 0.6409, "step": 3488 }, { "epoch": 0.3563840653728294, "grad_norm": 1.4646025986180748, "learning_rate": 1.4914762575360087e-05, "loss": 0.7432, "step": 3489 }, { "epoch": 0.3564862104187947, "grad_norm": 1.4506767383957861, "learning_rate": 1.4911881109435693e-05, "loss": 0.7821, "step": 3490 }, { "epoch": 0.35658835546476, "grad_norm": 1.3308058075038096, "learning_rate": 1.4908999105902742e-05, "loss": 0.7471, "step": 3491 }, { "epoch": 0.3566905005107252, "grad_norm": 1.5030790935971772, "learning_rate": 1.4906116565076667e-05, "loss": 0.8164, "step": 3492 }, { "epoch": 0.3567926455566905, "grad_norm": 1.36923071995333, "learning_rate": 1.4903233487272959e-05, "loss": 0.6974, "step": 3493 }, { "epoch": 0.3568947906026558, "grad_norm": 1.4601808354390002, "learning_rate": 1.4900349872807183e-05, "loss": 0.762, "step": 3494 }, { "epoch": 0.35699693564862106, "grad_norm": 1.5543228749845361, "learning_rate": 1.4897465721994943e-05, "loss": 0.7681, "step": 3495 }, { "epoch": 0.3570990806945863, "grad_norm": 1.6335611690412835, "learning_rate": 1.4894581035151921e-05, "loss": 0.791, "step": 3496 }, { "epoch": 0.3572012257405516, "grad_norm": 1.5247387578776723, "learning_rate": 1.4891695812593838e-05, "loss": 0.711, "step": 3497 }, { "epoch": 0.35730337078651686, "grad_norm": 1.4405323528418739, "learning_rate": 1.4888810054636491e-05, "loss": 0.7896, "step": 3498 }, { "epoch": 0.35740551583248215, "grad_norm": 1.533419544009103, "learning_rate": 1.4885923761595724e-05, "loss": 0.7727, "step": 3499 }, { "epoch": 0.3575076608784474, "grad_norm": 1.4683954123028158, "learning_rate": 1.4883036933787446e-05, "loss": 0.7563, "step": 3500 }, { "epoch": 0.35760980592441266, "grad_norm": 1.4881631049242774, "learning_rate": 1.4880149571527616e-05, "loss": 0.7897, "step": 3501 }, { "epoch": 0.35771195097037795, "grad_norm": 1.3734756063283173, "learning_rate": 1.4877261675132267e-05, "loss": 0.6308, "step": 3502 }, { "epoch": 0.35781409601634323, "grad_norm": 1.3623119703365125, "learning_rate": 1.4874373244917473e-05, "loss": 0.6912, "step": 3503 }, { "epoch": 0.35791624106230846, "grad_norm": 1.4789831179889628, "learning_rate": 1.4871484281199381e-05, "loss": 0.8306, "step": 3504 }, { "epoch": 0.35801838610827375, "grad_norm": 1.4084139593351896, "learning_rate": 1.4868594784294183e-05, "loss": 0.7265, "step": 3505 }, { "epoch": 0.35812053115423903, "grad_norm": 1.2346101749999052, "learning_rate": 1.4865704754518144e-05, "loss": 0.6024, "step": 3506 }, { "epoch": 0.3582226762002043, "grad_norm": 1.4930524368727602, "learning_rate": 1.4862814192187575e-05, "loss": 0.8381, "step": 3507 }, { "epoch": 0.35832482124616954, "grad_norm": 1.482995172744597, "learning_rate": 1.4859923097618854e-05, "loss": 0.7924, "step": 3508 }, { "epoch": 0.35842696629213483, "grad_norm": 1.5561701857128722, "learning_rate": 1.4857031471128407e-05, "loss": 0.8678, "step": 3509 }, { "epoch": 0.3585291113381001, "grad_norm": 1.476051750138663, "learning_rate": 1.4854139313032727e-05, "loss": 0.6984, "step": 3510 }, { "epoch": 0.3586312563840654, "grad_norm": 1.3657890636717835, "learning_rate": 1.4851246623648364e-05, "loss": 0.699, "step": 3511 }, { "epoch": 0.35873340143003063, "grad_norm": 1.5620935653976191, "learning_rate": 1.4848353403291924e-05, "loss": 0.7332, "step": 3512 }, { "epoch": 0.3588355464759959, "grad_norm": 1.4404267957674066, "learning_rate": 1.4845459652280069e-05, "loss": 0.7652, "step": 3513 }, { "epoch": 0.3589376915219612, "grad_norm": 1.4888897071950422, "learning_rate": 1.4842565370929528e-05, "loss": 0.7558, "step": 3514 }, { "epoch": 0.35903983656792643, "grad_norm": 1.4881428369182534, "learning_rate": 1.4839670559557076e-05, "loss": 0.7267, "step": 3515 }, { "epoch": 0.3591419816138917, "grad_norm": 1.4926361925075338, "learning_rate": 1.4836775218479558e-05, "loss": 0.7338, "step": 3516 }, { "epoch": 0.359244126659857, "grad_norm": 1.5546982153545426, "learning_rate": 1.4833879348013862e-05, "loss": 0.8417, "step": 3517 }, { "epoch": 0.3593462717058223, "grad_norm": 1.2873060988536296, "learning_rate": 1.483098294847695e-05, "loss": 0.6561, "step": 3518 }, { "epoch": 0.3594484167517875, "grad_norm": 1.4805183222159815, "learning_rate": 1.4828086020185837e-05, "loss": 0.7404, "step": 3519 }, { "epoch": 0.3595505617977528, "grad_norm": 1.3884670157696586, "learning_rate": 1.4825188563457586e-05, "loss": 0.7653, "step": 3520 }, { "epoch": 0.3596527068437181, "grad_norm": 1.3166762654571889, "learning_rate": 1.4822290578609329e-05, "loss": 0.7986, "step": 3521 }, { "epoch": 0.35975485188968337, "grad_norm": 1.4960164084815841, "learning_rate": 1.4819392065958253e-05, "loss": 0.7157, "step": 3522 }, { "epoch": 0.3598569969356486, "grad_norm": 1.4307481875637516, "learning_rate": 1.4816493025821603e-05, "loss": 0.6549, "step": 3523 }, { "epoch": 0.3599591419816139, "grad_norm": 1.315270486104617, "learning_rate": 1.4813593458516677e-05, "loss": 0.6173, "step": 3524 }, { "epoch": 0.36006128702757917, "grad_norm": 1.4996307917914726, "learning_rate": 1.4810693364360839e-05, "loss": 0.7079, "step": 3525 }, { "epoch": 0.36016343207354445, "grad_norm": 1.6254771963374428, "learning_rate": 1.4807792743671504e-05, "loss": 0.7053, "step": 3526 }, { "epoch": 0.3602655771195097, "grad_norm": 1.4042117546019866, "learning_rate": 1.480489159676615e-05, "loss": 0.812, "step": 3527 }, { "epoch": 0.36036772216547497, "grad_norm": 1.4381609526182655, "learning_rate": 1.4801989923962304e-05, "loss": 0.7484, "step": 3528 }, { "epoch": 0.36046986721144025, "grad_norm": 1.4427478658430821, "learning_rate": 1.4799087725577557e-05, "loss": 0.7837, "step": 3529 }, { "epoch": 0.36057201225740554, "grad_norm": 1.4767667404789748, "learning_rate": 1.4796185001929558e-05, "loss": 0.7183, "step": 3530 }, { "epoch": 0.36067415730337077, "grad_norm": 1.5139926169321702, "learning_rate": 1.4793281753336013e-05, "loss": 0.7475, "step": 3531 }, { "epoch": 0.36077630234933605, "grad_norm": 1.2692024736879741, "learning_rate": 1.4790377980114682e-05, "loss": 0.6582, "step": 3532 }, { "epoch": 0.36087844739530134, "grad_norm": 1.5150379757787118, "learning_rate": 1.4787473682583384e-05, "loss": 0.7402, "step": 3533 }, { "epoch": 0.3609805924412666, "grad_norm": 1.4497426092568104, "learning_rate": 1.478456886106e-05, "loss": 0.8044, "step": 3534 }, { "epoch": 0.36108273748723185, "grad_norm": 1.518901540540455, "learning_rate": 1.4781663515862465e-05, "loss": 0.791, "step": 3535 }, { "epoch": 0.36118488253319714, "grad_norm": 1.5533440110834615, "learning_rate": 1.477875764730877e-05, "loss": 0.759, "step": 3536 }, { "epoch": 0.3612870275791624, "grad_norm": 1.4167996124687194, "learning_rate": 1.4775851255716958e-05, "loss": 0.7571, "step": 3537 }, { "epoch": 0.3613891726251277, "grad_norm": 1.5586093911179746, "learning_rate": 1.4772944341405145e-05, "loss": 0.7743, "step": 3538 }, { "epoch": 0.36149131767109294, "grad_norm": 1.4781070385699004, "learning_rate": 1.4770036904691487e-05, "loss": 0.6837, "step": 3539 }, { "epoch": 0.3615934627170582, "grad_norm": 1.4783462457769279, "learning_rate": 1.4767128945894211e-05, "loss": 0.7563, "step": 3540 }, { "epoch": 0.3616956077630235, "grad_norm": 1.4093634812298927, "learning_rate": 1.476422046533159e-05, "loss": 0.7297, "step": 3541 }, { "epoch": 0.36179775280898874, "grad_norm": 1.3186063401576988, "learning_rate": 1.4761311463321959e-05, "loss": 0.6499, "step": 3542 }, { "epoch": 0.361899897854954, "grad_norm": 1.4834311932183237, "learning_rate": 1.4758401940183715e-05, "loss": 0.7136, "step": 3543 }, { "epoch": 0.3620020429009193, "grad_norm": 1.5848396428660996, "learning_rate": 1.4755491896235304e-05, "loss": 0.7469, "step": 3544 }, { "epoch": 0.3621041879468846, "grad_norm": 1.4501279764240527, "learning_rate": 1.4752581331795233e-05, "loss": 0.7052, "step": 3545 }, { "epoch": 0.3622063329928498, "grad_norm": 1.5569997587194149, "learning_rate": 1.4749670247182064e-05, "loss": 0.7531, "step": 3546 }, { "epoch": 0.3623084780388151, "grad_norm": 1.6030123379742232, "learning_rate": 1.4746758642714415e-05, "loss": 0.6892, "step": 3547 }, { "epoch": 0.3624106230847804, "grad_norm": 1.5653356367269906, "learning_rate": 1.4743846518710971e-05, "loss": 0.7479, "step": 3548 }, { "epoch": 0.3625127681307457, "grad_norm": 1.3774972060344115, "learning_rate": 1.4740933875490456e-05, "loss": 0.814, "step": 3549 }, { "epoch": 0.3626149131767109, "grad_norm": 1.3085954413459302, "learning_rate": 1.4738020713371668e-05, "loss": 0.8017, "step": 3550 }, { "epoch": 0.3627170582226762, "grad_norm": 1.52505904006268, "learning_rate": 1.473510703267345e-05, "loss": 0.7377, "step": 3551 }, { "epoch": 0.3628192032686415, "grad_norm": 1.4460463479772836, "learning_rate": 1.4732192833714712e-05, "loss": 0.653, "step": 3552 }, { "epoch": 0.36292134831460676, "grad_norm": 1.597986277757192, "learning_rate": 1.4729278116814406e-05, "loss": 0.752, "step": 3553 }, { "epoch": 0.363023493360572, "grad_norm": 1.3403146746156354, "learning_rate": 1.4726362882291555e-05, "loss": 0.6718, "step": 3554 }, { "epoch": 0.3631256384065373, "grad_norm": 1.3829788518123365, "learning_rate": 1.4723447130465236e-05, "loss": 0.7344, "step": 3555 }, { "epoch": 0.36322778345250256, "grad_norm": 1.602423752132864, "learning_rate": 1.4720530861654577e-05, "loss": 0.7567, "step": 3556 }, { "epoch": 0.36332992849846785, "grad_norm": 1.5575462470250936, "learning_rate": 1.4717614076178761e-05, "loss": 0.8836, "step": 3557 }, { "epoch": 0.3634320735444331, "grad_norm": 1.4794524056913083, "learning_rate": 1.471469677435704e-05, "loss": 0.7257, "step": 3558 }, { "epoch": 0.36353421859039836, "grad_norm": 1.3914877391548583, "learning_rate": 1.4711778956508708e-05, "loss": 0.8264, "step": 3559 }, { "epoch": 0.36363636363636365, "grad_norm": 1.395574635582073, "learning_rate": 1.4708860622953132e-05, "loss": 0.7423, "step": 3560 }, { "epoch": 0.36373850868232893, "grad_norm": 1.530605813155384, "learning_rate": 1.4705941774009708e-05, "loss": 0.879, "step": 3561 }, { "epoch": 0.36384065372829416, "grad_norm": 1.4115340550072433, "learning_rate": 1.4703022409997924e-05, "loss": 0.6706, "step": 3562 }, { "epoch": 0.36394279877425945, "grad_norm": 1.5215885550646886, "learning_rate": 1.47001025312373e-05, "loss": 0.804, "step": 3563 }, { "epoch": 0.36404494382022473, "grad_norm": 1.5038036353148072, "learning_rate": 1.4697182138047412e-05, "loss": 0.7966, "step": 3564 }, { "epoch": 0.36414708886619, "grad_norm": 1.407248670395217, "learning_rate": 1.4694261230747903e-05, "loss": 0.6933, "step": 3565 }, { "epoch": 0.36424923391215525, "grad_norm": 1.498742470689245, "learning_rate": 1.4691339809658473e-05, "loss": 0.7316, "step": 3566 }, { "epoch": 0.36435137895812053, "grad_norm": 1.35984646029236, "learning_rate": 1.468841787509887e-05, "loss": 0.7201, "step": 3567 }, { "epoch": 0.3644535240040858, "grad_norm": 1.350166528795013, "learning_rate": 1.4685495427388903e-05, "loss": 0.8239, "step": 3568 }, { "epoch": 0.36455566905005105, "grad_norm": 1.4629909454744277, "learning_rate": 1.468257246684843e-05, "loss": 0.85, "step": 3569 }, { "epoch": 0.36465781409601633, "grad_norm": 1.532535643249225, "learning_rate": 1.467964899379738e-05, "loss": 0.8106, "step": 3570 }, { "epoch": 0.3647599591419816, "grad_norm": 1.5350234660708026, "learning_rate": 1.4676725008555719e-05, "loss": 0.7743, "step": 3571 }, { "epoch": 0.3648621041879469, "grad_norm": 1.5712565211612515, "learning_rate": 1.4673800511443488e-05, "loss": 0.7415, "step": 3572 }, { "epoch": 0.36496424923391213, "grad_norm": 1.4503835070169362, "learning_rate": 1.467087550278077e-05, "loss": 0.7637, "step": 3573 }, { "epoch": 0.3650663942798774, "grad_norm": 1.245451434261055, "learning_rate": 1.4667949982887711e-05, "loss": 0.6575, "step": 3574 }, { "epoch": 0.3651685393258427, "grad_norm": 1.5660989426379437, "learning_rate": 1.466502395208451e-05, "loss": 0.7802, "step": 3575 }, { "epoch": 0.365270684371808, "grad_norm": 1.5882683599553815, "learning_rate": 1.4662097410691422e-05, "loss": 0.7925, "step": 3576 }, { "epoch": 0.3653728294177732, "grad_norm": 1.4219513286888783, "learning_rate": 1.4659170359028763e-05, "loss": 0.7209, "step": 3577 }, { "epoch": 0.3654749744637385, "grad_norm": 1.2313316789307804, "learning_rate": 1.4656242797416895e-05, "loss": 0.6937, "step": 3578 }, { "epoch": 0.3655771195097038, "grad_norm": 1.539023476740282, "learning_rate": 1.4653314726176249e-05, "loss": 0.7205, "step": 3579 }, { "epoch": 0.36567926455566907, "grad_norm": 1.4615364153418366, "learning_rate": 1.4650386145627298e-05, "loss": 0.8191, "step": 3580 }, { "epoch": 0.3657814096016343, "grad_norm": 1.523091558614056, "learning_rate": 1.4647457056090575e-05, "loss": 0.7378, "step": 3581 }, { "epoch": 0.3658835546475996, "grad_norm": 1.4125594920393905, "learning_rate": 1.464452745788668e-05, "loss": 0.6643, "step": 3582 }, { "epoch": 0.36598569969356487, "grad_norm": 1.5445807858286138, "learning_rate": 1.464159735133625e-05, "loss": 0.7748, "step": 3583 }, { "epoch": 0.36608784473953015, "grad_norm": 1.4142546214682299, "learning_rate": 1.4638666736759992e-05, "loss": 0.7141, "step": 3584 }, { "epoch": 0.3661899897854954, "grad_norm": 1.5960918702239622, "learning_rate": 1.4635735614478663e-05, "loss": 0.7577, "step": 3585 }, { "epoch": 0.36629213483146067, "grad_norm": 1.6719239579670662, "learning_rate": 1.4632803984813076e-05, "loss": 0.8539, "step": 3586 }, { "epoch": 0.36639427987742595, "grad_norm": 1.4515077378926515, "learning_rate": 1.4629871848084101e-05, "loss": 0.7558, "step": 3587 }, { "epoch": 0.36649642492339124, "grad_norm": 1.3695855978324316, "learning_rate": 1.4626939204612657e-05, "loss": 0.7564, "step": 3588 }, { "epoch": 0.36659856996935647, "grad_norm": 1.4320087213060648, "learning_rate": 1.4624006054719733e-05, "loss": 0.6628, "step": 3589 }, { "epoch": 0.36670071501532175, "grad_norm": 1.4735600735206476, "learning_rate": 1.4621072398726357e-05, "loss": 0.7483, "step": 3590 }, { "epoch": 0.36680286006128704, "grad_norm": 1.413726306613324, "learning_rate": 1.461813823695362e-05, "loss": 0.7098, "step": 3591 }, { "epoch": 0.3669050051072523, "grad_norm": 1.3723791641071754, "learning_rate": 1.4615203569722672e-05, "loss": 0.6798, "step": 3592 }, { "epoch": 0.36700715015321755, "grad_norm": 1.3787401723519346, "learning_rate": 1.4612268397354706e-05, "loss": 0.7892, "step": 3593 }, { "epoch": 0.36710929519918284, "grad_norm": 1.43741502588015, "learning_rate": 1.4609332720170988e-05, "loss": 0.6665, "step": 3594 }, { "epoch": 0.3672114402451481, "grad_norm": 1.3497835651672883, "learning_rate": 1.4606396538492824e-05, "loss": 0.6772, "step": 3595 }, { "epoch": 0.36731358529111335, "grad_norm": 1.2802686187741845, "learning_rate": 1.4603459852641586e-05, "loss": 0.6644, "step": 3596 }, { "epoch": 0.36741573033707864, "grad_norm": 1.3846898101306624, "learning_rate": 1.460052266293869e-05, "loss": 0.6732, "step": 3597 }, { "epoch": 0.3675178753830439, "grad_norm": 1.4850561884244704, "learning_rate": 1.4597584969705616e-05, "loss": 0.6702, "step": 3598 }, { "epoch": 0.3676200204290092, "grad_norm": 1.6417667206036424, "learning_rate": 1.45946467732639e-05, "loss": 0.7736, "step": 3599 }, { "epoch": 0.36772216547497444, "grad_norm": 1.508648257280374, "learning_rate": 1.4591708073935122e-05, "loss": 0.7441, "step": 3600 }, { "epoch": 0.3678243105209397, "grad_norm": 1.5581586534899627, "learning_rate": 1.458876887204093e-05, "loss": 0.8311, "step": 3601 }, { "epoch": 0.367926455566905, "grad_norm": 1.3647480128365639, "learning_rate": 1.4585829167903022e-05, "loss": 0.7277, "step": 3602 }, { "epoch": 0.3680286006128703, "grad_norm": 1.3015608574213278, "learning_rate": 1.4582888961843147e-05, "loss": 0.8404, "step": 3603 }, { "epoch": 0.3681307456588355, "grad_norm": 1.5796521861656745, "learning_rate": 1.4579948254183116e-05, "loss": 0.7277, "step": 3604 }, { "epoch": 0.3682328907048008, "grad_norm": 1.4794105921201846, "learning_rate": 1.4577007045244787e-05, "loss": 0.794, "step": 3605 }, { "epoch": 0.3683350357507661, "grad_norm": 1.340764716127591, "learning_rate": 1.457406533535008e-05, "loss": 0.6224, "step": 3606 }, { "epoch": 0.3684371807967314, "grad_norm": 1.4284995813516426, "learning_rate": 1.4571123124820969e-05, "loss": 0.7437, "step": 3607 }, { "epoch": 0.3685393258426966, "grad_norm": 1.4187360706387864, "learning_rate": 1.4568180413979478e-05, "loss": 0.7095, "step": 3608 }, { "epoch": 0.3686414708886619, "grad_norm": 1.4317089037253494, "learning_rate": 1.4565237203147688e-05, "loss": 0.752, "step": 3609 }, { "epoch": 0.3687436159346272, "grad_norm": 1.5682739572886992, "learning_rate": 1.4562293492647737e-05, "loss": 0.7201, "step": 3610 }, { "epoch": 0.36884576098059246, "grad_norm": 1.2986345471611553, "learning_rate": 1.4559349282801818e-05, "loss": 0.5799, "step": 3611 }, { "epoch": 0.3689479060265577, "grad_norm": 1.5179461423012208, "learning_rate": 1.455640457393217e-05, "loss": 0.8102, "step": 3612 }, { "epoch": 0.369050051072523, "grad_norm": 1.7483568923034332, "learning_rate": 1.45534593663611e-05, "loss": 0.7897, "step": 3613 }, { "epoch": 0.36915219611848826, "grad_norm": 1.577203999248307, "learning_rate": 1.4550513660410962e-05, "loss": 0.7017, "step": 3614 }, { "epoch": 0.36925434116445355, "grad_norm": 1.543514234227448, "learning_rate": 1.4547567456404162e-05, "loss": 0.7633, "step": 3615 }, { "epoch": 0.3693564862104188, "grad_norm": 1.5495580145609447, "learning_rate": 1.4544620754663165e-05, "loss": 0.6678, "step": 3616 }, { "epoch": 0.36945863125638406, "grad_norm": 1.3462967627681197, "learning_rate": 1.4541673555510491e-05, "loss": 0.7039, "step": 3617 }, { "epoch": 0.36956077630234935, "grad_norm": 1.709141253043606, "learning_rate": 1.4538725859268711e-05, "loss": 0.7238, "step": 3618 }, { "epoch": 0.36966292134831463, "grad_norm": 1.4772003166161658, "learning_rate": 1.4535777666260456e-05, "loss": 0.7611, "step": 3619 }, { "epoch": 0.36976506639427986, "grad_norm": 1.5657073474429466, "learning_rate": 1.4532828976808404e-05, "loss": 0.7501, "step": 3620 }, { "epoch": 0.36986721144024515, "grad_norm": 1.638456528154201, "learning_rate": 1.4529879791235291e-05, "loss": 0.9016, "step": 3621 }, { "epoch": 0.36996935648621043, "grad_norm": 1.389396222147818, "learning_rate": 1.4526930109863905e-05, "loss": 0.711, "step": 3622 }, { "epoch": 0.37007150153217566, "grad_norm": 1.4731595081452353, "learning_rate": 1.4523979933017101e-05, "loss": 0.7118, "step": 3623 }, { "epoch": 0.37017364657814095, "grad_norm": 1.5588683015656337, "learning_rate": 1.4521029261017765e-05, "loss": 0.7637, "step": 3624 }, { "epoch": 0.37027579162410623, "grad_norm": 1.3652004998121787, "learning_rate": 1.4518078094188856e-05, "loss": 0.6267, "step": 3625 }, { "epoch": 0.3703779366700715, "grad_norm": 1.4836445625923325, "learning_rate": 1.4515126432853384e-05, "loss": 0.6966, "step": 3626 }, { "epoch": 0.37048008171603675, "grad_norm": 1.6489376444744281, "learning_rate": 1.4512174277334403e-05, "loss": 0.865, "step": 3627 }, { "epoch": 0.37058222676200203, "grad_norm": 1.5178896456982356, "learning_rate": 1.4509221627955037e-05, "loss": 0.7282, "step": 3628 }, { "epoch": 0.3706843718079673, "grad_norm": 1.503620313065253, "learning_rate": 1.4506268485038445e-05, "loss": 0.8446, "step": 3629 }, { "epoch": 0.3707865168539326, "grad_norm": 1.4410003223255292, "learning_rate": 1.4503314848907857e-05, "loss": 0.7576, "step": 3630 }, { "epoch": 0.37088866189989783, "grad_norm": 1.3713676399909998, "learning_rate": 1.4500360719886551e-05, "loss": 0.6399, "step": 3631 }, { "epoch": 0.3709908069458631, "grad_norm": 1.5353132471547601, "learning_rate": 1.4497406098297858e-05, "loss": 0.7025, "step": 3632 }, { "epoch": 0.3710929519918284, "grad_norm": 1.5221069189221201, "learning_rate": 1.4494450984465156e-05, "loss": 0.7279, "step": 3633 }, { "epoch": 0.3711950970377937, "grad_norm": 1.5885983348957806, "learning_rate": 1.4491495378711895e-05, "loss": 0.858, "step": 3634 }, { "epoch": 0.3712972420837589, "grad_norm": 1.3536907145924706, "learning_rate": 1.4488539281361562e-05, "loss": 0.6201, "step": 3635 }, { "epoch": 0.3713993871297242, "grad_norm": 1.3276582062833509, "learning_rate": 1.4485582692737705e-05, "loss": 0.7055, "step": 3636 }, { "epoch": 0.3715015321756895, "grad_norm": 1.4870232981047469, "learning_rate": 1.4482625613163921e-05, "loss": 0.8094, "step": 3637 }, { "epoch": 0.37160367722165477, "grad_norm": 1.2694764332601938, "learning_rate": 1.447966804296387e-05, "loss": 0.6804, "step": 3638 }, { "epoch": 0.37170582226762, "grad_norm": 1.5513568999490308, "learning_rate": 1.4476709982461258e-05, "loss": 0.8177, "step": 3639 }, { "epoch": 0.3718079673135853, "grad_norm": 1.508325105598013, "learning_rate": 1.4473751431979845e-05, "loss": 0.7769, "step": 3640 }, { "epoch": 0.37191011235955057, "grad_norm": 1.4247597802181968, "learning_rate": 1.4470792391843449e-05, "loss": 0.6332, "step": 3641 }, { "epoch": 0.37201225740551586, "grad_norm": 1.5244592734311038, "learning_rate": 1.4467832862375934e-05, "loss": 0.7402, "step": 3642 }, { "epoch": 0.3721144024514811, "grad_norm": 1.54550819124998, "learning_rate": 1.446487284390123e-05, "loss": 0.8492, "step": 3643 }, { "epoch": 0.37221654749744637, "grad_norm": 1.3368328148037967, "learning_rate": 1.4461912336743304e-05, "loss": 0.6975, "step": 3644 }, { "epoch": 0.37231869254341166, "grad_norm": 1.4451872842812368, "learning_rate": 1.4458951341226192e-05, "loss": 0.7298, "step": 3645 }, { "epoch": 0.37242083758937694, "grad_norm": 1.4005427107381334, "learning_rate": 1.4455989857673978e-05, "loss": 0.7656, "step": 3646 }, { "epoch": 0.37252298263534217, "grad_norm": 1.5103998740457987, "learning_rate": 1.4453027886410792e-05, "loss": 0.788, "step": 3647 }, { "epoch": 0.37262512768130746, "grad_norm": 1.528519627260269, "learning_rate": 1.4450065427760827e-05, "loss": 0.6873, "step": 3648 }, { "epoch": 0.37272727272727274, "grad_norm": 1.4177724285951554, "learning_rate": 1.4447102482048324e-05, "loss": 0.7027, "step": 3649 }, { "epoch": 0.372829417773238, "grad_norm": 1.4671763159742377, "learning_rate": 1.4444139049597583e-05, "loss": 0.8318, "step": 3650 }, { "epoch": 0.37293156281920326, "grad_norm": 1.4804035151544732, "learning_rate": 1.4441175130732952e-05, "loss": 0.7582, "step": 3651 }, { "epoch": 0.37303370786516854, "grad_norm": 1.6098988641694498, "learning_rate": 1.4438210725778833e-05, "loss": 0.7635, "step": 3652 }, { "epoch": 0.3731358529111338, "grad_norm": 1.2607461152484556, "learning_rate": 1.4435245835059684e-05, "loss": 0.81, "step": 3653 }, { "epoch": 0.37323799795709905, "grad_norm": 1.310994512230082, "learning_rate": 1.443228045890001e-05, "loss": 0.6724, "step": 3654 }, { "epoch": 0.37334014300306434, "grad_norm": 1.425602610155195, "learning_rate": 1.442931459762438e-05, "loss": 0.7376, "step": 3655 }, { "epoch": 0.3734422880490296, "grad_norm": 1.5197204592130922, "learning_rate": 1.4426348251557402e-05, "loss": 0.7371, "step": 3656 }, { "epoch": 0.3735444330949949, "grad_norm": 1.4743155850125698, "learning_rate": 1.442338142102375e-05, "loss": 0.7005, "step": 3657 }, { "epoch": 0.37364657814096014, "grad_norm": 1.418190801576121, "learning_rate": 1.4420414106348144e-05, "loss": 0.8089, "step": 3658 }, { "epoch": 0.3737487231869254, "grad_norm": 1.45883140527573, "learning_rate": 1.4417446307855356e-05, "loss": 0.693, "step": 3659 }, { "epoch": 0.3738508682328907, "grad_norm": 1.467968645465181, "learning_rate": 1.4414478025870218e-05, "loss": 0.6693, "step": 3660 }, { "epoch": 0.373953013278856, "grad_norm": 1.5287750918175633, "learning_rate": 1.4411509260717607e-05, "loss": 0.7814, "step": 3661 }, { "epoch": 0.3740551583248212, "grad_norm": 1.5025478379114838, "learning_rate": 1.4408540012722456e-05, "loss": 0.6616, "step": 3662 }, { "epoch": 0.3741573033707865, "grad_norm": 1.5146175780449924, "learning_rate": 1.4405570282209756e-05, "loss": 0.8505, "step": 3663 }, { "epoch": 0.3742594484167518, "grad_norm": 1.3314386004605876, "learning_rate": 1.4402600069504537e-05, "loss": 0.7081, "step": 3664 }, { "epoch": 0.3743615934627171, "grad_norm": 1.49247692578184, "learning_rate": 1.4399629374931898e-05, "loss": 0.8108, "step": 3665 }, { "epoch": 0.3744637385086823, "grad_norm": 1.4406517821466518, "learning_rate": 1.4396658198816982e-05, "loss": 0.7396, "step": 3666 }, { "epoch": 0.3745658835546476, "grad_norm": 1.2781214740996365, "learning_rate": 1.4393686541484986e-05, "loss": 0.7179, "step": 3667 }, { "epoch": 0.3746680286006129, "grad_norm": 1.565929302486357, "learning_rate": 1.4390714403261159e-05, "loss": 0.7522, "step": 3668 }, { "epoch": 0.37477017364657816, "grad_norm": 1.4080543472205684, "learning_rate": 1.43877417844708e-05, "loss": 0.7937, "step": 3669 }, { "epoch": 0.3748723186925434, "grad_norm": 1.5997233268125852, "learning_rate": 1.4384768685439274e-05, "loss": 0.8142, "step": 3670 }, { "epoch": 0.3749744637385087, "grad_norm": 1.400126980126301, "learning_rate": 1.438179510649198e-05, "loss": 0.7449, "step": 3671 }, { "epoch": 0.37507660878447396, "grad_norm": 1.4296666331125398, "learning_rate": 1.4378821047954382e-05, "loss": 0.8201, "step": 3672 }, { "epoch": 0.37517875383043925, "grad_norm": 1.3828789512666668, "learning_rate": 1.4375846510151989e-05, "loss": 0.7891, "step": 3673 }, { "epoch": 0.3752808988764045, "grad_norm": 1.4930069049360375, "learning_rate": 1.4372871493410368e-05, "loss": 0.6832, "step": 3674 }, { "epoch": 0.37538304392236976, "grad_norm": 1.3683770365734156, "learning_rate": 1.436989599805514e-05, "loss": 0.7581, "step": 3675 }, { "epoch": 0.37548518896833505, "grad_norm": 1.4842508517401583, "learning_rate": 1.4366920024411968e-05, "loss": 0.713, "step": 3676 }, { "epoch": 0.37558733401430033, "grad_norm": 1.4433914854046919, "learning_rate": 1.4363943572806579e-05, "loss": 0.6982, "step": 3677 }, { "epoch": 0.37568947906026556, "grad_norm": 1.3743766785039202, "learning_rate": 1.4360966643564746e-05, "loss": 0.7392, "step": 3678 }, { "epoch": 0.37579162410623085, "grad_norm": 1.4702753621169795, "learning_rate": 1.43579892370123e-05, "loss": 0.7977, "step": 3679 }, { "epoch": 0.37589376915219613, "grad_norm": 1.6260752696094032, "learning_rate": 1.4355011353475115e-05, "loss": 0.762, "step": 3680 }, { "epoch": 0.37599591419816136, "grad_norm": 1.5748777799045441, "learning_rate": 1.435203299327912e-05, "loss": 0.7215, "step": 3681 }, { "epoch": 0.37609805924412665, "grad_norm": 1.4190204929655832, "learning_rate": 1.4349054156750303e-05, "loss": 0.6805, "step": 3682 }, { "epoch": 0.37620020429009193, "grad_norm": 1.384949434398776, "learning_rate": 1.43460748442147e-05, "loss": 0.6795, "step": 3683 }, { "epoch": 0.3763023493360572, "grad_norm": 1.4736325322077315, "learning_rate": 1.43430950559984e-05, "loss": 0.7586, "step": 3684 }, { "epoch": 0.37640449438202245, "grad_norm": 1.4384662897864549, "learning_rate": 1.4340114792427535e-05, "loss": 0.7204, "step": 3685 }, { "epoch": 0.37650663942798773, "grad_norm": 1.4445058368114152, "learning_rate": 1.4337134053828305e-05, "loss": 0.7119, "step": 3686 }, { "epoch": 0.376608784473953, "grad_norm": 1.5585012024048948, "learning_rate": 1.4334152840526951e-05, "loss": 0.7159, "step": 3687 }, { "epoch": 0.3767109295199183, "grad_norm": 1.39463092304519, "learning_rate": 1.4331171152849769e-05, "loss": 0.7594, "step": 3688 }, { "epoch": 0.37681307456588353, "grad_norm": 1.6271617254435795, "learning_rate": 1.4328188991123103e-05, "loss": 0.7371, "step": 3689 }, { "epoch": 0.3769152196118488, "grad_norm": 1.4373512172275276, "learning_rate": 1.4325206355673357e-05, "loss": 0.6563, "step": 3690 }, { "epoch": 0.3770173646578141, "grad_norm": 1.4965718250069318, "learning_rate": 1.432222324682698e-05, "loss": 0.671, "step": 3691 }, { "epoch": 0.3771195097037794, "grad_norm": 1.5189566042131477, "learning_rate": 1.431923966491048e-05, "loss": 0.742, "step": 3692 }, { "epoch": 0.3772216547497446, "grad_norm": 1.5988516777730521, "learning_rate": 1.4316255610250402e-05, "loss": 0.9064, "step": 3693 }, { "epoch": 0.3773237997957099, "grad_norm": 1.4724745399528418, "learning_rate": 1.4313271083173363e-05, "loss": 0.733, "step": 3694 }, { "epoch": 0.3774259448416752, "grad_norm": 1.667581844666148, "learning_rate": 1.4310286084006015e-05, "loss": 0.7644, "step": 3695 }, { "epoch": 0.3775280898876405, "grad_norm": 1.5978535770090205, "learning_rate": 1.4307300613075072e-05, "loss": 0.7012, "step": 3696 }, { "epoch": 0.3776302349336057, "grad_norm": 1.3162020021484282, "learning_rate": 1.4304314670707292e-05, "loss": 0.7035, "step": 3697 }, { "epoch": 0.377732379979571, "grad_norm": 1.4983221085175242, "learning_rate": 1.4301328257229494e-05, "loss": 0.7335, "step": 3698 }, { "epoch": 0.37783452502553627, "grad_norm": 1.3503137815561121, "learning_rate": 1.4298341372968538e-05, "loss": 0.6871, "step": 3699 }, { "epoch": 0.37793667007150156, "grad_norm": 1.4911447894232301, "learning_rate": 1.4295354018251342e-05, "loss": 0.6425, "step": 3700 }, { "epoch": 0.3780388151174668, "grad_norm": 1.5193988873355548, "learning_rate": 1.429236619340487e-05, "loss": 0.8458, "step": 3701 }, { "epoch": 0.37814096016343207, "grad_norm": 1.4355064530714263, "learning_rate": 1.428937789875615e-05, "loss": 0.7544, "step": 3702 }, { "epoch": 0.37824310520939736, "grad_norm": 1.6579765946523064, "learning_rate": 1.4286389134632244e-05, "loss": 0.7322, "step": 3703 }, { "epoch": 0.37834525025536264, "grad_norm": 1.4106138978328489, "learning_rate": 1.428339990136028e-05, "loss": 0.7751, "step": 3704 }, { "epoch": 0.37844739530132787, "grad_norm": 1.5234619484207599, "learning_rate": 1.4280410199267427e-05, "loss": 0.78, "step": 3705 }, { "epoch": 0.37854954034729316, "grad_norm": 1.2624715661817962, "learning_rate": 1.4277420028680913e-05, "loss": 0.6238, "step": 3706 }, { "epoch": 0.37865168539325844, "grad_norm": 1.4745822614060566, "learning_rate": 1.4274429389928015e-05, "loss": 0.7816, "step": 3707 }, { "epoch": 0.37875383043922367, "grad_norm": 1.4355075811812978, "learning_rate": 1.4271438283336057e-05, "loss": 0.693, "step": 3708 }, { "epoch": 0.37885597548518896, "grad_norm": 1.4016615716543896, "learning_rate": 1.4268446709232418e-05, "loss": 0.7281, "step": 3709 }, { "epoch": 0.37895812053115424, "grad_norm": 1.5860334399469556, "learning_rate": 1.4265454667944529e-05, "loss": 0.804, "step": 3710 }, { "epoch": 0.3790602655771195, "grad_norm": 1.5650443676433952, "learning_rate": 1.4262462159799874e-05, "loss": 0.8108, "step": 3711 }, { "epoch": 0.37916241062308476, "grad_norm": 1.5100485779065302, "learning_rate": 1.4259469185125977e-05, "loss": 0.7249, "step": 3712 }, { "epoch": 0.37926455566905004, "grad_norm": 1.4061241920262901, "learning_rate": 1.425647574425043e-05, "loss": 0.6421, "step": 3713 }, { "epoch": 0.3793667007150153, "grad_norm": 1.4017877872572864, "learning_rate": 1.4253481837500862e-05, "loss": 0.7803, "step": 3714 }, { "epoch": 0.3794688457609806, "grad_norm": 1.380132325265422, "learning_rate": 1.4250487465204958e-05, "loss": 0.7704, "step": 3715 }, { "epoch": 0.37957099080694584, "grad_norm": 1.4108780735824247, "learning_rate": 1.4247492627690456e-05, "loss": 0.7464, "step": 3716 }, { "epoch": 0.3796731358529111, "grad_norm": 1.5049028566881042, "learning_rate": 1.424449732528514e-05, "loss": 0.7133, "step": 3717 }, { "epoch": 0.3797752808988764, "grad_norm": 1.493439067978785, "learning_rate": 1.424150155831685e-05, "loss": 0.7596, "step": 3718 }, { "epoch": 0.3798774259448417, "grad_norm": 1.4141203789591381, "learning_rate": 1.4238505327113475e-05, "loss": 0.7841, "step": 3719 }, { "epoch": 0.3799795709908069, "grad_norm": 1.5226858009081776, "learning_rate": 1.4235508632002952e-05, "loss": 0.7644, "step": 3720 }, { "epoch": 0.3800817160367722, "grad_norm": 1.4257253786178932, "learning_rate": 1.4232511473313273e-05, "loss": 0.7498, "step": 3721 }, { "epoch": 0.3801838610827375, "grad_norm": 1.3304964414600713, "learning_rate": 1.4229513851372479e-05, "loss": 0.6852, "step": 3722 }, { "epoch": 0.3802860061287028, "grad_norm": 1.4914631982178206, "learning_rate": 1.4226515766508662e-05, "loss": 0.7117, "step": 3723 }, { "epoch": 0.380388151174668, "grad_norm": 1.3990165680896844, "learning_rate": 1.4223517219049964e-05, "loss": 0.6958, "step": 3724 }, { "epoch": 0.3804902962206333, "grad_norm": 1.4560786132387258, "learning_rate": 1.4220518209324574e-05, "loss": 0.7568, "step": 3725 }, { "epoch": 0.3805924412665986, "grad_norm": 1.5442021922467273, "learning_rate": 1.4217518737660743e-05, "loss": 0.7148, "step": 3726 }, { "epoch": 0.38069458631256387, "grad_norm": 1.468354984985535, "learning_rate": 1.4214518804386761e-05, "loss": 0.7848, "step": 3727 }, { "epoch": 0.3807967313585291, "grad_norm": 1.5635493779056633, "learning_rate": 1.4211518409830973e-05, "loss": 0.8243, "step": 3728 }, { "epoch": 0.3808988764044944, "grad_norm": 1.564700102969133, "learning_rate": 1.4208517554321772e-05, "loss": 0.7712, "step": 3729 }, { "epoch": 0.38100102145045966, "grad_norm": 1.431261789372016, "learning_rate": 1.4205516238187606e-05, "loss": 0.7831, "step": 3730 }, { "epoch": 0.38110316649642495, "grad_norm": 1.7771771344287863, "learning_rate": 1.4202514461756974e-05, "loss": 0.7545, "step": 3731 }, { "epoch": 0.3812053115423902, "grad_norm": 1.2728338330729665, "learning_rate": 1.4199512225358416e-05, "loss": 0.7209, "step": 3732 }, { "epoch": 0.38130745658835546, "grad_norm": 1.4464226967437996, "learning_rate": 1.419650952932053e-05, "loss": 0.6923, "step": 3733 }, { "epoch": 0.38140960163432075, "grad_norm": 1.605981382305307, "learning_rate": 1.4193506373971968e-05, "loss": 0.7107, "step": 3734 }, { "epoch": 0.381511746680286, "grad_norm": 1.6557637613588452, "learning_rate": 1.4190502759641422e-05, "loss": 0.7477, "step": 3735 }, { "epoch": 0.38161389172625126, "grad_norm": 1.4419692472749346, "learning_rate": 1.4187498686657644e-05, "loss": 0.7653, "step": 3736 }, { "epoch": 0.38171603677221655, "grad_norm": 1.6109487716279298, "learning_rate": 1.4184494155349424e-05, "loss": 0.7467, "step": 3737 }, { "epoch": 0.38181818181818183, "grad_norm": 1.4581239679879419, "learning_rate": 1.4181489166045622e-05, "loss": 0.6508, "step": 3738 }, { "epoch": 0.38192032686414706, "grad_norm": 1.405300183414637, "learning_rate": 1.4178483719075124e-05, "loss": 0.697, "step": 3739 }, { "epoch": 0.38202247191011235, "grad_norm": 1.515876877148843, "learning_rate": 1.4175477814766888e-05, "loss": 0.7262, "step": 3740 }, { "epoch": 0.38212461695607763, "grad_norm": 1.4641851800028836, "learning_rate": 1.4172471453449902e-05, "loss": 0.7379, "step": 3741 }, { "epoch": 0.3822267620020429, "grad_norm": 1.459256633376696, "learning_rate": 1.4169464635453223e-05, "loss": 0.7473, "step": 3742 }, { "epoch": 0.38232890704800815, "grad_norm": 1.543179187681333, "learning_rate": 1.4166457361105947e-05, "loss": 0.6657, "step": 3743 }, { "epoch": 0.38243105209397343, "grad_norm": 1.5318158907408823, "learning_rate": 1.4163449630737219e-05, "loss": 0.7223, "step": 3744 }, { "epoch": 0.3825331971399387, "grad_norm": 1.4931845726201447, "learning_rate": 1.4160441444676239e-05, "loss": 0.8146, "step": 3745 }, { "epoch": 0.382635342185904, "grad_norm": 1.373586899921609, "learning_rate": 1.4157432803252256e-05, "loss": 0.7108, "step": 3746 }, { "epoch": 0.38273748723186923, "grad_norm": 1.3674300852249217, "learning_rate": 1.4154423706794565e-05, "loss": 0.6478, "step": 3747 }, { "epoch": 0.3828396322778345, "grad_norm": 1.386548544355511, "learning_rate": 1.4151414155632517e-05, "loss": 0.6511, "step": 3748 }, { "epoch": 0.3829417773237998, "grad_norm": 1.3586852007376644, "learning_rate": 1.4148404150095503e-05, "loss": 0.6245, "step": 3749 }, { "epoch": 0.3830439223697651, "grad_norm": 1.4769244178200376, "learning_rate": 1.414539369051298e-05, "loss": 0.6111, "step": 3750 }, { "epoch": 0.3831460674157303, "grad_norm": 1.6297641512463896, "learning_rate": 1.4142382777214438e-05, "loss": 0.7673, "step": 3751 }, { "epoch": 0.3832482124616956, "grad_norm": 1.7546460296680568, "learning_rate": 1.4139371410529425e-05, "loss": 0.7863, "step": 3752 }, { "epoch": 0.3833503575076609, "grad_norm": 1.442053202609649, "learning_rate": 1.4136359590787534e-05, "loss": 0.7213, "step": 3753 }, { "epoch": 0.3834525025536262, "grad_norm": 1.6829229290533938, "learning_rate": 1.4133347318318416e-05, "loss": 0.7498, "step": 3754 }, { "epoch": 0.3835546475995914, "grad_norm": 1.314189114712145, "learning_rate": 1.4130334593451763e-05, "loss": 0.6791, "step": 3755 }, { "epoch": 0.3836567926455567, "grad_norm": 1.4865539650770723, "learning_rate": 1.4127321416517319e-05, "loss": 0.826, "step": 3756 }, { "epoch": 0.383758937691522, "grad_norm": 1.4364678363513173, "learning_rate": 1.4124307787844879e-05, "loss": 0.8683, "step": 3757 }, { "epoch": 0.38386108273748726, "grad_norm": 1.5967542217465929, "learning_rate": 1.412129370776429e-05, "loss": 0.765, "step": 3758 }, { "epoch": 0.3839632277834525, "grad_norm": 1.5899493539223315, "learning_rate": 1.4118279176605439e-05, "loss": 0.732, "step": 3759 }, { "epoch": 0.3840653728294178, "grad_norm": 1.5266697886543328, "learning_rate": 1.4115264194698275e-05, "loss": 0.7243, "step": 3760 }, { "epoch": 0.38416751787538306, "grad_norm": 1.47361758322714, "learning_rate": 1.4112248762372782e-05, "loss": 0.7139, "step": 3761 }, { "epoch": 0.3842696629213483, "grad_norm": 1.3812918779790728, "learning_rate": 1.4109232879959008e-05, "loss": 0.7044, "step": 3762 }, { "epoch": 0.3843718079673136, "grad_norm": 1.5062525420602184, "learning_rate": 1.410621654778704e-05, "loss": 0.6855, "step": 3763 }, { "epoch": 0.38447395301327886, "grad_norm": 1.5263955181166646, "learning_rate": 1.4103199766187015e-05, "loss": 0.6596, "step": 3764 }, { "epoch": 0.38457609805924414, "grad_norm": 1.4341766171056096, "learning_rate": 1.4100182535489127e-05, "loss": 0.6153, "step": 3765 }, { "epoch": 0.38467824310520937, "grad_norm": 1.504410121728268, "learning_rate": 1.409716485602361e-05, "loss": 0.7123, "step": 3766 }, { "epoch": 0.38478038815117466, "grad_norm": 1.5097753849551514, "learning_rate": 1.4094146728120755e-05, "loss": 0.6825, "step": 3767 }, { "epoch": 0.38488253319713994, "grad_norm": 1.2836896028166562, "learning_rate": 1.4091128152110896e-05, "loss": 0.6557, "step": 3768 }, { "epoch": 0.3849846782431052, "grad_norm": 1.4172337011280671, "learning_rate": 1.4088109128324412e-05, "loss": 0.7636, "step": 3769 }, { "epoch": 0.38508682328907046, "grad_norm": 1.4493320130582017, "learning_rate": 1.4085089657091748e-05, "loss": 0.7121, "step": 3770 }, { "epoch": 0.38518896833503574, "grad_norm": 1.4293336287887488, "learning_rate": 1.4082069738743379e-05, "loss": 0.6615, "step": 3771 }, { "epoch": 0.385291113381001, "grad_norm": 1.561262664771609, "learning_rate": 1.407904937360984e-05, "loss": 0.7523, "step": 3772 }, { "epoch": 0.3853932584269663, "grad_norm": 1.5752742650109106, "learning_rate": 1.4076028562021712e-05, "loss": 0.6941, "step": 3773 }, { "epoch": 0.38549540347293154, "grad_norm": 1.6157365084836919, "learning_rate": 1.4073007304309625e-05, "loss": 0.7265, "step": 3774 }, { "epoch": 0.3855975485188968, "grad_norm": 1.2843072739037762, "learning_rate": 1.4069985600804259e-05, "loss": 0.6216, "step": 3775 }, { "epoch": 0.3856996935648621, "grad_norm": 1.3638624381058624, "learning_rate": 1.4066963451836336e-05, "loss": 0.7802, "step": 3776 }, { "epoch": 0.3858018386108274, "grad_norm": 1.4913842153789156, "learning_rate": 1.4063940857736635e-05, "loss": 0.67, "step": 3777 }, { "epoch": 0.3859039836567926, "grad_norm": 1.4256038551502161, "learning_rate": 1.4060917818835984e-05, "loss": 0.682, "step": 3778 }, { "epoch": 0.3860061287027579, "grad_norm": 1.3237038158235426, "learning_rate": 1.4057894335465254e-05, "loss": 0.6387, "step": 3779 }, { "epoch": 0.3861082737487232, "grad_norm": 1.5701135455518098, "learning_rate": 1.4054870407955368e-05, "loss": 0.7722, "step": 3780 }, { "epoch": 0.3862104187946885, "grad_norm": 1.3586204222088831, "learning_rate": 1.4051846036637291e-05, "loss": 0.718, "step": 3781 }, { "epoch": 0.3863125638406537, "grad_norm": 1.4477910154981761, "learning_rate": 1.4048821221842053e-05, "loss": 0.6886, "step": 3782 }, { "epoch": 0.386414708886619, "grad_norm": 1.3975691930435843, "learning_rate": 1.4045795963900712e-05, "loss": 0.7267, "step": 3783 }, { "epoch": 0.3865168539325843, "grad_norm": 1.5426801824516347, "learning_rate": 1.4042770263144394e-05, "loss": 0.7604, "step": 3784 }, { "epoch": 0.38661899897854957, "grad_norm": 1.3444052619075637, "learning_rate": 1.4039744119904255e-05, "loss": 0.6736, "step": 3785 }, { "epoch": 0.3867211440245148, "grad_norm": 1.4464471907102965, "learning_rate": 1.4036717534511512e-05, "loss": 0.7979, "step": 3786 }, { "epoch": 0.3868232890704801, "grad_norm": 1.5506339656209958, "learning_rate": 1.4033690507297431e-05, "loss": 0.7148, "step": 3787 }, { "epoch": 0.38692543411644537, "grad_norm": 1.3616792913693565, "learning_rate": 1.4030663038593313e-05, "loss": 0.5903, "step": 3788 }, { "epoch": 0.3870275791624106, "grad_norm": 1.477234958466394, "learning_rate": 1.4027635128730524e-05, "loss": 0.7175, "step": 3789 }, { "epoch": 0.3871297242083759, "grad_norm": 1.483024065976932, "learning_rate": 1.4024606778040468e-05, "loss": 0.8525, "step": 3790 }, { "epoch": 0.38723186925434117, "grad_norm": 1.4399692570865557, "learning_rate": 1.4021577986854597e-05, "loss": 0.6785, "step": 3791 }, { "epoch": 0.38733401430030645, "grad_norm": 1.543952091241829, "learning_rate": 1.401854875550442e-05, "loss": 0.7644, "step": 3792 }, { "epoch": 0.3874361593462717, "grad_norm": 1.367701794259396, "learning_rate": 1.4015519084321483e-05, "loss": 0.6241, "step": 3793 }, { "epoch": 0.38753830439223697, "grad_norm": 1.4167830025298678, "learning_rate": 1.401248897363739e-05, "loss": 0.7812, "step": 3794 }, { "epoch": 0.38764044943820225, "grad_norm": 1.5361223845566874, "learning_rate": 1.4009458423783786e-05, "loss": 0.7346, "step": 3795 }, { "epoch": 0.38774259448416754, "grad_norm": 1.4929830568668665, "learning_rate": 1.4006427435092367e-05, "loss": 0.8091, "step": 3796 }, { "epoch": 0.38784473953013276, "grad_norm": 1.4505281031501869, "learning_rate": 1.4003396007894877e-05, "loss": 0.6086, "step": 3797 }, { "epoch": 0.38794688457609805, "grad_norm": 1.3364063002693791, "learning_rate": 1.4000364142523104e-05, "loss": 0.5811, "step": 3798 }, { "epoch": 0.38804902962206334, "grad_norm": 1.471310892213482, "learning_rate": 1.3997331839308897e-05, "loss": 0.8029, "step": 3799 }, { "epoch": 0.3881511746680286, "grad_norm": 1.4016025835588313, "learning_rate": 1.3994299098584132e-05, "loss": 0.6538, "step": 3800 }, { "epoch": 0.38825331971399385, "grad_norm": 1.4803810223368623, "learning_rate": 1.3991265920680755e-05, "loss": 0.6896, "step": 3801 }, { "epoch": 0.38835546475995913, "grad_norm": 1.3342000455648262, "learning_rate": 1.3988232305930742e-05, "loss": 0.7344, "step": 3802 }, { "epoch": 0.3884576098059244, "grad_norm": 1.4028968859425381, "learning_rate": 1.3985198254666123e-05, "loss": 0.7669, "step": 3803 }, { "epoch": 0.3885597548518897, "grad_norm": 1.4105008365856497, "learning_rate": 1.3982163767218988e-05, "loss": 0.7082, "step": 3804 }, { "epoch": 0.38866189989785493, "grad_norm": 1.4716133457961391, "learning_rate": 1.397912884392145e-05, "loss": 0.6897, "step": 3805 }, { "epoch": 0.3887640449438202, "grad_norm": 1.4736651135307453, "learning_rate": 1.397609348510569e-05, "loss": 0.6549, "step": 3806 }, { "epoch": 0.3888661899897855, "grad_norm": 1.3449189314402574, "learning_rate": 1.397305769110393e-05, "loss": 0.7241, "step": 3807 }, { "epoch": 0.3889683350357508, "grad_norm": 1.560656152061021, "learning_rate": 1.3970021462248438e-05, "loss": 0.685, "step": 3808 }, { "epoch": 0.389070480081716, "grad_norm": 1.5500911803409483, "learning_rate": 1.3966984798871533e-05, "loss": 0.7301, "step": 3809 }, { "epoch": 0.3891726251276813, "grad_norm": 1.5774516460888948, "learning_rate": 1.3963947701305576e-05, "loss": 0.8131, "step": 3810 }, { "epoch": 0.3892747701736466, "grad_norm": 1.4057606200694768, "learning_rate": 1.3960910169882986e-05, "loss": 0.7497, "step": 3811 }, { "epoch": 0.3893769152196119, "grad_norm": 1.6710326193218412, "learning_rate": 1.3957872204936217e-05, "loss": 0.7145, "step": 3812 }, { "epoch": 0.3894790602655771, "grad_norm": 1.5103702633037224, "learning_rate": 1.3954833806797777e-05, "loss": 0.7026, "step": 3813 }, { "epoch": 0.3895812053115424, "grad_norm": 1.4687401064318544, "learning_rate": 1.3951794975800223e-05, "loss": 0.7619, "step": 3814 }, { "epoch": 0.3896833503575077, "grad_norm": 1.3630355205105802, "learning_rate": 1.3948755712276156e-05, "loss": 0.7237, "step": 3815 }, { "epoch": 0.38978549540347296, "grad_norm": 1.415145336045743, "learning_rate": 1.3945716016558227e-05, "loss": 0.6942, "step": 3816 }, { "epoch": 0.3898876404494382, "grad_norm": 1.492937099869908, "learning_rate": 1.3942675888979126e-05, "loss": 0.6405, "step": 3817 }, { "epoch": 0.3899897854954035, "grad_norm": 1.447914265191576, "learning_rate": 1.3939635329871606e-05, "loss": 0.7372, "step": 3818 }, { "epoch": 0.39009193054136876, "grad_norm": 1.582482153145002, "learning_rate": 1.3936594339568453e-05, "loss": 0.7709, "step": 3819 }, { "epoch": 0.390194075587334, "grad_norm": 1.4372404652014534, "learning_rate": 1.3933552918402504e-05, "loss": 0.7453, "step": 3820 }, { "epoch": 0.3902962206332993, "grad_norm": 1.5969613261130475, "learning_rate": 1.3930511066706647e-05, "loss": 0.6739, "step": 3821 }, { "epoch": 0.39039836567926456, "grad_norm": 1.2944315027282458, "learning_rate": 1.3927468784813816e-05, "loss": 0.6752, "step": 3822 }, { "epoch": 0.39050051072522984, "grad_norm": 1.4333652858388763, "learning_rate": 1.3924426073056988e-05, "loss": 0.6743, "step": 3823 }, { "epoch": 0.3906026557711951, "grad_norm": 1.6869741803673837, "learning_rate": 1.3921382931769193e-05, "loss": 0.755, "step": 3824 }, { "epoch": 0.39070480081716036, "grad_norm": 1.5869567302424308, "learning_rate": 1.3918339361283498e-05, "loss": 0.7461, "step": 3825 }, { "epoch": 0.39080694586312564, "grad_norm": 1.4032276389396365, "learning_rate": 1.391529536193303e-05, "loss": 0.8414, "step": 3826 }, { "epoch": 0.39090909090909093, "grad_norm": 1.5070123528417991, "learning_rate": 1.3912250934050955e-05, "loss": 0.7143, "step": 3827 }, { "epoch": 0.39101123595505616, "grad_norm": 1.4976870547125534, "learning_rate": 1.390920607797049e-05, "loss": 0.7817, "step": 3828 }, { "epoch": 0.39111338100102144, "grad_norm": 1.5725928830087903, "learning_rate": 1.3906160794024892e-05, "loss": 0.9126, "step": 3829 }, { "epoch": 0.39121552604698673, "grad_norm": 1.5718699984987594, "learning_rate": 1.390311508254747e-05, "loss": 0.6704, "step": 3830 }, { "epoch": 0.391317671092952, "grad_norm": 1.4603811231893467, "learning_rate": 1.3900068943871585e-05, "loss": 0.7165, "step": 3831 }, { "epoch": 0.39141981613891724, "grad_norm": 1.7015118894806842, "learning_rate": 1.3897022378330631e-05, "loss": 0.8638, "step": 3832 }, { "epoch": 0.3915219611848825, "grad_norm": 1.4157809966635893, "learning_rate": 1.389397538625806e-05, "loss": 0.7468, "step": 3833 }, { "epoch": 0.3916241062308478, "grad_norm": 1.3652585344398818, "learning_rate": 1.3890927967987368e-05, "loss": 0.5949, "step": 3834 }, { "epoch": 0.3917262512768131, "grad_norm": 1.3374366716300028, "learning_rate": 1.3887880123852097e-05, "loss": 0.7264, "step": 3835 }, { "epoch": 0.3918283963227783, "grad_norm": 1.2945434004372032, "learning_rate": 1.3884831854185833e-05, "loss": 0.582, "step": 3836 }, { "epoch": 0.3919305413687436, "grad_norm": 1.5209896190377057, "learning_rate": 1.3881783159322212e-05, "loss": 0.7425, "step": 3837 }, { "epoch": 0.3920326864147089, "grad_norm": 1.4302410886237777, "learning_rate": 1.3878734039594919e-05, "loss": 0.7638, "step": 3838 }, { "epoch": 0.3921348314606742, "grad_norm": 1.3544835154354662, "learning_rate": 1.3875684495337677e-05, "loss": 0.6926, "step": 3839 }, { "epoch": 0.3922369765066394, "grad_norm": 1.5641842309190743, "learning_rate": 1.3872634526884263e-05, "loss": 0.7424, "step": 3840 }, { "epoch": 0.3923391215526047, "grad_norm": 1.428636050913717, "learning_rate": 1.3869584134568498e-05, "loss": 0.675, "step": 3841 }, { "epoch": 0.39244126659857, "grad_norm": 1.5404072261839439, "learning_rate": 1.3866533318724251e-05, "loss": 0.7564, "step": 3842 }, { "epoch": 0.39254341164453527, "grad_norm": 1.5180082883217445, "learning_rate": 1.3863482079685434e-05, "loss": 0.7947, "step": 3843 }, { "epoch": 0.3926455566905005, "grad_norm": 1.533991228673674, "learning_rate": 1.3860430417786007e-05, "loss": 0.8351, "step": 3844 }, { "epoch": 0.3927477017364658, "grad_norm": 1.3786754232558622, "learning_rate": 1.3857378333359974e-05, "loss": 0.7524, "step": 3845 }, { "epoch": 0.39284984678243107, "grad_norm": 1.4671042680694282, "learning_rate": 1.3854325826741394e-05, "loss": 0.6452, "step": 3846 }, { "epoch": 0.3929519918283963, "grad_norm": 1.459733720941895, "learning_rate": 1.385127289826436e-05, "loss": 0.8106, "step": 3847 }, { "epoch": 0.3930541368743616, "grad_norm": 1.419738861353782, "learning_rate": 1.384821954826302e-05, "loss": 0.6574, "step": 3848 }, { "epoch": 0.39315628192032687, "grad_norm": 1.4382932929675243, "learning_rate": 1.3845165777071563e-05, "loss": 0.8002, "step": 3849 }, { "epoch": 0.39325842696629215, "grad_norm": 1.498302725620065, "learning_rate": 1.3842111585024228e-05, "loss": 0.8133, "step": 3850 }, { "epoch": 0.3933605720122574, "grad_norm": 1.3826833795166764, "learning_rate": 1.3839056972455298e-05, "loss": 0.7271, "step": 3851 }, { "epoch": 0.39346271705822267, "grad_norm": 1.5736391347419851, "learning_rate": 1.3836001939699103e-05, "loss": 0.7228, "step": 3852 }, { "epoch": 0.39356486210418795, "grad_norm": 1.4211492796325893, "learning_rate": 1.3832946487090013e-05, "loss": 0.7225, "step": 3853 }, { "epoch": 0.39366700715015324, "grad_norm": 1.4849278164492081, "learning_rate": 1.3829890614962458e-05, "loss": 0.7395, "step": 3854 }, { "epoch": 0.39376915219611847, "grad_norm": 1.4827839462755892, "learning_rate": 1.3826834323650899e-05, "loss": 0.8246, "step": 3855 }, { "epoch": 0.39387129724208375, "grad_norm": 1.4550382409605542, "learning_rate": 1.3823777613489853e-05, "loss": 0.8323, "step": 3856 }, { "epoch": 0.39397344228804904, "grad_norm": 1.2993347507634034, "learning_rate": 1.3820720484813874e-05, "loss": 0.6335, "step": 3857 }, { "epoch": 0.3940755873340143, "grad_norm": 1.6127229718692306, "learning_rate": 1.381766293795757e-05, "loss": 0.7798, "step": 3858 }, { "epoch": 0.39417773237997955, "grad_norm": 1.4971257082698561, "learning_rate": 1.381460497325559e-05, "loss": 0.8368, "step": 3859 }, { "epoch": 0.39427987742594484, "grad_norm": 1.4936932312978695, "learning_rate": 1.3811546591042632e-05, "loss": 0.6884, "step": 3860 }, { "epoch": 0.3943820224719101, "grad_norm": 1.447778982344869, "learning_rate": 1.3808487791653438e-05, "loss": 0.7363, "step": 3861 }, { "epoch": 0.3944841675178754, "grad_norm": 1.4496123710529063, "learning_rate": 1.3805428575422795e-05, "loss": 0.6958, "step": 3862 }, { "epoch": 0.39458631256384064, "grad_norm": 1.5031828740202973, "learning_rate": 1.3802368942685536e-05, "loss": 0.7102, "step": 3863 }, { "epoch": 0.3946884576098059, "grad_norm": 1.4712468047199487, "learning_rate": 1.3799308893776537e-05, "loss": 0.5528, "step": 3864 }, { "epoch": 0.3947906026557712, "grad_norm": 1.575318837743212, "learning_rate": 1.3796248429030727e-05, "loss": 0.8104, "step": 3865 }, { "epoch": 0.3948927477017365, "grad_norm": 1.6240248308077432, "learning_rate": 1.3793187548783073e-05, "loss": 0.7764, "step": 3866 }, { "epoch": 0.3949948927477017, "grad_norm": 1.5629759436729338, "learning_rate": 1.379012625336859e-05, "loss": 0.6918, "step": 3867 }, { "epoch": 0.395097037793667, "grad_norm": 1.4860945088833404, "learning_rate": 1.3787064543122344e-05, "loss": 0.7773, "step": 3868 }, { "epoch": 0.3951991828396323, "grad_norm": 1.3403475529031001, "learning_rate": 1.3784002418379432e-05, "loss": 0.8167, "step": 3869 }, { "epoch": 0.3953013278855976, "grad_norm": 1.3831819229421647, "learning_rate": 1.3780939879475013e-05, "loss": 0.7459, "step": 3870 }, { "epoch": 0.3954034729315628, "grad_norm": 1.6226970679988384, "learning_rate": 1.3777876926744279e-05, "loss": 0.851, "step": 3871 }, { "epoch": 0.3955056179775281, "grad_norm": 1.421789867750657, "learning_rate": 1.3774813560522477e-05, "loss": 0.6483, "step": 3872 }, { "epoch": 0.3956077630234934, "grad_norm": 1.3039819223380655, "learning_rate": 1.3771749781144893e-05, "loss": 0.6996, "step": 3873 }, { "epoch": 0.3957099080694586, "grad_norm": 1.5443144584540511, "learning_rate": 1.3768685588946855e-05, "loss": 0.7145, "step": 3874 }, { "epoch": 0.3958120531154239, "grad_norm": 1.5121372025367539, "learning_rate": 1.3765620984263747e-05, "loss": 0.7554, "step": 3875 }, { "epoch": 0.3959141981613892, "grad_norm": 1.5581868135274402, "learning_rate": 1.3762555967430988e-05, "loss": 0.8459, "step": 3876 }, { "epoch": 0.39601634320735446, "grad_norm": 1.4945122856245026, "learning_rate": 1.3759490538784051e-05, "loss": 0.6874, "step": 3877 }, { "epoch": 0.3961184882533197, "grad_norm": 1.3293848597846276, "learning_rate": 1.3756424698658442e-05, "loss": 0.6224, "step": 3878 }, { "epoch": 0.396220633299285, "grad_norm": 1.444593536172488, "learning_rate": 1.3753358447389722e-05, "loss": 0.7244, "step": 3879 }, { "epoch": 0.39632277834525026, "grad_norm": 1.4577429935459794, "learning_rate": 1.3750291785313498e-05, "loss": 0.7573, "step": 3880 }, { "epoch": 0.39642492339121554, "grad_norm": 1.4053607183030763, "learning_rate": 1.3747224712765413e-05, "loss": 0.7458, "step": 3881 }, { "epoch": 0.3965270684371808, "grad_norm": 1.5280314723356028, "learning_rate": 1.374415723008116e-05, "loss": 0.7045, "step": 3882 }, { "epoch": 0.39662921348314606, "grad_norm": 1.4203749531374863, "learning_rate": 1.3741089337596485e-05, "loss": 0.7055, "step": 3883 }, { "epoch": 0.39673135852911134, "grad_norm": 1.511675214775165, "learning_rate": 1.3738021035647162e-05, "loss": 0.7812, "step": 3884 }, { "epoch": 0.39683350357507663, "grad_norm": 1.4393952808548964, "learning_rate": 1.3734952324569022e-05, "loss": 0.7806, "step": 3885 }, { "epoch": 0.39693564862104186, "grad_norm": 1.5449155181200394, "learning_rate": 1.3731883204697933e-05, "loss": 0.7001, "step": 3886 }, { "epoch": 0.39703779366700714, "grad_norm": 1.4263016552807994, "learning_rate": 1.3728813676369824e-05, "loss": 0.6522, "step": 3887 }, { "epoch": 0.39713993871297243, "grad_norm": 1.574472780914036, "learning_rate": 1.3725743739920643e-05, "loss": 0.7653, "step": 3888 }, { "epoch": 0.3972420837589377, "grad_norm": 1.3321799224685371, "learning_rate": 1.3722673395686403e-05, "loss": 0.693, "step": 3889 }, { "epoch": 0.39734422880490294, "grad_norm": 1.5680343527042515, "learning_rate": 1.3719602644003157e-05, "loss": 0.8382, "step": 3890 }, { "epoch": 0.39744637385086823, "grad_norm": 1.4625928324280115, "learning_rate": 1.3716531485206996e-05, "loss": 0.737, "step": 3891 }, { "epoch": 0.3975485188968335, "grad_norm": 1.377147868448086, "learning_rate": 1.3713459919634065e-05, "loss": 0.7215, "step": 3892 }, { "epoch": 0.3976506639427988, "grad_norm": 1.443674178481787, "learning_rate": 1.3710387947620545e-05, "loss": 0.7047, "step": 3893 }, { "epoch": 0.39775280898876403, "grad_norm": 1.3803633943725935, "learning_rate": 1.3707315569502666e-05, "loss": 0.6387, "step": 3894 }, { "epoch": 0.3978549540347293, "grad_norm": 1.4414392107718101, "learning_rate": 1.3704242785616706e-05, "loss": 0.8225, "step": 3895 }, { "epoch": 0.3979570990806946, "grad_norm": 1.3682923726441567, "learning_rate": 1.3701169596298978e-05, "loss": 0.624, "step": 3896 }, { "epoch": 0.3980592441266599, "grad_norm": 1.4218465584577573, "learning_rate": 1.3698096001885847e-05, "loss": 0.7508, "step": 3897 }, { "epoch": 0.3981613891726251, "grad_norm": 1.4811466445923365, "learning_rate": 1.3695022002713718e-05, "loss": 0.7379, "step": 3898 }, { "epoch": 0.3982635342185904, "grad_norm": 1.405100123345751, "learning_rate": 1.3691947599119045e-05, "loss": 0.7282, "step": 3899 }, { "epoch": 0.3983656792645557, "grad_norm": 1.3975491629649694, "learning_rate": 1.3688872791438321e-05, "loss": 0.7171, "step": 3900 }, { "epoch": 0.3984678243105209, "grad_norm": 1.528797309142376, "learning_rate": 1.368579758000809e-05, "loss": 0.6896, "step": 3901 }, { "epoch": 0.3985699693564862, "grad_norm": 1.511922383903831, "learning_rate": 1.3682721965164927e-05, "loss": 0.7519, "step": 3902 }, { "epoch": 0.3986721144024515, "grad_norm": 1.5482907535340173, "learning_rate": 1.3679645947245468e-05, "loss": 0.7152, "step": 3903 }, { "epoch": 0.39877425944841677, "grad_norm": 1.4637116582041945, "learning_rate": 1.3676569526586383e-05, "loss": 0.7795, "step": 3904 }, { "epoch": 0.398876404494382, "grad_norm": 1.6253152936574335, "learning_rate": 1.3673492703524387e-05, "loss": 0.7342, "step": 3905 }, { "epoch": 0.3989785495403473, "grad_norm": 1.4816691856353705, "learning_rate": 1.3670415478396241e-05, "loss": 0.6209, "step": 3906 }, { "epoch": 0.39908069458631257, "grad_norm": 1.604340339585944, "learning_rate": 1.3667337851538753e-05, "loss": 0.7176, "step": 3907 }, { "epoch": 0.39918283963227785, "grad_norm": 1.4886907759318158, "learning_rate": 1.3664259823288764e-05, "loss": 0.6721, "step": 3908 }, { "epoch": 0.3992849846782431, "grad_norm": 1.4849185479657037, "learning_rate": 1.3661181393983171e-05, "loss": 0.7908, "step": 3909 }, { "epoch": 0.39938712972420837, "grad_norm": 1.5228324369368267, "learning_rate": 1.365810256395891e-05, "loss": 0.6371, "step": 3910 }, { "epoch": 0.39948927477017365, "grad_norm": 1.360855616566488, "learning_rate": 1.3655023333552957e-05, "loss": 0.7231, "step": 3911 }, { "epoch": 0.39959141981613894, "grad_norm": 1.385931559379355, "learning_rate": 1.3651943703102344e-05, "loss": 0.7833, "step": 3912 }, { "epoch": 0.39969356486210417, "grad_norm": 1.5691124569216457, "learning_rate": 1.3648863672944129e-05, "loss": 0.846, "step": 3913 }, { "epoch": 0.39979570990806945, "grad_norm": 1.5164288859050117, "learning_rate": 1.3645783243415427e-05, "loss": 0.6665, "step": 3914 }, { "epoch": 0.39989785495403474, "grad_norm": 1.4868281796022975, "learning_rate": 1.3642702414853395e-05, "loss": 0.6937, "step": 3915 }, { "epoch": 0.4, "grad_norm": 1.5487084566709683, "learning_rate": 1.3639621187595231e-05, "loss": 0.656, "step": 3916 }, { "epoch": 0.40010214504596525, "grad_norm": 1.3863966900893654, "learning_rate": 1.3636539561978177e-05, "loss": 0.704, "step": 3917 }, { "epoch": 0.40020429009193054, "grad_norm": 1.35704258014032, "learning_rate": 1.3633457538339514e-05, "loss": 0.7869, "step": 3918 }, { "epoch": 0.4003064351378958, "grad_norm": 1.4464960838959504, "learning_rate": 1.3630375117016581e-05, "loss": 0.8135, "step": 3919 }, { "epoch": 0.4004085801838611, "grad_norm": 1.4228264552800438, "learning_rate": 1.3627292298346745e-05, "loss": 0.7292, "step": 3920 }, { "epoch": 0.40051072522982634, "grad_norm": 1.458528323337574, "learning_rate": 1.3624209082667421e-05, "loss": 0.7324, "step": 3921 }, { "epoch": 0.4006128702757916, "grad_norm": 1.7129037280627757, "learning_rate": 1.3621125470316075e-05, "loss": 0.7734, "step": 3922 }, { "epoch": 0.4007150153217569, "grad_norm": 1.5492497224615298, "learning_rate": 1.3618041461630203e-05, "loss": 0.7483, "step": 3923 }, { "epoch": 0.4008171603677222, "grad_norm": 1.4286927839640557, "learning_rate": 1.3614957056947358e-05, "loss": 0.6844, "step": 3924 }, { "epoch": 0.4009193054136874, "grad_norm": 1.3623250262125992, "learning_rate": 1.3611872256605126e-05, "loss": 0.7252, "step": 3925 }, { "epoch": 0.4010214504596527, "grad_norm": 1.268783404075245, "learning_rate": 1.3608787060941143e-05, "loss": 0.6798, "step": 3926 }, { "epoch": 0.401123595505618, "grad_norm": 1.3210037544716697, "learning_rate": 1.3605701470293084e-05, "loss": 0.7512, "step": 3927 }, { "epoch": 0.4012257405515832, "grad_norm": 1.3830901556247912, "learning_rate": 1.3602615484998669e-05, "loss": 0.6789, "step": 3928 }, { "epoch": 0.4013278855975485, "grad_norm": 1.3599463721294547, "learning_rate": 1.3599529105395664e-05, "loss": 0.6441, "step": 3929 }, { "epoch": 0.4014300306435138, "grad_norm": 1.3537263893132887, "learning_rate": 1.3596442331821868e-05, "loss": 0.7625, "step": 3930 }, { "epoch": 0.4015321756894791, "grad_norm": 1.4744110922660467, "learning_rate": 1.3593355164615139e-05, "loss": 0.755, "step": 3931 }, { "epoch": 0.4016343207354443, "grad_norm": 1.4421667567694034, "learning_rate": 1.3590267604113363e-05, "loss": 0.829, "step": 3932 }, { "epoch": 0.4017364657814096, "grad_norm": 1.4935713368118413, "learning_rate": 1.3587179650654483e-05, "loss": 0.6835, "step": 3933 }, { "epoch": 0.4018386108273749, "grad_norm": 1.5524718741980919, "learning_rate": 1.3584091304576468e-05, "loss": 0.7731, "step": 3934 }, { "epoch": 0.40194075587334016, "grad_norm": 1.4621447786162751, "learning_rate": 1.3581002566217346e-05, "loss": 0.7162, "step": 3935 }, { "epoch": 0.4020429009193054, "grad_norm": 1.3211761698932112, "learning_rate": 1.3577913435915179e-05, "loss": 0.7729, "step": 3936 }, { "epoch": 0.4021450459652707, "grad_norm": 1.347435497718406, "learning_rate": 1.3574823914008075e-05, "loss": 0.6075, "step": 3937 }, { "epoch": 0.40224719101123596, "grad_norm": 1.4375418782120868, "learning_rate": 1.3571734000834184e-05, "loss": 0.7448, "step": 3938 }, { "epoch": 0.40234933605720125, "grad_norm": 1.5612880733078671, "learning_rate": 1.3568643696731701e-05, "loss": 0.7331, "step": 3939 }, { "epoch": 0.4024514811031665, "grad_norm": 1.2174957421342552, "learning_rate": 1.3565553002038857e-05, "loss": 0.6215, "step": 3940 }, { "epoch": 0.40255362614913176, "grad_norm": 1.40596212385612, "learning_rate": 1.3562461917093933e-05, "loss": 0.6735, "step": 3941 }, { "epoch": 0.40265577119509705, "grad_norm": 1.5559281528455997, "learning_rate": 1.3559370442235248e-05, "loss": 0.869, "step": 3942 }, { "epoch": 0.40275791624106233, "grad_norm": 1.4389679535163094, "learning_rate": 1.3556278577801174e-05, "loss": 0.6565, "step": 3943 }, { "epoch": 0.40286006128702756, "grad_norm": 1.30427387696139, "learning_rate": 1.3553186324130113e-05, "loss": 0.6137, "step": 3944 }, { "epoch": 0.40296220633299284, "grad_norm": 1.5912372970477873, "learning_rate": 1.355009368156051e-05, "loss": 0.8153, "step": 3945 }, { "epoch": 0.40306435137895813, "grad_norm": 1.4314962657769297, "learning_rate": 1.354700065043086e-05, "loss": 0.6701, "step": 3946 }, { "epoch": 0.4031664964249234, "grad_norm": 1.3398392432440458, "learning_rate": 1.3543907231079695e-05, "loss": 0.6382, "step": 3947 }, { "epoch": 0.40326864147088864, "grad_norm": 1.408398024456932, "learning_rate": 1.3540813423845598e-05, "loss": 0.701, "step": 3948 }, { "epoch": 0.40337078651685393, "grad_norm": 1.430391931582551, "learning_rate": 1.3537719229067182e-05, "loss": 0.6059, "step": 3949 }, { "epoch": 0.4034729315628192, "grad_norm": 1.40930955091006, "learning_rate": 1.353462464708311e-05, "loss": 0.6494, "step": 3950 }, { "epoch": 0.4035750766087845, "grad_norm": 1.707247250730458, "learning_rate": 1.353152967823209e-05, "loss": 0.699, "step": 3951 }, { "epoch": 0.40367722165474973, "grad_norm": 1.7545118283765881, "learning_rate": 1.352843432285286e-05, "loss": 0.7723, "step": 3952 }, { "epoch": 0.403779366700715, "grad_norm": 1.2924869944767148, "learning_rate": 1.3525338581284217e-05, "loss": 0.6966, "step": 3953 }, { "epoch": 0.4038815117466803, "grad_norm": 1.4809601285026897, "learning_rate": 1.3522242453864989e-05, "loss": 0.8494, "step": 3954 }, { "epoch": 0.40398365679264553, "grad_norm": 1.6202538099161883, "learning_rate": 1.3519145940934046e-05, "loss": 0.7635, "step": 3955 }, { "epoch": 0.4040858018386108, "grad_norm": 1.4840210725938705, "learning_rate": 1.3516049042830309e-05, "loss": 0.6187, "step": 3956 }, { "epoch": 0.4041879468845761, "grad_norm": 1.4016511165760077, "learning_rate": 1.3512951759892732e-05, "loss": 0.6902, "step": 3957 }, { "epoch": 0.4042900919305414, "grad_norm": 1.4490165364294925, "learning_rate": 1.3509854092460312e-05, "loss": 0.8288, "step": 3958 }, { "epoch": 0.4043922369765066, "grad_norm": 1.5945364482774207, "learning_rate": 1.3506756040872098e-05, "loss": 0.8579, "step": 3959 }, { "epoch": 0.4044943820224719, "grad_norm": 1.6134813830821937, "learning_rate": 1.3503657605467169e-05, "loss": 0.7971, "step": 3960 }, { "epoch": 0.4045965270684372, "grad_norm": 1.4130723425582792, "learning_rate": 1.3500558786584652e-05, "loss": 0.7422, "step": 3961 }, { "epoch": 0.40469867211440247, "grad_norm": 1.3897062579924135, "learning_rate": 1.349745958456371e-05, "loss": 0.6762, "step": 3962 }, { "epoch": 0.4048008171603677, "grad_norm": 1.4443174499233737, "learning_rate": 1.349435999974356e-05, "loss": 0.7461, "step": 3963 }, { "epoch": 0.404902962206333, "grad_norm": 1.3455979722265679, "learning_rate": 1.349126003246345e-05, "loss": 0.6902, "step": 3964 }, { "epoch": 0.40500510725229827, "grad_norm": 1.4714956855903425, "learning_rate": 1.3488159683062676e-05, "loss": 0.792, "step": 3965 }, { "epoch": 0.40510725229826355, "grad_norm": 1.3750750688392048, "learning_rate": 1.3485058951880567e-05, "loss": 0.7326, "step": 3966 }, { "epoch": 0.4052093973442288, "grad_norm": 1.4790583048513137, "learning_rate": 1.3481957839256507e-05, "loss": 0.6843, "step": 3967 }, { "epoch": 0.40531154239019407, "grad_norm": 1.5837593203258813, "learning_rate": 1.3478856345529912e-05, "loss": 0.8519, "step": 3968 }, { "epoch": 0.40541368743615935, "grad_norm": 1.5041052195877496, "learning_rate": 1.3475754471040241e-05, "loss": 0.7565, "step": 3969 }, { "epoch": 0.40551583248212464, "grad_norm": 1.410686834330223, "learning_rate": 1.3472652216126995e-05, "loss": 0.6298, "step": 3970 }, { "epoch": 0.40561797752808987, "grad_norm": 1.3890734150718758, "learning_rate": 1.3469549581129726e-05, "loss": 0.7079, "step": 3971 }, { "epoch": 0.40572012257405515, "grad_norm": 1.4950766854091868, "learning_rate": 1.3466446566388009e-05, "loss": 0.8109, "step": 3972 }, { "epoch": 0.40582226762002044, "grad_norm": 1.3807013259326402, "learning_rate": 1.3463343172241481e-05, "loss": 0.7695, "step": 3973 }, { "epoch": 0.4059244126659857, "grad_norm": 1.6045034408931116, "learning_rate": 1.3460239399029797e-05, "loss": 0.8856, "step": 3974 }, { "epoch": 0.40602655771195095, "grad_norm": 1.566836627955001, "learning_rate": 1.3457135247092681e-05, "loss": 0.7502, "step": 3975 }, { "epoch": 0.40612870275791624, "grad_norm": 1.5340444028732412, "learning_rate": 1.3454030716769877e-05, "loss": 0.8823, "step": 3976 }, { "epoch": 0.4062308478038815, "grad_norm": 1.7764155344383277, "learning_rate": 1.3450925808401183e-05, "loss": 0.7316, "step": 3977 }, { "epoch": 0.4063329928498468, "grad_norm": 1.315757738793011, "learning_rate": 1.3447820522326424e-05, "loss": 0.6989, "step": 3978 }, { "epoch": 0.40643513789581204, "grad_norm": 1.4030004981576072, "learning_rate": 1.3444714858885483e-05, "loss": 0.6856, "step": 3979 }, { "epoch": 0.4065372829417773, "grad_norm": 1.3876633012636734, "learning_rate": 1.3441608818418279e-05, "loss": 0.6768, "step": 3980 }, { "epoch": 0.4066394279877426, "grad_norm": 1.421451630320385, "learning_rate": 1.3438502401264761e-05, "loss": 0.673, "step": 3981 }, { "epoch": 0.4067415730337079, "grad_norm": 1.4119383108502743, "learning_rate": 1.3435395607764937e-05, "loss": 0.6616, "step": 3982 }, { "epoch": 0.4068437180796731, "grad_norm": 1.202245556386396, "learning_rate": 1.3432288438258842e-05, "loss": 0.7176, "step": 3983 }, { "epoch": 0.4069458631256384, "grad_norm": 1.6158847333721884, "learning_rate": 1.3429180893086563e-05, "loss": 0.7897, "step": 3984 }, { "epoch": 0.4070480081716037, "grad_norm": 1.5983544361641924, "learning_rate": 1.3426072972588218e-05, "loss": 0.7201, "step": 3985 }, { "epoch": 0.4071501532175689, "grad_norm": 1.393940110995502, "learning_rate": 1.3422964677103969e-05, "loss": 0.6928, "step": 3986 }, { "epoch": 0.4072522982635342, "grad_norm": 1.4549544872447078, "learning_rate": 1.341985600697403e-05, "loss": 0.824, "step": 3987 }, { "epoch": 0.4073544433094995, "grad_norm": 1.4000378655231784, "learning_rate": 1.341674696253864e-05, "loss": 0.7821, "step": 3988 }, { "epoch": 0.4074565883554648, "grad_norm": 1.318623792533593, "learning_rate": 1.3413637544138088e-05, "loss": 0.6568, "step": 3989 }, { "epoch": 0.40755873340143, "grad_norm": 1.4527997464739035, "learning_rate": 1.3410527752112699e-05, "loss": 0.6684, "step": 3990 }, { "epoch": 0.4076608784473953, "grad_norm": 1.423173170425724, "learning_rate": 1.3407417586802845e-05, "loss": 0.7722, "step": 3991 }, { "epoch": 0.4077630234933606, "grad_norm": 1.4800233853858624, "learning_rate": 1.3404307048548934e-05, "loss": 0.6447, "step": 3992 }, { "epoch": 0.40786516853932586, "grad_norm": 1.6048929331152626, "learning_rate": 1.340119613769142e-05, "loss": 0.8272, "step": 3993 }, { "epoch": 0.4079673135852911, "grad_norm": 1.5216292578030324, "learning_rate": 1.3398084854570788e-05, "loss": 0.6717, "step": 3994 }, { "epoch": 0.4080694586312564, "grad_norm": 1.3884734952559645, "learning_rate": 1.3394973199527575e-05, "loss": 0.6891, "step": 3995 }, { "epoch": 0.40817160367722166, "grad_norm": 1.4620912276892837, "learning_rate": 1.339186117290235e-05, "loss": 0.711, "step": 3996 }, { "epoch": 0.40827374872318695, "grad_norm": 1.6461523101663367, "learning_rate": 1.3388748775035732e-05, "loss": 0.7496, "step": 3997 }, { "epoch": 0.4083758937691522, "grad_norm": 1.4212794504962631, "learning_rate": 1.3385636006268367e-05, "loss": 0.6946, "step": 3998 }, { "epoch": 0.40847803881511746, "grad_norm": 1.5977174117370683, "learning_rate": 1.3382522866940955e-05, "loss": 0.7254, "step": 3999 }, { "epoch": 0.40858018386108275, "grad_norm": 1.4268446372800223, "learning_rate": 1.3379409357394231e-05, "loss": 0.6467, "step": 4000 }, { "epoch": 0.40868232890704803, "grad_norm": 1.3955436396136944, "learning_rate": 1.3376295477968968e-05, "loss": 0.7009, "step": 4001 }, { "epoch": 0.40878447395301326, "grad_norm": 1.4970253741797461, "learning_rate": 1.3373181229005985e-05, "loss": 0.7233, "step": 4002 }, { "epoch": 0.40888661899897855, "grad_norm": 1.6427893487077017, "learning_rate": 1.3370066610846136e-05, "loss": 0.7567, "step": 4003 }, { "epoch": 0.40898876404494383, "grad_norm": 1.6248064347821283, "learning_rate": 1.336695162383032e-05, "loss": 0.8051, "step": 4004 }, { "epoch": 0.4090909090909091, "grad_norm": 1.2635339919605855, "learning_rate": 1.3363836268299472e-05, "loss": 0.6449, "step": 4005 }, { "epoch": 0.40919305413687435, "grad_norm": 1.5407783019862278, "learning_rate": 1.3360720544594572e-05, "loss": 0.7464, "step": 4006 }, { "epoch": 0.40929519918283963, "grad_norm": 1.429155086244256, "learning_rate": 1.3357604453056636e-05, "loss": 0.8006, "step": 4007 }, { "epoch": 0.4093973442288049, "grad_norm": 1.4740954139759064, "learning_rate": 1.3354487994026726e-05, "loss": 0.6859, "step": 4008 }, { "epoch": 0.4094994892747702, "grad_norm": 1.4983981257284926, "learning_rate": 1.3351371167845938e-05, "loss": 0.6519, "step": 4009 }, { "epoch": 0.40960163432073543, "grad_norm": 1.4467496037837162, "learning_rate": 1.3348253974855407e-05, "loss": 0.6773, "step": 4010 }, { "epoch": 0.4097037793667007, "grad_norm": 1.464234118623483, "learning_rate": 1.3345136415396317e-05, "loss": 0.7006, "step": 4011 }, { "epoch": 0.409805924412666, "grad_norm": 1.617996398857369, "learning_rate": 1.3342018489809885e-05, "loss": 0.7233, "step": 4012 }, { "epoch": 0.40990806945863123, "grad_norm": 1.485882012502083, "learning_rate": 1.333890019843737e-05, "loss": 0.6574, "step": 4013 }, { "epoch": 0.4100102145045965, "grad_norm": 1.3351104660967639, "learning_rate": 1.333578154162007e-05, "loss": 0.666, "step": 4014 }, { "epoch": 0.4101123595505618, "grad_norm": 1.4480156211530573, "learning_rate": 1.3332662519699326e-05, "loss": 0.7046, "step": 4015 }, { "epoch": 0.4102145045965271, "grad_norm": 1.4492612273184573, "learning_rate": 1.3329543133016519e-05, "loss": 0.7284, "step": 4016 }, { "epoch": 0.4103166496424923, "grad_norm": 1.6242313864696112, "learning_rate": 1.3326423381913061e-05, "loss": 0.7703, "step": 4017 }, { "epoch": 0.4104187946884576, "grad_norm": 1.5514048505913542, "learning_rate": 1.3323303266730414e-05, "loss": 0.6317, "step": 4018 }, { "epoch": 0.4105209397344229, "grad_norm": 1.369421139176379, "learning_rate": 1.3320182787810081e-05, "loss": 0.7265, "step": 4019 }, { "epoch": 0.41062308478038817, "grad_norm": 1.4230252208871446, "learning_rate": 1.3317061945493595e-05, "loss": 0.7471, "step": 4020 }, { "epoch": 0.4107252298263534, "grad_norm": 1.704884280639212, "learning_rate": 1.3313940740122535e-05, "loss": 0.7548, "step": 4021 }, { "epoch": 0.4108273748723187, "grad_norm": 1.4497886501886585, "learning_rate": 1.331081917203852e-05, "loss": 0.7968, "step": 4022 }, { "epoch": 0.41092951991828397, "grad_norm": 1.574166629314942, "learning_rate": 1.3307697241583209e-05, "loss": 0.683, "step": 4023 }, { "epoch": 0.41103166496424925, "grad_norm": 1.4704711616671917, "learning_rate": 1.3304574949098298e-05, "loss": 0.7706, "step": 4024 }, { "epoch": 0.4111338100102145, "grad_norm": 1.4982989974075311, "learning_rate": 1.3301452294925524e-05, "loss": 0.6384, "step": 4025 }, { "epoch": 0.41123595505617977, "grad_norm": 1.5614672109440955, "learning_rate": 1.329832927940666e-05, "loss": 0.809, "step": 4026 }, { "epoch": 0.41133810010214505, "grad_norm": 1.517092556989473, "learning_rate": 1.329520590288353e-05, "loss": 0.7272, "step": 4027 }, { "epoch": 0.41144024514811034, "grad_norm": 1.3980918942386242, "learning_rate": 1.3292082165697981e-05, "loss": 0.6642, "step": 4028 }, { "epoch": 0.41154239019407557, "grad_norm": 1.4862855714792382, "learning_rate": 1.3288958068191915e-05, "loss": 0.6683, "step": 4029 }, { "epoch": 0.41164453524004085, "grad_norm": 1.4313906118644868, "learning_rate": 1.328583361070726e-05, "loss": 0.7199, "step": 4030 }, { "epoch": 0.41174668028600614, "grad_norm": 1.4230495103718672, "learning_rate": 1.3282708793585996e-05, "loss": 0.7442, "step": 4031 }, { "epoch": 0.4118488253319714, "grad_norm": 1.5117735105782308, "learning_rate": 1.3279583617170136e-05, "loss": 0.7102, "step": 4032 }, { "epoch": 0.41195097037793665, "grad_norm": 1.637452319174453, "learning_rate": 1.3276458081801727e-05, "loss": 0.8491, "step": 4033 }, { "epoch": 0.41205311542390194, "grad_norm": 1.4813784444032168, "learning_rate": 1.3273332187822862e-05, "loss": 0.8105, "step": 4034 }, { "epoch": 0.4121552604698672, "grad_norm": 1.5295130979732519, "learning_rate": 1.3270205935575677e-05, "loss": 0.8018, "step": 4035 }, { "epoch": 0.4122574055158325, "grad_norm": 1.4692277187154772, "learning_rate": 1.3267079325402341e-05, "loss": 0.8603, "step": 4036 }, { "epoch": 0.41235955056179774, "grad_norm": 1.4965906261863555, "learning_rate": 1.326395235764506e-05, "loss": 0.7362, "step": 4037 }, { "epoch": 0.412461695607763, "grad_norm": 1.6077418685398024, "learning_rate": 1.3260825032646083e-05, "loss": 0.78, "step": 4038 }, { "epoch": 0.4125638406537283, "grad_norm": 1.5059544814678323, "learning_rate": 1.3257697350747702e-05, "loss": 0.7208, "step": 4039 }, { "epoch": 0.41266598569969354, "grad_norm": 1.6701202522057155, "learning_rate": 1.325456931229224e-05, "loss": 0.7448, "step": 4040 }, { "epoch": 0.4127681307456588, "grad_norm": 1.4841410210905146, "learning_rate": 1.3251440917622067e-05, "loss": 0.7369, "step": 4041 }, { "epoch": 0.4128702757916241, "grad_norm": 1.5065484493022716, "learning_rate": 1.3248312167079583e-05, "loss": 0.6968, "step": 4042 }, { "epoch": 0.4129724208375894, "grad_norm": 1.4774057288347526, "learning_rate": 1.324518306100723e-05, "loss": 0.7776, "step": 4043 }, { "epoch": 0.4130745658835546, "grad_norm": 1.5848087520484264, "learning_rate": 1.32420535997475e-05, "loss": 0.8316, "step": 4044 }, { "epoch": 0.4131767109295199, "grad_norm": 1.4537341024289954, "learning_rate": 1.3238923783642905e-05, "loss": 0.7916, "step": 4045 }, { "epoch": 0.4132788559754852, "grad_norm": 1.505996164577858, "learning_rate": 1.323579361303601e-05, "loss": 0.6997, "step": 4046 }, { "epoch": 0.4133810010214505, "grad_norm": 1.525822819697486, "learning_rate": 1.3232663088269414e-05, "loss": 0.6872, "step": 4047 }, { "epoch": 0.4134831460674157, "grad_norm": 1.4401478573283029, "learning_rate": 1.3229532209685756e-05, "loss": 0.6956, "step": 4048 }, { "epoch": 0.413585291113381, "grad_norm": 1.3974748623576538, "learning_rate": 1.3226400977627709e-05, "loss": 0.702, "step": 4049 }, { "epoch": 0.4136874361593463, "grad_norm": 2.4503745115723503, "learning_rate": 1.3223269392437989e-05, "loss": 0.7215, "step": 4050 }, { "epoch": 0.41378958120531156, "grad_norm": 1.4791876318802741, "learning_rate": 1.3220137454459357e-05, "loss": 0.7362, "step": 4051 }, { "epoch": 0.4138917262512768, "grad_norm": 1.5036532983298305, "learning_rate": 1.3217005164034596e-05, "loss": 0.8564, "step": 4052 }, { "epoch": 0.4139938712972421, "grad_norm": 1.5496950064737762, "learning_rate": 1.3213872521506543e-05, "loss": 0.7661, "step": 4053 }, { "epoch": 0.41409601634320736, "grad_norm": 1.5364254294810693, "learning_rate": 1.3210739527218064e-05, "loss": 0.8544, "step": 4054 }, { "epoch": 0.41419816138917265, "grad_norm": 1.3950736600627378, "learning_rate": 1.320760618151207e-05, "loss": 0.6737, "step": 4055 }, { "epoch": 0.4143003064351379, "grad_norm": 1.382637759625659, "learning_rate": 1.3204472484731508e-05, "loss": 0.595, "step": 4056 }, { "epoch": 0.41440245148110316, "grad_norm": 1.4614796348015409, "learning_rate": 1.3201338437219362e-05, "loss": 0.7135, "step": 4057 }, { "epoch": 0.41450459652706845, "grad_norm": 1.4128923217460214, "learning_rate": 1.3198204039318654e-05, "loss": 0.6091, "step": 4058 }, { "epoch": 0.41460674157303373, "grad_norm": 1.4079889623854227, "learning_rate": 1.3195069291372451e-05, "loss": 0.7852, "step": 4059 }, { "epoch": 0.41470888661899896, "grad_norm": 1.3788582400195226, "learning_rate": 1.3191934193723848e-05, "loss": 0.7591, "step": 4060 }, { "epoch": 0.41481103166496425, "grad_norm": 1.4839946955713155, "learning_rate": 1.3188798746715985e-05, "loss": 0.784, "step": 4061 }, { "epoch": 0.41491317671092953, "grad_norm": 1.4310828363512647, "learning_rate": 1.3185662950692036e-05, "loss": 0.7725, "step": 4062 }, { "epoch": 0.4150153217568948, "grad_norm": 1.4338389284971307, "learning_rate": 1.3182526805995223e-05, "loss": 0.7388, "step": 4063 }, { "epoch": 0.41511746680286005, "grad_norm": 1.333643721706044, "learning_rate": 1.3179390312968793e-05, "loss": 0.6143, "step": 4064 }, { "epoch": 0.41521961184882533, "grad_norm": 1.4905184779583718, "learning_rate": 1.3176253471956043e-05, "loss": 0.6835, "step": 4065 }, { "epoch": 0.4153217568947906, "grad_norm": 1.3421428373345718, "learning_rate": 1.3173116283300293e-05, "loss": 0.7133, "step": 4066 }, { "epoch": 0.41542390194075585, "grad_norm": 1.4617748488641993, "learning_rate": 1.3169978747344919e-05, "loss": 0.6358, "step": 4067 }, { "epoch": 0.41552604698672113, "grad_norm": 1.2958353501005206, "learning_rate": 1.3166840864433322e-05, "loss": 0.6483, "step": 4068 }, { "epoch": 0.4156281920326864, "grad_norm": 1.380326311400662, "learning_rate": 1.3163702634908946e-05, "loss": 0.6627, "step": 4069 }, { "epoch": 0.4157303370786517, "grad_norm": 1.4701725629732483, "learning_rate": 1.316056405911527e-05, "loss": 0.7819, "step": 4070 }, { "epoch": 0.41583248212461693, "grad_norm": 1.3946076579594058, "learning_rate": 1.315742513739582e-05, "loss": 0.7495, "step": 4071 }, { "epoch": 0.4159346271705822, "grad_norm": 1.3511651739021993, "learning_rate": 1.3154285870094147e-05, "loss": 0.6703, "step": 4072 }, { "epoch": 0.4160367722165475, "grad_norm": 1.6026528476763535, "learning_rate": 1.315114625755385e-05, "loss": 0.8442, "step": 4073 }, { "epoch": 0.4161389172625128, "grad_norm": 1.5981961137947693, "learning_rate": 1.3148006300118554e-05, "loss": 0.7094, "step": 4074 }, { "epoch": 0.416241062308478, "grad_norm": 1.491200505025334, "learning_rate": 1.3144865998131939e-05, "loss": 0.7596, "step": 4075 }, { "epoch": 0.4163432073544433, "grad_norm": 1.4710762972812466, "learning_rate": 1.3141725351937709e-05, "loss": 0.7569, "step": 4076 }, { "epoch": 0.4164453524004086, "grad_norm": 1.7610737542326798, "learning_rate": 1.3138584361879607e-05, "loss": 0.7357, "step": 4077 }, { "epoch": 0.41654749744637387, "grad_norm": 1.5061838862324517, "learning_rate": 1.313544302830142e-05, "loss": 0.7304, "step": 4078 }, { "epoch": 0.4166496424923391, "grad_norm": 1.6595599660071645, "learning_rate": 1.3132301351546968e-05, "loss": 0.6979, "step": 4079 }, { "epoch": 0.4167517875383044, "grad_norm": 1.3876253947644883, "learning_rate": 1.3129159331960109e-05, "loss": 0.5958, "step": 4080 }, { "epoch": 0.41685393258426967, "grad_norm": 1.494077824775189, "learning_rate": 1.3126016969884739e-05, "loss": 0.7525, "step": 4081 }, { "epoch": 0.41695607763023496, "grad_norm": 1.359325684243245, "learning_rate": 1.312287426566479e-05, "loss": 0.6103, "step": 4082 }, { "epoch": 0.4170582226762002, "grad_norm": 1.2950168283734158, "learning_rate": 1.3119731219644238e-05, "loss": 0.6323, "step": 4083 }, { "epoch": 0.41716036772216547, "grad_norm": 1.5200803771793918, "learning_rate": 1.3116587832167089e-05, "loss": 0.8037, "step": 4084 }, { "epoch": 0.41726251276813076, "grad_norm": 1.397554587784112, "learning_rate": 1.3113444103577387e-05, "loss": 0.6801, "step": 4085 }, { "epoch": 0.41736465781409604, "grad_norm": 1.4499611613835213, "learning_rate": 1.3110300034219217e-05, "loss": 0.6486, "step": 4086 }, { "epoch": 0.41746680286006127, "grad_norm": 1.611375214389681, "learning_rate": 1.3107155624436696e-05, "loss": 0.7909, "step": 4087 }, { "epoch": 0.41756894790602656, "grad_norm": 1.4177099615406614, "learning_rate": 1.3104010874573987e-05, "loss": 0.6209, "step": 4088 }, { "epoch": 0.41767109295199184, "grad_norm": 1.5614550261294569, "learning_rate": 1.3100865784975281e-05, "loss": 0.7607, "step": 4089 }, { "epoch": 0.4177732379979571, "grad_norm": 1.4913476704737025, "learning_rate": 1.3097720355984812e-05, "loss": 0.7222, "step": 4090 }, { "epoch": 0.41787538304392235, "grad_norm": 1.2766152094934622, "learning_rate": 1.3094574587946847e-05, "loss": 0.6318, "step": 4091 }, { "epoch": 0.41797752808988764, "grad_norm": 1.5814835836471746, "learning_rate": 1.3091428481205697e-05, "loss": 0.7422, "step": 4092 }, { "epoch": 0.4180796731358529, "grad_norm": 1.5392623197466557, "learning_rate": 1.3088282036105701e-05, "loss": 0.7404, "step": 4093 }, { "epoch": 0.41818181818181815, "grad_norm": 1.3660922183503126, "learning_rate": 1.3085135252991238e-05, "loss": 0.6749, "step": 4094 }, { "epoch": 0.41828396322778344, "grad_norm": 1.500850860237259, "learning_rate": 1.3081988132206735e-05, "loss": 0.8192, "step": 4095 }, { "epoch": 0.4183861082737487, "grad_norm": 1.5414610143151695, "learning_rate": 1.3078840674096636e-05, "loss": 0.8525, "step": 4096 }, { "epoch": 0.418488253319714, "grad_norm": 1.6335581950671578, "learning_rate": 1.3075692879005436e-05, "loss": 0.7956, "step": 4097 }, { "epoch": 0.41859039836567924, "grad_norm": 1.325549804648626, "learning_rate": 1.3072544747277663e-05, "loss": 0.603, "step": 4098 }, { "epoch": 0.4186925434116445, "grad_norm": 1.4895319153991593, "learning_rate": 1.3069396279257882e-05, "loss": 0.7239, "step": 4099 }, { "epoch": 0.4187946884576098, "grad_norm": 1.4306054853927743, "learning_rate": 1.3066247475290696e-05, "loss": 0.7257, "step": 4100 }, { "epoch": 0.4188968335035751, "grad_norm": 1.2257462647611135, "learning_rate": 1.3063098335720743e-05, "loss": 0.6458, "step": 4101 }, { "epoch": 0.4189989785495403, "grad_norm": 1.3818102701848796, "learning_rate": 1.3059948860892696e-05, "loss": 0.7495, "step": 4102 }, { "epoch": 0.4191011235955056, "grad_norm": 1.5467326414267168, "learning_rate": 1.305679905115127e-05, "loss": 0.6762, "step": 4103 }, { "epoch": 0.4192032686414709, "grad_norm": 1.3934748653316449, "learning_rate": 1.3053648906841216e-05, "loss": 0.7351, "step": 4104 }, { "epoch": 0.4193054136874362, "grad_norm": 1.6369496700388027, "learning_rate": 1.3050498428307315e-05, "loss": 0.8069, "step": 4105 }, { "epoch": 0.4194075587334014, "grad_norm": 1.4390088599382682, "learning_rate": 1.3047347615894386e-05, "loss": 0.8243, "step": 4106 }, { "epoch": 0.4195097037793667, "grad_norm": 1.572644150538147, "learning_rate": 1.3044196469947296e-05, "loss": 0.7333, "step": 4107 }, { "epoch": 0.419611848825332, "grad_norm": 1.456051791935575, "learning_rate": 1.3041044990810933e-05, "loss": 0.6258, "step": 4108 }, { "epoch": 0.41971399387129726, "grad_norm": 1.454781171265891, "learning_rate": 1.3037893178830234e-05, "loss": 0.643, "step": 4109 }, { "epoch": 0.4198161389172625, "grad_norm": 1.7481206820761466, "learning_rate": 1.3034741034350162e-05, "loss": 0.8228, "step": 4110 }, { "epoch": 0.4199182839632278, "grad_norm": 1.6683891688452452, "learning_rate": 1.3031588557715721e-05, "loss": 0.7802, "step": 4111 }, { "epoch": 0.42002042900919306, "grad_norm": 1.469212460184517, "learning_rate": 1.302843574927196e-05, "loss": 0.613, "step": 4112 }, { "epoch": 0.42012257405515835, "grad_norm": 1.2670444782115955, "learning_rate": 1.3025282609363943e-05, "loss": 0.7991, "step": 4113 }, { "epoch": 0.4202247191011236, "grad_norm": 1.4568412502466304, "learning_rate": 1.3022129138336792e-05, "loss": 0.7549, "step": 4114 }, { "epoch": 0.42032686414708886, "grad_norm": 1.6850376317991482, "learning_rate": 1.3018975336535658e-05, "loss": 0.7966, "step": 4115 }, { "epoch": 0.42042900919305415, "grad_norm": 1.3926599918040983, "learning_rate": 1.3015821204305716e-05, "loss": 0.6788, "step": 4116 }, { "epoch": 0.42053115423901943, "grad_norm": 1.5124648792164923, "learning_rate": 1.3012666741992202e-05, "loss": 0.7618, "step": 4117 }, { "epoch": 0.42063329928498466, "grad_norm": 1.325928373671755, "learning_rate": 1.3009511949940359e-05, "loss": 0.7059, "step": 4118 }, { "epoch": 0.42073544433094995, "grad_norm": 1.528607455210471, "learning_rate": 1.3006356828495495e-05, "loss": 0.9157, "step": 4119 }, { "epoch": 0.42083758937691523, "grad_norm": 1.3301703344120657, "learning_rate": 1.3003201378002929e-05, "loss": 0.7041, "step": 4120 }, { "epoch": 0.42093973442288046, "grad_norm": 1.3850296142170697, "learning_rate": 1.3000045598808035e-05, "loss": 0.7068, "step": 4121 }, { "epoch": 0.42104187946884575, "grad_norm": 1.4007606860416288, "learning_rate": 1.299688949125621e-05, "loss": 0.7533, "step": 4122 }, { "epoch": 0.42114402451481103, "grad_norm": 1.5627785692444696, "learning_rate": 1.2993733055692897e-05, "loss": 0.7628, "step": 4123 }, { "epoch": 0.4212461695607763, "grad_norm": 1.2754867209599365, "learning_rate": 1.2990576292463563e-05, "loss": 0.7234, "step": 4124 }, { "epoch": 0.42134831460674155, "grad_norm": 1.6681084506778683, "learning_rate": 1.2987419201913724e-05, "loss": 0.7951, "step": 4125 }, { "epoch": 0.42145045965270683, "grad_norm": 1.4505808235332542, "learning_rate": 1.2984261784388923e-05, "loss": 0.7359, "step": 4126 }, { "epoch": 0.4215526046986721, "grad_norm": 1.4705792051374922, "learning_rate": 1.2981104040234742e-05, "loss": 0.7951, "step": 4127 }, { "epoch": 0.4216547497446374, "grad_norm": 1.3701889891001708, "learning_rate": 1.2977945969796796e-05, "loss": 0.631, "step": 4128 }, { "epoch": 0.42175689479060263, "grad_norm": 1.4463389266616375, "learning_rate": 1.2974787573420744e-05, "loss": 0.6531, "step": 4129 }, { "epoch": 0.4218590398365679, "grad_norm": 1.4530120591992444, "learning_rate": 1.2971628851452263e-05, "loss": 0.7059, "step": 4130 }, { "epoch": 0.4219611848825332, "grad_norm": 1.5219217795240638, "learning_rate": 1.2968469804237088e-05, "loss": 0.6238, "step": 4131 }, { "epoch": 0.4220633299284985, "grad_norm": 1.5017364988446407, "learning_rate": 1.2965310432120978e-05, "loss": 0.7114, "step": 4132 }, { "epoch": 0.4221654749744637, "grad_norm": 1.4389094641372986, "learning_rate": 1.2962150735449724e-05, "loss": 0.722, "step": 4133 }, { "epoch": 0.422267620020429, "grad_norm": 1.4698535732220495, "learning_rate": 1.2958990714569154e-05, "loss": 0.7758, "step": 4134 }, { "epoch": 0.4223697650663943, "grad_norm": 1.4454234873644298, "learning_rate": 1.2955830369825141e-05, "loss": 0.8351, "step": 4135 }, { "epoch": 0.42247191011235957, "grad_norm": 1.344756066930458, "learning_rate": 1.2952669701563588e-05, "loss": 0.7998, "step": 4136 }, { "epoch": 0.4225740551583248, "grad_norm": 1.418681941640728, "learning_rate": 1.2949508710130423e-05, "loss": 0.7328, "step": 4137 }, { "epoch": 0.4226762002042901, "grad_norm": 1.4906992595098487, "learning_rate": 1.2946347395871626e-05, "loss": 0.7859, "step": 4138 }, { "epoch": 0.42277834525025537, "grad_norm": 1.4599196904873275, "learning_rate": 1.2943185759133203e-05, "loss": 0.6149, "step": 4139 }, { "epoch": 0.42288049029622066, "grad_norm": 1.5352293689277754, "learning_rate": 1.2940023800261197e-05, "loss": 0.6974, "step": 4140 }, { "epoch": 0.4229826353421859, "grad_norm": 1.4747391792025863, "learning_rate": 1.2936861519601689e-05, "loss": 0.692, "step": 4141 }, { "epoch": 0.42308478038815117, "grad_norm": 1.592182718455755, "learning_rate": 1.2933698917500788e-05, "loss": 0.6323, "step": 4142 }, { "epoch": 0.42318692543411646, "grad_norm": 1.4931453626253548, "learning_rate": 1.2930535994304643e-05, "loss": 0.7523, "step": 4143 }, { "epoch": 0.42328907048008174, "grad_norm": 1.5729729186806827, "learning_rate": 1.2927372750359443e-05, "loss": 0.7543, "step": 4144 }, { "epoch": 0.42339121552604697, "grad_norm": 1.3550745282975585, "learning_rate": 1.2924209186011405e-05, "loss": 0.6482, "step": 4145 }, { "epoch": 0.42349336057201226, "grad_norm": 1.5746811100354057, "learning_rate": 1.2921045301606777e-05, "loss": 0.83, "step": 4146 }, { "epoch": 0.42359550561797754, "grad_norm": 1.4042866430321983, "learning_rate": 1.2917881097491858e-05, "loss": 0.7683, "step": 4147 }, { "epoch": 0.4236976506639428, "grad_norm": 1.441740187024938, "learning_rate": 1.2914716574012968e-05, "loss": 0.7477, "step": 4148 }, { "epoch": 0.42379979570990806, "grad_norm": 1.5703460896779449, "learning_rate": 1.2911551731516467e-05, "loss": 0.7446, "step": 4149 }, { "epoch": 0.42390194075587334, "grad_norm": 1.4185097964926956, "learning_rate": 1.290838657034874e-05, "loss": 0.7464, "step": 4150 }, { "epoch": 0.4240040858018386, "grad_norm": 1.4990641399195994, "learning_rate": 1.2905221090856232e-05, "loss": 0.6285, "step": 4151 }, { "epoch": 0.42410623084780386, "grad_norm": 1.529098470574368, "learning_rate": 1.2902055293385396e-05, "loss": 0.8503, "step": 4152 }, { "epoch": 0.42420837589376914, "grad_norm": 1.5725713886116188, "learning_rate": 1.2898889178282733e-05, "loss": 0.7078, "step": 4153 }, { "epoch": 0.4243105209397344, "grad_norm": 1.5295258079947283, "learning_rate": 1.2895722745894777e-05, "loss": 0.7121, "step": 4154 }, { "epoch": 0.4244126659856997, "grad_norm": 1.5095825940448466, "learning_rate": 1.2892555996568094e-05, "loss": 0.768, "step": 4155 }, { "epoch": 0.42451481103166494, "grad_norm": 1.5112433002257941, "learning_rate": 1.2889388930649291e-05, "loss": 0.7579, "step": 4156 }, { "epoch": 0.4246169560776302, "grad_norm": 1.514226228406768, "learning_rate": 1.2886221548485e-05, "loss": 0.7692, "step": 4157 }, { "epoch": 0.4247191011235955, "grad_norm": 1.4757653130565505, "learning_rate": 1.2883053850421899e-05, "loss": 0.7661, "step": 4158 }, { "epoch": 0.4248212461695608, "grad_norm": 1.357419997721736, "learning_rate": 1.2879885836806689e-05, "loss": 0.6896, "step": 4159 }, { "epoch": 0.424923391215526, "grad_norm": 1.3972495833831715, "learning_rate": 1.2876717507986114e-05, "loss": 0.7992, "step": 4160 }, { "epoch": 0.4250255362614913, "grad_norm": 1.4475067719021075, "learning_rate": 1.287354886430695e-05, "loss": 0.6546, "step": 4161 }, { "epoch": 0.4251276813074566, "grad_norm": 1.5324668861592707, "learning_rate": 1.2870379906116005e-05, "loss": 0.8312, "step": 4162 }, { "epoch": 0.4252298263534219, "grad_norm": 1.429368183012482, "learning_rate": 1.2867210633760126e-05, "loss": 0.7044, "step": 4163 }, { "epoch": 0.4253319713993871, "grad_norm": 1.4798231709875453, "learning_rate": 1.2864041047586189e-05, "loss": 0.6841, "step": 4164 }, { "epoch": 0.4254341164453524, "grad_norm": 1.5703651989985423, "learning_rate": 1.2860871147941109e-05, "loss": 0.6238, "step": 4165 }, { "epoch": 0.4255362614913177, "grad_norm": 1.27730640312045, "learning_rate": 1.2857700935171835e-05, "loss": 0.7165, "step": 4166 }, { "epoch": 0.42563840653728297, "grad_norm": 1.3485400667462677, "learning_rate": 1.2854530409625346e-05, "loss": 0.7165, "step": 4167 }, { "epoch": 0.4257405515832482, "grad_norm": 1.5053817130801825, "learning_rate": 1.285135957164866e-05, "loss": 0.8391, "step": 4168 }, { "epoch": 0.4258426966292135, "grad_norm": 1.373072070707707, "learning_rate": 1.2848188421588827e-05, "loss": 0.7164, "step": 4169 }, { "epoch": 0.42594484167517876, "grad_norm": 1.5248536954527256, "learning_rate": 1.2845016959792931e-05, "loss": 0.7186, "step": 4170 }, { "epoch": 0.42604698672114405, "grad_norm": 1.4049527114111642, "learning_rate": 1.284184518660809e-05, "loss": 0.6818, "step": 4171 }, { "epoch": 0.4261491317671093, "grad_norm": 1.3851174270655728, "learning_rate": 1.2838673102381458e-05, "loss": 0.694, "step": 4172 }, { "epoch": 0.42625127681307456, "grad_norm": 1.5562554404716402, "learning_rate": 1.2835500707460223e-05, "loss": 0.6424, "step": 4173 }, { "epoch": 0.42635342185903985, "grad_norm": 1.4227628680922861, "learning_rate": 1.2832328002191599e-05, "loss": 0.7868, "step": 4174 }, { "epoch": 0.42645556690500513, "grad_norm": 1.4643328018310775, "learning_rate": 1.2829154986922847e-05, "loss": 0.6896, "step": 4175 }, { "epoch": 0.42655771195097036, "grad_norm": 1.4745459924297115, "learning_rate": 1.2825981662001256e-05, "loss": 0.7656, "step": 4176 }, { "epoch": 0.42665985699693565, "grad_norm": 1.5029292321623076, "learning_rate": 1.2822808027774143e-05, "loss": 0.7277, "step": 4177 }, { "epoch": 0.42676200204290093, "grad_norm": 1.4823195118611412, "learning_rate": 1.281963408458887e-05, "loss": 0.7652, "step": 4178 }, { "epoch": 0.42686414708886616, "grad_norm": 1.3853250689794836, "learning_rate": 1.2816459832792822e-05, "loss": 0.75, "step": 4179 }, { "epoch": 0.42696629213483145, "grad_norm": 1.7521923192322615, "learning_rate": 1.2813285272733429e-05, "loss": 0.7804, "step": 4180 }, { "epoch": 0.42706843718079673, "grad_norm": 1.4189134851768206, "learning_rate": 1.2810110404758143e-05, "loss": 0.752, "step": 4181 }, { "epoch": 0.427170582226762, "grad_norm": 1.5064516408262625, "learning_rate": 1.2806935229214456e-05, "loss": 0.7408, "step": 4182 }, { "epoch": 0.42727272727272725, "grad_norm": 1.5434466837584009, "learning_rate": 1.28037597464499e-05, "loss": 0.7083, "step": 4183 }, { "epoch": 0.42737487231869253, "grad_norm": 1.4315682255893107, "learning_rate": 1.2800583956812025e-05, "loss": 0.7168, "step": 4184 }, { "epoch": 0.4274770173646578, "grad_norm": 1.551951380065014, "learning_rate": 1.2797407860648427e-05, "loss": 0.6986, "step": 4185 }, { "epoch": 0.4275791624106231, "grad_norm": 1.4989583080589504, "learning_rate": 1.2794231458306732e-05, "loss": 0.8221, "step": 4186 }, { "epoch": 0.42768130745658833, "grad_norm": 1.4211755988326256, "learning_rate": 1.2791054750134597e-05, "loss": 0.7164, "step": 4187 }, { "epoch": 0.4277834525025536, "grad_norm": 1.4687098144890882, "learning_rate": 1.2787877736479719e-05, "loss": 0.6884, "step": 4188 }, { "epoch": 0.4278855975485189, "grad_norm": 1.434373871437835, "learning_rate": 1.2784700417689817e-05, "loss": 0.706, "step": 4189 }, { "epoch": 0.4279877425944842, "grad_norm": 1.4761514426845452, "learning_rate": 1.2781522794112658e-05, "loss": 0.7788, "step": 4190 }, { "epoch": 0.4280898876404494, "grad_norm": 1.5371224217495576, "learning_rate": 1.2778344866096032e-05, "loss": 0.7553, "step": 4191 }, { "epoch": 0.4281920326864147, "grad_norm": 1.505334300874722, "learning_rate": 1.2775166633987765e-05, "loss": 0.6602, "step": 4192 }, { "epoch": 0.42829417773238, "grad_norm": 1.3980104338401964, "learning_rate": 1.2771988098135719e-05, "loss": 0.7006, "step": 4193 }, { "epoch": 0.4283963227783453, "grad_norm": 1.383802255642878, "learning_rate": 1.276880925888778e-05, "loss": 0.7447, "step": 4194 }, { "epoch": 0.4284984678243105, "grad_norm": 1.4202791704004998, "learning_rate": 1.2765630116591884e-05, "loss": 0.6968, "step": 4195 }, { "epoch": 0.4286006128702758, "grad_norm": 1.4590125606068858, "learning_rate": 1.2762450671595983e-05, "loss": 0.7108, "step": 4196 }, { "epoch": 0.4287027579162411, "grad_norm": 1.4020338981341036, "learning_rate": 1.275927092424807e-05, "loss": 0.6834, "step": 4197 }, { "epoch": 0.42880490296220636, "grad_norm": 1.5133927724807894, "learning_rate": 1.2756090874896171e-05, "loss": 0.7314, "step": 4198 }, { "epoch": 0.4289070480081716, "grad_norm": 1.4240112947773271, "learning_rate": 1.2752910523888347e-05, "loss": 0.6645, "step": 4199 }, { "epoch": 0.4290091930541369, "grad_norm": 1.4354208813429918, "learning_rate": 1.274972987157269e-05, "loss": 0.7336, "step": 4200 }, { "epoch": 0.42911133810010216, "grad_norm": 1.4461515262142526, "learning_rate": 1.2746548918297318e-05, "loss": 0.6986, "step": 4201 }, { "epoch": 0.42921348314606744, "grad_norm": 1.4815422836040038, "learning_rate": 1.2743367664410391e-05, "loss": 0.7797, "step": 4202 }, { "epoch": 0.42931562819203267, "grad_norm": 1.8068029698968613, "learning_rate": 1.2740186110260104e-05, "loss": 0.7516, "step": 4203 }, { "epoch": 0.42941777323799796, "grad_norm": 1.5583473027966743, "learning_rate": 1.2737004256194676e-05, "loss": 0.7247, "step": 4204 }, { "epoch": 0.42951991828396324, "grad_norm": 1.5807737796431178, "learning_rate": 1.2733822102562366e-05, "loss": 0.7589, "step": 4205 }, { "epoch": 0.42962206332992847, "grad_norm": 1.6256543319508912, "learning_rate": 1.2730639649711453e-05, "loss": 0.7787, "step": 4206 }, { "epoch": 0.42972420837589376, "grad_norm": 1.5856240579972127, "learning_rate": 1.2727456897990276e-05, "loss": 0.7561, "step": 4207 }, { "epoch": 0.42982635342185904, "grad_norm": 1.3395595041880035, "learning_rate": 1.2724273847747173e-05, "loss": 0.6978, "step": 4208 }, { "epoch": 0.4299284984678243, "grad_norm": 1.421668395737883, "learning_rate": 1.2721090499330542e-05, "loss": 0.7474, "step": 4209 }, { "epoch": 0.43003064351378956, "grad_norm": 1.4186737209031755, "learning_rate": 1.2717906853088793e-05, "loss": 0.7714, "step": 4210 }, { "epoch": 0.43013278855975484, "grad_norm": 1.531953079437075, "learning_rate": 1.2714722909370383e-05, "loss": 0.7227, "step": 4211 }, { "epoch": 0.4302349336057201, "grad_norm": 1.541353837000716, "learning_rate": 1.2711538668523802e-05, "loss": 0.7264, "step": 4212 }, { "epoch": 0.4303370786516854, "grad_norm": 1.5580233017897014, "learning_rate": 1.2708354130897555e-05, "loss": 0.7778, "step": 4213 }, { "epoch": 0.43043922369765064, "grad_norm": 1.4851710871351311, "learning_rate": 1.2705169296840203e-05, "loss": 0.7109, "step": 4214 }, { "epoch": 0.4305413687436159, "grad_norm": 1.383274129234747, "learning_rate": 1.2701984166700324e-05, "loss": 0.6398, "step": 4215 }, { "epoch": 0.4306435137895812, "grad_norm": 1.4384865579196828, "learning_rate": 1.2698798740826531e-05, "loss": 0.7613, "step": 4216 }, { "epoch": 0.4307456588355465, "grad_norm": 1.5110176748999187, "learning_rate": 1.2695613019567472e-05, "loss": 0.7787, "step": 4217 }, { "epoch": 0.4308478038815117, "grad_norm": 1.490556998129876, "learning_rate": 1.2692427003271823e-05, "loss": 0.7777, "step": 4218 }, { "epoch": 0.430949948927477, "grad_norm": 1.7190476961541812, "learning_rate": 1.2689240692288305e-05, "loss": 0.692, "step": 4219 }, { "epoch": 0.4310520939734423, "grad_norm": 1.3351591805615528, "learning_rate": 1.2686054086965653e-05, "loss": 0.6479, "step": 4220 }, { "epoch": 0.4311542390194076, "grad_norm": 1.2994211639198199, "learning_rate": 1.2682867187652645e-05, "loss": 0.6713, "step": 4221 }, { "epoch": 0.4312563840653728, "grad_norm": 1.6173038652989569, "learning_rate": 1.267967999469809e-05, "loss": 0.7172, "step": 4222 }, { "epoch": 0.4313585291113381, "grad_norm": 1.4013839991973194, "learning_rate": 1.267649250845083e-05, "loss": 0.7636, "step": 4223 }, { "epoch": 0.4314606741573034, "grad_norm": 1.4726324122435535, "learning_rate": 1.2673304729259737e-05, "loss": 0.7195, "step": 4224 }, { "epoch": 0.43156281920326867, "grad_norm": 1.305989334081915, "learning_rate": 1.267011665747371e-05, "loss": 0.6655, "step": 4225 }, { "epoch": 0.4316649642492339, "grad_norm": 1.4848371949272288, "learning_rate": 1.2666928293441692e-05, "loss": 0.794, "step": 4226 }, { "epoch": 0.4317671092951992, "grad_norm": 1.5279658758639154, "learning_rate": 1.2663739637512648e-05, "loss": 0.7679, "step": 4227 }, { "epoch": 0.43186925434116447, "grad_norm": 1.4891591759877514, "learning_rate": 1.2660550690035582e-05, "loss": 0.6606, "step": 4228 }, { "epoch": 0.43197139938712975, "grad_norm": 1.560479938170031, "learning_rate": 1.2657361451359524e-05, "loss": 0.7087, "step": 4229 }, { "epoch": 0.432073544433095, "grad_norm": 1.446457484049207, "learning_rate": 1.2654171921833536e-05, "loss": 0.6367, "step": 4230 }, { "epoch": 0.43217568947906027, "grad_norm": 1.4640792583203828, "learning_rate": 1.2650982101806717e-05, "loss": 0.7146, "step": 4231 }, { "epoch": 0.43227783452502555, "grad_norm": 1.305597056616421, "learning_rate": 1.2647791991628195e-05, "loss": 0.735, "step": 4232 }, { "epoch": 0.4323799795709908, "grad_norm": 1.6604632371241312, "learning_rate": 1.2644601591647127e-05, "loss": 0.7913, "step": 4233 }, { "epoch": 0.43248212461695607, "grad_norm": 1.3623833546588544, "learning_rate": 1.2641410902212707e-05, "loss": 0.6871, "step": 4234 }, { "epoch": 0.43258426966292135, "grad_norm": 1.4119739414357706, "learning_rate": 1.2638219923674158e-05, "loss": 0.7034, "step": 4235 }, { "epoch": 0.43268641470888664, "grad_norm": 1.3472515115276802, "learning_rate": 1.2635028656380735e-05, "loss": 0.7406, "step": 4236 }, { "epoch": 0.43278855975485186, "grad_norm": 1.4665677431941686, "learning_rate": 1.2631837100681724e-05, "loss": 0.7524, "step": 4237 }, { "epoch": 0.43289070480081715, "grad_norm": 1.5849462424163931, "learning_rate": 1.2628645256926438e-05, "loss": 0.7419, "step": 4238 }, { "epoch": 0.43299284984678243, "grad_norm": 1.5234466519793624, "learning_rate": 1.262545312546423e-05, "loss": 0.7857, "step": 4239 }, { "epoch": 0.4330949948927477, "grad_norm": 1.4564028783733853, "learning_rate": 1.2622260706644482e-05, "loss": 0.6648, "step": 4240 }, { "epoch": 0.43319713993871295, "grad_norm": 1.4957898544082597, "learning_rate": 1.261906800081661e-05, "loss": 0.7938, "step": 4241 }, { "epoch": 0.43329928498467823, "grad_norm": 1.3152159561122425, "learning_rate": 1.261587500833005e-05, "loss": 0.7076, "step": 4242 }, { "epoch": 0.4334014300306435, "grad_norm": 1.4533069707570487, "learning_rate": 1.2612681729534277e-05, "loss": 0.7397, "step": 4243 }, { "epoch": 0.4335035750766088, "grad_norm": 1.5302892140573456, "learning_rate": 1.2609488164778805e-05, "loss": 0.7107, "step": 4244 }, { "epoch": 0.43360572012257403, "grad_norm": 1.4286947601473468, "learning_rate": 1.2606294314413169e-05, "loss": 0.7125, "step": 4245 }, { "epoch": 0.4337078651685393, "grad_norm": 1.5247547188056398, "learning_rate": 1.2603100178786928e-05, "loss": 0.7492, "step": 4246 }, { "epoch": 0.4338100102145046, "grad_norm": 1.3594453166938771, "learning_rate": 1.25999057582497e-05, "loss": 0.6974, "step": 4247 }, { "epoch": 0.4339121552604699, "grad_norm": 1.5915995566954642, "learning_rate": 1.2596711053151103e-05, "loss": 0.8248, "step": 4248 }, { "epoch": 0.4340143003064351, "grad_norm": 1.4011718665018413, "learning_rate": 1.2593516063840805e-05, "loss": 0.6668, "step": 4249 }, { "epoch": 0.4341164453524004, "grad_norm": 1.6427006796073262, "learning_rate": 1.2590320790668493e-05, "loss": 0.6919, "step": 4250 }, { "epoch": 0.4342185903983657, "grad_norm": 1.4392778565463424, "learning_rate": 1.25871252339839e-05, "loss": 0.7708, "step": 4251 }, { "epoch": 0.434320735444331, "grad_norm": 1.4867793635511608, "learning_rate": 1.2583929394136783e-05, "loss": 0.7478, "step": 4252 }, { "epoch": 0.4344228804902962, "grad_norm": 1.4906834013008994, "learning_rate": 1.258073327147692e-05, "loss": 0.7746, "step": 4253 }, { "epoch": 0.4345250255362615, "grad_norm": 1.5456711155769758, "learning_rate": 1.2577536866354136e-05, "loss": 0.6246, "step": 4254 }, { "epoch": 0.4346271705822268, "grad_norm": 1.5987569158121842, "learning_rate": 1.2574340179118271e-05, "loss": 0.7534, "step": 4255 }, { "epoch": 0.43472931562819206, "grad_norm": 1.5285868518881474, "learning_rate": 1.2571143210119216e-05, "loss": 0.7932, "step": 4256 }, { "epoch": 0.4348314606741573, "grad_norm": 1.4742076762058434, "learning_rate": 1.2567945959706873e-05, "loss": 0.7, "step": 4257 }, { "epoch": 0.4349336057201226, "grad_norm": 1.4508402517673071, "learning_rate": 1.2564748428231186e-05, "loss": 0.7187, "step": 4258 }, { "epoch": 0.43503575076608786, "grad_norm": 1.4697177232710263, "learning_rate": 1.2561550616042126e-05, "loss": 0.7721, "step": 4259 }, { "epoch": 0.4351378958120531, "grad_norm": 1.4806895140853427, "learning_rate": 1.2558352523489696e-05, "loss": 0.8045, "step": 4260 }, { "epoch": 0.4352400408580184, "grad_norm": 1.4513621245151793, "learning_rate": 1.255515415092393e-05, "loss": 0.6891, "step": 4261 }, { "epoch": 0.43534218590398366, "grad_norm": 1.415982889032856, "learning_rate": 1.2551955498694893e-05, "loss": 0.755, "step": 4262 }, { "epoch": 0.43544433094994894, "grad_norm": 1.3861999338764512, "learning_rate": 1.2548756567152674e-05, "loss": 0.7952, "step": 4263 }, { "epoch": 0.4355464759959142, "grad_norm": 1.524254702554869, "learning_rate": 1.2545557356647405e-05, "loss": 0.7251, "step": 4264 }, { "epoch": 0.43564862104187946, "grad_norm": 1.6317547648668884, "learning_rate": 1.2542357867529236e-05, "loss": 0.7831, "step": 4265 }, { "epoch": 0.43575076608784474, "grad_norm": 1.647448086376945, "learning_rate": 1.2539158100148358e-05, "loss": 0.7037, "step": 4266 }, { "epoch": 0.43585291113381003, "grad_norm": 1.4759070532949015, "learning_rate": 1.2535958054854984e-05, "loss": 0.7594, "step": 4267 }, { "epoch": 0.43595505617977526, "grad_norm": 1.2660794496718846, "learning_rate": 1.2532757731999365e-05, "loss": 0.6832, "step": 4268 }, { "epoch": 0.43605720122574054, "grad_norm": 1.4173481738742841, "learning_rate": 1.2529557131931773e-05, "loss": 0.6088, "step": 4269 }, { "epoch": 0.43615934627170583, "grad_norm": 1.580060537762344, "learning_rate": 1.252635625500252e-05, "loss": 0.7747, "step": 4270 }, { "epoch": 0.4362614913176711, "grad_norm": 1.6104125873782262, "learning_rate": 1.2523155101561943e-05, "loss": 0.7463, "step": 4271 }, { "epoch": 0.43636363636363634, "grad_norm": 1.3927999737394492, "learning_rate": 1.2519953671960407e-05, "loss": 0.6961, "step": 4272 }, { "epoch": 0.4364657814096016, "grad_norm": 1.551204619477217, "learning_rate": 1.251675196654832e-05, "loss": 0.7178, "step": 4273 }, { "epoch": 0.4365679264555669, "grad_norm": 1.424812835566812, "learning_rate": 1.2513549985676098e-05, "loss": 0.7082, "step": 4274 }, { "epoch": 0.4366700715015322, "grad_norm": 1.5665642202278396, "learning_rate": 1.2510347729694208e-05, "loss": 0.686, "step": 4275 }, { "epoch": 0.4367722165474974, "grad_norm": 1.4117778375924306, "learning_rate": 1.2507145198953139e-05, "loss": 0.7138, "step": 4276 }, { "epoch": 0.4368743615934627, "grad_norm": 1.4872404928955392, "learning_rate": 1.2503942393803405e-05, "loss": 0.6918, "step": 4277 }, { "epoch": 0.436976506639428, "grad_norm": 1.4129641957705419, "learning_rate": 1.2500739314595562e-05, "loss": 0.69, "step": 4278 }, { "epoch": 0.4370786516853933, "grad_norm": 1.5922274940638164, "learning_rate": 1.249753596168018e-05, "loss": 0.7062, "step": 4279 }, { "epoch": 0.4371807967313585, "grad_norm": 1.4689943534505607, "learning_rate": 1.2494332335407879e-05, "loss": 0.7371, "step": 4280 }, { "epoch": 0.4372829417773238, "grad_norm": 1.3529574828601063, "learning_rate": 1.2491128436129292e-05, "loss": 0.6346, "step": 4281 }, { "epoch": 0.4373850868232891, "grad_norm": 1.4052686012092208, "learning_rate": 1.2487924264195084e-05, "loss": 0.725, "step": 4282 }, { "epoch": 0.43748723186925437, "grad_norm": 1.756972850170746, "learning_rate": 1.248471981995596e-05, "loss": 0.7604, "step": 4283 }, { "epoch": 0.4375893769152196, "grad_norm": 1.5635699228613305, "learning_rate": 1.2481515103762644e-05, "loss": 0.8931, "step": 4284 }, { "epoch": 0.4376915219611849, "grad_norm": 1.6618465365811392, "learning_rate": 1.2478310115965901e-05, "loss": 0.6541, "step": 4285 }, { "epoch": 0.43779366700715017, "grad_norm": 1.5157149535528396, "learning_rate": 1.2475104856916512e-05, "loss": 0.7138, "step": 4286 }, { "epoch": 0.4378958120531154, "grad_norm": 1.5692036851755695, "learning_rate": 1.2471899326965298e-05, "loss": 0.8129, "step": 4287 }, { "epoch": 0.4379979570990807, "grad_norm": 1.5866358561429488, "learning_rate": 1.2468693526463107e-05, "loss": 0.749, "step": 4288 }, { "epoch": 0.43810010214504597, "grad_norm": 1.469598574919986, "learning_rate": 1.2465487455760811e-05, "loss": 0.7657, "step": 4289 }, { "epoch": 0.43820224719101125, "grad_norm": 1.4751974073668996, "learning_rate": 1.2462281115209324e-05, "loss": 0.701, "step": 4290 }, { "epoch": 0.4383043922369765, "grad_norm": 1.5118602405768673, "learning_rate": 1.2459074505159577e-05, "loss": 0.7591, "step": 4291 }, { "epoch": 0.43840653728294177, "grad_norm": 1.386400343417803, "learning_rate": 1.2455867625962534e-05, "loss": 0.7637, "step": 4292 }, { "epoch": 0.43850868232890705, "grad_norm": 1.438132032564504, "learning_rate": 1.2452660477969197e-05, "loss": 0.7462, "step": 4293 }, { "epoch": 0.43861082737487234, "grad_norm": 1.578058093343192, "learning_rate": 1.244945306153058e-05, "loss": 0.7451, "step": 4294 }, { "epoch": 0.43871297242083757, "grad_norm": 1.4165549492823388, "learning_rate": 1.2446245376997747e-05, "loss": 0.7411, "step": 4295 }, { "epoch": 0.43881511746680285, "grad_norm": 1.6445460778641914, "learning_rate": 1.2443037424721775e-05, "loss": 0.8098, "step": 4296 }, { "epoch": 0.43891726251276814, "grad_norm": 1.5080588856632076, "learning_rate": 1.2439829205053781e-05, "loss": 0.6541, "step": 4297 }, { "epoch": 0.4390194075587334, "grad_norm": 1.5216849013003038, "learning_rate": 1.2436620718344906e-05, "loss": 0.6985, "step": 4298 }, { "epoch": 0.43912155260469865, "grad_norm": 1.5243592312663756, "learning_rate": 1.2433411964946314e-05, "loss": 0.6725, "step": 4299 }, { "epoch": 0.43922369765066394, "grad_norm": 1.3153444437401756, "learning_rate": 1.2430202945209213e-05, "loss": 0.6508, "step": 4300 }, { "epoch": 0.4393258426966292, "grad_norm": 1.322529874816043, "learning_rate": 1.2426993659484827e-05, "loss": 0.7185, "step": 4301 }, { "epoch": 0.4394279877425945, "grad_norm": 1.3363266600642454, "learning_rate": 1.2423784108124422e-05, "loss": 0.6425, "step": 4302 }, { "epoch": 0.43953013278855974, "grad_norm": 1.5189178699273036, "learning_rate": 1.2420574291479275e-05, "loss": 0.5936, "step": 4303 }, { "epoch": 0.439632277834525, "grad_norm": 1.5499711754372096, "learning_rate": 1.2417364209900711e-05, "loss": 0.7395, "step": 4304 }, { "epoch": 0.4397344228804903, "grad_norm": 1.5058191572520587, "learning_rate": 1.2414153863740073e-05, "loss": 0.8413, "step": 4305 }, { "epoch": 0.4398365679264556, "grad_norm": 1.3766694861285038, "learning_rate": 1.2410943253348733e-05, "loss": 0.6354, "step": 4306 }, { "epoch": 0.4399387129724208, "grad_norm": 1.512251402805886, "learning_rate": 1.2407732379078095e-05, "loss": 0.7682, "step": 4307 }, { "epoch": 0.4400408580183861, "grad_norm": 1.2428102591146084, "learning_rate": 1.2404521241279595e-05, "loss": 0.6779, "step": 4308 }, { "epoch": 0.4401430030643514, "grad_norm": 1.7695786688638393, "learning_rate": 1.2401309840304689e-05, "loss": 0.8618, "step": 4309 }, { "epoch": 0.4402451481103167, "grad_norm": 1.4813845762135314, "learning_rate": 1.2398098176504873e-05, "loss": 0.7545, "step": 4310 }, { "epoch": 0.4403472931562819, "grad_norm": 1.4063653331311956, "learning_rate": 1.239488625023166e-05, "loss": 0.622, "step": 4311 }, { "epoch": 0.4404494382022472, "grad_norm": 1.3263698740502583, "learning_rate": 1.2391674061836601e-05, "loss": 0.6706, "step": 4312 }, { "epoch": 0.4405515832482125, "grad_norm": 1.4644829744161356, "learning_rate": 1.238846161167127e-05, "loss": 0.6802, "step": 4313 }, { "epoch": 0.44065372829417776, "grad_norm": 1.4676019613216038, "learning_rate": 1.2385248900087272e-05, "loss": 0.7248, "step": 4314 }, { "epoch": 0.440755873340143, "grad_norm": 1.4555216495576928, "learning_rate": 1.2382035927436242e-05, "loss": 0.6482, "step": 4315 }, { "epoch": 0.4408580183861083, "grad_norm": 1.542980701200706, "learning_rate": 1.2378822694069838e-05, "loss": 0.8055, "step": 4316 }, { "epoch": 0.44096016343207356, "grad_norm": 1.5247808891023396, "learning_rate": 1.2375609200339757e-05, "loss": 0.6696, "step": 4317 }, { "epoch": 0.4410623084780388, "grad_norm": 1.4698196529612217, "learning_rate": 1.2372395446597711e-05, "loss": 0.8496, "step": 4318 }, { "epoch": 0.4411644535240041, "grad_norm": 1.3867930671224251, "learning_rate": 1.2369181433195451e-05, "loss": 0.7064, "step": 4319 }, { "epoch": 0.44126659856996936, "grad_norm": 1.633360646761823, "learning_rate": 1.2365967160484755e-05, "loss": 0.8671, "step": 4320 }, { "epoch": 0.44136874361593464, "grad_norm": 1.5241977124885135, "learning_rate": 1.2362752628817423e-05, "loss": 0.7087, "step": 4321 }, { "epoch": 0.4414708886618999, "grad_norm": 1.4411299567597549, "learning_rate": 1.235953783854529e-05, "loss": 0.6368, "step": 4322 }, { "epoch": 0.44157303370786516, "grad_norm": 1.5709462306510522, "learning_rate": 1.2356322790020214e-05, "loss": 0.732, "step": 4323 }, { "epoch": 0.44167517875383044, "grad_norm": 1.4522546422766027, "learning_rate": 1.2353107483594089e-05, "loss": 0.7425, "step": 4324 }, { "epoch": 0.44177732379979573, "grad_norm": 1.4020149706744478, "learning_rate": 1.234989191961883e-05, "loss": 0.6809, "step": 4325 }, { "epoch": 0.44187946884576096, "grad_norm": 1.5624484753992463, "learning_rate": 1.234667609844638e-05, "loss": 0.7472, "step": 4326 }, { "epoch": 0.44198161389172624, "grad_norm": 1.5846059399937262, "learning_rate": 1.2343460020428715e-05, "loss": 0.7475, "step": 4327 }, { "epoch": 0.44208375893769153, "grad_norm": 1.4182512117393002, "learning_rate": 1.2340243685917836e-05, "loss": 0.7199, "step": 4328 }, { "epoch": 0.4421859039836568, "grad_norm": 1.6064853051905523, "learning_rate": 1.2337027095265775e-05, "loss": 0.7312, "step": 4329 }, { "epoch": 0.44228804902962204, "grad_norm": 1.5858264427646682, "learning_rate": 1.2333810248824588e-05, "loss": 0.8269, "step": 4330 }, { "epoch": 0.44239019407558733, "grad_norm": 1.6260113523230295, "learning_rate": 1.2330593146946362e-05, "loss": 0.7912, "step": 4331 }, { "epoch": 0.4424923391215526, "grad_norm": 1.5447404557460256, "learning_rate": 1.232737578998321e-05, "loss": 0.7347, "step": 4332 }, { "epoch": 0.4425944841675179, "grad_norm": 1.4230145103673555, "learning_rate": 1.2324158178287274e-05, "loss": 0.7982, "step": 4333 }, { "epoch": 0.44269662921348313, "grad_norm": 1.4613193056367413, "learning_rate": 1.2320940312210725e-05, "loss": 0.6198, "step": 4334 }, { "epoch": 0.4427987742594484, "grad_norm": 1.404732768170166, "learning_rate": 1.2317722192105757e-05, "loss": 0.6204, "step": 4335 }, { "epoch": 0.4429009193054137, "grad_norm": 1.398786951959995, "learning_rate": 1.2314503818324597e-05, "loss": 0.6363, "step": 4336 }, { "epoch": 0.443003064351379, "grad_norm": 1.3095174777510055, "learning_rate": 1.2311285191219501e-05, "loss": 0.6024, "step": 4337 }, { "epoch": 0.4431052093973442, "grad_norm": 1.5102474032263602, "learning_rate": 1.2308066311142747e-05, "loss": 0.8179, "step": 4338 }, { "epoch": 0.4432073544433095, "grad_norm": 1.426766688907201, "learning_rate": 1.2304847178446643e-05, "loss": 0.7161, "step": 4339 }, { "epoch": 0.4433094994892748, "grad_norm": 1.4421464549915892, "learning_rate": 1.2301627793483527e-05, "loss": 0.7611, "step": 4340 }, { "epoch": 0.44341164453524007, "grad_norm": 1.5614923544265915, "learning_rate": 1.2298408156605763e-05, "loss": 0.7406, "step": 4341 }, { "epoch": 0.4435137895812053, "grad_norm": 1.578988848333134, "learning_rate": 1.2295188268165743e-05, "loss": 0.7601, "step": 4342 }, { "epoch": 0.4436159346271706, "grad_norm": 1.375854058276013, "learning_rate": 1.2291968128515878e-05, "loss": 0.7274, "step": 4343 }, { "epoch": 0.44371807967313587, "grad_norm": 1.4424386116312715, "learning_rate": 1.2288747738008626e-05, "loss": 0.7352, "step": 4344 }, { "epoch": 0.4438202247191011, "grad_norm": 1.4568709246195262, "learning_rate": 1.2285527096996455e-05, "loss": 0.7474, "step": 4345 }, { "epoch": 0.4439223697650664, "grad_norm": 1.4114088741971806, "learning_rate": 1.2282306205831866e-05, "loss": 0.7832, "step": 4346 }, { "epoch": 0.44402451481103167, "grad_norm": 1.6934672679577285, "learning_rate": 1.2279085064867387e-05, "loss": 0.6854, "step": 4347 }, { "epoch": 0.44412665985699695, "grad_norm": 1.3919749285446825, "learning_rate": 1.2275863674455576e-05, "loss": 0.7036, "step": 4348 }, { "epoch": 0.4442288049029622, "grad_norm": 1.3783320978581106, "learning_rate": 1.2272642034949013e-05, "loss": 0.6925, "step": 4349 }, { "epoch": 0.44433094994892747, "grad_norm": 1.4304731140633082, "learning_rate": 1.2269420146700312e-05, "loss": 0.7424, "step": 4350 }, { "epoch": 0.44443309499489275, "grad_norm": 1.4204502808265755, "learning_rate": 1.2266198010062112e-05, "loss": 0.6975, "step": 4351 }, { "epoch": 0.44453524004085804, "grad_norm": 1.5211648061553873, "learning_rate": 1.2262975625387074e-05, "loss": 0.7859, "step": 4352 }, { "epoch": 0.44463738508682327, "grad_norm": 1.43845791217103, "learning_rate": 1.2259752993027893e-05, "loss": 0.7281, "step": 4353 }, { "epoch": 0.44473953013278855, "grad_norm": 1.3810689485156413, "learning_rate": 1.2256530113337287e-05, "loss": 0.7739, "step": 4354 }, { "epoch": 0.44484167517875384, "grad_norm": 1.4576886326680158, "learning_rate": 1.2253306986667999e-05, "loss": 0.7645, "step": 4355 }, { "epoch": 0.4449438202247191, "grad_norm": 1.3302933881990837, "learning_rate": 1.2250083613372807e-05, "loss": 0.7041, "step": 4356 }, { "epoch": 0.44504596527068435, "grad_norm": 1.4774879036721376, "learning_rate": 1.224685999380451e-05, "loss": 0.7488, "step": 4357 }, { "epoch": 0.44514811031664964, "grad_norm": 1.4832276230968722, "learning_rate": 1.224363612831594e-05, "loss": 0.6589, "step": 4358 }, { "epoch": 0.4452502553626149, "grad_norm": 1.473863136827675, "learning_rate": 1.224041201725994e-05, "loss": 0.7756, "step": 4359 }, { "epoch": 0.4453524004085802, "grad_norm": 1.500343603157219, "learning_rate": 1.2237187660989396e-05, "loss": 0.7811, "step": 4360 }, { "epoch": 0.44545454545454544, "grad_norm": 1.3326896825393737, "learning_rate": 1.2233963059857222e-05, "loss": 0.7125, "step": 4361 }, { "epoch": 0.4455566905005107, "grad_norm": 1.5108023488563913, "learning_rate": 1.2230738214216344e-05, "loss": 0.6309, "step": 4362 }, { "epoch": 0.445658835546476, "grad_norm": 1.4666688851993972, "learning_rate": 1.222751312441973e-05, "loss": 0.6696, "step": 4363 }, { "epoch": 0.4457609805924413, "grad_norm": 1.5359144566587273, "learning_rate": 1.2224287790820367e-05, "loss": 0.7074, "step": 4364 }, { "epoch": 0.4458631256384065, "grad_norm": 1.5298543394776472, "learning_rate": 1.2221062213771261e-05, "loss": 0.7, "step": 4365 }, { "epoch": 0.4459652706843718, "grad_norm": 1.3351588777663383, "learning_rate": 1.2217836393625469e-05, "loss": 0.7323, "step": 4366 }, { "epoch": 0.4460674157303371, "grad_norm": 1.3951986652096973, "learning_rate": 1.2214610330736043e-05, "loss": 0.8777, "step": 4367 }, { "epoch": 0.4461695607763024, "grad_norm": 1.5284464624583667, "learning_rate": 1.2211384025456092e-05, "loss": 0.7218, "step": 4368 }, { "epoch": 0.4462717058222676, "grad_norm": 1.397552943358692, "learning_rate": 1.2208157478138728e-05, "loss": 0.7657, "step": 4369 }, { "epoch": 0.4463738508682329, "grad_norm": 1.4253191380478456, "learning_rate": 1.2204930689137103e-05, "loss": 0.7466, "step": 4370 }, { "epoch": 0.4464759959141982, "grad_norm": 1.699449729249936, "learning_rate": 1.2201703658804386e-05, "loss": 0.7295, "step": 4371 }, { "epoch": 0.4465781409601634, "grad_norm": 1.4557497957359848, "learning_rate": 1.2198476387493783e-05, "loss": 0.7694, "step": 4372 }, { "epoch": 0.4466802860061287, "grad_norm": 1.5786541820164806, "learning_rate": 1.2195248875558521e-05, "loss": 0.7621, "step": 4373 }, { "epoch": 0.446782431052094, "grad_norm": 1.4830492297264768, "learning_rate": 1.2192021123351846e-05, "loss": 0.7227, "step": 4374 }, { "epoch": 0.44688457609805926, "grad_norm": 1.4370125807475895, "learning_rate": 1.2188793131227049e-05, "loss": 0.7343, "step": 4375 }, { "epoch": 0.4469867211440245, "grad_norm": 1.4686824926980728, "learning_rate": 1.2185564899537425e-05, "loss": 0.735, "step": 4376 }, { "epoch": 0.4470888661899898, "grad_norm": 1.3952056934191304, "learning_rate": 1.2182336428636314e-05, "loss": 0.7218, "step": 4377 }, { "epoch": 0.44719101123595506, "grad_norm": 1.4037639559460904, "learning_rate": 1.217910771887707e-05, "loss": 0.6432, "step": 4378 }, { "epoch": 0.44729315628192035, "grad_norm": 1.5182739448173013, "learning_rate": 1.2175878770613077e-05, "loss": 0.6754, "step": 4379 }, { "epoch": 0.4473953013278856, "grad_norm": 1.4535938967674906, "learning_rate": 1.2172649584197746e-05, "loss": 0.7118, "step": 4380 }, { "epoch": 0.44749744637385086, "grad_norm": 1.4896893676586502, "learning_rate": 1.2169420159984517e-05, "loss": 0.7093, "step": 4381 }, { "epoch": 0.44759959141981615, "grad_norm": 1.321420613608895, "learning_rate": 1.2166190498326849e-05, "loss": 0.663, "step": 4382 }, { "epoch": 0.44770173646578143, "grad_norm": 1.437288504541632, "learning_rate": 1.216296059957823e-05, "loss": 0.741, "step": 4383 }, { "epoch": 0.44780388151174666, "grad_norm": 1.5730459794623028, "learning_rate": 1.2159730464092176e-05, "loss": 0.7918, "step": 4384 }, { "epoch": 0.44790602655771194, "grad_norm": 1.4530273916342542, "learning_rate": 1.215650009222223e-05, "loss": 0.7308, "step": 4385 }, { "epoch": 0.44800817160367723, "grad_norm": 1.4207751415847125, "learning_rate": 1.2153269484321956e-05, "loss": 0.7016, "step": 4386 }, { "epoch": 0.4481103166496425, "grad_norm": 1.4770457015481053, "learning_rate": 1.2150038640744942e-05, "loss": 0.6615, "step": 4387 }, { "epoch": 0.44821246169560774, "grad_norm": 1.2838986437172024, "learning_rate": 1.214680756184481e-05, "loss": 0.7217, "step": 4388 }, { "epoch": 0.44831460674157303, "grad_norm": 1.451117080800461, "learning_rate": 1.2143576247975207e-05, "loss": 0.8044, "step": 4389 }, { "epoch": 0.4484167517875383, "grad_norm": 1.516318690198013, "learning_rate": 1.2140344699489796e-05, "loss": 0.8021, "step": 4390 }, { "epoch": 0.4485188968335036, "grad_norm": 1.6220569601971606, "learning_rate": 1.2137112916742275e-05, "loss": 0.7667, "step": 4391 }, { "epoch": 0.44862104187946883, "grad_norm": 1.5677845537498745, "learning_rate": 1.2133880900086364e-05, "loss": 0.829, "step": 4392 }, { "epoch": 0.4487231869254341, "grad_norm": 1.4765287355672096, "learning_rate": 1.2130648649875812e-05, "loss": 0.7262, "step": 4393 }, { "epoch": 0.4488253319713994, "grad_norm": 1.5058052829512634, "learning_rate": 1.2127416166464387e-05, "loss": 0.7776, "step": 4394 }, { "epoch": 0.4489274770173647, "grad_norm": 1.3821313019612402, "learning_rate": 1.2124183450205886e-05, "loss": 0.6601, "step": 4395 }, { "epoch": 0.4490296220633299, "grad_norm": 1.4557119782506014, "learning_rate": 1.2120950501454138e-05, "loss": 0.6493, "step": 4396 }, { "epoch": 0.4491317671092952, "grad_norm": 1.5664360796236632, "learning_rate": 1.2117717320562986e-05, "loss": 0.821, "step": 4397 }, { "epoch": 0.4492339121552605, "grad_norm": 1.650589519160757, "learning_rate": 1.2114483907886307e-05, "loss": 0.7134, "step": 4398 }, { "epoch": 0.4493360572012257, "grad_norm": 1.3733277789530847, "learning_rate": 1.2111250263777991e-05, "loss": 0.7191, "step": 4399 }, { "epoch": 0.449438202247191, "grad_norm": 1.4600761141790548, "learning_rate": 1.2108016388591976e-05, "loss": 0.7335, "step": 4400 }, { "epoch": 0.4495403472931563, "grad_norm": 1.4035557072562581, "learning_rate": 1.2104782282682203e-05, "loss": 0.8445, "step": 4401 }, { "epoch": 0.44964249233912157, "grad_norm": 1.4187037891871126, "learning_rate": 1.2101547946402653e-05, "loss": 0.7001, "step": 4402 }, { "epoch": 0.4497446373850868, "grad_norm": 1.4077158538699681, "learning_rate": 1.2098313380107319e-05, "loss": 0.7009, "step": 4403 }, { "epoch": 0.4498467824310521, "grad_norm": 1.4544018778405412, "learning_rate": 1.2095078584150228e-05, "loss": 0.6942, "step": 4404 }, { "epoch": 0.44994892747701737, "grad_norm": 1.4039242708460282, "learning_rate": 1.2091843558885436e-05, "loss": 0.6463, "step": 4405 }, { "epoch": 0.45005107252298265, "grad_norm": 1.47201556512031, "learning_rate": 1.2088608304667014e-05, "loss": 0.7531, "step": 4406 }, { "epoch": 0.4501532175689479, "grad_norm": 1.399603494788756, "learning_rate": 1.2085372821849063e-05, "loss": 0.7037, "step": 4407 }, { "epoch": 0.45025536261491317, "grad_norm": 1.4921025181436893, "learning_rate": 1.2082137110785713e-05, "loss": 0.6334, "step": 4408 }, { "epoch": 0.45035750766087845, "grad_norm": 1.4480639255419936, "learning_rate": 1.2078901171831106e-05, "loss": 0.698, "step": 4409 }, { "epoch": 0.45045965270684374, "grad_norm": 1.4376241586795264, "learning_rate": 1.2075665005339426e-05, "loss": 0.6242, "step": 4410 }, { "epoch": 0.45056179775280897, "grad_norm": 1.443282262100317, "learning_rate": 1.2072428611664864e-05, "loss": 0.6801, "step": 4411 }, { "epoch": 0.45066394279877425, "grad_norm": 1.39450885496504, "learning_rate": 1.2069191991161658e-05, "loss": 0.7029, "step": 4412 }, { "epoch": 0.45076608784473954, "grad_norm": 1.599144149457271, "learning_rate": 1.2065955144184052e-05, "loss": 0.7726, "step": 4413 }, { "epoch": 0.4508682328907048, "grad_norm": 1.373473877396574, "learning_rate": 1.2062718071086317e-05, "loss": 0.6232, "step": 4414 }, { "epoch": 0.45097037793667005, "grad_norm": 1.5095594440109863, "learning_rate": 1.2059480772222756e-05, "loss": 0.6713, "step": 4415 }, { "epoch": 0.45107252298263534, "grad_norm": 1.44031859690779, "learning_rate": 1.2056243247947697e-05, "loss": 0.6613, "step": 4416 }, { "epoch": 0.4511746680286006, "grad_norm": 1.3256904663519213, "learning_rate": 1.2053005498615484e-05, "loss": 0.753, "step": 4417 }, { "epoch": 0.4512768130745659, "grad_norm": 1.3994283577238384, "learning_rate": 1.2049767524580493e-05, "loss": 0.8205, "step": 4418 }, { "epoch": 0.45137895812053114, "grad_norm": 1.556199031453398, "learning_rate": 1.2046529326197123e-05, "loss": 0.7343, "step": 4419 }, { "epoch": 0.4514811031664964, "grad_norm": 1.350704694924575, "learning_rate": 1.2043290903819796e-05, "loss": 0.7549, "step": 4420 }, { "epoch": 0.4515832482124617, "grad_norm": 1.525958803218305, "learning_rate": 1.2040052257802959e-05, "loss": 0.7794, "step": 4421 }, { "epoch": 0.451685393258427, "grad_norm": 1.470411451723171, "learning_rate": 1.2036813388501086e-05, "loss": 0.6812, "step": 4422 }, { "epoch": 0.4517875383043922, "grad_norm": 1.416199166670464, "learning_rate": 1.2033574296268669e-05, "loss": 0.6105, "step": 4423 }, { "epoch": 0.4518896833503575, "grad_norm": 1.540217586109598, "learning_rate": 1.2030334981460232e-05, "loss": 0.865, "step": 4424 }, { "epoch": 0.4519918283963228, "grad_norm": 1.3906921001109036, "learning_rate": 1.2027095444430322e-05, "loss": 0.69, "step": 4425 }, { "epoch": 0.452093973442288, "grad_norm": 1.3807130721367225, "learning_rate": 1.20238556855335e-05, "loss": 0.7883, "step": 4426 }, { "epoch": 0.4521961184882533, "grad_norm": 1.5510032943363248, "learning_rate": 1.202061570512437e-05, "loss": 0.6689, "step": 4427 }, { "epoch": 0.4522982635342186, "grad_norm": 1.363118110938814, "learning_rate": 1.2017375503557544e-05, "loss": 0.7374, "step": 4428 }, { "epoch": 0.4524004085801839, "grad_norm": 1.4357667445835383, "learning_rate": 1.201413508118767e-05, "loss": 0.733, "step": 4429 }, { "epoch": 0.4525025536261491, "grad_norm": 1.3108820213726866, "learning_rate": 1.2010894438369405e-05, "loss": 0.765, "step": 4430 }, { "epoch": 0.4526046986721144, "grad_norm": 1.535544868946225, "learning_rate": 1.2007653575457445e-05, "loss": 0.741, "step": 4431 }, { "epoch": 0.4527068437180797, "grad_norm": 1.5286731558569973, "learning_rate": 1.2004412492806507e-05, "loss": 0.8003, "step": 4432 }, { "epoch": 0.45280898876404496, "grad_norm": 1.4730489224638963, "learning_rate": 1.2001171190771325e-05, "loss": 0.6618, "step": 4433 }, { "epoch": 0.4529111338100102, "grad_norm": 1.453915623716457, "learning_rate": 1.1997929669706664e-05, "loss": 0.71, "step": 4434 }, { "epoch": 0.4530132788559755, "grad_norm": 1.7679577552373162, "learning_rate": 1.199468792996731e-05, "loss": 0.8842, "step": 4435 }, { "epoch": 0.45311542390194076, "grad_norm": 1.3727079808498286, "learning_rate": 1.1991445971908072e-05, "loss": 0.7056, "step": 4436 }, { "epoch": 0.45321756894790605, "grad_norm": 1.4074743546290096, "learning_rate": 1.1988203795883787e-05, "loss": 0.5967, "step": 4437 }, { "epoch": 0.4533197139938713, "grad_norm": 1.5073829520534248, "learning_rate": 1.198496140224931e-05, "loss": 0.6979, "step": 4438 }, { "epoch": 0.45342185903983656, "grad_norm": 1.429241194942722, "learning_rate": 1.1981718791359527e-05, "loss": 0.7593, "step": 4439 }, { "epoch": 0.45352400408580185, "grad_norm": 1.4546143175747257, "learning_rate": 1.1978475963569343e-05, "loss": 0.82, "step": 4440 }, { "epoch": 0.45362614913176713, "grad_norm": 1.7785285644971274, "learning_rate": 1.1975232919233684e-05, "loss": 0.6882, "step": 4441 }, { "epoch": 0.45372829417773236, "grad_norm": 1.4970466542597403, "learning_rate": 1.1971989658707507e-05, "loss": 0.7195, "step": 4442 }, { "epoch": 0.45383043922369765, "grad_norm": 1.4054963498339679, "learning_rate": 1.1968746182345785e-05, "loss": 0.7519, "step": 4443 }, { "epoch": 0.45393258426966293, "grad_norm": 1.5892193400696613, "learning_rate": 1.1965502490503525e-05, "loss": 0.7515, "step": 4444 }, { "epoch": 0.4540347293156282, "grad_norm": 1.4682562435517577, "learning_rate": 1.1962258583535747e-05, "loss": 0.6774, "step": 4445 }, { "epoch": 0.45413687436159345, "grad_norm": 1.5021529017127018, "learning_rate": 1.1959014461797498e-05, "loss": 0.692, "step": 4446 }, { "epoch": 0.45423901940755873, "grad_norm": 1.3661682369665527, "learning_rate": 1.195577012564385e-05, "loss": 0.763, "step": 4447 }, { "epoch": 0.454341164453524, "grad_norm": 1.4152040630401885, "learning_rate": 1.19525255754299e-05, "loss": 0.7232, "step": 4448 }, { "epoch": 0.4544433094994893, "grad_norm": 1.7021936132141924, "learning_rate": 1.1949280811510763e-05, "loss": 0.75, "step": 4449 }, { "epoch": 0.45454545454545453, "grad_norm": 1.3976685473117325, "learning_rate": 1.1946035834241582e-05, "loss": 0.7018, "step": 4450 }, { "epoch": 0.4546475995914198, "grad_norm": 1.3638514612073973, "learning_rate": 1.1942790643977523e-05, "loss": 0.7236, "step": 4451 }, { "epoch": 0.4547497446373851, "grad_norm": 1.5083968539835473, "learning_rate": 1.1939545241073774e-05, "loss": 0.793, "step": 4452 }, { "epoch": 0.45485188968335033, "grad_norm": 1.3083226578590414, "learning_rate": 1.1936299625885542e-05, "loss": 0.6711, "step": 4453 }, { "epoch": 0.4549540347293156, "grad_norm": 1.4819100042807536, "learning_rate": 1.1933053798768065e-05, "loss": 0.674, "step": 4454 }, { "epoch": 0.4550561797752809, "grad_norm": 1.494768994864976, "learning_rate": 1.1929807760076599e-05, "loss": 0.7685, "step": 4455 }, { "epoch": 0.4551583248212462, "grad_norm": 1.5428293099845931, "learning_rate": 1.1926561510166432e-05, "loss": 0.683, "step": 4456 }, { "epoch": 0.4552604698672114, "grad_norm": 1.5218773399898289, "learning_rate": 1.1923315049392859e-05, "loss": 0.8571, "step": 4457 }, { "epoch": 0.4553626149131767, "grad_norm": 1.3456395891463317, "learning_rate": 1.1920068378111214e-05, "loss": 0.6701, "step": 4458 }, { "epoch": 0.455464759959142, "grad_norm": 1.3364270175490052, "learning_rate": 1.1916821496676842e-05, "loss": 0.6399, "step": 4459 }, { "epoch": 0.45556690500510727, "grad_norm": 1.3936767358891256, "learning_rate": 1.191357440544512e-05, "loss": 0.7822, "step": 4460 }, { "epoch": 0.4556690500510725, "grad_norm": 1.4175869051409595, "learning_rate": 1.1910327104771444e-05, "loss": 0.7581, "step": 4461 }, { "epoch": 0.4557711950970378, "grad_norm": 1.5152997363576073, "learning_rate": 1.190707959501123e-05, "loss": 0.6552, "step": 4462 }, { "epoch": 0.45587334014300307, "grad_norm": 1.4616797198097955, "learning_rate": 1.1903831876519925e-05, "loss": 0.7458, "step": 4463 }, { "epoch": 0.45597548518896835, "grad_norm": 1.4448445647926547, "learning_rate": 1.190058394965299e-05, "loss": 0.6674, "step": 4464 }, { "epoch": 0.4560776302349336, "grad_norm": 1.4918605862769925, "learning_rate": 1.1897335814765913e-05, "loss": 0.7428, "step": 4465 }, { "epoch": 0.45617977528089887, "grad_norm": 1.408609158388085, "learning_rate": 1.1894087472214207e-05, "loss": 0.7152, "step": 4466 }, { "epoch": 0.45628192032686415, "grad_norm": 1.4557950591250353, "learning_rate": 1.1890838922353401e-05, "loss": 0.6463, "step": 4467 }, { "epoch": 0.45638406537282944, "grad_norm": 1.3917652304791865, "learning_rate": 1.1887590165539053e-05, "loss": 0.6889, "step": 4468 }, { "epoch": 0.45648621041879467, "grad_norm": 1.298569340387807, "learning_rate": 1.1884341202126745e-05, "loss": 0.704, "step": 4469 }, { "epoch": 0.45658835546475995, "grad_norm": 1.3493372771975554, "learning_rate": 1.1881092032472072e-05, "loss": 0.5985, "step": 4470 }, { "epoch": 0.45669050051072524, "grad_norm": 1.4769110478431835, "learning_rate": 1.1877842656930661e-05, "loss": 0.7733, "step": 4471 }, { "epoch": 0.4567926455566905, "grad_norm": 1.3819348357981414, "learning_rate": 1.1874593075858159e-05, "loss": 0.6765, "step": 4472 }, { "epoch": 0.45689479060265575, "grad_norm": 1.467529481900932, "learning_rate": 1.1871343289610233e-05, "loss": 0.7215, "step": 4473 }, { "epoch": 0.45699693564862104, "grad_norm": 1.3055325672947433, "learning_rate": 1.1868093298542576e-05, "loss": 0.7102, "step": 4474 }, { "epoch": 0.4570990806945863, "grad_norm": 1.5932109654979698, "learning_rate": 1.1864843103010898e-05, "loss": 0.7304, "step": 4475 }, { "epoch": 0.4572012257405516, "grad_norm": 1.5430787915665027, "learning_rate": 1.1861592703370942e-05, "loss": 0.7336, "step": 4476 }, { "epoch": 0.45730337078651684, "grad_norm": 1.5141796117121165, "learning_rate": 1.1858342099978458e-05, "loss": 0.7576, "step": 4477 }, { "epoch": 0.4574055158324821, "grad_norm": 1.4057089567082752, "learning_rate": 1.1855091293189233e-05, "loss": 0.7269, "step": 4478 }, { "epoch": 0.4575076608784474, "grad_norm": 1.4773961845811605, "learning_rate": 1.1851840283359067e-05, "loss": 0.7183, "step": 4479 }, { "epoch": 0.4576098059244127, "grad_norm": 1.465560342917277, "learning_rate": 1.1848589070843783e-05, "loss": 0.6968, "step": 4480 }, { "epoch": 0.4577119509703779, "grad_norm": 1.3990218642663586, "learning_rate": 1.1845337655999234e-05, "loss": 0.773, "step": 4481 }, { "epoch": 0.4578140960163432, "grad_norm": 1.4049164043782585, "learning_rate": 1.1842086039181284e-05, "loss": 0.732, "step": 4482 }, { "epoch": 0.4579162410623085, "grad_norm": 1.5324372564910247, "learning_rate": 1.1838834220745828e-05, "loss": 0.8042, "step": 4483 }, { "epoch": 0.4580183861082737, "grad_norm": 1.4241554532999965, "learning_rate": 1.1835582201048777e-05, "loss": 0.7165, "step": 4484 }, { "epoch": 0.458120531154239, "grad_norm": 1.4764995078289065, "learning_rate": 1.183232998044607e-05, "loss": 0.6513, "step": 4485 }, { "epoch": 0.4582226762002043, "grad_norm": 1.3793746714274664, "learning_rate": 1.1829077559293665e-05, "loss": 0.7562, "step": 4486 }, { "epoch": 0.4583248212461696, "grad_norm": 1.4055444718881402, "learning_rate": 1.1825824937947531e-05, "loss": 0.6216, "step": 4487 }, { "epoch": 0.4584269662921348, "grad_norm": 1.5710093242195102, "learning_rate": 1.1822572116763686e-05, "loss": 0.7807, "step": 4488 }, { "epoch": 0.4585291113381001, "grad_norm": 1.5918105120579595, "learning_rate": 1.1819319096098143e-05, "loss": 0.7835, "step": 4489 }, { "epoch": 0.4586312563840654, "grad_norm": 1.4985010192757853, "learning_rate": 1.1816065876306951e-05, "loss": 0.8001, "step": 4490 }, { "epoch": 0.45873340143003066, "grad_norm": 1.4315793498288834, "learning_rate": 1.1812812457746172e-05, "loss": 0.7355, "step": 4491 }, { "epoch": 0.4588355464759959, "grad_norm": 1.3918137340073968, "learning_rate": 1.18095588407719e-05, "loss": 0.745, "step": 4492 }, { "epoch": 0.4589376915219612, "grad_norm": 1.4100405651430146, "learning_rate": 1.1806305025740245e-05, "loss": 0.7515, "step": 4493 }, { "epoch": 0.45903983656792646, "grad_norm": 1.584976641672956, "learning_rate": 1.1803051013007336e-05, "loss": 0.8127, "step": 4494 }, { "epoch": 0.45914198161389175, "grad_norm": 1.344524049078553, "learning_rate": 1.1799796802929328e-05, "loss": 0.5779, "step": 4495 }, { "epoch": 0.459244126659857, "grad_norm": 1.5544231512859255, "learning_rate": 1.1796542395862401e-05, "loss": 0.7729, "step": 4496 }, { "epoch": 0.45934627170582226, "grad_norm": 1.44060791549229, "learning_rate": 1.1793287792162746e-05, "loss": 0.7558, "step": 4497 }, { "epoch": 0.45944841675178755, "grad_norm": 1.2906737439107772, "learning_rate": 1.1790032992186584e-05, "loss": 0.5977, "step": 4498 }, { "epoch": 0.45955056179775283, "grad_norm": 1.4899824954260785, "learning_rate": 1.178677799629015e-05, "loss": 0.7043, "step": 4499 }, { "epoch": 0.45965270684371806, "grad_norm": 1.5838750320172272, "learning_rate": 1.1783522804829714e-05, "loss": 0.7291, "step": 4500 }, { "epoch": 0.45975485188968335, "grad_norm": 1.421561941919404, "learning_rate": 1.1780267418161554e-05, "loss": 0.7337, "step": 4501 }, { "epoch": 0.45985699693564863, "grad_norm": 1.5156148111440408, "learning_rate": 1.1777011836641978e-05, "loss": 0.6222, "step": 4502 }, { "epoch": 0.4599591419816139, "grad_norm": 1.5774293563327493, "learning_rate": 1.1773756060627303e-05, "loss": 0.8313, "step": 4503 }, { "epoch": 0.46006128702757915, "grad_norm": 1.3139766439388563, "learning_rate": 1.1770500090473885e-05, "loss": 0.6694, "step": 4504 }, { "epoch": 0.46016343207354443, "grad_norm": 1.4742926552052202, "learning_rate": 1.1767243926538088e-05, "loss": 0.7518, "step": 4505 }, { "epoch": 0.4602655771195097, "grad_norm": 1.5176206140090431, "learning_rate": 1.17639875691763e-05, "loss": 0.681, "step": 4506 }, { "epoch": 0.460367722165475, "grad_norm": 1.422717750612694, "learning_rate": 1.1760731018744933e-05, "loss": 0.6845, "step": 4507 }, { "epoch": 0.46046986721144023, "grad_norm": 1.3533135422358182, "learning_rate": 1.175747427560042e-05, "loss": 0.7197, "step": 4508 }, { "epoch": 0.4605720122574055, "grad_norm": 1.415120636265506, "learning_rate": 1.175421734009921e-05, "loss": 0.67, "step": 4509 }, { "epoch": 0.4606741573033708, "grad_norm": 1.3429640427349085, "learning_rate": 1.1750960212597779e-05, "loss": 0.8117, "step": 4510 }, { "epoch": 0.46077630234933603, "grad_norm": 1.52927151973224, "learning_rate": 1.1747702893452621e-05, "loss": 0.7561, "step": 4511 }, { "epoch": 0.4608784473953013, "grad_norm": 1.4949545652597274, "learning_rate": 1.1744445383020254e-05, "loss": 0.6854, "step": 4512 }, { "epoch": 0.4609805924412666, "grad_norm": 1.3517988707324138, "learning_rate": 1.1741187681657213e-05, "loss": 0.8343, "step": 4513 }, { "epoch": 0.4610827374872319, "grad_norm": 1.5538074149780177, "learning_rate": 1.1737929789720055e-05, "loss": 0.8063, "step": 4514 }, { "epoch": 0.4611848825331971, "grad_norm": 1.4282948544756653, "learning_rate": 1.1734671707565358e-05, "loss": 0.8012, "step": 4515 }, { "epoch": 0.4612870275791624, "grad_norm": 1.452901576690905, "learning_rate": 1.1731413435549718e-05, "loss": 0.7044, "step": 4516 }, { "epoch": 0.4613891726251277, "grad_norm": 1.5156278544007429, "learning_rate": 1.1728154974029766e-05, "loss": 0.7288, "step": 4517 }, { "epoch": 0.46149131767109297, "grad_norm": 1.394450522609207, "learning_rate": 1.172489632336213e-05, "loss": 0.7078, "step": 4518 }, { "epoch": 0.4615934627170582, "grad_norm": 1.391370479248982, "learning_rate": 1.1721637483903478e-05, "loss": 0.7446, "step": 4519 }, { "epoch": 0.4616956077630235, "grad_norm": 1.4870436954173325, "learning_rate": 1.1718378456010495e-05, "loss": 0.6561, "step": 4520 }, { "epoch": 0.46179775280898877, "grad_norm": 1.3112354748647896, "learning_rate": 1.1715119240039877e-05, "loss": 0.6363, "step": 4521 }, { "epoch": 0.46189989785495406, "grad_norm": 1.4086864847352285, "learning_rate": 1.171185983634835e-05, "loss": 0.6854, "step": 4522 }, { "epoch": 0.4620020429009193, "grad_norm": 1.284031270230089, "learning_rate": 1.1708600245292656e-05, "loss": 0.6077, "step": 4523 }, { "epoch": 0.46210418794688457, "grad_norm": 1.637898424139519, "learning_rate": 1.1705340467229564e-05, "loss": 0.7875, "step": 4524 }, { "epoch": 0.46220633299284986, "grad_norm": 1.5078505965161635, "learning_rate": 1.1702080502515855e-05, "loss": 0.6618, "step": 4525 }, { "epoch": 0.46230847803881514, "grad_norm": 1.3516476396439767, "learning_rate": 1.1698820351508336e-05, "loss": 0.6934, "step": 4526 }, { "epoch": 0.46241062308478037, "grad_norm": 1.4097216494224978, "learning_rate": 1.1695560014563831e-05, "loss": 0.716, "step": 4527 }, { "epoch": 0.46251276813074566, "grad_norm": 1.541281499356308, "learning_rate": 1.1692299492039188e-05, "loss": 0.8164, "step": 4528 }, { "epoch": 0.46261491317671094, "grad_norm": 1.5537444997547776, "learning_rate": 1.168903878429127e-05, "loss": 0.7481, "step": 4529 }, { "epoch": 0.4627170582226762, "grad_norm": 1.4152725726478879, "learning_rate": 1.168577789167697e-05, "loss": 0.7678, "step": 4530 }, { "epoch": 0.46281920326864145, "grad_norm": 1.3947114030805299, "learning_rate": 1.1682516814553187e-05, "loss": 0.7577, "step": 4531 }, { "epoch": 0.46292134831460674, "grad_norm": 1.4001366349031497, "learning_rate": 1.1679255553276853e-05, "loss": 0.7626, "step": 4532 }, { "epoch": 0.463023493360572, "grad_norm": 1.8663926057054372, "learning_rate": 1.1675994108204913e-05, "loss": 0.7736, "step": 4533 }, { "epoch": 0.4631256384065373, "grad_norm": 1.5748421002904427, "learning_rate": 1.1672732479694338e-05, "loss": 0.8666, "step": 4534 }, { "epoch": 0.46322778345250254, "grad_norm": 1.602035825189538, "learning_rate": 1.1669470668102108e-05, "loss": 0.6936, "step": 4535 }, { "epoch": 0.4633299284984678, "grad_norm": 1.3340629493414446, "learning_rate": 1.1666208673785235e-05, "loss": 0.6729, "step": 4536 }, { "epoch": 0.4634320735444331, "grad_norm": 1.5561068922055945, "learning_rate": 1.1662946497100749e-05, "loss": 0.6787, "step": 4537 }, { "epoch": 0.46353421859039834, "grad_norm": 1.5118687406868672, "learning_rate": 1.1659684138405694e-05, "loss": 0.7221, "step": 4538 }, { "epoch": 0.4636363636363636, "grad_norm": 1.4519085162765155, "learning_rate": 1.1656421598057135e-05, "loss": 0.7615, "step": 4539 }, { "epoch": 0.4637385086823289, "grad_norm": 1.3996703743327186, "learning_rate": 1.1653158876412167e-05, "loss": 0.772, "step": 4540 }, { "epoch": 0.4638406537282942, "grad_norm": 1.424905734581487, "learning_rate": 1.1649895973827887e-05, "loss": 0.7325, "step": 4541 }, { "epoch": 0.4639427987742594, "grad_norm": 1.3153285306686104, "learning_rate": 1.1646632890661431e-05, "loss": 0.6948, "step": 4542 }, { "epoch": 0.4640449438202247, "grad_norm": 1.4792997454565426, "learning_rate": 1.1643369627269934e-05, "loss": 0.8275, "step": 4543 }, { "epoch": 0.46414708886619, "grad_norm": 1.4014362336860784, "learning_rate": 1.1640106184010578e-05, "loss": 0.7681, "step": 4544 }, { "epoch": 0.4642492339121553, "grad_norm": 1.3017855283276736, "learning_rate": 1.1636842561240536e-05, "loss": 0.7001, "step": 4545 }, { "epoch": 0.4643513789581205, "grad_norm": 1.3453803307719, "learning_rate": 1.1633578759317019e-05, "loss": 0.6569, "step": 4546 }, { "epoch": 0.4644535240040858, "grad_norm": 1.3712654405117655, "learning_rate": 1.1630314778597252e-05, "loss": 0.6077, "step": 4547 }, { "epoch": 0.4645556690500511, "grad_norm": 1.5268651178149601, "learning_rate": 1.1627050619438476e-05, "loss": 0.7634, "step": 4548 }, { "epoch": 0.46465781409601636, "grad_norm": 1.590761155513556, "learning_rate": 1.1623786282197961e-05, "loss": 0.7091, "step": 4549 }, { "epoch": 0.4647599591419816, "grad_norm": 1.3402695786937597, "learning_rate": 1.1620521767232988e-05, "loss": 0.7149, "step": 4550 }, { "epoch": 0.4648621041879469, "grad_norm": 1.4925413074760292, "learning_rate": 1.161725707490086e-05, "loss": 0.768, "step": 4551 }, { "epoch": 0.46496424923391216, "grad_norm": 1.3656067482389571, "learning_rate": 1.1613992205558903e-05, "loss": 0.6383, "step": 4552 }, { "epoch": 0.46506639427987745, "grad_norm": 1.4362685564863855, "learning_rate": 1.1610727159564454e-05, "loss": 0.7115, "step": 4553 }, { "epoch": 0.4651685393258427, "grad_norm": 1.5185036120952167, "learning_rate": 1.160746193727488e-05, "loss": 0.7549, "step": 4554 }, { "epoch": 0.46527068437180796, "grad_norm": 1.4029603369476822, "learning_rate": 1.1604196539047552e-05, "loss": 0.7091, "step": 4555 }, { "epoch": 0.46537282941777325, "grad_norm": 1.537030478497276, "learning_rate": 1.1600930965239883e-05, "loss": 0.7574, "step": 4556 }, { "epoch": 0.46547497446373853, "grad_norm": 1.5758924440498452, "learning_rate": 1.1597665216209281e-05, "loss": 0.7917, "step": 4557 }, { "epoch": 0.46557711950970376, "grad_norm": 1.5862278785151886, "learning_rate": 1.1594399292313192e-05, "loss": 0.8666, "step": 4558 }, { "epoch": 0.46567926455566905, "grad_norm": 1.3865680057483374, "learning_rate": 1.1591133193909067e-05, "loss": 0.8053, "step": 4559 }, { "epoch": 0.46578140960163433, "grad_norm": 1.382658973465373, "learning_rate": 1.1587866921354388e-05, "loss": 0.7276, "step": 4560 }, { "epoch": 0.4658835546475996, "grad_norm": 1.3851150416908025, "learning_rate": 1.1584600475006649e-05, "loss": 0.6589, "step": 4561 }, { "epoch": 0.46598569969356485, "grad_norm": 1.3623752690347581, "learning_rate": 1.1581333855223362e-05, "loss": 0.592, "step": 4562 }, { "epoch": 0.46608784473953013, "grad_norm": 1.3031010323044558, "learning_rate": 1.1578067062362064e-05, "loss": 0.6313, "step": 4563 }, { "epoch": 0.4661899897854954, "grad_norm": 1.5821242558541917, "learning_rate": 1.1574800096780307e-05, "loss": 0.7556, "step": 4564 }, { "epoch": 0.46629213483146065, "grad_norm": 1.3076552958323757, "learning_rate": 1.1571532958835664e-05, "loss": 0.7504, "step": 4565 }, { "epoch": 0.46639427987742593, "grad_norm": 1.495623075220918, "learning_rate": 1.1568265648885722e-05, "loss": 0.6968, "step": 4566 }, { "epoch": 0.4664964249233912, "grad_norm": 1.4119164980792056, "learning_rate": 1.1564998167288089e-05, "loss": 0.7125, "step": 4567 }, { "epoch": 0.4665985699693565, "grad_norm": 1.4671391997642753, "learning_rate": 1.1561730514400395e-05, "loss": 0.8384, "step": 4568 }, { "epoch": 0.46670071501532173, "grad_norm": 1.422650020680028, "learning_rate": 1.1558462690580292e-05, "loss": 0.7884, "step": 4569 }, { "epoch": 0.466802860061287, "grad_norm": 1.4682432490143777, "learning_rate": 1.1555194696185437e-05, "loss": 0.744, "step": 4570 }, { "epoch": 0.4669050051072523, "grad_norm": 1.34101269903494, "learning_rate": 1.1551926531573517e-05, "loss": 0.6566, "step": 4571 }, { "epoch": 0.4670071501532176, "grad_norm": 1.377863205523952, "learning_rate": 1.1548658197102236e-05, "loss": 0.7059, "step": 4572 }, { "epoch": 0.4671092951991828, "grad_norm": 1.4150997833577, "learning_rate": 1.1545389693129318e-05, "loss": 0.8529, "step": 4573 }, { "epoch": 0.4672114402451481, "grad_norm": 1.5751511378322125, "learning_rate": 1.1542121020012498e-05, "loss": 0.743, "step": 4574 }, { "epoch": 0.4673135852911134, "grad_norm": 1.4587180518501928, "learning_rate": 1.1538852178109532e-05, "loss": 0.6624, "step": 4575 }, { "epoch": 0.46741573033707867, "grad_norm": 1.513114376594062, "learning_rate": 1.1535583167778206e-05, "loss": 0.6995, "step": 4576 }, { "epoch": 0.4675178753830439, "grad_norm": 1.3959257434612828, "learning_rate": 1.1532313989376309e-05, "loss": 0.6748, "step": 4577 }, { "epoch": 0.4676200204290092, "grad_norm": 1.4624441010712579, "learning_rate": 1.1529044643261655e-05, "loss": 0.7235, "step": 4578 }, { "epoch": 0.46772216547497447, "grad_norm": 1.479573579565105, "learning_rate": 1.1525775129792079e-05, "loss": 0.6625, "step": 4579 }, { "epoch": 0.46782431052093976, "grad_norm": 1.468245640853636, "learning_rate": 1.1522505449325426e-05, "loss": 0.6372, "step": 4580 }, { "epoch": 0.467926455566905, "grad_norm": 1.4809584094673238, "learning_rate": 1.1519235602219569e-05, "loss": 0.6514, "step": 4581 }, { "epoch": 0.46802860061287027, "grad_norm": 1.3851591922369217, "learning_rate": 1.1515965588832394e-05, "loss": 0.8244, "step": 4582 }, { "epoch": 0.46813074565883556, "grad_norm": 1.449227194672402, "learning_rate": 1.1512695409521806e-05, "loss": 0.7521, "step": 4583 }, { "epoch": 0.46823289070480084, "grad_norm": 1.335402374116199, "learning_rate": 1.150942506464573e-05, "loss": 0.6609, "step": 4584 }, { "epoch": 0.46833503575076607, "grad_norm": 1.4657570501899861, "learning_rate": 1.15061545545621e-05, "loss": 0.6737, "step": 4585 }, { "epoch": 0.46843718079673136, "grad_norm": 1.3749899429581807, "learning_rate": 1.1502883879628887e-05, "loss": 0.6967, "step": 4586 }, { "epoch": 0.46853932584269664, "grad_norm": 1.496094852758198, "learning_rate": 1.1499613040204058e-05, "loss": 0.713, "step": 4587 }, { "epoch": 0.4686414708886619, "grad_norm": 1.4028827891639952, "learning_rate": 1.1496342036645615e-05, "loss": 0.6899, "step": 4588 }, { "epoch": 0.46874361593462716, "grad_norm": 1.6237822421921642, "learning_rate": 1.1493070869311569e-05, "loss": 0.7004, "step": 4589 }, { "epoch": 0.46884576098059244, "grad_norm": 1.5857557925370613, "learning_rate": 1.1489799538559953e-05, "loss": 0.705, "step": 4590 }, { "epoch": 0.4689479060265577, "grad_norm": 1.3767669386995813, "learning_rate": 1.1486528044748814e-05, "loss": 0.7151, "step": 4591 }, { "epoch": 0.46905005107252296, "grad_norm": 1.414581024929639, "learning_rate": 1.1483256388236218e-05, "loss": 0.7034, "step": 4592 }, { "epoch": 0.46915219611848824, "grad_norm": 1.5383110942917957, "learning_rate": 1.1479984569380256e-05, "loss": 0.782, "step": 4593 }, { "epoch": 0.4692543411644535, "grad_norm": 1.3752950914580033, "learning_rate": 1.1476712588539023e-05, "loss": 0.7254, "step": 4594 }, { "epoch": 0.4693564862104188, "grad_norm": 1.4533411182031981, "learning_rate": 1.1473440446070646e-05, "loss": 0.6597, "step": 4595 }, { "epoch": 0.46945863125638404, "grad_norm": 1.4501212256952631, "learning_rate": 1.147016814233326e-05, "loss": 0.6747, "step": 4596 }, { "epoch": 0.4695607763023493, "grad_norm": 1.3847479746946372, "learning_rate": 1.146689567768502e-05, "loss": 0.7018, "step": 4597 }, { "epoch": 0.4696629213483146, "grad_norm": 1.4891741056924936, "learning_rate": 1.14636230524841e-05, "loss": 0.8504, "step": 4598 }, { "epoch": 0.4697650663942799, "grad_norm": 1.500441230228755, "learning_rate": 1.1460350267088688e-05, "loss": 0.7318, "step": 4599 }, { "epoch": 0.4698672114402451, "grad_norm": 1.3831274770140913, "learning_rate": 1.1457077321857002e-05, "loss": 0.6675, "step": 4600 }, { "epoch": 0.4699693564862104, "grad_norm": 1.4479169600078448, "learning_rate": 1.145380421714726e-05, "loss": 0.7732, "step": 4601 }, { "epoch": 0.4700715015321757, "grad_norm": 1.3560605740731997, "learning_rate": 1.1450530953317705e-05, "loss": 0.7227, "step": 4602 }, { "epoch": 0.470173646578141, "grad_norm": 1.5640022945170355, "learning_rate": 1.1447257530726601e-05, "loss": 0.7384, "step": 4603 }, { "epoch": 0.4702757916241062, "grad_norm": 1.4872837492721065, "learning_rate": 1.1443983949732225e-05, "loss": 0.677, "step": 4604 }, { "epoch": 0.4703779366700715, "grad_norm": 1.3748553106857189, "learning_rate": 1.1440710210692874e-05, "loss": 0.6987, "step": 4605 }, { "epoch": 0.4704800817160368, "grad_norm": 1.4568476927107372, "learning_rate": 1.1437436313966857e-05, "loss": 0.6881, "step": 4606 }, { "epoch": 0.47058222676200206, "grad_norm": 1.5634136743519704, "learning_rate": 1.143416225991251e-05, "loss": 0.6742, "step": 4607 }, { "epoch": 0.4706843718079673, "grad_norm": 1.4234673549374743, "learning_rate": 1.1430888048888175e-05, "loss": 0.6917, "step": 4608 }, { "epoch": 0.4707865168539326, "grad_norm": 1.3922306134715365, "learning_rate": 1.1427613681252219e-05, "loss": 0.6853, "step": 4609 }, { "epoch": 0.47088866189989786, "grad_norm": 1.331213317427588, "learning_rate": 1.1424339157363024e-05, "loss": 0.6816, "step": 4610 }, { "epoch": 0.47099080694586315, "grad_norm": 1.458589449098392, "learning_rate": 1.1421064477578986e-05, "loss": 0.681, "step": 4611 }, { "epoch": 0.4710929519918284, "grad_norm": 1.4900377014875879, "learning_rate": 1.1417789642258523e-05, "loss": 0.7096, "step": 4612 }, { "epoch": 0.47119509703779366, "grad_norm": 1.380239925393662, "learning_rate": 1.1414514651760071e-05, "loss": 0.6124, "step": 4613 }, { "epoch": 0.47129724208375895, "grad_norm": 1.496306443905916, "learning_rate": 1.1411239506442073e-05, "loss": 0.7454, "step": 4614 }, { "epoch": 0.47139938712972423, "grad_norm": 1.6019461587359385, "learning_rate": 1.1407964206663e-05, "loss": 0.6258, "step": 4615 }, { "epoch": 0.47150153217568946, "grad_norm": 1.498099717141024, "learning_rate": 1.1404688752781335e-05, "loss": 0.7338, "step": 4616 }, { "epoch": 0.47160367722165475, "grad_norm": 1.3563410442073103, "learning_rate": 1.140141314515558e-05, "loss": 0.6424, "step": 4617 }, { "epoch": 0.47170582226762003, "grad_norm": 1.3693588691610963, "learning_rate": 1.1398137384144253e-05, "loss": 0.6434, "step": 4618 }, { "epoch": 0.47180796731358526, "grad_norm": 1.4005606658076981, "learning_rate": 1.1394861470105878e-05, "loss": 0.6191, "step": 4619 }, { "epoch": 0.47191011235955055, "grad_norm": 1.296358070109126, "learning_rate": 1.139158540339902e-05, "loss": 0.804, "step": 4620 }, { "epoch": 0.47201225740551583, "grad_norm": 1.49758334155772, "learning_rate": 1.1388309184382237e-05, "loss": 0.6821, "step": 4621 }, { "epoch": 0.4721144024514811, "grad_norm": 1.5757060269443182, "learning_rate": 1.1385032813414121e-05, "loss": 0.8261, "step": 4622 }, { "epoch": 0.47221654749744635, "grad_norm": 1.6149679206737317, "learning_rate": 1.1381756290853267e-05, "loss": 0.7312, "step": 4623 }, { "epoch": 0.47231869254341163, "grad_norm": 1.5887841295070801, "learning_rate": 1.1378479617058293e-05, "loss": 0.7529, "step": 4624 }, { "epoch": 0.4724208375893769, "grad_norm": 1.428688693816069, "learning_rate": 1.1375202792387836e-05, "loss": 0.7197, "step": 4625 }, { "epoch": 0.4725229826353422, "grad_norm": 1.412872508421381, "learning_rate": 1.1371925817200544e-05, "loss": 0.6769, "step": 4626 }, { "epoch": 0.47262512768130743, "grad_norm": 1.382736490786676, "learning_rate": 1.1368648691855084e-05, "loss": 0.7287, "step": 4627 }, { "epoch": 0.4727272727272727, "grad_norm": 1.4971349034599082, "learning_rate": 1.1365371416710142e-05, "loss": 0.7456, "step": 4628 }, { "epoch": 0.472829417773238, "grad_norm": 1.4842805375229045, "learning_rate": 1.1362093992124416e-05, "loss": 0.714, "step": 4629 }, { "epoch": 0.4729315628192033, "grad_norm": 1.4374293177193989, "learning_rate": 1.1358816418456625e-05, "loss": 0.6519, "step": 4630 }, { "epoch": 0.4730337078651685, "grad_norm": 1.3576375338439068, "learning_rate": 1.1355538696065491e-05, "loss": 0.6827, "step": 4631 }, { "epoch": 0.4731358529111338, "grad_norm": 1.3925611650479424, "learning_rate": 1.135226082530978e-05, "loss": 0.7307, "step": 4632 }, { "epoch": 0.4732379979570991, "grad_norm": 1.4609066009779765, "learning_rate": 1.1348982806548242e-05, "loss": 0.7623, "step": 4633 }, { "epoch": 0.4733401430030644, "grad_norm": 1.4176078667902363, "learning_rate": 1.1345704640139668e-05, "loss": 0.7382, "step": 4634 }, { "epoch": 0.4734422880490296, "grad_norm": 1.4160934666837424, "learning_rate": 1.134242632644285e-05, "loss": 0.7484, "step": 4635 }, { "epoch": 0.4735444330949949, "grad_norm": 1.4242787808837387, "learning_rate": 1.1339147865816602e-05, "loss": 0.7462, "step": 4636 }, { "epoch": 0.4736465781409602, "grad_norm": 1.3488103241888663, "learning_rate": 1.1335869258619758e-05, "loss": 0.5837, "step": 4637 }, { "epoch": 0.47374872318692546, "grad_norm": 1.478928467679866, "learning_rate": 1.1332590505211158e-05, "loss": 0.7891, "step": 4638 }, { "epoch": 0.4738508682328907, "grad_norm": 1.466070668525409, "learning_rate": 1.1329311605949666e-05, "loss": 0.7601, "step": 4639 }, { "epoch": 0.473953013278856, "grad_norm": 1.6708384094901065, "learning_rate": 1.1326032561194163e-05, "loss": 0.648, "step": 4640 }, { "epoch": 0.47405515832482126, "grad_norm": 1.4545883677528972, "learning_rate": 1.1322753371303536e-05, "loss": 0.6773, "step": 4641 }, { "epoch": 0.47415730337078654, "grad_norm": 1.4801451294868049, "learning_rate": 1.1319474036636702e-05, "loss": 0.6558, "step": 4642 }, { "epoch": 0.47425944841675177, "grad_norm": 1.4562705333935624, "learning_rate": 1.1316194557552575e-05, "loss": 0.7484, "step": 4643 }, { "epoch": 0.47436159346271706, "grad_norm": 1.4725518306596883, "learning_rate": 1.1312914934410111e-05, "loss": 0.7568, "step": 4644 }, { "epoch": 0.47446373850868234, "grad_norm": 1.471184976628405, "learning_rate": 1.1309635167568259e-05, "loss": 0.725, "step": 4645 }, { "epoch": 0.4745658835546476, "grad_norm": 1.3433768566020603, "learning_rate": 1.1306355257385987e-05, "loss": 0.6963, "step": 4646 }, { "epoch": 0.47466802860061286, "grad_norm": 1.575069507568577, "learning_rate": 1.1303075204222292e-05, "loss": 0.8487, "step": 4647 }, { "epoch": 0.47477017364657814, "grad_norm": 1.4967242299854873, "learning_rate": 1.1299795008436168e-05, "loss": 0.7634, "step": 4648 }, { "epoch": 0.4748723186925434, "grad_norm": 1.3849950573502745, "learning_rate": 1.1296514670386646e-05, "loss": 0.6624, "step": 4649 }, { "epoch": 0.47497446373850866, "grad_norm": 1.5094100383409006, "learning_rate": 1.1293234190432753e-05, "loss": 0.7114, "step": 4650 }, { "epoch": 0.47507660878447394, "grad_norm": 1.3847510912887924, "learning_rate": 1.1289953568933545e-05, "loss": 0.7251, "step": 4651 }, { "epoch": 0.4751787538304392, "grad_norm": 1.4199815336506274, "learning_rate": 1.1286672806248082e-05, "loss": 0.7147, "step": 4652 }, { "epoch": 0.4752808988764045, "grad_norm": 1.5358473275215399, "learning_rate": 1.1283391902735451e-05, "loss": 0.7356, "step": 4653 }, { "epoch": 0.47538304392236974, "grad_norm": 1.6028311372699997, "learning_rate": 1.128011085875475e-05, "loss": 0.7518, "step": 4654 }, { "epoch": 0.475485188968335, "grad_norm": 1.4757244276681047, "learning_rate": 1.1276829674665084e-05, "loss": 0.7508, "step": 4655 }, { "epoch": 0.4755873340143003, "grad_norm": 1.6398672407581447, "learning_rate": 1.1273548350825584e-05, "loss": 0.8111, "step": 4656 }, { "epoch": 0.4756894790602656, "grad_norm": 1.3283907493780107, "learning_rate": 1.12702668875954e-05, "loss": 0.6821, "step": 4657 }, { "epoch": 0.4757916241062308, "grad_norm": 1.4477970140519387, "learning_rate": 1.126698528533368e-05, "loss": 0.6846, "step": 4658 }, { "epoch": 0.4758937691521961, "grad_norm": 1.5767618769337386, "learning_rate": 1.1263703544399605e-05, "loss": 0.6887, "step": 4659 }, { "epoch": 0.4759959141981614, "grad_norm": 1.551097458156733, "learning_rate": 1.1260421665152357e-05, "loss": 0.7457, "step": 4660 }, { "epoch": 0.4760980592441267, "grad_norm": 1.5490172537607214, "learning_rate": 1.1257139647951146e-05, "loss": 0.7408, "step": 4661 }, { "epoch": 0.4762002042900919, "grad_norm": 1.488877484561567, "learning_rate": 1.1253857493155189e-05, "loss": 0.7016, "step": 4662 }, { "epoch": 0.4763023493360572, "grad_norm": 1.4903238794910556, "learning_rate": 1.1250575201123716e-05, "loss": 0.6751, "step": 4663 }, { "epoch": 0.4764044943820225, "grad_norm": 1.3887470103732797, "learning_rate": 1.124729277221598e-05, "loss": 0.6599, "step": 4664 }, { "epoch": 0.47650663942798777, "grad_norm": 1.3380593264598162, "learning_rate": 1.1244010206791244e-05, "loss": 0.7595, "step": 4665 }, { "epoch": 0.476608784473953, "grad_norm": 1.3504635421716253, "learning_rate": 1.1240727505208793e-05, "loss": 0.73, "step": 4666 }, { "epoch": 0.4767109295199183, "grad_norm": 1.3045087532533222, "learning_rate": 1.1237444667827908e-05, "loss": 0.7515, "step": 4667 }, { "epoch": 0.47681307456588357, "grad_norm": 1.4054401946995192, "learning_rate": 1.1234161695007906e-05, "loss": 0.7107, "step": 4668 }, { "epoch": 0.47691521961184885, "grad_norm": 1.3447237163534818, "learning_rate": 1.1230878587108112e-05, "loss": 0.708, "step": 4669 }, { "epoch": 0.4770173646578141, "grad_norm": 1.3755045437647198, "learning_rate": 1.1227595344487861e-05, "loss": 0.7086, "step": 4670 }, { "epoch": 0.47711950970377937, "grad_norm": 1.3517475051221437, "learning_rate": 1.1224311967506505e-05, "loss": 0.7163, "step": 4671 }, { "epoch": 0.47722165474974465, "grad_norm": 1.4105765737950693, "learning_rate": 1.1221028456523413e-05, "loss": 0.6783, "step": 4672 }, { "epoch": 0.47732379979570994, "grad_norm": 1.613342143842825, "learning_rate": 1.1217744811897966e-05, "loss": 0.7139, "step": 4673 }, { "epoch": 0.47742594484167516, "grad_norm": 1.5596618377009908, "learning_rate": 1.1214461033989566e-05, "loss": 0.7693, "step": 4674 }, { "epoch": 0.47752808988764045, "grad_norm": 1.3792060089492475, "learning_rate": 1.1211177123157619e-05, "loss": 0.7617, "step": 4675 }, { "epoch": 0.47763023493360574, "grad_norm": 1.4387718634765345, "learning_rate": 1.1207893079761551e-05, "loss": 0.8137, "step": 4676 }, { "epoch": 0.47773237997957096, "grad_norm": 1.231978076444747, "learning_rate": 1.1204608904160808e-05, "loss": 0.6823, "step": 4677 }, { "epoch": 0.47783452502553625, "grad_norm": 1.4556551950995793, "learning_rate": 1.1201324596714845e-05, "loss": 0.7209, "step": 4678 }, { "epoch": 0.47793667007150153, "grad_norm": 1.4782081118904289, "learning_rate": 1.1198040157783125e-05, "loss": 0.7531, "step": 4679 }, { "epoch": 0.4780388151174668, "grad_norm": 1.4649234984523267, "learning_rate": 1.1194755587725134e-05, "loss": 0.7274, "step": 4680 }, { "epoch": 0.47814096016343205, "grad_norm": 1.4106938227713668, "learning_rate": 1.1191470886900378e-05, "loss": 0.6623, "step": 4681 }, { "epoch": 0.47824310520939733, "grad_norm": 1.3474231419601228, "learning_rate": 1.1188186055668358e-05, "loss": 0.6413, "step": 4682 }, { "epoch": 0.4783452502553626, "grad_norm": 1.359182306055146, "learning_rate": 1.118490109438861e-05, "loss": 0.6482, "step": 4683 }, { "epoch": 0.4784473953013279, "grad_norm": 1.3975563749228346, "learning_rate": 1.118161600342067e-05, "loss": 0.6667, "step": 4684 }, { "epoch": 0.47854954034729313, "grad_norm": 1.4644867152855252, "learning_rate": 1.1178330783124094e-05, "loss": 0.7048, "step": 4685 }, { "epoch": 0.4786516853932584, "grad_norm": 1.5076206982144373, "learning_rate": 1.1175045433858457e-05, "loss": 0.7019, "step": 4686 }, { "epoch": 0.4787538304392237, "grad_norm": 1.486235354665212, "learning_rate": 1.1171759955983332e-05, "loss": 0.7228, "step": 4687 }, { "epoch": 0.478855975485189, "grad_norm": 1.4555598177593088, "learning_rate": 1.1168474349858325e-05, "loss": 0.7937, "step": 4688 }, { "epoch": 0.4789581205311542, "grad_norm": 1.6350161323704449, "learning_rate": 1.1165188615843048e-05, "loss": 0.8726, "step": 4689 }, { "epoch": 0.4790602655771195, "grad_norm": 1.564789595843301, "learning_rate": 1.116190275429712e-05, "loss": 0.8349, "step": 4690 }, { "epoch": 0.4791624106230848, "grad_norm": 1.4402449148628527, "learning_rate": 1.115861676558019e-05, "loss": 0.6312, "step": 4691 }, { "epoch": 0.4792645556690501, "grad_norm": 1.4385768883746815, "learning_rate": 1.1155330650051902e-05, "loss": 0.6438, "step": 4692 }, { "epoch": 0.4793667007150153, "grad_norm": 1.6355445470466625, "learning_rate": 1.1152044408071929e-05, "loss": 0.7258, "step": 4693 }, { "epoch": 0.4794688457609806, "grad_norm": 1.4723669392423124, "learning_rate": 1.1148758039999951e-05, "loss": 0.754, "step": 4694 }, { "epoch": 0.4795709908069459, "grad_norm": 1.3979130111637978, "learning_rate": 1.1145471546195665e-05, "loss": 0.6319, "step": 4695 }, { "epoch": 0.47967313585291116, "grad_norm": 1.346603391231298, "learning_rate": 1.1142184927018778e-05, "loss": 0.7508, "step": 4696 }, { "epoch": 0.4797752808988764, "grad_norm": 1.4390226762627387, "learning_rate": 1.113889818282901e-05, "loss": 0.8709, "step": 4697 }, { "epoch": 0.4798774259448417, "grad_norm": 1.338201967658271, "learning_rate": 1.1135611313986103e-05, "loss": 0.6627, "step": 4698 }, { "epoch": 0.47997957099080696, "grad_norm": 1.337653789721875, "learning_rate": 1.1132324320849803e-05, "loss": 0.6441, "step": 4699 }, { "epoch": 0.48008171603677224, "grad_norm": 1.4446213137373256, "learning_rate": 1.1129037203779873e-05, "loss": 0.6464, "step": 4700 }, { "epoch": 0.4801838610827375, "grad_norm": 1.4565199164257723, "learning_rate": 1.1125749963136097e-05, "loss": 0.7344, "step": 4701 }, { "epoch": 0.48028600612870276, "grad_norm": 1.1738935492007405, "learning_rate": 1.1122462599278255e-05, "loss": 0.747, "step": 4702 }, { "epoch": 0.48038815117466804, "grad_norm": 1.4301540878668242, "learning_rate": 1.1119175112566159e-05, "loss": 0.734, "step": 4703 }, { "epoch": 0.4804902962206333, "grad_norm": 1.3730090736621727, "learning_rate": 1.111588750335962e-05, "loss": 0.7, "step": 4704 }, { "epoch": 0.48059244126659856, "grad_norm": 1.3630316007743029, "learning_rate": 1.1112599772018476e-05, "loss": 0.5666, "step": 4705 }, { "epoch": 0.48069458631256384, "grad_norm": 1.4332445928303563, "learning_rate": 1.110931191890257e-05, "loss": 0.6576, "step": 4706 }, { "epoch": 0.48079673135852913, "grad_norm": 1.4427638806608467, "learning_rate": 1.1106023944371754e-05, "loss": 0.6496, "step": 4707 }, { "epoch": 0.48089887640449436, "grad_norm": 1.441314930530969, "learning_rate": 1.1102735848785901e-05, "loss": 0.7646, "step": 4708 }, { "epoch": 0.48100102145045964, "grad_norm": 1.4011243402104627, "learning_rate": 1.10994476325049e-05, "loss": 0.6796, "step": 4709 }, { "epoch": 0.4811031664964249, "grad_norm": 1.5332078680310863, "learning_rate": 1.1096159295888646e-05, "loss": 0.6752, "step": 4710 }, { "epoch": 0.4812053115423902, "grad_norm": 1.339275016041277, "learning_rate": 1.1092870839297048e-05, "loss": 0.7027, "step": 4711 }, { "epoch": 0.48130745658835544, "grad_norm": 1.5286070977781736, "learning_rate": 1.1089582263090031e-05, "loss": 0.6976, "step": 4712 }, { "epoch": 0.4814096016343207, "grad_norm": 1.3916486434080306, "learning_rate": 1.108629356762753e-05, "loss": 0.7242, "step": 4713 }, { "epoch": 0.481511746680286, "grad_norm": 1.4547589553747535, "learning_rate": 1.1083004753269498e-05, "loss": 0.6791, "step": 4714 }, { "epoch": 0.4816138917262513, "grad_norm": 1.3247269001281308, "learning_rate": 1.1079715820375896e-05, "loss": 0.6979, "step": 4715 }, { "epoch": 0.4817160367722165, "grad_norm": 1.5189073367249037, "learning_rate": 1.10764267693067e-05, "loss": 0.7847, "step": 4716 }, { "epoch": 0.4818181818181818, "grad_norm": 1.4536075292173836, "learning_rate": 1.1073137600421895e-05, "loss": 0.7313, "step": 4717 }, { "epoch": 0.4819203268641471, "grad_norm": 1.5215812614239415, "learning_rate": 1.106984831408149e-05, "loss": 0.7802, "step": 4718 }, { "epoch": 0.4820224719101124, "grad_norm": 1.45199619357555, "learning_rate": 1.1066558910645494e-05, "loss": 0.688, "step": 4719 }, { "epoch": 0.4821246169560776, "grad_norm": 1.449925752627741, "learning_rate": 1.1063269390473937e-05, "loss": 0.7402, "step": 4720 }, { "epoch": 0.4822267620020429, "grad_norm": 1.3700791134833505, "learning_rate": 1.1059979753926857e-05, "loss": 0.6968, "step": 4721 }, { "epoch": 0.4823289070480082, "grad_norm": 1.6700825120330365, "learning_rate": 1.1056690001364312e-05, "loss": 0.7011, "step": 4722 }, { "epoch": 0.48243105209397347, "grad_norm": 1.452173014140861, "learning_rate": 1.105340013314636e-05, "loss": 0.7881, "step": 4723 }, { "epoch": 0.4825331971399387, "grad_norm": 1.7102688780087025, "learning_rate": 1.1050110149633081e-05, "loss": 0.8457, "step": 4724 }, { "epoch": 0.482635342185904, "grad_norm": 1.4183807764377703, "learning_rate": 1.1046820051184572e-05, "loss": 0.5645, "step": 4725 }, { "epoch": 0.48273748723186927, "grad_norm": 1.5433763726676624, "learning_rate": 1.104352983816093e-05, "loss": 0.7098, "step": 4726 }, { "epoch": 0.48283963227783455, "grad_norm": 1.530437238645394, "learning_rate": 1.1040239510922274e-05, "loss": 0.6483, "step": 4727 }, { "epoch": 0.4829417773237998, "grad_norm": 1.281583461424658, "learning_rate": 1.1036949069828732e-05, "loss": 0.6891, "step": 4728 }, { "epoch": 0.48304392236976507, "grad_norm": 1.581099491532157, "learning_rate": 1.1033658515240442e-05, "loss": 0.7577, "step": 4729 }, { "epoch": 0.48314606741573035, "grad_norm": 1.3404123409199826, "learning_rate": 1.1030367847517562e-05, "loss": 0.6993, "step": 4730 }, { "epoch": 0.4832482124616956, "grad_norm": 1.447097435906328, "learning_rate": 1.1027077067020256e-05, "loss": 0.7, "step": 4731 }, { "epoch": 0.48335035750766087, "grad_norm": 1.522689313433083, "learning_rate": 1.1023786174108697e-05, "loss": 0.7008, "step": 4732 }, { "epoch": 0.48345250255362615, "grad_norm": 1.2472719613848893, "learning_rate": 1.1020495169143085e-05, "loss": 0.5715, "step": 4733 }, { "epoch": 0.48355464759959144, "grad_norm": 1.4343415549086316, "learning_rate": 1.1017204052483614e-05, "loss": 0.6811, "step": 4734 }, { "epoch": 0.48365679264555667, "grad_norm": 1.4160243811006035, "learning_rate": 1.1013912824490505e-05, "loss": 0.6955, "step": 4735 }, { "epoch": 0.48375893769152195, "grad_norm": 1.4087088736368545, "learning_rate": 1.1010621485523976e-05, "loss": 0.7517, "step": 4736 }, { "epoch": 0.48386108273748724, "grad_norm": 1.5792861609406652, "learning_rate": 1.100733003594428e-05, "loss": 0.8144, "step": 4737 }, { "epoch": 0.4839632277834525, "grad_norm": 1.4804956668244074, "learning_rate": 1.1004038476111655e-05, "loss": 0.7572, "step": 4738 }, { "epoch": 0.48406537282941775, "grad_norm": 1.497378797266265, "learning_rate": 1.1000746806386376e-05, "loss": 0.6738, "step": 4739 }, { "epoch": 0.48416751787538304, "grad_norm": 1.664610861877691, "learning_rate": 1.0997455027128708e-05, "loss": 0.7767, "step": 4740 }, { "epoch": 0.4842696629213483, "grad_norm": 1.5594397513256653, "learning_rate": 1.0994163138698944e-05, "loss": 0.6548, "step": 4741 }, { "epoch": 0.4843718079673136, "grad_norm": 1.3932185142744336, "learning_rate": 1.0990871141457383e-05, "loss": 0.6795, "step": 4742 }, { "epoch": 0.48447395301327884, "grad_norm": 1.5340790775083974, "learning_rate": 1.0987579035764334e-05, "loss": 0.745, "step": 4743 }, { "epoch": 0.4845760980592441, "grad_norm": 1.5184643039088583, "learning_rate": 1.098428682198012e-05, "loss": 0.6914, "step": 4744 }, { "epoch": 0.4846782431052094, "grad_norm": 1.534369240790998, "learning_rate": 1.0980994500465082e-05, "loss": 0.7083, "step": 4745 }, { "epoch": 0.4847803881511747, "grad_norm": 1.375709347008455, "learning_rate": 1.097770207157956e-05, "loss": 0.615, "step": 4746 }, { "epoch": 0.4848825331971399, "grad_norm": 1.5124341747214853, "learning_rate": 1.0974409535683915e-05, "loss": 0.7781, "step": 4747 }, { "epoch": 0.4849846782431052, "grad_norm": 1.4739504895528295, "learning_rate": 1.0971116893138514e-05, "loss": 0.7429, "step": 4748 }, { "epoch": 0.4850868232890705, "grad_norm": 1.469761192613227, "learning_rate": 1.0967824144303744e-05, "loss": 0.7064, "step": 4749 }, { "epoch": 0.4851889683350358, "grad_norm": 1.3837198078945248, "learning_rate": 1.0964531289539996e-05, "loss": 0.8201, "step": 4750 }, { "epoch": 0.485291113381001, "grad_norm": 1.6432937371759888, "learning_rate": 1.0961238329207674e-05, "loss": 0.6821, "step": 4751 }, { "epoch": 0.4853932584269663, "grad_norm": 1.4232015699449252, "learning_rate": 1.0957945263667198e-05, "loss": 0.7385, "step": 4752 }, { "epoch": 0.4854954034729316, "grad_norm": 1.5591136740028924, "learning_rate": 1.0954652093278992e-05, "loss": 0.6357, "step": 4753 }, { "epoch": 0.48559754851889686, "grad_norm": 1.6090627757684384, "learning_rate": 1.09513588184035e-05, "loss": 0.8706, "step": 4754 }, { "epoch": 0.4856996935648621, "grad_norm": 1.2824965117402605, "learning_rate": 1.0948065439401167e-05, "loss": 0.7944, "step": 4755 }, { "epoch": 0.4858018386108274, "grad_norm": 1.5222870121338883, "learning_rate": 1.0944771956632461e-05, "loss": 0.6711, "step": 4756 }, { "epoch": 0.48590398365679266, "grad_norm": 1.4931651566811324, "learning_rate": 1.0941478370457857e-05, "loss": 0.7286, "step": 4757 }, { "epoch": 0.4860061287027579, "grad_norm": 1.41385675466694, "learning_rate": 1.0938184681237833e-05, "loss": 0.6811, "step": 4758 }, { "epoch": 0.4861082737487232, "grad_norm": 1.4487110011342361, "learning_rate": 1.0934890889332892e-05, "loss": 0.7063, "step": 4759 }, { "epoch": 0.48621041879468846, "grad_norm": 1.560260887203428, "learning_rate": 1.0931596995103537e-05, "loss": 0.7352, "step": 4760 }, { "epoch": 0.48631256384065374, "grad_norm": 1.4236950726441457, "learning_rate": 1.092830299891029e-05, "loss": 0.7215, "step": 4761 }, { "epoch": 0.486414708886619, "grad_norm": 1.43817469110565, "learning_rate": 1.092500890111368e-05, "loss": 0.7674, "step": 4762 }, { "epoch": 0.48651685393258426, "grad_norm": 1.4901566016402077, "learning_rate": 1.0921714702074247e-05, "loss": 0.7956, "step": 4763 }, { "epoch": 0.48661899897854954, "grad_norm": 1.5059899075064886, "learning_rate": 1.0918420402152546e-05, "loss": 0.7749, "step": 4764 }, { "epoch": 0.48672114402451483, "grad_norm": 1.3965853583805192, "learning_rate": 1.0915126001709136e-05, "loss": 0.6573, "step": 4765 }, { "epoch": 0.48682328907048006, "grad_norm": 1.5938716200642589, "learning_rate": 1.0911831501104598e-05, "loss": 0.8367, "step": 4766 }, { "epoch": 0.48692543411644534, "grad_norm": 1.3263109440622418, "learning_rate": 1.090853690069951e-05, "loss": 0.6874, "step": 4767 }, { "epoch": 0.48702757916241063, "grad_norm": 1.478747216684893, "learning_rate": 1.0905242200854472e-05, "loss": 0.6745, "step": 4768 }, { "epoch": 0.4871297242083759, "grad_norm": 1.377958581345215, "learning_rate": 1.0901947401930091e-05, "loss": 0.754, "step": 4769 }, { "epoch": 0.48723186925434114, "grad_norm": 1.3274494273208592, "learning_rate": 1.0898652504286982e-05, "loss": 0.6681, "step": 4770 }, { "epoch": 0.48733401430030643, "grad_norm": 1.588452944987468, "learning_rate": 1.0895357508285779e-05, "loss": 0.6956, "step": 4771 }, { "epoch": 0.4874361593462717, "grad_norm": 1.4176444713663592, "learning_rate": 1.0892062414287118e-05, "loss": 0.6232, "step": 4772 }, { "epoch": 0.487538304392237, "grad_norm": 1.3562293899930955, "learning_rate": 1.0888767222651646e-05, "loss": 0.7467, "step": 4773 }, { "epoch": 0.48764044943820223, "grad_norm": 1.4366110614247176, "learning_rate": 1.088547193374003e-05, "loss": 0.6906, "step": 4774 }, { "epoch": 0.4877425944841675, "grad_norm": 1.5247542758365578, "learning_rate": 1.0882176547912937e-05, "loss": 0.7729, "step": 4775 }, { "epoch": 0.4878447395301328, "grad_norm": 1.415319457842646, "learning_rate": 1.0878881065531051e-05, "loss": 0.7112, "step": 4776 }, { "epoch": 0.4879468845760981, "grad_norm": 1.6514315506061399, "learning_rate": 1.0875585486955068e-05, "loss": 0.7946, "step": 4777 }, { "epoch": 0.4880490296220633, "grad_norm": 1.4956595068805882, "learning_rate": 1.0872289812545685e-05, "loss": 0.738, "step": 4778 }, { "epoch": 0.4881511746680286, "grad_norm": 1.468818331164324, "learning_rate": 1.0868994042663619e-05, "loss": 0.8096, "step": 4779 }, { "epoch": 0.4882533197139939, "grad_norm": 1.4975732916658322, "learning_rate": 1.086569817766959e-05, "loss": 0.6452, "step": 4780 }, { "epoch": 0.48835546475995917, "grad_norm": 1.4818801455518247, "learning_rate": 1.0862402217924342e-05, "loss": 0.774, "step": 4781 }, { "epoch": 0.4884576098059244, "grad_norm": 1.4038656981792568, "learning_rate": 1.0859106163788608e-05, "loss": 0.6904, "step": 4782 }, { "epoch": 0.4885597548518897, "grad_norm": 1.439198575987848, "learning_rate": 1.0855810015623156e-05, "loss": 0.7551, "step": 4783 }, { "epoch": 0.48866189989785497, "grad_norm": 1.5777882423507272, "learning_rate": 1.085251377378874e-05, "loss": 0.6802, "step": 4784 }, { "epoch": 0.4887640449438202, "grad_norm": 1.4210039461625712, "learning_rate": 1.0849217438646143e-05, "loss": 0.7983, "step": 4785 }, { "epoch": 0.4888661899897855, "grad_norm": 1.4216855239283253, "learning_rate": 1.0845921010556148e-05, "loss": 0.8124, "step": 4786 }, { "epoch": 0.48896833503575077, "grad_norm": 1.5058742549350697, "learning_rate": 1.0842624489879553e-05, "loss": 0.8104, "step": 4787 }, { "epoch": 0.48907048008171605, "grad_norm": 1.5358103262303238, "learning_rate": 1.083932787697716e-05, "loss": 0.7297, "step": 4788 }, { "epoch": 0.4891726251276813, "grad_norm": 1.4314046155395013, "learning_rate": 1.0836031172209792e-05, "loss": 0.7326, "step": 4789 }, { "epoch": 0.48927477017364657, "grad_norm": 1.7659487389672255, "learning_rate": 1.0832734375938269e-05, "loss": 0.6802, "step": 4790 }, { "epoch": 0.48937691521961185, "grad_norm": 1.5208691604189406, "learning_rate": 1.0829437488523433e-05, "loss": 0.7457, "step": 4791 }, { "epoch": 0.48947906026557714, "grad_norm": 1.3267972839509876, "learning_rate": 1.0826140510326127e-05, "loss": 0.6537, "step": 4792 }, { "epoch": 0.48958120531154237, "grad_norm": 1.5645050670611527, "learning_rate": 1.082284344170721e-05, "loss": 0.7847, "step": 4793 }, { "epoch": 0.48968335035750765, "grad_norm": 1.4751413112048315, "learning_rate": 1.0819546283027544e-05, "loss": 0.6565, "step": 4794 }, { "epoch": 0.48978549540347294, "grad_norm": 1.5107509457948471, "learning_rate": 1.081624903464801e-05, "loss": 0.7759, "step": 4795 }, { "epoch": 0.4898876404494382, "grad_norm": 1.4456766290323204, "learning_rate": 1.081295169692949e-05, "loss": 0.8139, "step": 4796 }, { "epoch": 0.48998978549540345, "grad_norm": 1.5104576654915762, "learning_rate": 1.080965427023288e-05, "loss": 0.6609, "step": 4797 }, { "epoch": 0.49009193054136874, "grad_norm": 1.3609813645018798, "learning_rate": 1.0806356754919092e-05, "loss": 0.6238, "step": 4798 }, { "epoch": 0.490194075587334, "grad_norm": 1.3744872530313812, "learning_rate": 1.0803059151349034e-05, "loss": 0.6872, "step": 4799 }, { "epoch": 0.4902962206332993, "grad_norm": 1.445958951773182, "learning_rate": 1.0799761459883631e-05, "loss": 0.7646, "step": 4800 }, { "epoch": 0.49039836567926454, "grad_norm": 1.4104058097550571, "learning_rate": 1.0796463680883822e-05, "loss": 0.7003, "step": 4801 }, { "epoch": 0.4905005107252298, "grad_norm": 1.4669422083785768, "learning_rate": 1.0793165814710547e-05, "loss": 0.654, "step": 4802 }, { "epoch": 0.4906026557711951, "grad_norm": 1.6136921348822473, "learning_rate": 1.0789867861724764e-05, "loss": 0.8445, "step": 4803 }, { "epoch": 0.4907048008171604, "grad_norm": 1.4706167800385206, "learning_rate": 1.078656982228743e-05, "loss": 0.7202, "step": 4804 }, { "epoch": 0.4908069458631256, "grad_norm": 1.436602342849144, "learning_rate": 1.078327169675952e-05, "loss": 0.6466, "step": 4805 }, { "epoch": 0.4909090909090909, "grad_norm": 1.347962570721208, "learning_rate": 1.077997348550202e-05, "loss": 0.5594, "step": 4806 }, { "epoch": 0.4910112359550562, "grad_norm": 1.5684552429194407, "learning_rate": 1.0776675188875916e-05, "loss": 0.8123, "step": 4807 }, { "epoch": 0.4911133810010215, "grad_norm": 1.474407940713154, "learning_rate": 1.0773376807242211e-05, "loss": 0.6448, "step": 4808 }, { "epoch": 0.4912155260469867, "grad_norm": 1.4279886806824929, "learning_rate": 1.0770078340961915e-05, "loss": 0.6793, "step": 4809 }, { "epoch": 0.491317671092952, "grad_norm": 1.5491650376005763, "learning_rate": 1.076677979039605e-05, "loss": 0.6706, "step": 4810 }, { "epoch": 0.4914198161389173, "grad_norm": 1.475385142587922, "learning_rate": 1.0763481155905637e-05, "loss": 0.7756, "step": 4811 }, { "epoch": 0.49152196118488256, "grad_norm": 1.37668490961059, "learning_rate": 1.0760182437851718e-05, "loss": 0.6976, "step": 4812 }, { "epoch": 0.4916241062308478, "grad_norm": 1.292421018288612, "learning_rate": 1.0756883636595344e-05, "loss": 0.675, "step": 4813 }, { "epoch": 0.4917262512768131, "grad_norm": 1.7145517074301948, "learning_rate": 1.0753584752497566e-05, "loss": 0.7903, "step": 4814 }, { "epoch": 0.49182839632277836, "grad_norm": 1.3046012698934726, "learning_rate": 1.0750285785919449e-05, "loss": 0.5661, "step": 4815 }, { "epoch": 0.4919305413687436, "grad_norm": 1.3200863451132492, "learning_rate": 1.0746986737222067e-05, "loss": 0.6726, "step": 4816 }, { "epoch": 0.4920326864147089, "grad_norm": 1.4486284752239376, "learning_rate": 1.0743687606766505e-05, "loss": 0.7016, "step": 4817 }, { "epoch": 0.49213483146067416, "grad_norm": 1.513097406772954, "learning_rate": 1.0740388394913855e-05, "loss": 0.7805, "step": 4818 }, { "epoch": 0.49223697650663945, "grad_norm": 1.3646155037894567, "learning_rate": 1.0737089102025216e-05, "loss": 0.6375, "step": 4819 }, { "epoch": 0.4923391215526047, "grad_norm": 1.3405871268165823, "learning_rate": 1.0733789728461696e-05, "loss": 0.6672, "step": 4820 }, { "epoch": 0.49244126659856996, "grad_norm": 1.509606979433828, "learning_rate": 1.073049027458442e-05, "loss": 0.8514, "step": 4821 }, { "epoch": 0.49254341164453525, "grad_norm": 1.4197579308679416, "learning_rate": 1.072719074075451e-05, "loss": 0.723, "step": 4822 }, { "epoch": 0.49264555669050053, "grad_norm": 1.4061805051641842, "learning_rate": 1.0723891127333104e-05, "loss": 0.7146, "step": 4823 }, { "epoch": 0.49274770173646576, "grad_norm": 1.414937495617106, "learning_rate": 1.0720591434681343e-05, "loss": 0.6488, "step": 4824 }, { "epoch": 0.49284984678243104, "grad_norm": 1.5583678747805858, "learning_rate": 1.0717291663160387e-05, "loss": 0.8501, "step": 4825 }, { "epoch": 0.49295199182839633, "grad_norm": 1.50701318068142, "learning_rate": 1.0713991813131395e-05, "loss": 0.7165, "step": 4826 }, { "epoch": 0.4930541368743616, "grad_norm": 1.4606468327034636, "learning_rate": 1.071069188495554e-05, "loss": 0.6797, "step": 4827 }, { "epoch": 0.49315628192032684, "grad_norm": 1.592801392103572, "learning_rate": 1.0707391878993996e-05, "loss": 0.8635, "step": 4828 }, { "epoch": 0.49325842696629213, "grad_norm": 1.4870351175924856, "learning_rate": 1.0704091795607954e-05, "loss": 0.7054, "step": 4829 }, { "epoch": 0.4933605720122574, "grad_norm": 1.4574312302557222, "learning_rate": 1.0700791635158612e-05, "loss": 0.6895, "step": 4830 }, { "epoch": 0.4934627170582227, "grad_norm": 1.4522327184082269, "learning_rate": 1.069749139800717e-05, "loss": 0.8333, "step": 4831 }, { "epoch": 0.49356486210418793, "grad_norm": 1.4829924388969178, "learning_rate": 1.0694191084514844e-05, "loss": 0.6646, "step": 4832 }, { "epoch": 0.4936670071501532, "grad_norm": 1.4419217783419047, "learning_rate": 1.0690890695042857e-05, "loss": 0.6488, "step": 4833 }, { "epoch": 0.4937691521961185, "grad_norm": 1.512176548555915, "learning_rate": 1.0687590229952435e-05, "loss": 0.7513, "step": 4834 }, { "epoch": 0.4938712972420838, "grad_norm": 1.1797720064519392, "learning_rate": 1.0684289689604824e-05, "loss": 0.6561, "step": 4835 }, { "epoch": 0.493973442288049, "grad_norm": 1.4523244156872483, "learning_rate": 1.0680989074361254e-05, "loss": 0.6626, "step": 4836 }, { "epoch": 0.4940755873340143, "grad_norm": 1.45766887313778, "learning_rate": 1.0677688384582999e-05, "loss": 0.7549, "step": 4837 }, { "epoch": 0.4941777323799796, "grad_norm": 1.2708216693153007, "learning_rate": 1.0674387620631308e-05, "loss": 0.6763, "step": 4838 }, { "epoch": 0.49427987742594487, "grad_norm": 1.5256261761398195, "learning_rate": 1.0671086782867459e-05, "loss": 0.7168, "step": 4839 }, { "epoch": 0.4943820224719101, "grad_norm": 1.3667434140510348, "learning_rate": 1.0667785871652724e-05, "loss": 0.6798, "step": 4840 }, { "epoch": 0.4944841675178754, "grad_norm": 1.4370871043503535, "learning_rate": 1.0664484887348396e-05, "loss": 0.7291, "step": 4841 }, { "epoch": 0.49458631256384067, "grad_norm": 1.4938763119935077, "learning_rate": 1.0661183830315772e-05, "loss": 0.7267, "step": 4842 }, { "epoch": 0.4946884576098059, "grad_norm": 1.4041539157504446, "learning_rate": 1.0657882700916144e-05, "loss": 0.7428, "step": 4843 }, { "epoch": 0.4947906026557712, "grad_norm": 1.562294603340939, "learning_rate": 1.0654581499510832e-05, "loss": 0.8075, "step": 4844 }, { "epoch": 0.49489274770173647, "grad_norm": 1.3141668379802414, "learning_rate": 1.0651280226461154e-05, "loss": 0.6392, "step": 4845 }, { "epoch": 0.49499489274770175, "grad_norm": 1.6195687250195019, "learning_rate": 1.0647978882128431e-05, "loss": 0.7879, "step": 4846 }, { "epoch": 0.495097037793667, "grad_norm": 1.4723794365890113, "learning_rate": 1.0644677466874005e-05, "loss": 0.6849, "step": 4847 }, { "epoch": 0.49519918283963227, "grad_norm": 1.5091505865627393, "learning_rate": 1.064137598105921e-05, "loss": 0.7203, "step": 4848 }, { "epoch": 0.49530132788559755, "grad_norm": 1.3971572198802125, "learning_rate": 1.06380744250454e-05, "loss": 0.6774, "step": 4849 }, { "epoch": 0.49540347293156284, "grad_norm": 1.3490066411686468, "learning_rate": 1.0634772799193933e-05, "loss": 0.6426, "step": 4850 }, { "epoch": 0.49550561797752807, "grad_norm": 1.6673520078428556, "learning_rate": 1.063147110386617e-05, "loss": 0.7834, "step": 4851 }, { "epoch": 0.49560776302349335, "grad_norm": 1.56732445270385, "learning_rate": 1.0628169339423491e-05, "loss": 0.8905, "step": 4852 }, { "epoch": 0.49570990806945864, "grad_norm": 1.4549935971710761, "learning_rate": 1.0624867506227268e-05, "loss": 0.5801, "step": 4853 }, { "epoch": 0.4958120531154239, "grad_norm": 1.388283850227532, "learning_rate": 1.0621565604638897e-05, "loss": 0.6584, "step": 4854 }, { "epoch": 0.49591419816138915, "grad_norm": 1.5954216514003943, "learning_rate": 1.0618263635019765e-05, "loss": 0.7201, "step": 4855 }, { "epoch": 0.49601634320735444, "grad_norm": 1.6140987739629562, "learning_rate": 1.0614961597731279e-05, "loss": 0.6137, "step": 4856 }, { "epoch": 0.4961184882533197, "grad_norm": 1.6153877511894505, "learning_rate": 1.0611659493134852e-05, "loss": 0.688, "step": 4857 }, { "epoch": 0.496220633299285, "grad_norm": 1.4229766413049774, "learning_rate": 1.0608357321591895e-05, "loss": 0.7657, "step": 4858 }, { "epoch": 0.49632277834525024, "grad_norm": 1.4686393847255665, "learning_rate": 1.060505508346384e-05, "loss": 0.6999, "step": 4859 }, { "epoch": 0.4964249233912155, "grad_norm": 1.4235236145380288, "learning_rate": 1.0601752779112114e-05, "loss": 0.7045, "step": 4860 }, { "epoch": 0.4965270684371808, "grad_norm": 1.6074636774244668, "learning_rate": 1.0598450408898154e-05, "loss": 0.8177, "step": 4861 }, { "epoch": 0.4966292134831461, "grad_norm": 1.5537396838109478, "learning_rate": 1.0595147973183416e-05, "loss": 0.6558, "step": 4862 }, { "epoch": 0.4967313585291113, "grad_norm": 1.58141447185381, "learning_rate": 1.0591845472329341e-05, "loss": 0.7581, "step": 4863 }, { "epoch": 0.4968335035750766, "grad_norm": 1.5924444968259097, "learning_rate": 1.0588542906697401e-05, "loss": 0.7916, "step": 4864 }, { "epoch": 0.4969356486210419, "grad_norm": 1.4028515150428233, "learning_rate": 1.0585240276649056e-05, "loss": 0.634, "step": 4865 }, { "epoch": 0.4970377936670072, "grad_norm": 1.2881391287383237, "learning_rate": 1.0581937582545789e-05, "loss": 0.6299, "step": 4866 }, { "epoch": 0.4971399387129724, "grad_norm": 1.5596498619421684, "learning_rate": 1.0578634824749076e-05, "loss": 0.8019, "step": 4867 }, { "epoch": 0.4972420837589377, "grad_norm": 1.52784809772862, "learning_rate": 1.0575332003620406e-05, "loss": 0.7828, "step": 4868 }, { "epoch": 0.497344228804903, "grad_norm": 1.4048209191776697, "learning_rate": 1.057202911952128e-05, "loss": 0.661, "step": 4869 }, { "epoch": 0.4974463738508682, "grad_norm": 1.3789684385809347, "learning_rate": 1.0568726172813192e-05, "loss": 0.595, "step": 4870 }, { "epoch": 0.4975485188968335, "grad_norm": 1.7904796960301383, "learning_rate": 1.0565423163857665e-05, "loss": 0.6617, "step": 4871 }, { "epoch": 0.4976506639427988, "grad_norm": 1.4610251813782338, "learning_rate": 1.05621200930162e-05, "loss": 0.6548, "step": 4872 }, { "epoch": 0.49775280898876406, "grad_norm": 1.5264166869604894, "learning_rate": 1.0558816960650328e-05, "loss": 0.7879, "step": 4873 }, { "epoch": 0.4978549540347293, "grad_norm": 1.6143699378092482, "learning_rate": 1.0555513767121584e-05, "loss": 0.5551, "step": 4874 }, { "epoch": 0.4979570990806946, "grad_norm": 1.4184933991594126, "learning_rate": 1.0552210512791494e-05, "loss": 0.707, "step": 4875 }, { "epoch": 0.49805924412665986, "grad_norm": 1.5794379866408286, "learning_rate": 1.054890719802161e-05, "loss": 0.7828, "step": 4876 }, { "epoch": 0.49816138917262515, "grad_norm": 1.4793178794299249, "learning_rate": 1.0545603823173479e-05, "loss": 0.6561, "step": 4877 }, { "epoch": 0.4982635342185904, "grad_norm": 1.505283604688199, "learning_rate": 1.0542300388608652e-05, "loss": 0.688, "step": 4878 }, { "epoch": 0.49836567926455566, "grad_norm": 1.426099532389924, "learning_rate": 1.0538996894688702e-05, "loss": 0.7356, "step": 4879 }, { "epoch": 0.49846782431052095, "grad_norm": 1.5312345109031216, "learning_rate": 1.0535693341775191e-05, "loss": 0.6673, "step": 4880 }, { "epoch": 0.49856996935648623, "grad_norm": 1.4687782552468387, "learning_rate": 1.05323897302297e-05, "loss": 0.6117, "step": 4881 }, { "epoch": 0.49867211440245146, "grad_norm": 1.474759458799634, "learning_rate": 1.0529086060413807e-05, "loss": 0.6331, "step": 4882 }, { "epoch": 0.49877425944841675, "grad_norm": 1.4482382064876158, "learning_rate": 1.0525782332689103e-05, "loss": 0.8164, "step": 4883 }, { "epoch": 0.49887640449438203, "grad_norm": 1.3576308022243069, "learning_rate": 1.0522478547417183e-05, "loss": 0.654, "step": 4884 }, { "epoch": 0.4989785495403473, "grad_norm": 1.5239224402985214, "learning_rate": 1.051917470495965e-05, "loss": 0.7367, "step": 4885 }, { "epoch": 0.49908069458631255, "grad_norm": 1.432211060367834, "learning_rate": 1.051587080567811e-05, "loss": 0.6996, "step": 4886 }, { "epoch": 0.49918283963227783, "grad_norm": 1.3262176980758462, "learning_rate": 1.0512566849934174e-05, "loss": 0.7014, "step": 4887 }, { "epoch": 0.4992849846782431, "grad_norm": 1.4379308482825146, "learning_rate": 1.0509262838089467e-05, "loss": 0.6592, "step": 4888 }, { "epoch": 0.4993871297242084, "grad_norm": 1.3989492676618582, "learning_rate": 1.0505958770505615e-05, "loss": 0.7151, "step": 4889 }, { "epoch": 0.49948927477017363, "grad_norm": 1.4860482507383963, "learning_rate": 1.0502654647544246e-05, "loss": 0.8067, "step": 4890 }, { "epoch": 0.4995914198161389, "grad_norm": 1.4374733133306616, "learning_rate": 1.0499350469567005e-05, "loss": 0.7365, "step": 4891 }, { "epoch": 0.4996935648621042, "grad_norm": 1.4646893849155718, "learning_rate": 1.0496046236935529e-05, "loss": 0.7654, "step": 4892 }, { "epoch": 0.4997957099080695, "grad_norm": 1.5992299689918235, "learning_rate": 1.0492741950011472e-05, "loss": 0.641, "step": 4893 }, { "epoch": 0.4998978549540347, "grad_norm": 1.5042865341850593, "learning_rate": 1.0489437609156491e-05, "loss": 0.6639, "step": 4894 }, { "epoch": 0.5, "grad_norm": 1.4546188010857557, "learning_rate": 1.0486133214732249e-05, "loss": 0.7501, "step": 4895 }, { "epoch": 0.5001021450459653, "grad_norm": 1.5152444667622122, "learning_rate": 1.0482828767100409e-05, "loss": 0.6712, "step": 4896 }, { "epoch": 0.5002042900919306, "grad_norm": 1.544586856066606, "learning_rate": 1.0479524266622649e-05, "loss": 0.7819, "step": 4897 }, { "epoch": 0.5003064351378959, "grad_norm": 1.3279587256301781, "learning_rate": 1.047621971366065e-05, "loss": 0.6556, "step": 4898 }, { "epoch": 0.500408580183861, "grad_norm": 1.5120638638889898, "learning_rate": 1.0472915108576095e-05, "loss": 0.7279, "step": 4899 }, { "epoch": 0.5005107252298263, "grad_norm": 1.4673202849906533, "learning_rate": 1.0469610451730676e-05, "loss": 0.6355, "step": 4900 }, { "epoch": 0.5006128702757916, "grad_norm": 1.3106722606890313, "learning_rate": 1.0466305743486092e-05, "loss": 0.7234, "step": 4901 }, { "epoch": 0.5007150153217569, "grad_norm": 1.379474125411674, "learning_rate": 1.0463000984204039e-05, "loss": 0.6966, "step": 4902 }, { "epoch": 0.5008171603677222, "grad_norm": 1.451572504783514, "learning_rate": 1.0459696174246232e-05, "loss": 0.7035, "step": 4903 }, { "epoch": 0.5009193054136875, "grad_norm": 1.4451819948964828, "learning_rate": 1.045639131397438e-05, "loss": 0.6769, "step": 4904 }, { "epoch": 0.5010214504596527, "grad_norm": 1.3123760456294886, "learning_rate": 1.0453086403750203e-05, "loss": 0.6675, "step": 4905 }, { "epoch": 0.501123595505618, "grad_norm": 1.5329469025217979, "learning_rate": 1.044978144393543e-05, "loss": 0.7072, "step": 4906 }, { "epoch": 0.5012257405515832, "grad_norm": 1.3762813283503292, "learning_rate": 1.0446476434891786e-05, "loss": 0.7396, "step": 4907 }, { "epoch": 0.5013278855975485, "grad_norm": 1.468183756462138, "learning_rate": 1.0443171376981004e-05, "loss": 0.6932, "step": 4908 }, { "epoch": 0.5014300306435138, "grad_norm": 1.490695622732649, "learning_rate": 1.043986627056483e-05, "loss": 0.866, "step": 4909 }, { "epoch": 0.501532175689479, "grad_norm": 1.3494003225416509, "learning_rate": 1.0436561116005012e-05, "loss": 0.7159, "step": 4910 }, { "epoch": 0.5016343207354443, "grad_norm": 1.5782833684893363, "learning_rate": 1.0433255913663299e-05, "loss": 0.7814, "step": 4911 }, { "epoch": 0.5017364657814096, "grad_norm": 1.4559434365130153, "learning_rate": 1.042995066390144e-05, "loss": 0.7889, "step": 4912 }, { "epoch": 0.5018386108273749, "grad_norm": 1.3447130881204652, "learning_rate": 1.0426645367081207e-05, "loss": 0.6462, "step": 4913 }, { "epoch": 0.5019407558733402, "grad_norm": 1.4867002540751066, "learning_rate": 1.0423340023564362e-05, "loss": 0.7794, "step": 4914 }, { "epoch": 0.5020429009193054, "grad_norm": 1.3777104723035158, "learning_rate": 1.0420034633712678e-05, "loss": 0.5601, "step": 4915 }, { "epoch": 0.5021450459652707, "grad_norm": 1.6107935028423546, "learning_rate": 1.041672919788793e-05, "loss": 0.7419, "step": 4916 }, { "epoch": 0.5022471910112359, "grad_norm": 1.3720817188994545, "learning_rate": 1.0413423716451904e-05, "loss": 0.84, "step": 4917 }, { "epoch": 0.5023493360572012, "grad_norm": 1.3850543537431321, "learning_rate": 1.0410118189766386e-05, "loss": 0.7248, "step": 4918 }, { "epoch": 0.5024514811031665, "grad_norm": 1.4342716840740544, "learning_rate": 1.0406812618193167e-05, "loss": 0.772, "step": 4919 }, { "epoch": 0.5025536261491318, "grad_norm": 1.4507620267116055, "learning_rate": 1.0403507002094042e-05, "loss": 0.7656, "step": 4920 }, { "epoch": 0.5026557711950971, "grad_norm": 1.6435212028974366, "learning_rate": 1.0400201341830819e-05, "loss": 0.7457, "step": 4921 }, { "epoch": 0.5027579162410624, "grad_norm": 1.3675105690159028, "learning_rate": 1.0396895637765296e-05, "loss": 0.6451, "step": 4922 }, { "epoch": 0.5028600612870275, "grad_norm": 1.433988583491808, "learning_rate": 1.0393589890259293e-05, "loss": 0.7845, "step": 4923 }, { "epoch": 0.5029622063329928, "grad_norm": 1.5062193639595813, "learning_rate": 1.0390284099674616e-05, "loss": 0.7176, "step": 4924 }, { "epoch": 0.5030643513789581, "grad_norm": 1.3570578777574203, "learning_rate": 1.03869782663731e-05, "loss": 0.692, "step": 4925 }, { "epoch": 0.5031664964249234, "grad_norm": 1.4613805145147096, "learning_rate": 1.0383672390716558e-05, "loss": 0.6892, "step": 4926 }, { "epoch": 0.5032686414708887, "grad_norm": 1.5143175154008095, "learning_rate": 1.0380366473066827e-05, "loss": 0.6788, "step": 4927 }, { "epoch": 0.503370786516854, "grad_norm": 1.675952757098426, "learning_rate": 1.0377060513785737e-05, "loss": 0.809, "step": 4928 }, { "epoch": 0.5034729315628192, "grad_norm": 1.4993703810925254, "learning_rate": 1.0373754513235133e-05, "loss": 0.6875, "step": 4929 }, { "epoch": 0.5035750766087844, "grad_norm": 1.5802161628037208, "learning_rate": 1.0370448471776855e-05, "loss": 0.7455, "step": 4930 }, { "epoch": 0.5036772216547497, "grad_norm": 1.603586527790416, "learning_rate": 1.036714238977275e-05, "loss": 0.7098, "step": 4931 }, { "epoch": 0.503779366700715, "grad_norm": 1.2619364110867464, "learning_rate": 1.0363836267584676e-05, "loss": 0.6646, "step": 4932 }, { "epoch": 0.5038815117466803, "grad_norm": 1.5173364633899913, "learning_rate": 1.0360530105574489e-05, "loss": 0.7818, "step": 4933 }, { "epoch": 0.5039836567926456, "grad_norm": 1.4656151646868276, "learning_rate": 1.0357223904104046e-05, "loss": 0.7426, "step": 4934 }, { "epoch": 0.5040858018386108, "grad_norm": 1.4756222970278228, "learning_rate": 1.0353917663535218e-05, "loss": 0.7553, "step": 4935 }, { "epoch": 0.5041879468845761, "grad_norm": 1.5365094330757276, "learning_rate": 1.035061138422987e-05, "loss": 0.7008, "step": 4936 }, { "epoch": 0.5042900919305414, "grad_norm": 1.5305251066473442, "learning_rate": 1.0347305066549881e-05, "loss": 0.6681, "step": 4937 }, { "epoch": 0.5043922369765066, "grad_norm": 1.5394322978267003, "learning_rate": 1.0343998710857133e-05, "loss": 0.729, "step": 4938 }, { "epoch": 0.5044943820224719, "grad_norm": 1.3518139424256863, "learning_rate": 1.0340692317513496e-05, "loss": 0.6911, "step": 4939 }, { "epoch": 0.5045965270684372, "grad_norm": 1.4602378033555292, "learning_rate": 1.0337385886880868e-05, "loss": 0.7311, "step": 4940 }, { "epoch": 0.5046986721144024, "grad_norm": 1.3388402677704312, "learning_rate": 1.0334079419321137e-05, "loss": 0.6621, "step": 4941 }, { "epoch": 0.5048008171603677, "grad_norm": 1.4917349926698051, "learning_rate": 1.0330772915196199e-05, "loss": 0.7844, "step": 4942 }, { "epoch": 0.504902962206333, "grad_norm": 1.375921210815757, "learning_rate": 1.0327466374867949e-05, "loss": 0.6364, "step": 4943 }, { "epoch": 0.5050051072522983, "grad_norm": 1.4588566545498547, "learning_rate": 1.0324159798698294e-05, "loss": 0.7213, "step": 4944 }, { "epoch": 0.5051072522982636, "grad_norm": 1.5953367300601635, "learning_rate": 1.032085318704914e-05, "loss": 0.6758, "step": 4945 }, { "epoch": 0.5052093973442288, "grad_norm": 1.5323012582712892, "learning_rate": 1.0317546540282396e-05, "loss": 0.8859, "step": 4946 }, { "epoch": 0.505311542390194, "grad_norm": 1.4655779901171861, "learning_rate": 1.031423985875998e-05, "loss": 0.7718, "step": 4947 }, { "epoch": 0.5054136874361593, "grad_norm": 1.5301276525679626, "learning_rate": 1.0310933142843809e-05, "loss": 0.7237, "step": 4948 }, { "epoch": 0.5055158324821246, "grad_norm": 1.444305828810855, "learning_rate": 1.0307626392895803e-05, "loss": 0.8306, "step": 4949 }, { "epoch": 0.5056179775280899, "grad_norm": 1.4782140246571118, "learning_rate": 1.0304319609277888e-05, "loss": 0.7849, "step": 4950 }, { "epoch": 0.5057201225740552, "grad_norm": 1.3147184150489735, "learning_rate": 1.0301012792351996e-05, "loss": 0.5421, "step": 4951 }, { "epoch": 0.5058222676200205, "grad_norm": 1.4724871708066771, "learning_rate": 1.029770594248006e-05, "loss": 0.8173, "step": 4952 }, { "epoch": 0.5059244126659856, "grad_norm": 1.512684641290524, "learning_rate": 1.0294399060024016e-05, "loss": 0.7175, "step": 4953 }, { "epoch": 0.5060265577119509, "grad_norm": 1.2904526686948803, "learning_rate": 1.0291092145345807e-05, "loss": 0.6908, "step": 4954 }, { "epoch": 0.5061287027579162, "grad_norm": 1.4854167124457194, "learning_rate": 1.0287785198807375e-05, "loss": 0.715, "step": 4955 }, { "epoch": 0.5062308478038815, "grad_norm": 1.4629714050175426, "learning_rate": 1.028447822077066e-05, "loss": 0.7592, "step": 4956 }, { "epoch": 0.5063329928498468, "grad_norm": 1.5046595661930975, "learning_rate": 1.0281171211597627e-05, "loss": 0.7038, "step": 4957 }, { "epoch": 0.5064351378958121, "grad_norm": 1.4931251335015692, "learning_rate": 1.027786417165022e-05, "loss": 0.7657, "step": 4958 }, { "epoch": 0.5065372829417774, "grad_norm": 1.663608267459015, "learning_rate": 1.0274557101290401e-05, "loss": 0.7124, "step": 4959 }, { "epoch": 0.5066394279877426, "grad_norm": 1.4416808762325581, "learning_rate": 1.027125000088013e-05, "loss": 0.7471, "step": 4960 }, { "epoch": 0.5067415730337078, "grad_norm": 1.4715377292282, "learning_rate": 1.026794287078137e-05, "loss": 0.6257, "step": 4961 }, { "epoch": 0.5068437180796731, "grad_norm": 1.4012413112055653, "learning_rate": 1.0264635711356093e-05, "loss": 0.6328, "step": 4962 }, { "epoch": 0.5069458631256384, "grad_norm": 1.5136684014549189, "learning_rate": 1.026132852296626e-05, "loss": 0.7595, "step": 4963 }, { "epoch": 0.5070480081716037, "grad_norm": 1.5072325247604184, "learning_rate": 1.0258021305973855e-05, "loss": 0.6832, "step": 4964 }, { "epoch": 0.507150153217569, "grad_norm": 1.5772436889389359, "learning_rate": 1.0254714060740853e-05, "loss": 0.8105, "step": 4965 }, { "epoch": 0.5072522982635342, "grad_norm": 1.541917004939391, "learning_rate": 1.0251406787629232e-05, "loss": 0.7555, "step": 4966 }, { "epoch": 0.5073544433094995, "grad_norm": 1.5016372865708516, "learning_rate": 1.0248099487000975e-05, "loss": 0.6747, "step": 4967 }, { "epoch": 0.5074565883554648, "grad_norm": 1.5279678030568928, "learning_rate": 1.0244792159218066e-05, "loss": 0.7458, "step": 4968 }, { "epoch": 0.50755873340143, "grad_norm": 1.4293467953367496, "learning_rate": 1.02414848046425e-05, "loss": 0.7063, "step": 4969 }, { "epoch": 0.5076608784473953, "grad_norm": 1.5631387792664038, "learning_rate": 1.0238177423636266e-05, "loss": 0.6517, "step": 4970 }, { "epoch": 0.5077630234933606, "grad_norm": 1.4832788572734437, "learning_rate": 1.023487001656136e-05, "loss": 0.7222, "step": 4971 }, { "epoch": 0.5078651685393258, "grad_norm": 1.4881679059526838, "learning_rate": 1.0231562583779778e-05, "loss": 0.725, "step": 4972 }, { "epoch": 0.5079673135852911, "grad_norm": 1.3904776509961085, "learning_rate": 1.022825512565352e-05, "loss": 0.6542, "step": 4973 }, { "epoch": 0.5080694586312564, "grad_norm": 1.4455967349824477, "learning_rate": 1.0224947642544594e-05, "loss": 0.6871, "step": 4974 }, { "epoch": 0.5081716036772217, "grad_norm": 1.355783401547785, "learning_rate": 1.0221640134815e-05, "loss": 0.7243, "step": 4975 }, { "epoch": 0.508273748723187, "grad_norm": 1.4043945277991996, "learning_rate": 1.0218332602826751e-05, "loss": 0.7347, "step": 4976 }, { "epoch": 0.5083758937691522, "grad_norm": 1.4324937209978437, "learning_rate": 1.021502504694186e-05, "loss": 0.6995, "step": 4977 }, { "epoch": 0.5084780388151174, "grad_norm": 1.4246427234437187, "learning_rate": 1.0211717467522335e-05, "loss": 0.7015, "step": 4978 }, { "epoch": 0.5085801838610827, "grad_norm": 1.552543933066069, "learning_rate": 1.02084098649302e-05, "loss": 0.7515, "step": 4979 }, { "epoch": 0.508682328907048, "grad_norm": 1.563332453309511, "learning_rate": 1.0205102239527467e-05, "loss": 0.7735, "step": 4980 }, { "epoch": 0.5087844739530133, "grad_norm": 1.6258730233149554, "learning_rate": 1.0201794591676164e-05, "loss": 0.7516, "step": 4981 }, { "epoch": 0.5088866189989786, "grad_norm": 1.3079701718078196, "learning_rate": 1.0198486921738313e-05, "loss": 0.5923, "step": 4982 }, { "epoch": 0.5089887640449439, "grad_norm": 1.3943874781658374, "learning_rate": 1.0195179230075937e-05, "loss": 0.687, "step": 4983 }, { "epoch": 0.509090909090909, "grad_norm": 1.33725374530559, "learning_rate": 1.0191871517051072e-05, "loss": 0.7005, "step": 4984 }, { "epoch": 0.5091930541368743, "grad_norm": 1.4763386045288456, "learning_rate": 1.0188563783025742e-05, "loss": 0.7885, "step": 4985 }, { "epoch": 0.5092951991828396, "grad_norm": 1.298126020394323, "learning_rate": 1.0185256028361987e-05, "loss": 0.6568, "step": 4986 }, { "epoch": 0.5093973442288049, "grad_norm": 1.4426594538511717, "learning_rate": 1.0181948253421839e-05, "loss": 0.6926, "step": 4987 }, { "epoch": 0.5094994892747702, "grad_norm": 1.3713714818637979, "learning_rate": 1.0178640458567334e-05, "loss": 0.7149, "step": 4988 }, { "epoch": 0.5096016343207355, "grad_norm": 1.593644299862376, "learning_rate": 1.0175332644160521e-05, "loss": 0.7229, "step": 4989 }, { "epoch": 0.5097037793667007, "grad_norm": 1.4474671841797542, "learning_rate": 1.0172024810563435e-05, "loss": 0.7814, "step": 4990 }, { "epoch": 0.509805924412666, "grad_norm": 1.5879352394758206, "learning_rate": 1.016871695813812e-05, "loss": 0.809, "step": 4991 }, { "epoch": 0.5099080694586312, "grad_norm": 1.5684126035661095, "learning_rate": 1.0165409087246627e-05, "loss": 0.6729, "step": 4992 }, { "epoch": 0.5100102145045965, "grad_norm": 1.4508443582389976, "learning_rate": 1.0162101198251002e-05, "loss": 0.7103, "step": 4993 }, { "epoch": 0.5101123595505618, "grad_norm": 1.5391466259624145, "learning_rate": 1.0158793291513296e-05, "loss": 0.6897, "step": 4994 }, { "epoch": 0.5102145045965271, "grad_norm": 1.5408442295054745, "learning_rate": 1.015548536739556e-05, "loss": 0.645, "step": 4995 }, { "epoch": 0.5103166496424923, "grad_norm": 1.4203063400317217, "learning_rate": 1.0152177426259852e-05, "loss": 0.7228, "step": 4996 }, { "epoch": 0.5104187946884576, "grad_norm": 1.5422079458619808, "learning_rate": 1.0148869468468225e-05, "loss": 0.7558, "step": 4997 }, { "epoch": 0.5105209397344229, "grad_norm": 1.5420226479208468, "learning_rate": 1.0145561494382743e-05, "loss": 0.7381, "step": 4998 }, { "epoch": 0.5106230847803882, "grad_norm": 1.4938624075509517, "learning_rate": 1.0142253504365458e-05, "loss": 0.6824, "step": 4999 }, { "epoch": 0.5107252298263534, "grad_norm": 1.661207342991121, "learning_rate": 1.0138945498778433e-05, "loss": 0.7235, "step": 5000 }, { "epoch": 0.5108273748723187, "grad_norm": 1.3066948050772274, "learning_rate": 1.0135637477983738e-05, "loss": 0.6311, "step": 5001 }, { "epoch": 0.510929519918284, "grad_norm": 1.471095721592753, "learning_rate": 1.013232944234343e-05, "loss": 0.7429, "step": 5002 }, { "epoch": 0.5110316649642492, "grad_norm": 1.414695509550472, "learning_rate": 1.012902139221958e-05, "loss": 0.6409, "step": 5003 }, { "epoch": 0.5111338100102145, "grad_norm": 1.4341900720333762, "learning_rate": 1.0125713327974253e-05, "loss": 0.7215, "step": 5004 }, { "epoch": 0.5112359550561798, "grad_norm": 1.5705947198090602, "learning_rate": 1.0122405249969525e-05, "loss": 0.7216, "step": 5005 }, { "epoch": 0.5113381001021451, "grad_norm": 1.343164978296261, "learning_rate": 1.011909715856746e-05, "loss": 0.7009, "step": 5006 }, { "epoch": 0.5114402451481103, "grad_norm": 1.487391374263533, "learning_rate": 1.0115789054130136e-05, "loss": 0.6737, "step": 5007 }, { "epoch": 0.5115423901940755, "grad_norm": 1.6439198424027581, "learning_rate": 1.0112480937019624e-05, "loss": 0.7974, "step": 5008 }, { "epoch": 0.5116445352400408, "grad_norm": 1.378566787593172, "learning_rate": 1.0109172807598005e-05, "loss": 0.6539, "step": 5009 }, { "epoch": 0.5117466802860061, "grad_norm": 1.5869236655144923, "learning_rate": 1.0105864666227345e-05, "loss": 0.6963, "step": 5010 }, { "epoch": 0.5118488253319714, "grad_norm": 1.5379285055702219, "learning_rate": 1.0102556513269735e-05, "loss": 0.8451, "step": 5011 }, { "epoch": 0.5119509703779367, "grad_norm": 1.3723288999572696, "learning_rate": 1.0099248349087242e-05, "loss": 0.6843, "step": 5012 }, { "epoch": 0.512053115423902, "grad_norm": 1.3878120551259336, "learning_rate": 1.0095940174041959e-05, "loss": 0.6514, "step": 5013 }, { "epoch": 0.5121552604698673, "grad_norm": 1.4770768181131304, "learning_rate": 1.0092631988495957e-05, "loss": 0.7153, "step": 5014 }, { "epoch": 0.5122574055158324, "grad_norm": 1.5242024814533823, "learning_rate": 1.0089323792811329e-05, "loss": 0.778, "step": 5015 }, { "epoch": 0.5123595505617977, "grad_norm": 1.5194101112736569, "learning_rate": 1.0086015587350151e-05, "loss": 0.6832, "step": 5016 }, { "epoch": 0.512461695607763, "grad_norm": 1.4404030368731784, "learning_rate": 1.0082707372474512e-05, "loss": 0.7597, "step": 5017 }, { "epoch": 0.5125638406537283, "grad_norm": 1.5012777644700137, "learning_rate": 1.0079399148546498e-05, "loss": 0.7031, "step": 5018 }, { "epoch": 0.5126659856996936, "grad_norm": 1.5597456050725038, "learning_rate": 1.0076090915928194e-05, "loss": 0.7478, "step": 5019 }, { "epoch": 0.5127681307456589, "grad_norm": 1.355423371053339, "learning_rate": 1.007278267498169e-05, "loss": 0.6733, "step": 5020 }, { "epoch": 0.5128702757916241, "grad_norm": 1.6261570273539918, "learning_rate": 1.006947442606908e-05, "loss": 0.7526, "step": 5021 }, { "epoch": 0.5129724208375894, "grad_norm": 1.5866921582001485, "learning_rate": 1.0066166169552444e-05, "loss": 0.7462, "step": 5022 }, { "epoch": 0.5130745658835546, "grad_norm": 1.411489396812108, "learning_rate": 1.0062857905793883e-05, "loss": 0.6992, "step": 5023 }, { "epoch": 0.5131767109295199, "grad_norm": 1.526969075607703, "learning_rate": 1.0059549635155477e-05, "loss": 0.6917, "step": 5024 }, { "epoch": 0.5132788559754852, "grad_norm": 1.5264803022624562, "learning_rate": 1.005624135799933e-05, "loss": 0.6993, "step": 5025 }, { "epoch": 0.5133810010214505, "grad_norm": 1.5105370698861917, "learning_rate": 1.005293307468753e-05, "loss": 0.7261, "step": 5026 }, { "epoch": 0.5134831460674157, "grad_norm": 1.654678278100486, "learning_rate": 1.0049624785582169e-05, "loss": 0.7391, "step": 5027 }, { "epoch": 0.513585291113381, "grad_norm": 1.5998962492844213, "learning_rate": 1.0046316491045343e-05, "loss": 0.829, "step": 5028 }, { "epoch": 0.5136874361593463, "grad_norm": 1.4427022931432851, "learning_rate": 1.0043008191439147e-05, "loss": 0.6785, "step": 5029 }, { "epoch": 0.5137895812053116, "grad_norm": 1.4729968855420275, "learning_rate": 1.0039699887125678e-05, "loss": 0.758, "step": 5030 }, { "epoch": 0.5138917262512768, "grad_norm": 1.3596378548166397, "learning_rate": 1.0036391578467031e-05, "loss": 0.7359, "step": 5031 }, { "epoch": 0.513993871297242, "grad_norm": 1.6382597227279492, "learning_rate": 1.0033083265825301e-05, "loss": 0.7313, "step": 5032 }, { "epoch": 0.5140960163432073, "grad_norm": 1.5056283480443384, "learning_rate": 1.0029774949562588e-05, "loss": 0.7083, "step": 5033 }, { "epoch": 0.5141981613891726, "grad_norm": 1.516225681957321, "learning_rate": 1.0026466630040984e-05, "loss": 0.7495, "step": 5034 }, { "epoch": 0.5143003064351379, "grad_norm": 1.5442895582633052, "learning_rate": 1.0023158307622594e-05, "loss": 0.7819, "step": 5035 }, { "epoch": 0.5144024514811032, "grad_norm": 1.3092253371373535, "learning_rate": 1.001984998266951e-05, "loss": 0.736, "step": 5036 }, { "epoch": 0.5145045965270685, "grad_norm": 1.5615601268186117, "learning_rate": 1.0016541655543833e-05, "loss": 0.6946, "step": 5037 }, { "epoch": 0.5146067415730337, "grad_norm": 1.4313658374837899, "learning_rate": 1.001323332660766e-05, "loss": 0.6383, "step": 5038 }, { "epoch": 0.5147088866189989, "grad_norm": 1.466079894321012, "learning_rate": 1.0009924996223093e-05, "loss": 0.6764, "step": 5039 }, { "epoch": 0.5148110316649642, "grad_norm": 1.5032568986496178, "learning_rate": 1.0006616664752227e-05, "loss": 0.737, "step": 5040 }, { "epoch": 0.5149131767109295, "grad_norm": 1.4381561145015749, "learning_rate": 1.0003308332557163e-05, "loss": 0.6853, "step": 5041 }, { "epoch": 0.5150153217568948, "grad_norm": 1.514054188961378, "learning_rate": 1e-05, "loss": 0.7041, "step": 5042 }, { "epoch": 0.5151174668028601, "grad_norm": 1.5407023890316274, "learning_rate": 9.99669166744284e-06, "loss": 0.7084, "step": 5043 }, { "epoch": 0.5152196118488254, "grad_norm": 1.4027229541091284, "learning_rate": 9.993383335247777e-06, "loss": 0.6677, "step": 5044 }, { "epoch": 0.5153217568947907, "grad_norm": 1.5254464612568175, "learning_rate": 9.990075003776913e-06, "loss": 0.683, "step": 5045 }, { "epoch": 0.5154239019407558, "grad_norm": 1.6714994435999713, "learning_rate": 9.986766673392344e-06, "loss": 0.6915, "step": 5046 }, { "epoch": 0.5155260469867211, "grad_norm": 1.5161590193303969, "learning_rate": 9.983458344456169e-06, "loss": 0.7425, "step": 5047 }, { "epoch": 0.5156281920326864, "grad_norm": 1.5545020095786672, "learning_rate": 9.980150017330494e-06, "loss": 0.7354, "step": 5048 }, { "epoch": 0.5157303370786517, "grad_norm": 1.4609707613585636, "learning_rate": 9.976841692377409e-06, "loss": 0.6889, "step": 5049 }, { "epoch": 0.515832482124617, "grad_norm": 1.4382173263031521, "learning_rate": 9.973533369959018e-06, "loss": 0.703, "step": 5050 }, { "epoch": 0.5159346271705822, "grad_norm": 1.5368490777179438, "learning_rate": 9.970225050437417e-06, "loss": 0.7613, "step": 5051 }, { "epoch": 0.5160367722165475, "grad_norm": 1.33728049232719, "learning_rate": 9.966916734174702e-06, "loss": 0.7442, "step": 5052 }, { "epoch": 0.5161389172625128, "grad_norm": 1.5343228406577771, "learning_rate": 9.963608421532972e-06, "loss": 0.6912, "step": 5053 }, { "epoch": 0.516241062308478, "grad_norm": 1.4562095962932375, "learning_rate": 9.960300112874327e-06, "loss": 0.819, "step": 5054 }, { "epoch": 0.5163432073544433, "grad_norm": 1.3758092454252902, "learning_rate": 9.956991808560855e-06, "loss": 0.6927, "step": 5055 }, { "epoch": 0.5164453524004086, "grad_norm": 1.461762282121934, "learning_rate": 9.953683508954659e-06, "loss": 0.7272, "step": 5056 }, { "epoch": 0.5165474974463738, "grad_norm": 1.4588567569296538, "learning_rate": 9.950375214417833e-06, "loss": 0.6181, "step": 5057 }, { "epoch": 0.5166496424923391, "grad_norm": 1.509143891973364, "learning_rate": 9.947066925312472e-06, "loss": 0.8049, "step": 5058 }, { "epoch": 0.5167517875383044, "grad_norm": 1.472274964002463, "learning_rate": 9.943758642000673e-06, "loss": 0.7034, "step": 5059 }, { "epoch": 0.5168539325842697, "grad_norm": 1.5356466968300069, "learning_rate": 9.940450364844525e-06, "loss": 0.7741, "step": 5060 }, { "epoch": 0.5169560776302349, "grad_norm": 1.6163531903171955, "learning_rate": 9.937142094206122e-06, "loss": 0.8499, "step": 5061 }, { "epoch": 0.5170582226762002, "grad_norm": 1.3900653036177841, "learning_rate": 9.93383383044756e-06, "loss": 0.6009, "step": 5062 }, { "epoch": 0.5171603677221654, "grad_norm": 1.4816395478688142, "learning_rate": 9.930525573930922e-06, "loss": 0.7708, "step": 5063 }, { "epoch": 0.5172625127681307, "grad_norm": 1.3870062707823916, "learning_rate": 9.927217325018309e-06, "loss": 0.6804, "step": 5064 }, { "epoch": 0.517364657814096, "grad_norm": 1.4878997443143618, "learning_rate": 9.923909084071808e-06, "loss": 0.6627, "step": 5065 }, { "epoch": 0.5174668028600613, "grad_norm": 1.3972291301998636, "learning_rate": 9.920600851453505e-06, "loss": 0.7243, "step": 5066 }, { "epoch": 0.5175689479060266, "grad_norm": 1.5377944516763358, "learning_rate": 9.917292627525493e-06, "loss": 0.6708, "step": 5067 }, { "epoch": 0.5176710929519919, "grad_norm": 1.4612159782052174, "learning_rate": 9.913984412649852e-06, "loss": 0.642, "step": 5068 }, { "epoch": 0.517773237997957, "grad_norm": 1.4930830348889437, "learning_rate": 9.910676207188676e-06, "loss": 0.6929, "step": 5069 }, { "epoch": 0.5178753830439223, "grad_norm": 1.6392874611336676, "learning_rate": 9.907368011504044e-06, "loss": 0.7594, "step": 5070 }, { "epoch": 0.5179775280898876, "grad_norm": 1.3046372773311206, "learning_rate": 9.904059825958043e-06, "loss": 0.7213, "step": 5071 }, { "epoch": 0.5180796731358529, "grad_norm": 1.5750731969422274, "learning_rate": 9.90075165091276e-06, "loss": 0.7149, "step": 5072 }, { "epoch": 0.5181818181818182, "grad_norm": 1.4760942067561031, "learning_rate": 9.897443486730268e-06, "loss": 0.7305, "step": 5073 }, { "epoch": 0.5182839632277835, "grad_norm": 1.399452614218617, "learning_rate": 9.894135333772657e-06, "loss": 0.6304, "step": 5074 }, { "epoch": 0.5183861082737488, "grad_norm": 1.606809156412251, "learning_rate": 9.890827192402e-06, "loss": 0.7522, "step": 5075 }, { "epoch": 0.518488253319714, "grad_norm": 1.368347899673904, "learning_rate": 9.88751906298038e-06, "loss": 0.7237, "step": 5076 }, { "epoch": 0.5185903983656792, "grad_norm": 1.4199384682758185, "learning_rate": 9.88421094586987e-06, "loss": 0.8127, "step": 5077 }, { "epoch": 0.5186925434116445, "grad_norm": 1.4552173781406839, "learning_rate": 9.880902841432544e-06, "loss": 0.6946, "step": 5078 }, { "epoch": 0.5187946884576098, "grad_norm": 1.4544203174762038, "learning_rate": 9.877594750030477e-06, "loss": 0.6478, "step": 5079 }, { "epoch": 0.5188968335035751, "grad_norm": 1.5768149025467835, "learning_rate": 9.874286672025749e-06, "loss": 0.7469, "step": 5080 }, { "epoch": 0.5189989785495404, "grad_norm": 1.44510525526849, "learning_rate": 9.870978607780423e-06, "loss": 0.7796, "step": 5081 }, { "epoch": 0.5191011235955056, "grad_norm": 1.5508701880248372, "learning_rate": 9.867670557656575e-06, "loss": 0.6764, "step": 5082 }, { "epoch": 0.5192032686414709, "grad_norm": 1.1860944094652088, "learning_rate": 9.864362522016266e-06, "loss": 0.6131, "step": 5083 }, { "epoch": 0.5193054136874362, "grad_norm": 1.3638236151727117, "learning_rate": 9.861054501221569e-06, "loss": 0.6832, "step": 5084 }, { "epoch": 0.5194075587334014, "grad_norm": 1.4411377216565255, "learning_rate": 9.857746495634547e-06, "loss": 0.7492, "step": 5085 }, { "epoch": 0.5195097037793667, "grad_norm": 1.6104760989757423, "learning_rate": 9.854438505617264e-06, "loss": 0.6986, "step": 5086 }, { "epoch": 0.519611848825332, "grad_norm": 1.538034773615667, "learning_rate": 9.851130531531775e-06, "loss": 0.7912, "step": 5087 }, { "epoch": 0.5197139938712972, "grad_norm": 1.4058910173464245, "learning_rate": 9.847822573740148e-06, "loss": 0.7131, "step": 5088 }, { "epoch": 0.5198161389172625, "grad_norm": 1.4674621166643893, "learning_rate": 9.844514632604441e-06, "loss": 0.6725, "step": 5089 }, { "epoch": 0.5199182839632278, "grad_norm": 1.4618057673559015, "learning_rate": 9.841206708486705e-06, "loss": 0.5635, "step": 5090 }, { "epoch": 0.5200204290091931, "grad_norm": 1.5492083622202624, "learning_rate": 9.837898801749001e-06, "loss": 0.6652, "step": 5091 }, { "epoch": 0.5201225740551583, "grad_norm": 1.46502543553936, "learning_rate": 9.834590912753376e-06, "loss": 0.7195, "step": 5092 }, { "epoch": 0.5202247191011236, "grad_norm": 1.489313570097098, "learning_rate": 9.831283041861883e-06, "loss": 0.7057, "step": 5093 }, { "epoch": 0.5203268641470888, "grad_norm": 1.440211829884361, "learning_rate": 9.827975189436572e-06, "loss": 0.7159, "step": 5094 }, { "epoch": 0.5204290091930541, "grad_norm": 1.6593988218012934, "learning_rate": 9.824667355839479e-06, "loss": 0.7569, "step": 5095 }, { "epoch": 0.5205311542390194, "grad_norm": 1.4306533723228798, "learning_rate": 9.821359541432664e-06, "loss": 0.7001, "step": 5096 }, { "epoch": 0.5206332992849847, "grad_norm": 1.598567843413237, "learning_rate": 9.818051746578165e-06, "loss": 0.7744, "step": 5097 }, { "epoch": 0.52073544433095, "grad_norm": 1.4634821203353316, "learning_rate": 9.814743971638016e-06, "loss": 0.7436, "step": 5098 }, { "epoch": 0.5208375893769153, "grad_norm": 1.4082152457858663, "learning_rate": 9.81143621697426e-06, "loss": 0.7359, "step": 5099 }, { "epoch": 0.5209397344228804, "grad_norm": 1.6112140055927395, "learning_rate": 9.808128482948932e-06, "loss": 0.7314, "step": 5100 }, { "epoch": 0.5210418794688457, "grad_norm": 1.4608811343047152, "learning_rate": 9.804820769924066e-06, "loss": 0.7638, "step": 5101 }, { "epoch": 0.521144024514811, "grad_norm": 1.4543920651397524, "learning_rate": 9.801513078261692e-06, "loss": 0.7281, "step": 5102 }, { "epoch": 0.5212461695607763, "grad_norm": 1.4715674723462617, "learning_rate": 9.798205408323836e-06, "loss": 0.7847, "step": 5103 }, { "epoch": 0.5213483146067416, "grad_norm": 1.3786978048574219, "learning_rate": 9.794897760472533e-06, "loss": 0.6709, "step": 5104 }, { "epoch": 0.5214504596527069, "grad_norm": 1.5521306073102712, "learning_rate": 9.791590135069802e-06, "loss": 0.7853, "step": 5105 }, { "epoch": 0.5215526046986722, "grad_norm": 1.5015215760675327, "learning_rate": 9.788282532477667e-06, "loss": 0.781, "step": 5106 }, { "epoch": 0.5216547497446374, "grad_norm": 1.395912833914741, "learning_rate": 9.784974953058142e-06, "loss": 0.7016, "step": 5107 }, { "epoch": 0.5217568947906026, "grad_norm": 1.7594742889275699, "learning_rate": 9.78166739717325e-06, "loss": 0.7252, "step": 5108 }, { "epoch": 0.5218590398365679, "grad_norm": 1.6559845561480964, "learning_rate": 9.778359865185003e-06, "loss": 0.7224, "step": 5109 }, { "epoch": 0.5219611848825332, "grad_norm": 1.4170131301667364, "learning_rate": 9.775052357455411e-06, "loss": 0.7269, "step": 5110 }, { "epoch": 0.5220633299284985, "grad_norm": 1.4962461688736937, "learning_rate": 9.77174487434648e-06, "loss": 0.6251, "step": 5111 }, { "epoch": 0.5221654749744638, "grad_norm": 1.4369182092448913, "learning_rate": 9.768437416220224e-06, "loss": 0.7718, "step": 5112 }, { "epoch": 0.522267620020429, "grad_norm": 1.3128302729015586, "learning_rate": 9.765129983438642e-06, "loss": 0.657, "step": 5113 }, { "epoch": 0.5223697650663943, "grad_norm": 1.3797081584161954, "learning_rate": 9.761822576363737e-06, "loss": 0.6988, "step": 5114 }, { "epoch": 0.5224719101123596, "grad_norm": 1.458159366944201, "learning_rate": 9.758515195357501e-06, "loss": 0.6998, "step": 5115 }, { "epoch": 0.5225740551583248, "grad_norm": 1.5475287283222443, "learning_rate": 9.755207840781937e-06, "loss": 0.798, "step": 5116 }, { "epoch": 0.5226762002042901, "grad_norm": 1.3773093236034852, "learning_rate": 9.75190051299903e-06, "loss": 0.7541, "step": 5117 }, { "epoch": 0.5227783452502553, "grad_norm": 1.4254443804497317, "learning_rate": 9.748593212370773e-06, "loss": 0.6485, "step": 5118 }, { "epoch": 0.5228804902962206, "grad_norm": 1.4301000627484601, "learning_rate": 9.745285939259148e-06, "loss": 0.7523, "step": 5119 }, { "epoch": 0.5229826353421859, "grad_norm": 1.4920997205350683, "learning_rate": 9.741978694026145e-06, "loss": 0.7864, "step": 5120 }, { "epoch": 0.5230847803881512, "grad_norm": 1.5106081143517995, "learning_rate": 9.738671477033741e-06, "loss": 0.8059, "step": 5121 }, { "epoch": 0.5231869254341165, "grad_norm": 1.4139685284321442, "learning_rate": 9.735364288643912e-06, "loss": 0.7168, "step": 5122 }, { "epoch": 0.5232890704800817, "grad_norm": 1.426102819257782, "learning_rate": 9.732057129218634e-06, "loss": 0.655, "step": 5123 }, { "epoch": 0.523391215526047, "grad_norm": 1.4165945918142098, "learning_rate": 9.728749999119872e-06, "loss": 0.7276, "step": 5124 }, { "epoch": 0.5234933605720122, "grad_norm": 1.5830020601416201, "learning_rate": 9.725442898709604e-06, "loss": 0.6914, "step": 5125 }, { "epoch": 0.5235955056179775, "grad_norm": 1.5743064718683806, "learning_rate": 9.722135828349784e-06, "loss": 0.7779, "step": 5126 }, { "epoch": 0.5236976506639428, "grad_norm": 1.5482270638775972, "learning_rate": 9.718828788402374e-06, "loss": 0.7639, "step": 5127 }, { "epoch": 0.5237997957099081, "grad_norm": 1.453334098293946, "learning_rate": 9.715521779229342e-06, "loss": 0.6977, "step": 5128 }, { "epoch": 0.5239019407558734, "grad_norm": 1.5625917186930427, "learning_rate": 9.71221480119263e-06, "loss": 0.7718, "step": 5129 }, { "epoch": 0.5240040858018387, "grad_norm": 1.4521412400899234, "learning_rate": 9.708907854654198e-06, "loss": 0.6889, "step": 5130 }, { "epoch": 0.5241062308478038, "grad_norm": 1.398189493511134, "learning_rate": 9.705600939975988e-06, "loss": 0.7046, "step": 5131 }, { "epoch": 0.5242083758937691, "grad_norm": 1.506521122546969, "learning_rate": 9.702294057519943e-06, "loss": 0.7626, "step": 5132 }, { "epoch": 0.5243105209397344, "grad_norm": 1.6063259208396754, "learning_rate": 9.698987207648009e-06, "loss": 0.715, "step": 5133 }, { "epoch": 0.5244126659856997, "grad_norm": 1.4726721475135285, "learning_rate": 9.695680390722117e-06, "loss": 0.6684, "step": 5134 }, { "epoch": 0.524514811031665, "grad_norm": 1.3268591401007739, "learning_rate": 9.692373607104199e-06, "loss": 0.7215, "step": 5135 }, { "epoch": 0.5246169560776303, "grad_norm": 1.472933904520735, "learning_rate": 9.689066857156196e-06, "loss": 0.6952, "step": 5136 }, { "epoch": 0.5247191011235955, "grad_norm": 1.4285027928044267, "learning_rate": 9.685760141240022e-06, "loss": 0.6048, "step": 5137 }, { "epoch": 0.5248212461695608, "grad_norm": 1.506939851132931, "learning_rate": 9.682453459717607e-06, "loss": 0.765, "step": 5138 }, { "epoch": 0.524923391215526, "grad_norm": 1.6195691401487944, "learning_rate": 9.679146812950863e-06, "loss": 0.7709, "step": 5139 }, { "epoch": 0.5250255362614913, "grad_norm": 1.407943020312896, "learning_rate": 9.675840201301709e-06, "loss": 0.7129, "step": 5140 }, { "epoch": 0.5251276813074566, "grad_norm": 1.4612211684616374, "learning_rate": 9.672533625132053e-06, "loss": 0.7242, "step": 5141 }, { "epoch": 0.5252298263534219, "grad_norm": 1.5344220960942356, "learning_rate": 9.669227084803806e-06, "loss": 0.7248, "step": 5142 }, { "epoch": 0.5253319713993871, "grad_norm": 1.491028807803594, "learning_rate": 9.665920580678863e-06, "loss": 0.6781, "step": 5143 }, { "epoch": 0.5254341164453524, "grad_norm": 1.652221921116902, "learning_rate": 9.662614113119132e-06, "loss": 0.8428, "step": 5144 }, { "epoch": 0.5255362614913177, "grad_norm": 1.3219219861732163, "learning_rate": 9.659307682486506e-06, "loss": 0.6062, "step": 5145 }, { "epoch": 0.5256384065372829, "grad_norm": 1.5840142994394426, "learning_rate": 9.656001289142872e-06, "loss": 0.6878, "step": 5146 }, { "epoch": 0.5257405515832482, "grad_norm": 1.3400159775019036, "learning_rate": 9.65269493345012e-06, "loss": 0.6692, "step": 5147 }, { "epoch": 0.5258426966292135, "grad_norm": 1.6175365239293367, "learning_rate": 9.649388615770134e-06, "loss": 0.7748, "step": 5148 }, { "epoch": 0.5259448416751787, "grad_norm": 1.4617545388348845, "learning_rate": 9.646082336464787e-06, "loss": 0.6759, "step": 5149 }, { "epoch": 0.526046986721144, "grad_norm": 1.3685413776591426, "learning_rate": 9.642776095895959e-06, "loss": 0.6313, "step": 5150 }, { "epoch": 0.5261491317671093, "grad_norm": 1.5030004027073611, "learning_rate": 9.639469894425515e-06, "loss": 0.6903, "step": 5151 }, { "epoch": 0.5262512768130746, "grad_norm": 1.5636079019727214, "learning_rate": 9.636163732415325e-06, "loss": 0.708, "step": 5152 }, { "epoch": 0.5263534218590399, "grad_norm": 1.5003921346173024, "learning_rate": 9.632857610227251e-06, "loss": 0.6825, "step": 5153 }, { "epoch": 0.5264555669050051, "grad_norm": 1.482840292955541, "learning_rate": 9.629551528223147e-06, "loss": 0.7052, "step": 5154 }, { "epoch": 0.5265577119509703, "grad_norm": 1.480367382004129, "learning_rate": 9.626245486764872e-06, "loss": 0.7671, "step": 5155 }, { "epoch": 0.5266598569969356, "grad_norm": 1.4598999597691995, "learning_rate": 9.622939486214265e-06, "loss": 0.7796, "step": 5156 }, { "epoch": 0.5267620020429009, "grad_norm": 1.4526246243513843, "learning_rate": 9.619633526933178e-06, "loss": 0.7693, "step": 5157 }, { "epoch": 0.5268641470888662, "grad_norm": 1.483152338222519, "learning_rate": 9.616327609283445e-06, "loss": 0.7343, "step": 5158 }, { "epoch": 0.5269662921348315, "grad_norm": 1.4935644627300313, "learning_rate": 9.613021733626901e-06, "loss": 0.7264, "step": 5159 }, { "epoch": 0.5270684371807968, "grad_norm": 1.4892375072301431, "learning_rate": 9.609715900325382e-06, "loss": 0.7474, "step": 5160 }, { "epoch": 0.527170582226762, "grad_norm": 1.700501327049568, "learning_rate": 9.606410109740708e-06, "loss": 0.7129, "step": 5161 }, { "epoch": 0.5272727272727272, "grad_norm": 1.4575998933806678, "learning_rate": 9.603104362234706e-06, "loss": 0.7091, "step": 5162 }, { "epoch": 0.5273748723186925, "grad_norm": 1.5009987290615843, "learning_rate": 9.599798658169183e-06, "loss": 0.6751, "step": 5163 }, { "epoch": 0.5274770173646578, "grad_norm": 1.5089684752005672, "learning_rate": 9.59649299790596e-06, "loss": 0.636, "step": 5164 }, { "epoch": 0.5275791624106231, "grad_norm": 1.4323652624834637, "learning_rate": 9.593187381806836e-06, "loss": 0.6815, "step": 5165 }, { "epoch": 0.5276813074565884, "grad_norm": 1.5379467146294474, "learning_rate": 9.589881810233617e-06, "loss": 0.6455, "step": 5166 }, { "epoch": 0.5277834525025537, "grad_norm": 1.4597742437757253, "learning_rate": 9.586576283548094e-06, "loss": 0.6074, "step": 5167 }, { "epoch": 0.5278855975485189, "grad_norm": 1.359526809507787, "learning_rate": 9.583270802112071e-06, "loss": 0.7651, "step": 5168 }, { "epoch": 0.5279877425944842, "grad_norm": 1.4085023448290075, "learning_rate": 9.579965366287323e-06, "loss": 0.653, "step": 5169 }, { "epoch": 0.5280898876404494, "grad_norm": 1.3708279307672784, "learning_rate": 9.576659976435642e-06, "loss": 0.702, "step": 5170 }, { "epoch": 0.5281920326864147, "grad_norm": 1.4952205593909023, "learning_rate": 9.573354632918795e-06, "loss": 0.6497, "step": 5171 }, { "epoch": 0.52829417773238, "grad_norm": 1.3282727066725983, "learning_rate": 9.570049336098564e-06, "loss": 0.6484, "step": 5172 }, { "epoch": 0.5283963227783453, "grad_norm": 1.5235616748837848, "learning_rate": 9.566744086336706e-06, "loss": 0.685, "step": 5173 }, { "epoch": 0.5284984678243105, "grad_norm": 1.3512743226983603, "learning_rate": 9.563438883994991e-06, "loss": 0.6675, "step": 5174 }, { "epoch": 0.5286006128702758, "grad_norm": 1.4272422143408126, "learning_rate": 9.560133729435169e-06, "loss": 0.7023, "step": 5175 }, { "epoch": 0.5287027579162411, "grad_norm": 1.6387202021569567, "learning_rate": 9.556828623018995e-06, "loss": 0.8347, "step": 5176 }, { "epoch": 0.5288049029622063, "grad_norm": 1.4893469388334897, "learning_rate": 9.553523565108218e-06, "loss": 0.7538, "step": 5177 }, { "epoch": 0.5289070480081716, "grad_norm": 1.4249779120486283, "learning_rate": 9.550218556064571e-06, "loss": 0.7235, "step": 5178 }, { "epoch": 0.5290091930541369, "grad_norm": 1.4211583157086818, "learning_rate": 9.546913596249799e-06, "loss": 0.7038, "step": 5179 }, { "epoch": 0.5291113381001021, "grad_norm": 1.5960536678655683, "learning_rate": 9.543608686025623e-06, "loss": 0.7541, "step": 5180 }, { "epoch": 0.5292134831460674, "grad_norm": 1.2827435194768086, "learning_rate": 9.540303825753773e-06, "loss": 0.6395, "step": 5181 }, { "epoch": 0.5293156281920327, "grad_norm": 1.5285437181298687, "learning_rate": 9.536999015795964e-06, "loss": 0.6846, "step": 5182 }, { "epoch": 0.529417773237998, "grad_norm": 1.6057062622426963, "learning_rate": 9.53369425651391e-06, "loss": 0.7851, "step": 5183 }, { "epoch": 0.5295199182839633, "grad_norm": 1.423785849295611, "learning_rate": 9.530389548269325e-06, "loss": 0.7498, "step": 5184 }, { "epoch": 0.5296220633299284, "grad_norm": 1.3436873685320918, "learning_rate": 9.527084891423908e-06, "loss": 0.6765, "step": 5185 }, { "epoch": 0.5297242083758937, "grad_norm": 1.4496191456932257, "learning_rate": 9.523780286339353e-06, "loss": 0.7391, "step": 5186 }, { "epoch": 0.529826353421859, "grad_norm": 1.6439732869842854, "learning_rate": 9.520475733377355e-06, "loss": 0.7116, "step": 5187 }, { "epoch": 0.5299284984678243, "grad_norm": 1.545366081554161, "learning_rate": 9.517171232899595e-06, "loss": 0.739, "step": 5188 }, { "epoch": 0.5300306435137896, "grad_norm": 1.4986960578817448, "learning_rate": 9.513866785267758e-06, "loss": 0.7468, "step": 5189 }, { "epoch": 0.5301327885597549, "grad_norm": 1.7446155275946975, "learning_rate": 9.510562390843514e-06, "loss": 0.7508, "step": 5190 }, { "epoch": 0.5302349336057202, "grad_norm": 1.4896490666947961, "learning_rate": 9.50725804998853e-06, "loss": 0.7514, "step": 5191 }, { "epoch": 0.5303370786516854, "grad_norm": 1.525841631588098, "learning_rate": 9.503953763064475e-06, "loss": 0.6952, "step": 5192 }, { "epoch": 0.5304392236976506, "grad_norm": 1.5430319354369864, "learning_rate": 9.500649530432998e-06, "loss": 0.6683, "step": 5193 }, { "epoch": 0.5305413687436159, "grad_norm": 1.4622079798359395, "learning_rate": 9.497345352455757e-06, "loss": 0.6864, "step": 5194 }, { "epoch": 0.5306435137895812, "grad_norm": 1.410600549756591, "learning_rate": 9.494041229494388e-06, "loss": 0.7752, "step": 5195 }, { "epoch": 0.5307456588355465, "grad_norm": 1.5123284249259115, "learning_rate": 9.490737161910536e-06, "loss": 0.6101, "step": 5196 }, { "epoch": 0.5308478038815118, "grad_norm": 1.5394335469023082, "learning_rate": 9.487433150065827e-06, "loss": 0.7096, "step": 5197 }, { "epoch": 0.530949948927477, "grad_norm": 1.4027111356414252, "learning_rate": 9.484129194321896e-06, "loss": 0.7492, "step": 5198 }, { "epoch": 0.5310520939734423, "grad_norm": 1.5076294639375596, "learning_rate": 9.480825295040352e-06, "loss": 0.749, "step": 5199 }, { "epoch": 0.5311542390194075, "grad_norm": 1.5401512574061447, "learning_rate": 9.477521452582817e-06, "loss": 0.8157, "step": 5200 }, { "epoch": 0.5312563840653728, "grad_norm": 1.7117409286432816, "learning_rate": 9.474217667310899e-06, "loss": 0.7423, "step": 5201 }, { "epoch": 0.5313585291113381, "grad_norm": 1.4661050105078965, "learning_rate": 9.470913939586198e-06, "loss": 0.6627, "step": 5202 }, { "epoch": 0.5314606741573034, "grad_norm": 1.401855504032334, "learning_rate": 9.467610269770305e-06, "loss": 0.6642, "step": 5203 }, { "epoch": 0.5315628192032686, "grad_norm": 1.2906304813753924, "learning_rate": 9.464306658224814e-06, "loss": 0.67, "step": 5204 }, { "epoch": 0.5316649642492339, "grad_norm": 1.4643820762637774, "learning_rate": 9.461003105311301e-06, "loss": 0.664, "step": 5205 }, { "epoch": 0.5317671092951992, "grad_norm": 1.3580602625602194, "learning_rate": 9.457699611391353e-06, "loss": 0.6015, "step": 5206 }, { "epoch": 0.5318692543411645, "grad_norm": 1.4537758891906634, "learning_rate": 9.454396176826526e-06, "loss": 0.7009, "step": 5207 }, { "epoch": 0.5319713993871297, "grad_norm": 1.5132973527389146, "learning_rate": 9.451092801978392e-06, "loss": 0.7353, "step": 5208 }, { "epoch": 0.532073544433095, "grad_norm": 1.5687592504040881, "learning_rate": 9.447789487208507e-06, "loss": 0.7545, "step": 5209 }, { "epoch": 0.5321756894790602, "grad_norm": 1.5998407060957576, "learning_rate": 9.44448623287842e-06, "loss": 0.7174, "step": 5210 }, { "epoch": 0.5322778345250255, "grad_norm": 1.3080205329773444, "learning_rate": 9.441183039349673e-06, "loss": 0.6654, "step": 5211 }, { "epoch": 0.5323799795709908, "grad_norm": 1.5639283786139195, "learning_rate": 9.437879906983802e-06, "loss": 0.7279, "step": 5212 }, { "epoch": 0.5324821246169561, "grad_norm": 1.357514135423471, "learning_rate": 9.434576836142342e-06, "loss": 0.6818, "step": 5213 }, { "epoch": 0.5325842696629214, "grad_norm": 1.4100995588972292, "learning_rate": 9.43127382718681e-06, "loss": 0.7335, "step": 5214 }, { "epoch": 0.5326864147088867, "grad_norm": 1.568960220925224, "learning_rate": 9.427970880478722e-06, "loss": 0.7584, "step": 5215 }, { "epoch": 0.5327885597548518, "grad_norm": 1.5011030885054004, "learning_rate": 9.424667996379595e-06, "loss": 0.7565, "step": 5216 }, { "epoch": 0.5328907048008171, "grad_norm": 1.5894263004873823, "learning_rate": 9.421365175250926e-06, "loss": 0.7215, "step": 5217 }, { "epoch": 0.5329928498467824, "grad_norm": 1.7578176063141788, "learning_rate": 9.418062417454213e-06, "loss": 0.7107, "step": 5218 }, { "epoch": 0.5330949948927477, "grad_norm": 1.440778242320715, "learning_rate": 9.414759723350946e-06, "loss": 0.7162, "step": 5219 }, { "epoch": 0.533197139938713, "grad_norm": 1.5198544947260326, "learning_rate": 9.411457093302602e-06, "loss": 0.6941, "step": 5220 }, { "epoch": 0.5332992849846783, "grad_norm": 1.5739514159496326, "learning_rate": 9.408154527670664e-06, "loss": 0.7206, "step": 5221 }, { "epoch": 0.5334014300306436, "grad_norm": 1.5434643563777037, "learning_rate": 9.404852026816591e-06, "loss": 0.7277, "step": 5222 }, { "epoch": 0.5335035750766088, "grad_norm": 1.3102973594194431, "learning_rate": 9.401549591101846e-06, "loss": 0.6163, "step": 5223 }, { "epoch": 0.533605720122574, "grad_norm": 1.582047805245025, "learning_rate": 9.398247220887891e-06, "loss": 0.714, "step": 5224 }, { "epoch": 0.5337078651685393, "grad_norm": 1.8185450745973895, "learning_rate": 9.394944916536162e-06, "loss": 0.7698, "step": 5225 }, { "epoch": 0.5338100102145046, "grad_norm": 1.553943699560829, "learning_rate": 9.391642678408106e-06, "loss": 0.7408, "step": 5226 }, { "epoch": 0.5339121552604699, "grad_norm": 1.4702776391165397, "learning_rate": 9.38834050686515e-06, "loss": 0.7308, "step": 5227 }, { "epoch": 0.5340143003064352, "grad_norm": 1.5622365292883602, "learning_rate": 9.385038402268723e-06, "loss": 0.7105, "step": 5228 }, { "epoch": 0.5341164453524004, "grad_norm": 1.332354262114744, "learning_rate": 9.381736364980238e-06, "loss": 0.603, "step": 5229 }, { "epoch": 0.5342185903983657, "grad_norm": 1.5320921500301965, "learning_rate": 9.37843439536111e-06, "loss": 0.7429, "step": 5230 }, { "epoch": 0.5343207354443309, "grad_norm": 1.3837328853944448, "learning_rate": 9.375132493772732e-06, "loss": 0.733, "step": 5231 }, { "epoch": 0.5344228804902962, "grad_norm": 1.5242104816405373, "learning_rate": 9.37183066057651e-06, "loss": 0.6769, "step": 5232 }, { "epoch": 0.5345250255362615, "grad_norm": 1.5727672737205118, "learning_rate": 9.368528896133831e-06, "loss": 0.5786, "step": 5233 }, { "epoch": 0.5346271705822268, "grad_norm": 1.4921052624260152, "learning_rate": 9.365227200806069e-06, "loss": 0.7184, "step": 5234 }, { "epoch": 0.534729315628192, "grad_norm": 1.4130026584586142, "learning_rate": 9.361925574954603e-06, "loss": 0.6841, "step": 5235 }, { "epoch": 0.5348314606741573, "grad_norm": 1.365519194615162, "learning_rate": 9.358624018940795e-06, "loss": 0.6369, "step": 5236 }, { "epoch": 0.5349336057201226, "grad_norm": 1.5666966599637793, "learning_rate": 9.355322533126002e-06, "loss": 0.7645, "step": 5237 }, { "epoch": 0.5350357507660879, "grad_norm": 1.3232513646076036, "learning_rate": 9.352021117871574e-06, "loss": 0.7439, "step": 5238 }, { "epoch": 0.5351378958120531, "grad_norm": 1.3245689427114669, "learning_rate": 9.348719773538849e-06, "loss": 0.6858, "step": 5239 }, { "epoch": 0.5352400408580184, "grad_norm": 1.4605304969591744, "learning_rate": 9.34541850048917e-06, "loss": 0.6459, "step": 5240 }, { "epoch": 0.5353421859039836, "grad_norm": 1.4476566910186557, "learning_rate": 9.342117299083859e-06, "loss": 0.6964, "step": 5241 }, { "epoch": 0.5354443309499489, "grad_norm": 1.4907526958267718, "learning_rate": 9.338816169684231e-06, "loss": 0.742, "step": 5242 }, { "epoch": 0.5355464759959142, "grad_norm": 1.4266270314557523, "learning_rate": 9.335515112651606e-06, "loss": 0.5986, "step": 5243 }, { "epoch": 0.5356486210418795, "grad_norm": 1.5307428758134793, "learning_rate": 9.332214128347277e-06, "loss": 0.7907, "step": 5244 }, { "epoch": 0.5357507660878448, "grad_norm": 1.4544659809004, "learning_rate": 9.328913217132546e-06, "loss": 0.7315, "step": 5245 }, { "epoch": 0.5358529111338101, "grad_norm": 1.5311439433339236, "learning_rate": 9.325612379368695e-06, "loss": 0.704, "step": 5246 }, { "epoch": 0.5359550561797752, "grad_norm": 1.46492126109438, "learning_rate": 9.322311615417003e-06, "loss": 0.6895, "step": 5247 }, { "epoch": 0.5360572012257405, "grad_norm": 1.2532498667894556, "learning_rate": 9.319010925638746e-06, "loss": 0.6569, "step": 5248 }, { "epoch": 0.5361593462717058, "grad_norm": 1.5057032932249226, "learning_rate": 9.315710310395181e-06, "loss": 0.689, "step": 5249 }, { "epoch": 0.5362614913176711, "grad_norm": 1.43922695177929, "learning_rate": 9.312409770047566e-06, "loss": 0.7244, "step": 5250 }, { "epoch": 0.5363636363636364, "grad_norm": 1.4659410388318872, "learning_rate": 9.309109304957145e-06, "loss": 0.7128, "step": 5251 }, { "epoch": 0.5364657814096017, "grad_norm": 1.336241602958345, "learning_rate": 9.30580891548516e-06, "loss": 0.6475, "step": 5252 }, { "epoch": 0.536567926455567, "grad_norm": 1.3401891670552928, "learning_rate": 9.302508601992834e-06, "loss": 0.7022, "step": 5253 }, { "epoch": 0.5366700715015322, "grad_norm": 1.50850922143205, "learning_rate": 9.299208364841394e-06, "loss": 0.7908, "step": 5254 }, { "epoch": 0.5367722165474974, "grad_norm": 1.5296596183193492, "learning_rate": 9.295908204392047e-06, "loss": 0.6882, "step": 5255 }, { "epoch": 0.5368743615934627, "grad_norm": 1.6002141505999796, "learning_rate": 9.292608121006009e-06, "loss": 0.67, "step": 5256 }, { "epoch": 0.536976506639428, "grad_norm": 1.3735472611573007, "learning_rate": 9.289308115044463e-06, "loss": 0.6693, "step": 5257 }, { "epoch": 0.5370786516853933, "grad_norm": 1.3894506834816651, "learning_rate": 9.286008186868606e-06, "loss": 0.6783, "step": 5258 }, { "epoch": 0.5371807967313585, "grad_norm": 1.306144968789425, "learning_rate": 9.282708336839614e-06, "loss": 0.5567, "step": 5259 }, { "epoch": 0.5372829417773238, "grad_norm": 1.3103043955413112, "learning_rate": 9.279408565318659e-06, "loss": 0.6806, "step": 5260 }, { "epoch": 0.5373850868232891, "grad_norm": 1.6535123776007974, "learning_rate": 9.276108872666901e-06, "loss": 0.7957, "step": 5261 }, { "epoch": 0.5374872318692543, "grad_norm": 1.5252777916727294, "learning_rate": 9.272809259245496e-06, "loss": 0.8554, "step": 5262 }, { "epoch": 0.5375893769152196, "grad_norm": 1.4153971798648703, "learning_rate": 9.269509725415584e-06, "loss": 0.7413, "step": 5263 }, { "epoch": 0.5376915219611849, "grad_norm": 1.4794298137426707, "learning_rate": 9.266210271538306e-06, "loss": 0.7684, "step": 5264 }, { "epoch": 0.5377936670071501, "grad_norm": 1.519620646630575, "learning_rate": 9.262910897974789e-06, "loss": 0.7292, "step": 5265 }, { "epoch": 0.5378958120531154, "grad_norm": 1.6542631258073068, "learning_rate": 9.259611605086148e-06, "loss": 0.6751, "step": 5266 }, { "epoch": 0.5379979570990807, "grad_norm": 1.3773974798268631, "learning_rate": 9.256312393233498e-06, "loss": 0.7119, "step": 5267 }, { "epoch": 0.538100102145046, "grad_norm": 1.4379207729597026, "learning_rate": 9.253013262777936e-06, "loss": 0.69, "step": 5268 }, { "epoch": 0.5382022471910113, "grad_norm": 1.4148171001847447, "learning_rate": 9.249714214080556e-06, "loss": 0.7604, "step": 5269 }, { "epoch": 0.5383043922369765, "grad_norm": 1.325100154473844, "learning_rate": 9.246415247502439e-06, "loss": 0.7376, "step": 5270 }, { "epoch": 0.5384065372829417, "grad_norm": 1.5177571170075688, "learning_rate": 9.243116363404656e-06, "loss": 0.696, "step": 5271 }, { "epoch": 0.538508682328907, "grad_norm": 1.3555489570734756, "learning_rate": 9.239817562148282e-06, "loss": 0.6245, "step": 5272 }, { "epoch": 0.5386108273748723, "grad_norm": 1.5472753084386863, "learning_rate": 9.236518844094366e-06, "loss": 0.828, "step": 5273 }, { "epoch": 0.5387129724208376, "grad_norm": 1.4827026082201882, "learning_rate": 9.233220209603954e-06, "loss": 0.7435, "step": 5274 }, { "epoch": 0.5388151174668029, "grad_norm": 1.4401966727546343, "learning_rate": 9.229921659038088e-06, "loss": 0.7402, "step": 5275 }, { "epoch": 0.5389172625127682, "grad_norm": 1.508204883055981, "learning_rate": 9.22662319275779e-06, "loss": 0.6799, "step": 5276 }, { "epoch": 0.5390194075587335, "grad_norm": 1.5978317798229795, "learning_rate": 9.223324811124088e-06, "loss": 0.7384, "step": 5277 }, { "epoch": 0.5391215526046986, "grad_norm": 1.402087605817577, "learning_rate": 9.220026514497984e-06, "loss": 0.6534, "step": 5278 }, { "epoch": 0.5392236976506639, "grad_norm": 1.3809946173019199, "learning_rate": 9.21672830324048e-06, "loss": 0.5954, "step": 5279 }, { "epoch": 0.5393258426966292, "grad_norm": 1.6411917445001991, "learning_rate": 9.213430177712574e-06, "loss": 0.6867, "step": 5280 }, { "epoch": 0.5394279877425945, "grad_norm": 1.520897593261886, "learning_rate": 9.21013213827524e-06, "loss": 0.7898, "step": 5281 }, { "epoch": 0.5395301327885598, "grad_norm": 1.4964203013563593, "learning_rate": 9.206834185289454e-06, "loss": 0.7174, "step": 5282 }, { "epoch": 0.539632277834525, "grad_norm": 1.531332446511657, "learning_rate": 9.203536319116181e-06, "loss": 0.7402, "step": 5283 }, { "epoch": 0.5397344228804903, "grad_norm": 1.6615418227398808, "learning_rate": 9.200238540116372e-06, "loss": 0.7164, "step": 5284 }, { "epoch": 0.5398365679264555, "grad_norm": 1.525332902436478, "learning_rate": 9.196940848650971e-06, "loss": 0.6072, "step": 5285 }, { "epoch": 0.5399387129724208, "grad_norm": 1.6139711229075728, "learning_rate": 9.193643245080913e-06, "loss": 0.7862, "step": 5286 }, { "epoch": 0.5400408580183861, "grad_norm": 1.451061318556362, "learning_rate": 9.19034572976712e-06, "loss": 0.6612, "step": 5287 }, { "epoch": 0.5401430030643514, "grad_norm": 1.5857671251795262, "learning_rate": 9.187048303070511e-06, "loss": 0.7574, "step": 5288 }, { "epoch": 0.5402451481103167, "grad_norm": 1.561709585188985, "learning_rate": 9.183750965351993e-06, "loss": 0.8005, "step": 5289 }, { "epoch": 0.5403472931562819, "grad_norm": 1.3759356342814062, "learning_rate": 9.180453716972457e-06, "loss": 0.6906, "step": 5290 }, { "epoch": 0.5404494382022472, "grad_norm": 1.3853215319387142, "learning_rate": 9.177156558292794e-06, "loss": 0.69, "step": 5291 }, { "epoch": 0.5405515832482125, "grad_norm": 1.3870361073436825, "learning_rate": 9.173859489673877e-06, "loss": 0.7393, "step": 5292 }, { "epoch": 0.5406537282941777, "grad_norm": 1.4140309681807364, "learning_rate": 9.17056251147657e-06, "loss": 0.7551, "step": 5293 }, { "epoch": 0.540755873340143, "grad_norm": 1.4674334304973493, "learning_rate": 9.167265624061734e-06, "loss": 0.742, "step": 5294 }, { "epoch": 0.5408580183861083, "grad_norm": 1.3562881998876646, "learning_rate": 9.16396882779021e-06, "loss": 0.6339, "step": 5295 }, { "epoch": 0.5409601634320735, "grad_norm": 1.4859463329125158, "learning_rate": 9.160672123022842e-06, "loss": 0.751, "step": 5296 }, { "epoch": 0.5410623084780388, "grad_norm": 1.466658939442774, "learning_rate": 9.15737551012045e-06, "loss": 0.7923, "step": 5297 }, { "epoch": 0.5411644535240041, "grad_norm": 1.5990995821455012, "learning_rate": 9.154078989443854e-06, "loss": 0.6984, "step": 5298 }, { "epoch": 0.5412665985699694, "grad_norm": 1.5890066752397045, "learning_rate": 9.15078256135386e-06, "loss": 0.6756, "step": 5299 }, { "epoch": 0.5413687436159347, "grad_norm": 1.4800093375485848, "learning_rate": 9.147486226211262e-06, "loss": 0.7975, "step": 5300 }, { "epoch": 0.5414708886618999, "grad_norm": 1.4305514607377332, "learning_rate": 9.144189984376849e-06, "loss": 0.7698, "step": 5301 }, { "epoch": 0.5415730337078651, "grad_norm": 1.5629233271080127, "learning_rate": 9.140893836211393e-06, "loss": 0.7377, "step": 5302 }, { "epoch": 0.5416751787538304, "grad_norm": 1.5762054273293127, "learning_rate": 9.13759778207566e-06, "loss": 0.8224, "step": 5303 }, { "epoch": 0.5417773237997957, "grad_norm": 1.436590857879403, "learning_rate": 9.13430182233041e-06, "loss": 0.7729, "step": 5304 }, { "epoch": 0.541879468845761, "grad_norm": 1.3800295467987769, "learning_rate": 9.131005957336385e-06, "loss": 0.7499, "step": 5305 }, { "epoch": 0.5419816138917263, "grad_norm": 1.4531538739947707, "learning_rate": 9.12771018745432e-06, "loss": 0.7126, "step": 5306 }, { "epoch": 0.5420837589376916, "grad_norm": 1.2574470937337408, "learning_rate": 9.124414513044936e-06, "loss": 0.5587, "step": 5307 }, { "epoch": 0.5421859039836568, "grad_norm": 1.4843886595211573, "learning_rate": 9.12111893446895e-06, "loss": 0.7022, "step": 5308 }, { "epoch": 0.542288049029622, "grad_norm": 1.4507226920181049, "learning_rate": 9.117823452087068e-06, "loss": 0.8284, "step": 5309 }, { "epoch": 0.5423901940755873, "grad_norm": 1.4267470637068462, "learning_rate": 9.114528066259976e-06, "loss": 0.6566, "step": 5310 }, { "epoch": 0.5424923391215526, "grad_norm": 1.4797203656055982, "learning_rate": 9.111232777348356e-06, "loss": 0.7206, "step": 5311 }, { "epoch": 0.5425944841675179, "grad_norm": 1.5215950085698822, "learning_rate": 9.107937585712887e-06, "loss": 0.786, "step": 5312 }, { "epoch": 0.5426966292134832, "grad_norm": 1.5107304338104994, "learning_rate": 9.104642491714224e-06, "loss": 0.6554, "step": 5313 }, { "epoch": 0.5427987742594484, "grad_norm": 1.520630747917681, "learning_rate": 9.10134749571302e-06, "loss": 0.6564, "step": 5314 }, { "epoch": 0.5429009193054137, "grad_norm": 1.445699678559169, "learning_rate": 9.098052598069912e-06, "loss": 0.6431, "step": 5315 }, { "epoch": 0.5430030643513789, "grad_norm": 1.501999467823678, "learning_rate": 9.094757799145532e-06, "loss": 0.7194, "step": 5316 }, { "epoch": 0.5431052093973442, "grad_norm": 1.4451666524407356, "learning_rate": 9.091463099300494e-06, "loss": 0.7128, "step": 5317 }, { "epoch": 0.5432073544433095, "grad_norm": 1.5725513348734697, "learning_rate": 9.088168498895407e-06, "loss": 0.7077, "step": 5318 }, { "epoch": 0.5433094994892748, "grad_norm": 1.451321436949179, "learning_rate": 9.084873998290864e-06, "loss": 0.7615, "step": 5319 }, { "epoch": 0.54341164453524, "grad_norm": 1.5582671863331368, "learning_rate": 9.081579597847456e-06, "loss": 0.7957, "step": 5320 }, { "epoch": 0.5435137895812053, "grad_norm": 1.4121393335382328, "learning_rate": 9.078285297925755e-06, "loss": 0.6675, "step": 5321 }, { "epoch": 0.5436159346271706, "grad_norm": 1.449586993042086, "learning_rate": 9.074991098886323e-06, "loss": 0.7449, "step": 5322 }, { "epoch": 0.5437180796731359, "grad_norm": 1.4462893785550492, "learning_rate": 9.071697001089715e-06, "loss": 0.6644, "step": 5323 }, { "epoch": 0.5438202247191011, "grad_norm": 1.5100590038502006, "learning_rate": 9.068403004896466e-06, "loss": 0.7042, "step": 5324 }, { "epoch": 0.5439223697650664, "grad_norm": 1.4309243720156277, "learning_rate": 9.065109110667115e-06, "loss": 0.5981, "step": 5325 }, { "epoch": 0.5440245148110316, "grad_norm": 1.6063071330138892, "learning_rate": 9.061815318762172e-06, "loss": 0.8174, "step": 5326 }, { "epoch": 0.5441266598569969, "grad_norm": 1.5566826578911757, "learning_rate": 9.058521629542148e-06, "loss": 0.678, "step": 5327 }, { "epoch": 0.5442288049029622, "grad_norm": 1.4681254846370988, "learning_rate": 9.055228043367539e-06, "loss": 0.6948, "step": 5328 }, { "epoch": 0.5443309499489275, "grad_norm": 1.4871040856778386, "learning_rate": 9.051934560598836e-06, "loss": 0.7453, "step": 5329 }, { "epoch": 0.5444330949948928, "grad_norm": 1.5811952845783739, "learning_rate": 9.048641181596504e-06, "loss": 0.7559, "step": 5330 }, { "epoch": 0.5445352400408581, "grad_norm": 1.453350051115602, "learning_rate": 9.045347906721011e-06, "loss": 0.817, "step": 5331 }, { "epoch": 0.5446373850868232, "grad_norm": 1.4795550383718767, "learning_rate": 9.042054736332806e-06, "loss": 0.7452, "step": 5332 }, { "epoch": 0.5447395301327885, "grad_norm": 1.4157521258886236, "learning_rate": 9.03876167079233e-06, "loss": 0.751, "step": 5333 }, { "epoch": 0.5448416751787538, "grad_norm": 1.4382263368016583, "learning_rate": 9.035468710460007e-06, "loss": 0.7434, "step": 5334 }, { "epoch": 0.5449438202247191, "grad_norm": 1.4580716073026145, "learning_rate": 9.032175855696258e-06, "loss": 0.6604, "step": 5335 }, { "epoch": 0.5450459652706844, "grad_norm": 1.3815795263848616, "learning_rate": 9.028883106861488e-06, "loss": 0.7993, "step": 5336 }, { "epoch": 0.5451481103166497, "grad_norm": 1.5790148880070334, "learning_rate": 9.025590464316087e-06, "loss": 0.7055, "step": 5337 }, { "epoch": 0.545250255362615, "grad_norm": 1.2921656316065682, "learning_rate": 9.022297928420444e-06, "loss": 0.7469, "step": 5338 }, { "epoch": 0.5453524004085801, "grad_norm": 1.5872318292370122, "learning_rate": 9.019005499534921e-06, "loss": 0.6628, "step": 5339 }, { "epoch": 0.5454545454545454, "grad_norm": 1.4682122446890498, "learning_rate": 9.01571317801988e-06, "loss": 0.7187, "step": 5340 }, { "epoch": 0.5455566905005107, "grad_norm": 1.4298051684777104, "learning_rate": 9.012420964235668e-06, "loss": 0.7633, "step": 5341 }, { "epoch": 0.545658835546476, "grad_norm": 1.50302296413129, "learning_rate": 9.009128858542622e-06, "loss": 0.7363, "step": 5342 }, { "epoch": 0.5457609805924413, "grad_norm": 1.3826266483155756, "learning_rate": 9.005836861301058e-06, "loss": 0.7217, "step": 5343 }, { "epoch": 0.5458631256384066, "grad_norm": 1.4075502297413032, "learning_rate": 9.002544972871292e-06, "loss": 0.7515, "step": 5344 }, { "epoch": 0.5459652706843718, "grad_norm": 1.4235299753942443, "learning_rate": 8.999253193613627e-06, "loss": 0.6235, "step": 5345 }, { "epoch": 0.5460674157303371, "grad_norm": 1.4251626010458078, "learning_rate": 8.995961523888346e-06, "loss": 0.7342, "step": 5346 }, { "epoch": 0.5461695607763023, "grad_norm": 1.4968845427203312, "learning_rate": 8.992669964055724e-06, "loss": 0.7633, "step": 5347 }, { "epoch": 0.5462717058222676, "grad_norm": 1.4716207391979643, "learning_rate": 8.989378514476025e-06, "loss": 0.673, "step": 5348 }, { "epoch": 0.5463738508682329, "grad_norm": 1.3852885529936758, "learning_rate": 8.986087175509502e-06, "loss": 0.6826, "step": 5349 }, { "epoch": 0.5464759959141982, "grad_norm": 1.5653429152103706, "learning_rate": 8.982795947516392e-06, "loss": 0.789, "step": 5350 }, { "epoch": 0.5465781409601634, "grad_norm": 1.4859779881279889, "learning_rate": 8.979504830856919e-06, "loss": 0.7388, "step": 5351 }, { "epoch": 0.5466802860061287, "grad_norm": 1.5773763624722752, "learning_rate": 8.976213825891304e-06, "loss": 0.6827, "step": 5352 }, { "epoch": 0.546782431052094, "grad_norm": 1.603407303018258, "learning_rate": 8.97292293297975e-06, "loss": 0.8171, "step": 5353 }, { "epoch": 0.5468845760980593, "grad_norm": 1.4925773215730558, "learning_rate": 8.96963215248244e-06, "loss": 0.708, "step": 5354 }, { "epoch": 0.5469867211440245, "grad_norm": 1.492018056543209, "learning_rate": 8.966341484759561e-06, "loss": 0.6226, "step": 5355 }, { "epoch": 0.5470888661899898, "grad_norm": 1.6431137419227642, "learning_rate": 8.963050930171271e-06, "loss": 0.7496, "step": 5356 }, { "epoch": 0.547191011235955, "grad_norm": 1.4791372078429992, "learning_rate": 8.959760489077729e-06, "loss": 0.6192, "step": 5357 }, { "epoch": 0.5472931562819203, "grad_norm": 1.4890416899484047, "learning_rate": 8.956470161839073e-06, "loss": 0.7928, "step": 5358 }, { "epoch": 0.5473953013278856, "grad_norm": 1.4434292697286835, "learning_rate": 8.953179948815428e-06, "loss": 0.6171, "step": 5359 }, { "epoch": 0.5474974463738509, "grad_norm": 1.415645205441737, "learning_rate": 8.949889850366918e-06, "loss": 0.6723, "step": 5360 }, { "epoch": 0.5475995914198162, "grad_norm": 1.448894353315161, "learning_rate": 8.94659986685364e-06, "loss": 0.6791, "step": 5361 }, { "epoch": 0.5477017364657815, "grad_norm": 1.3656937207936726, "learning_rate": 8.943309998635693e-06, "loss": 0.7261, "step": 5362 }, { "epoch": 0.5478038815117466, "grad_norm": 1.5996651055356172, "learning_rate": 8.940020246073146e-06, "loss": 0.7881, "step": 5363 }, { "epoch": 0.5479060265577119, "grad_norm": 1.359052456741216, "learning_rate": 8.936730609526065e-06, "loss": 0.7476, "step": 5364 }, { "epoch": 0.5480081716036772, "grad_norm": 1.4059140357592597, "learning_rate": 8.93344108935451e-06, "loss": 0.7359, "step": 5365 }, { "epoch": 0.5481103166496425, "grad_norm": 1.556804157242638, "learning_rate": 8.930151685918515e-06, "loss": 0.8106, "step": 5366 }, { "epoch": 0.5482124616956078, "grad_norm": 1.4906055103333997, "learning_rate": 8.926862399578105e-06, "loss": 0.682, "step": 5367 }, { "epoch": 0.5483146067415731, "grad_norm": 1.4132452878515613, "learning_rate": 8.923573230693304e-06, "loss": 0.7147, "step": 5368 }, { "epoch": 0.5484167517875383, "grad_norm": 1.4030899147833644, "learning_rate": 8.920284179624107e-06, "loss": 0.6427, "step": 5369 }, { "epoch": 0.5485188968335035, "grad_norm": 1.512310965118413, "learning_rate": 8.916995246730505e-06, "loss": 0.8136, "step": 5370 }, { "epoch": 0.5486210418794688, "grad_norm": 1.5233682828546162, "learning_rate": 8.913706432372471e-06, "loss": 0.6877, "step": 5371 }, { "epoch": 0.5487231869254341, "grad_norm": 1.578424541584731, "learning_rate": 8.910417736909974e-06, "loss": 0.7004, "step": 5372 }, { "epoch": 0.5488253319713994, "grad_norm": 1.3677503555378996, "learning_rate": 8.907129160702954e-06, "loss": 0.7669, "step": 5373 }, { "epoch": 0.5489274770173647, "grad_norm": 1.4370148136214547, "learning_rate": 8.903840704111357e-06, "loss": 0.6979, "step": 5374 }, { "epoch": 0.54902962206333, "grad_norm": 1.5042638915499635, "learning_rate": 8.9005523674951e-06, "loss": 0.7847, "step": 5375 }, { "epoch": 0.5491317671092952, "grad_norm": 1.544319530070016, "learning_rate": 8.897264151214097e-06, "loss": 0.7424, "step": 5376 }, { "epoch": 0.5492339121552605, "grad_norm": 1.549438778384867, "learning_rate": 8.89397605562825e-06, "loss": 0.6311, "step": 5377 }, { "epoch": 0.5493360572012257, "grad_norm": 1.4177978825614934, "learning_rate": 8.890688081097433e-06, "loss": 0.67, "step": 5378 }, { "epoch": 0.549438202247191, "grad_norm": 1.4395209406122906, "learning_rate": 8.887400227981526e-06, "loss": 0.7183, "step": 5379 }, { "epoch": 0.5495403472931563, "grad_norm": 1.4616314817419782, "learning_rate": 8.884112496640384e-06, "loss": 0.7311, "step": 5380 }, { "epoch": 0.5496424923391215, "grad_norm": 1.4454530093057807, "learning_rate": 8.880824887433846e-06, "loss": 0.6944, "step": 5381 }, { "epoch": 0.5497446373850868, "grad_norm": 1.5396149169460862, "learning_rate": 8.87753740072175e-06, "loss": 0.6707, "step": 5382 }, { "epoch": 0.5498467824310521, "grad_norm": 1.5909855895748617, "learning_rate": 8.874250036863908e-06, "loss": 0.6597, "step": 5383 }, { "epoch": 0.5499489274770174, "grad_norm": 1.3443167615110867, "learning_rate": 8.870962796220126e-06, "loss": 0.6405, "step": 5384 }, { "epoch": 0.5500510725229827, "grad_norm": 1.5852993308897236, "learning_rate": 8.867675679150199e-06, "loss": 0.7639, "step": 5385 }, { "epoch": 0.5501532175689479, "grad_norm": 1.3942173580257833, "learning_rate": 8.864388686013898e-06, "loss": 0.6952, "step": 5386 }, { "epoch": 0.5502553626149131, "grad_norm": 1.233334244837427, "learning_rate": 8.861101817170992e-06, "loss": 0.5944, "step": 5387 }, { "epoch": 0.5503575076608784, "grad_norm": 1.360716004687835, "learning_rate": 8.857815072981226e-06, "loss": 0.6664, "step": 5388 }, { "epoch": 0.5504596527068437, "grad_norm": 1.5082396249771426, "learning_rate": 8.85452845380434e-06, "loss": 0.7253, "step": 5389 }, { "epoch": 0.550561797752809, "grad_norm": 1.4087227583963116, "learning_rate": 8.851241960000052e-06, "loss": 0.5674, "step": 5390 }, { "epoch": 0.5506639427987743, "grad_norm": 1.57716737928456, "learning_rate": 8.847955591928071e-06, "loss": 0.8226, "step": 5391 }, { "epoch": 0.5507660878447396, "grad_norm": 1.4750059229405859, "learning_rate": 8.8446693499481e-06, "loss": 0.6505, "step": 5392 }, { "epoch": 0.5508682328907047, "grad_norm": 1.497572334655076, "learning_rate": 8.841383234419814e-06, "loss": 0.7789, "step": 5393 }, { "epoch": 0.55097037793667, "grad_norm": 1.4985414622726312, "learning_rate": 8.838097245702882e-06, "loss": 0.7381, "step": 5394 }, { "epoch": 0.5510725229826353, "grad_norm": 1.4572795778242322, "learning_rate": 8.834811384156956e-06, "loss": 0.6933, "step": 5395 }, { "epoch": 0.5511746680286006, "grad_norm": 1.5379611256922758, "learning_rate": 8.831525650141679e-06, "loss": 0.7658, "step": 5396 }, { "epoch": 0.5512768130745659, "grad_norm": 1.4225258726730035, "learning_rate": 8.828240044016673e-06, "loss": 0.6638, "step": 5397 }, { "epoch": 0.5513789581205312, "grad_norm": 1.408941937092425, "learning_rate": 8.82495456614155e-06, "loss": 0.7282, "step": 5398 }, { "epoch": 0.5514811031664965, "grad_norm": 1.5617833393287068, "learning_rate": 8.821669216875907e-06, "loss": 0.8289, "step": 5399 }, { "epoch": 0.5515832482124617, "grad_norm": 1.4591657624697218, "learning_rate": 8.818383996579333e-06, "loss": 0.7902, "step": 5400 }, { "epoch": 0.5516853932584269, "grad_norm": 1.3360872473629597, "learning_rate": 8.815098905611394e-06, "loss": 0.7384, "step": 5401 }, { "epoch": 0.5517875383043922, "grad_norm": 1.6297580651485546, "learning_rate": 8.811813944331645e-06, "loss": 0.6578, "step": 5402 }, { "epoch": 0.5518896833503575, "grad_norm": 1.322814968956624, "learning_rate": 8.808529113099627e-06, "loss": 0.6966, "step": 5403 }, { "epoch": 0.5519918283963228, "grad_norm": 1.6136283568268253, "learning_rate": 8.805244412274868e-06, "loss": 0.6758, "step": 5404 }, { "epoch": 0.552093973442288, "grad_norm": 1.3573620005413962, "learning_rate": 8.801959842216878e-06, "loss": 0.7232, "step": 5405 }, { "epoch": 0.5521961184882533, "grad_norm": 1.3281703332363635, "learning_rate": 8.79867540328516e-06, "loss": 0.6728, "step": 5406 }, { "epoch": 0.5522982635342186, "grad_norm": 1.3938561424786702, "learning_rate": 8.795391095839192e-06, "loss": 0.7659, "step": 5407 }, { "epoch": 0.5524004085801839, "grad_norm": 1.3182967879684218, "learning_rate": 8.792106920238447e-06, "loss": 0.6943, "step": 5408 }, { "epoch": 0.5525025536261491, "grad_norm": 1.5843658315144558, "learning_rate": 8.788822876842384e-06, "loss": 0.7033, "step": 5409 }, { "epoch": 0.5526046986721144, "grad_norm": 1.3822059379086615, "learning_rate": 8.785538966010437e-06, "loss": 0.6363, "step": 5410 }, { "epoch": 0.5527068437180797, "grad_norm": 1.3541830578460525, "learning_rate": 8.782255188102037e-06, "loss": 0.6685, "step": 5411 }, { "epoch": 0.5528089887640449, "grad_norm": 1.527147474143802, "learning_rate": 8.77897154347659e-06, "loss": 0.7371, "step": 5412 }, { "epoch": 0.5529111338100102, "grad_norm": 1.5212382176550752, "learning_rate": 8.7756880324935e-06, "loss": 0.7032, "step": 5413 }, { "epoch": 0.5530132788559755, "grad_norm": 1.4912726537488599, "learning_rate": 8.772404655512145e-06, "loss": 0.7179, "step": 5414 }, { "epoch": 0.5531154239019408, "grad_norm": 1.3226093351288812, "learning_rate": 8.769121412891888e-06, "loss": 0.7762, "step": 5415 }, { "epoch": 0.5532175689479061, "grad_norm": 1.402450990480852, "learning_rate": 8.765838304992094e-06, "loss": 0.7697, "step": 5416 }, { "epoch": 0.5533197139938713, "grad_norm": 1.5064101591621921, "learning_rate": 8.762555332172095e-06, "loss": 0.7789, "step": 5417 }, { "epoch": 0.5534218590398365, "grad_norm": 1.3848635611319071, "learning_rate": 8.75927249479121e-06, "loss": 0.6135, "step": 5418 }, { "epoch": 0.5535240040858018, "grad_norm": 1.3957637655466684, "learning_rate": 8.755989793208757e-06, "loss": 0.6932, "step": 5419 }, { "epoch": 0.5536261491317671, "grad_norm": 1.3984161187758415, "learning_rate": 8.752707227784021e-06, "loss": 0.6831, "step": 5420 }, { "epoch": 0.5537282941777324, "grad_norm": 1.3712990644502543, "learning_rate": 8.749424798876289e-06, "loss": 0.6204, "step": 5421 }, { "epoch": 0.5538304392236977, "grad_norm": 1.3868031620723829, "learning_rate": 8.746142506844816e-06, "loss": 0.7131, "step": 5422 }, { "epoch": 0.553932584269663, "grad_norm": 1.3183002560168648, "learning_rate": 8.742860352048854e-06, "loss": 0.7191, "step": 5423 }, { "epoch": 0.5540347293156281, "grad_norm": 1.5646449904463107, "learning_rate": 8.739578334847645e-06, "loss": 0.6902, "step": 5424 }, { "epoch": 0.5541368743615934, "grad_norm": 1.45429524617401, "learning_rate": 8.736296455600396e-06, "loss": 0.7357, "step": 5425 }, { "epoch": 0.5542390194075587, "grad_norm": 1.4197719026293592, "learning_rate": 8.733014714666322e-06, "loss": 0.669, "step": 5426 }, { "epoch": 0.554341164453524, "grad_norm": 1.421260438089345, "learning_rate": 8.729733112404603e-06, "loss": 0.6578, "step": 5427 }, { "epoch": 0.5544433094994893, "grad_norm": 1.5623988841892025, "learning_rate": 8.726451649174417e-06, "loss": 0.7682, "step": 5428 }, { "epoch": 0.5545454545454546, "grad_norm": 1.5197618045307155, "learning_rate": 8.72317032533492e-06, "loss": 0.72, "step": 5429 }, { "epoch": 0.5546475995914198, "grad_norm": 1.528543427900039, "learning_rate": 8.719889141245255e-06, "loss": 0.7179, "step": 5430 }, { "epoch": 0.5547497446373851, "grad_norm": 1.5208838908753068, "learning_rate": 8.71660809726455e-06, "loss": 0.7673, "step": 5431 }, { "epoch": 0.5548518896833503, "grad_norm": 1.4274537855602016, "learning_rate": 8.713327193751918e-06, "loss": 0.6853, "step": 5432 }, { "epoch": 0.5549540347293156, "grad_norm": 1.685769011154593, "learning_rate": 8.710046431066458e-06, "loss": 0.7788, "step": 5433 }, { "epoch": 0.5550561797752809, "grad_norm": 1.405275362485287, "learning_rate": 8.70676580956725e-06, "loss": 0.8027, "step": 5434 }, { "epoch": 0.5551583248212462, "grad_norm": 1.5163393188140282, "learning_rate": 8.703485329613357e-06, "loss": 0.7, "step": 5435 }, { "epoch": 0.5552604698672114, "grad_norm": 1.3829420905283298, "learning_rate": 8.700204991563835e-06, "loss": 0.6525, "step": 5436 }, { "epoch": 0.5553626149131767, "grad_norm": 1.4390024847657423, "learning_rate": 8.696924795777715e-06, "loss": 0.7738, "step": 5437 }, { "epoch": 0.555464759959142, "grad_norm": 1.3109865126460842, "learning_rate": 8.693644742614018e-06, "loss": 0.5716, "step": 5438 }, { "epoch": 0.5555669050051073, "grad_norm": 1.3243140838657725, "learning_rate": 8.690364832431748e-06, "loss": 0.6979, "step": 5439 }, { "epoch": 0.5556690500510725, "grad_norm": 1.3421238460418488, "learning_rate": 8.68708506558989e-06, "loss": 0.6797, "step": 5440 }, { "epoch": 0.5557711950970378, "grad_norm": 1.3511069491946468, "learning_rate": 8.683805442447425e-06, "loss": 0.6522, "step": 5441 }, { "epoch": 0.555873340143003, "grad_norm": 1.243650514447468, "learning_rate": 8.680525963363301e-06, "loss": 0.6051, "step": 5442 }, { "epoch": 0.5559754851889683, "grad_norm": 1.4258369976754803, "learning_rate": 8.677246628696466e-06, "loss": 0.6891, "step": 5443 }, { "epoch": 0.5560776302349336, "grad_norm": 1.4255802384811038, "learning_rate": 8.673967438805838e-06, "loss": 0.5942, "step": 5444 }, { "epoch": 0.5561797752808989, "grad_norm": 1.4796087527682718, "learning_rate": 8.670688394050336e-06, "loss": 0.6625, "step": 5445 }, { "epoch": 0.5562819203268642, "grad_norm": 1.3559543353005912, "learning_rate": 8.667409494788844e-06, "loss": 0.7788, "step": 5446 }, { "epoch": 0.5563840653728295, "grad_norm": 1.4652009230750216, "learning_rate": 8.664130741380247e-06, "loss": 0.681, "step": 5447 }, { "epoch": 0.5564862104187946, "grad_norm": 1.3769266874580246, "learning_rate": 8.660852134183398e-06, "loss": 0.6092, "step": 5448 }, { "epoch": 0.5565883554647599, "grad_norm": 1.5053238975994778, "learning_rate": 8.657573673557152e-06, "loss": 0.7319, "step": 5449 }, { "epoch": 0.5566905005107252, "grad_norm": 1.6280561634994386, "learning_rate": 8.654295359860334e-06, "loss": 0.7001, "step": 5450 }, { "epoch": 0.5567926455566905, "grad_norm": 1.365606013798107, "learning_rate": 8.65101719345176e-06, "loss": 0.7117, "step": 5451 }, { "epoch": 0.5568947906026558, "grad_norm": 1.528620140340669, "learning_rate": 8.647739174690224e-06, "loss": 0.7467, "step": 5452 }, { "epoch": 0.5569969356486211, "grad_norm": 1.3581623356761632, "learning_rate": 8.64446130393451e-06, "loss": 0.6924, "step": 5453 }, { "epoch": 0.5570990806945864, "grad_norm": 1.4831497456138345, "learning_rate": 8.641183581543382e-06, "loss": 0.6535, "step": 5454 }, { "epoch": 0.5572012257405515, "grad_norm": 1.6280099037221463, "learning_rate": 8.63790600787559e-06, "loss": 0.7211, "step": 5455 }, { "epoch": 0.5573033707865168, "grad_norm": 1.3588971234407978, "learning_rate": 8.634628583289861e-06, "loss": 0.6252, "step": 5456 }, { "epoch": 0.5574055158324821, "grad_norm": 1.6751376987740338, "learning_rate": 8.631351308144916e-06, "loss": 0.693, "step": 5457 }, { "epoch": 0.5575076608784474, "grad_norm": 1.464127105438117, "learning_rate": 8.628074182799458e-06, "loss": 0.6311, "step": 5458 }, { "epoch": 0.5576098059244127, "grad_norm": 1.573566640975009, "learning_rate": 8.624797207612166e-06, "loss": 0.7222, "step": 5459 }, { "epoch": 0.557711950970378, "grad_norm": 1.6102868761297027, "learning_rate": 8.62152038294171e-06, "loss": 0.727, "step": 5460 }, { "epoch": 0.5578140960163432, "grad_norm": 1.345739520910247, "learning_rate": 8.618243709146737e-06, "loss": 0.6615, "step": 5461 }, { "epoch": 0.5579162410623085, "grad_norm": 1.464769936734692, "learning_rate": 8.614967186585882e-06, "loss": 0.7667, "step": 5462 }, { "epoch": 0.5580183861082737, "grad_norm": 1.4420039795719477, "learning_rate": 8.611690815617764e-06, "loss": 0.7675, "step": 5463 }, { "epoch": 0.558120531154239, "grad_norm": 1.9194002966031567, "learning_rate": 8.60841459660098e-06, "loss": 0.6199, "step": 5464 }, { "epoch": 0.5582226762002043, "grad_norm": 1.5541804811824884, "learning_rate": 8.605138529894122e-06, "loss": 0.7915, "step": 5465 }, { "epoch": 0.5583248212461696, "grad_norm": 1.4434386313334997, "learning_rate": 8.601862615855752e-06, "loss": 0.6898, "step": 5466 }, { "epoch": 0.5584269662921348, "grad_norm": 1.4632940856441208, "learning_rate": 8.598586854844422e-06, "loss": 0.7327, "step": 5467 }, { "epoch": 0.5585291113381001, "grad_norm": 1.4861320118181407, "learning_rate": 8.595311247218667e-06, "loss": 0.6306, "step": 5468 }, { "epoch": 0.5586312563840654, "grad_norm": 1.3922441489611643, "learning_rate": 8.592035793337002e-06, "loss": 0.7341, "step": 5469 }, { "epoch": 0.5587334014300307, "grad_norm": 1.5279809241627287, "learning_rate": 8.588760493557932e-06, "loss": 0.7231, "step": 5470 }, { "epoch": 0.5588355464759959, "grad_norm": 1.5437193032512637, "learning_rate": 8.585485348239934e-06, "loss": 0.6459, "step": 5471 }, { "epoch": 0.5589376915219612, "grad_norm": 1.4370671776973034, "learning_rate": 8.582210357741476e-06, "loss": 0.8095, "step": 5472 }, { "epoch": 0.5590398365679264, "grad_norm": 1.421953655487678, "learning_rate": 8.578935522421015e-06, "loss": 0.6978, "step": 5473 }, { "epoch": 0.5591419816138917, "grad_norm": 1.4793207270143711, "learning_rate": 8.575660842636979e-06, "loss": 0.7619, "step": 5474 }, { "epoch": 0.559244126659857, "grad_norm": 1.406622688147467, "learning_rate": 8.572386318747784e-06, "loss": 0.6276, "step": 5475 }, { "epoch": 0.5593462717058223, "grad_norm": 1.4989869654068735, "learning_rate": 8.569111951111828e-06, "loss": 0.7373, "step": 5476 }, { "epoch": 0.5594484167517876, "grad_norm": 1.4979384028884712, "learning_rate": 8.565837740087495e-06, "loss": 0.6497, "step": 5477 }, { "epoch": 0.5595505617977528, "grad_norm": 1.3751450191665806, "learning_rate": 8.562563686033145e-06, "loss": 0.6791, "step": 5478 }, { "epoch": 0.559652706843718, "grad_norm": 1.4237708214715832, "learning_rate": 8.559289789307131e-06, "loss": 0.7264, "step": 5479 }, { "epoch": 0.5597548518896833, "grad_norm": 1.415150732266702, "learning_rate": 8.556016050267776e-06, "loss": 0.7407, "step": 5480 }, { "epoch": 0.5598569969356486, "grad_norm": 1.5448474830362244, "learning_rate": 8.5527424692734e-06, "loss": 0.7058, "step": 5481 }, { "epoch": 0.5599591419816139, "grad_norm": 1.5640862993742257, "learning_rate": 8.549469046682297e-06, "loss": 0.7053, "step": 5482 }, { "epoch": 0.5600612870275792, "grad_norm": 1.3953667688424205, "learning_rate": 8.546195782852743e-06, "loss": 0.739, "step": 5483 }, { "epoch": 0.5601634320735445, "grad_norm": 1.4822673817737033, "learning_rate": 8.542922678143001e-06, "loss": 0.72, "step": 5484 }, { "epoch": 0.5602655771195098, "grad_norm": 1.546441801790899, "learning_rate": 8.539649732911315e-06, "loss": 0.7618, "step": 5485 }, { "epoch": 0.5603677221654749, "grad_norm": 1.4847525772552743, "learning_rate": 8.536376947515905e-06, "loss": 0.663, "step": 5486 }, { "epoch": 0.5604698672114402, "grad_norm": 1.4454512965490551, "learning_rate": 8.533104322314987e-06, "loss": 0.7705, "step": 5487 }, { "epoch": 0.5605720122574055, "grad_norm": 1.3872770038146887, "learning_rate": 8.529831857666744e-06, "loss": 0.6615, "step": 5488 }, { "epoch": 0.5606741573033708, "grad_norm": 1.4999023399938949, "learning_rate": 8.526559553929356e-06, "loss": 0.7331, "step": 5489 }, { "epoch": 0.5607763023493361, "grad_norm": 1.5416529430314083, "learning_rate": 8.523287411460979e-06, "loss": 0.6665, "step": 5490 }, { "epoch": 0.5608784473953013, "grad_norm": 1.2950520231324016, "learning_rate": 8.520015430619747e-06, "loss": 0.6482, "step": 5491 }, { "epoch": 0.5609805924412666, "grad_norm": 1.3550668051513932, "learning_rate": 8.516743611763783e-06, "loss": 0.6924, "step": 5492 }, { "epoch": 0.5610827374872319, "grad_norm": 1.5292348681204713, "learning_rate": 8.51347195525119e-06, "loss": 0.7744, "step": 5493 }, { "epoch": 0.5611848825331971, "grad_norm": 1.353489747020099, "learning_rate": 8.510200461440052e-06, "loss": 0.6871, "step": 5494 }, { "epoch": 0.5612870275791624, "grad_norm": 1.4846507082997045, "learning_rate": 8.506929130688433e-06, "loss": 0.7412, "step": 5495 }, { "epoch": 0.5613891726251277, "grad_norm": 1.333680929176951, "learning_rate": 8.503657963354385e-06, "loss": 0.8209, "step": 5496 }, { "epoch": 0.561491317671093, "grad_norm": 1.5625060145019278, "learning_rate": 8.500386959795944e-06, "loss": 0.7314, "step": 5497 }, { "epoch": 0.5615934627170582, "grad_norm": 1.5551643447656638, "learning_rate": 8.497116120371114e-06, "loss": 0.7978, "step": 5498 }, { "epoch": 0.5616956077630235, "grad_norm": 1.5075140229202546, "learning_rate": 8.493845445437901e-06, "loss": 0.6843, "step": 5499 }, { "epoch": 0.5617977528089888, "grad_norm": 1.539760279268995, "learning_rate": 8.490574935354274e-06, "loss": 0.7533, "step": 5500 }, { "epoch": 0.5618998978549541, "grad_norm": 1.3792139374035617, "learning_rate": 8.487304590478197e-06, "loss": 0.5998, "step": 5501 }, { "epoch": 0.5620020429009193, "grad_norm": 1.5015004024018868, "learning_rate": 8.484034411167611e-06, "loss": 0.7739, "step": 5502 }, { "epoch": 0.5621041879468845, "grad_norm": 1.4648735723152027, "learning_rate": 8.480764397780435e-06, "loss": 0.7227, "step": 5503 }, { "epoch": 0.5622063329928498, "grad_norm": 1.4295299318741348, "learning_rate": 8.477494550674576e-06, "loss": 0.6866, "step": 5504 }, { "epoch": 0.5623084780388151, "grad_norm": 1.449077699015867, "learning_rate": 8.474224870207926e-06, "loss": 0.742, "step": 5505 }, { "epoch": 0.5624106230847804, "grad_norm": 1.4739607128657262, "learning_rate": 8.470955356738347e-06, "loss": 0.7125, "step": 5506 }, { "epoch": 0.5625127681307457, "grad_norm": 1.4406486107354575, "learning_rate": 8.467686010623694e-06, "loss": 0.6818, "step": 5507 }, { "epoch": 0.562614913176711, "grad_norm": 1.4616967279507533, "learning_rate": 8.464416832221797e-06, "loss": 0.7278, "step": 5508 }, { "epoch": 0.5627170582226761, "grad_norm": 1.3173178707164772, "learning_rate": 8.46114782189047e-06, "loss": 0.7004, "step": 5509 }, { "epoch": 0.5628192032686414, "grad_norm": 1.545110222622579, "learning_rate": 8.457878979987507e-06, "loss": 0.7876, "step": 5510 }, { "epoch": 0.5629213483146067, "grad_norm": 1.450836591062816, "learning_rate": 8.454610306870688e-06, "loss": 0.6435, "step": 5511 }, { "epoch": 0.563023493360572, "grad_norm": 1.4712818830344108, "learning_rate": 8.451341802897764e-06, "loss": 0.8102, "step": 5512 }, { "epoch": 0.5631256384065373, "grad_norm": 1.5437649731579361, "learning_rate": 8.448073468426483e-06, "loss": 0.6643, "step": 5513 }, { "epoch": 0.5632277834525026, "grad_norm": 1.681499234085543, "learning_rate": 8.444805303814566e-06, "loss": 0.6865, "step": 5514 }, { "epoch": 0.5633299284984679, "grad_norm": 1.4579460375539743, "learning_rate": 8.441537309419713e-06, "loss": 0.7953, "step": 5515 }, { "epoch": 0.5634320735444331, "grad_norm": 1.602627341735896, "learning_rate": 8.438269485599606e-06, "loss": 0.7554, "step": 5516 }, { "epoch": 0.5635342185903983, "grad_norm": 1.5134446688913048, "learning_rate": 8.435001832711915e-06, "loss": 0.602, "step": 5517 }, { "epoch": 0.5636363636363636, "grad_norm": 1.6286407042271358, "learning_rate": 8.431734351114285e-06, "loss": 0.7646, "step": 5518 }, { "epoch": 0.5637385086823289, "grad_norm": 1.4950181869379995, "learning_rate": 8.428467041164341e-06, "loss": 0.6759, "step": 5519 }, { "epoch": 0.5638406537282942, "grad_norm": 1.3760183421170613, "learning_rate": 8.425199903219693e-06, "loss": 0.7645, "step": 5520 }, { "epoch": 0.5639427987742595, "grad_norm": 1.4446162890346474, "learning_rate": 8.421932937637936e-06, "loss": 0.7053, "step": 5521 }, { "epoch": 0.5640449438202247, "grad_norm": 1.4756902464276427, "learning_rate": 8.41866614477664e-06, "loss": 0.6366, "step": 5522 }, { "epoch": 0.56414708886619, "grad_norm": 1.3881349447001143, "learning_rate": 8.415399524993355e-06, "loss": 0.6912, "step": 5523 }, { "epoch": 0.5642492339121553, "grad_norm": 1.5122236258105692, "learning_rate": 8.412133078645616e-06, "loss": 0.7325, "step": 5524 }, { "epoch": 0.5643513789581205, "grad_norm": 1.452961049797863, "learning_rate": 8.408866806090936e-06, "loss": 0.7303, "step": 5525 }, { "epoch": 0.5644535240040858, "grad_norm": 1.263921346814425, "learning_rate": 8.405600707686815e-06, "loss": 0.6591, "step": 5526 }, { "epoch": 0.5645556690500511, "grad_norm": 1.6093603868663224, "learning_rate": 8.402334783790722e-06, "loss": 0.7768, "step": 5527 }, { "epoch": 0.5646578140960163, "grad_norm": 1.4359749128475645, "learning_rate": 8.399069034760119e-06, "loss": 0.7257, "step": 5528 }, { "epoch": 0.5647599591419816, "grad_norm": 1.4561805674047275, "learning_rate": 8.395803460952448e-06, "loss": 0.7267, "step": 5529 }, { "epoch": 0.5648621041879469, "grad_norm": 1.3748865796071352, "learning_rate": 8.392538062725123e-06, "loss": 0.6886, "step": 5530 }, { "epoch": 0.5649642492339122, "grad_norm": 1.6115678313747757, "learning_rate": 8.389272840435548e-06, "loss": 0.7802, "step": 5531 }, { "epoch": 0.5650663942798774, "grad_norm": 1.5146207628384138, "learning_rate": 8.386007794441098e-06, "loss": 0.7429, "step": 5532 }, { "epoch": 0.5651685393258427, "grad_norm": 1.5020452005872036, "learning_rate": 8.382742925099141e-06, "loss": 0.7101, "step": 5533 }, { "epoch": 0.5652706843718079, "grad_norm": 1.4195709083650634, "learning_rate": 8.379478232767014e-06, "loss": 0.5951, "step": 5534 }, { "epoch": 0.5653728294177732, "grad_norm": 1.4541113404505992, "learning_rate": 8.376213717802042e-06, "loss": 0.709, "step": 5535 }, { "epoch": 0.5654749744637385, "grad_norm": 1.2191889759574968, "learning_rate": 8.372949380561523e-06, "loss": 0.8121, "step": 5536 }, { "epoch": 0.5655771195097038, "grad_norm": 1.3374943295366444, "learning_rate": 8.36968522140275e-06, "loss": 0.7005, "step": 5537 }, { "epoch": 0.5656792645556691, "grad_norm": 1.339263273006483, "learning_rate": 8.366421240682983e-06, "loss": 0.6471, "step": 5538 }, { "epoch": 0.5657814096016344, "grad_norm": 1.5284134081229392, "learning_rate": 8.363157438759469e-06, "loss": 0.7397, "step": 5539 }, { "epoch": 0.5658835546475995, "grad_norm": 1.4117463498978489, "learning_rate": 8.359893815989425e-06, "loss": 0.6237, "step": 5540 }, { "epoch": 0.5659856996935648, "grad_norm": 1.4771856313297753, "learning_rate": 8.356630372730068e-06, "loss": 0.7695, "step": 5541 }, { "epoch": 0.5660878447395301, "grad_norm": 1.4193530252703073, "learning_rate": 8.353367109338576e-06, "loss": 0.753, "step": 5542 }, { "epoch": 0.5661899897854954, "grad_norm": 1.5988505931947978, "learning_rate": 8.350104026172118e-06, "loss": 0.6664, "step": 5543 }, { "epoch": 0.5662921348314607, "grad_norm": 1.2970332457595837, "learning_rate": 8.346841123587836e-06, "loss": 0.5533, "step": 5544 }, { "epoch": 0.566394279877426, "grad_norm": 1.3558863327370871, "learning_rate": 8.343578401942865e-06, "loss": 0.7161, "step": 5545 }, { "epoch": 0.5664964249233913, "grad_norm": 1.2766845592774723, "learning_rate": 8.340315861594309e-06, "loss": 0.6098, "step": 5546 }, { "epoch": 0.5665985699693565, "grad_norm": 1.494831160185546, "learning_rate": 8.337053502899253e-06, "loss": 0.7157, "step": 5547 }, { "epoch": 0.5667007150153217, "grad_norm": 1.3988168347511352, "learning_rate": 8.333791326214767e-06, "loss": 0.6124, "step": 5548 }, { "epoch": 0.566802860061287, "grad_norm": 1.4021994741444659, "learning_rate": 8.330529331897895e-06, "loss": 0.6938, "step": 5549 }, { "epoch": 0.5669050051072523, "grad_norm": 1.4701462153986604, "learning_rate": 8.327267520305669e-06, "loss": 0.7549, "step": 5550 }, { "epoch": 0.5670071501532176, "grad_norm": 1.301337063550517, "learning_rate": 8.32400589179509e-06, "loss": 0.601, "step": 5551 }, { "epoch": 0.5671092951991829, "grad_norm": 1.5803205715035384, "learning_rate": 8.320744446723149e-06, "loss": 0.7899, "step": 5552 }, { "epoch": 0.5672114402451481, "grad_norm": 1.332096685267543, "learning_rate": 8.317483185446815e-06, "loss": 0.6942, "step": 5553 }, { "epoch": 0.5673135852911134, "grad_norm": 1.4207245341357648, "learning_rate": 8.314222108323033e-06, "loss": 0.6795, "step": 5554 }, { "epoch": 0.5674157303370787, "grad_norm": 1.4870848608321372, "learning_rate": 8.310961215708731e-06, "loss": 0.7063, "step": 5555 }, { "epoch": 0.5675178753830439, "grad_norm": 1.4859237206022573, "learning_rate": 8.307700507960817e-06, "loss": 0.6861, "step": 5556 }, { "epoch": 0.5676200204290092, "grad_norm": 1.5191927850781612, "learning_rate": 8.304439985436172e-06, "loss": 0.5831, "step": 5557 }, { "epoch": 0.5677221654749744, "grad_norm": 1.4209278700463186, "learning_rate": 8.301179648491669e-06, "loss": 0.6666, "step": 5558 }, { "epoch": 0.5678243105209397, "grad_norm": 1.4714484289948508, "learning_rate": 8.297919497484148e-06, "loss": 0.781, "step": 5559 }, { "epoch": 0.567926455566905, "grad_norm": 1.3288912110969469, "learning_rate": 8.294659532770437e-06, "loss": 0.6737, "step": 5560 }, { "epoch": 0.5680286006128703, "grad_norm": 1.419918056426505, "learning_rate": 8.291399754707346e-06, "loss": 0.7343, "step": 5561 }, { "epoch": 0.5681307456588356, "grad_norm": 1.4191260281212257, "learning_rate": 8.288140163651652e-06, "loss": 0.7239, "step": 5562 }, { "epoch": 0.5682328907048008, "grad_norm": 1.4969376217971684, "learning_rate": 8.284880759960128e-06, "loss": 0.7231, "step": 5563 }, { "epoch": 0.568335035750766, "grad_norm": 1.4014358980120416, "learning_rate": 8.281621543989508e-06, "loss": 0.6127, "step": 5564 }, { "epoch": 0.5684371807967313, "grad_norm": 1.4588840308043796, "learning_rate": 8.278362516096524e-06, "loss": 0.67, "step": 5565 }, { "epoch": 0.5685393258426966, "grad_norm": 1.534157437069656, "learning_rate": 8.275103676637872e-06, "loss": 0.7857, "step": 5566 }, { "epoch": 0.5686414708886619, "grad_norm": 1.443239945167279, "learning_rate": 8.27184502597024e-06, "loss": 0.7843, "step": 5567 }, { "epoch": 0.5687436159346272, "grad_norm": 1.4194112044353462, "learning_rate": 8.268586564450282e-06, "loss": 0.7269, "step": 5568 }, { "epoch": 0.5688457609805925, "grad_norm": 1.2209366142601779, "learning_rate": 8.265328292434644e-06, "loss": 0.5803, "step": 5569 }, { "epoch": 0.5689479060265578, "grad_norm": 1.2860033587050994, "learning_rate": 8.262070210279949e-06, "loss": 0.6097, "step": 5570 }, { "epoch": 0.5690500510725229, "grad_norm": 1.5168856969051356, "learning_rate": 8.258812318342789e-06, "loss": 0.7797, "step": 5571 }, { "epoch": 0.5691521961184882, "grad_norm": 1.4309638441955725, "learning_rate": 8.255554616979748e-06, "loss": 0.581, "step": 5572 }, { "epoch": 0.5692543411644535, "grad_norm": 1.556464091114335, "learning_rate": 8.252297106547382e-06, "loss": 0.7073, "step": 5573 }, { "epoch": 0.5693564862104188, "grad_norm": 1.2960146281569849, "learning_rate": 8.249039787402224e-06, "loss": 0.6786, "step": 5574 }, { "epoch": 0.5694586312563841, "grad_norm": 1.3832064420949621, "learning_rate": 8.245782659900796e-06, "loss": 0.6894, "step": 5575 }, { "epoch": 0.5695607763023494, "grad_norm": 1.4977078386221925, "learning_rate": 8.242525724399584e-06, "loss": 0.7585, "step": 5576 }, { "epoch": 0.5696629213483146, "grad_norm": 1.5602924146920711, "learning_rate": 8.239268981255069e-06, "loss": 0.7245, "step": 5577 }, { "epoch": 0.5697650663942799, "grad_norm": 1.3789628662314972, "learning_rate": 8.236012430823703e-06, "loss": 0.5995, "step": 5578 }, { "epoch": 0.5698672114402451, "grad_norm": 1.3379489313129043, "learning_rate": 8.232756073461915e-06, "loss": 0.5899, "step": 5579 }, { "epoch": 0.5699693564862104, "grad_norm": 1.3440329832094646, "learning_rate": 8.229499909526117e-06, "loss": 0.6151, "step": 5580 }, { "epoch": 0.5700715015321757, "grad_norm": 1.6416064035886635, "learning_rate": 8.226243939372698e-06, "loss": 0.7515, "step": 5581 }, { "epoch": 0.570173646578141, "grad_norm": 1.5116545250869746, "learning_rate": 8.222988163358028e-06, "loss": 0.6849, "step": 5582 }, { "epoch": 0.5702757916241062, "grad_norm": 1.6471497263817951, "learning_rate": 8.219732581838447e-06, "loss": 0.7142, "step": 5583 }, { "epoch": 0.5703779366700715, "grad_norm": 1.537439691228659, "learning_rate": 8.216477195170285e-06, "loss": 0.7651, "step": 5584 }, { "epoch": 0.5704800817160368, "grad_norm": 1.4430194669165006, "learning_rate": 8.21322200370985e-06, "loss": 0.6656, "step": 5585 }, { "epoch": 0.5705822267620021, "grad_norm": 1.4725616895800748, "learning_rate": 8.209967007813419e-06, "loss": 0.6001, "step": 5586 }, { "epoch": 0.5706843718079673, "grad_norm": 1.4823640667033064, "learning_rate": 8.206712207837257e-06, "loss": 0.8307, "step": 5587 }, { "epoch": 0.5707865168539326, "grad_norm": 1.537808750675819, "learning_rate": 8.2034576041376e-06, "loss": 0.8112, "step": 5588 }, { "epoch": 0.5708886618998978, "grad_norm": 1.4653883943868213, "learning_rate": 8.200203197070673e-06, "loss": 0.7844, "step": 5589 }, { "epoch": 0.5709908069458631, "grad_norm": 1.5254797649357665, "learning_rate": 8.196948986992667e-06, "loss": 0.7117, "step": 5590 }, { "epoch": 0.5710929519918284, "grad_norm": 1.6050926131275574, "learning_rate": 8.193694974259759e-06, "loss": 0.7696, "step": 5591 }, { "epoch": 0.5711950970377937, "grad_norm": 1.3376944891028861, "learning_rate": 8.1904411592281e-06, "loss": 0.6248, "step": 5592 }, { "epoch": 0.571297242083759, "grad_norm": 1.5811762962144547, "learning_rate": 8.18718754225383e-06, "loss": 0.6773, "step": 5593 }, { "epoch": 0.5713993871297242, "grad_norm": 1.578578112276457, "learning_rate": 8.183934123693052e-06, "loss": 0.7359, "step": 5594 }, { "epoch": 0.5715015321756894, "grad_norm": 1.4489308227038415, "learning_rate": 8.18068090390186e-06, "loss": 0.7416, "step": 5595 }, { "epoch": 0.5716036772216547, "grad_norm": 1.416339811888468, "learning_rate": 8.177427883236316e-06, "loss": 0.613, "step": 5596 }, { "epoch": 0.57170582226762, "grad_norm": 1.456391453880688, "learning_rate": 8.17417506205247e-06, "loss": 0.6652, "step": 5597 }, { "epoch": 0.5718079673135853, "grad_norm": 1.610523099205285, "learning_rate": 8.170922440706342e-06, "loss": 0.6861, "step": 5598 }, { "epoch": 0.5719101123595506, "grad_norm": 1.4646832894356274, "learning_rate": 8.167670019553934e-06, "loss": 0.6328, "step": 5599 }, { "epoch": 0.5720122574055159, "grad_norm": 1.533819052881376, "learning_rate": 8.164417798951224e-06, "loss": 0.6548, "step": 5600 }, { "epoch": 0.5721144024514812, "grad_norm": 1.6023117174354504, "learning_rate": 8.161165779254174e-06, "loss": 0.732, "step": 5601 }, { "epoch": 0.5722165474974463, "grad_norm": 1.5340524639347695, "learning_rate": 8.15791396081872e-06, "loss": 0.7461, "step": 5602 }, { "epoch": 0.5723186925434116, "grad_norm": 1.4974045811774361, "learning_rate": 8.154662344000769e-06, "loss": 0.7844, "step": 5603 }, { "epoch": 0.5724208375893769, "grad_norm": 1.37114688318297, "learning_rate": 8.15141092915622e-06, "loss": 0.7016, "step": 5604 }, { "epoch": 0.5725229826353422, "grad_norm": 1.3934491876109605, "learning_rate": 8.148159716640938e-06, "loss": 0.6403, "step": 5605 }, { "epoch": 0.5726251276813075, "grad_norm": 1.5270139773915146, "learning_rate": 8.144908706810772e-06, "loss": 0.6498, "step": 5606 }, { "epoch": 0.5727272727272728, "grad_norm": 1.5281764668417457, "learning_rate": 8.141657900021544e-06, "loss": 0.7074, "step": 5607 }, { "epoch": 0.572829417773238, "grad_norm": 1.4273756275210643, "learning_rate": 8.13840729662906e-06, "loss": 0.7398, "step": 5608 }, { "epoch": 0.5729315628192033, "grad_norm": 1.5579555294669083, "learning_rate": 8.135156896989103e-06, "loss": 0.6671, "step": 5609 }, { "epoch": 0.5730337078651685, "grad_norm": 1.517223092753926, "learning_rate": 8.131906701457427e-06, "loss": 0.5893, "step": 5610 }, { "epoch": 0.5731358529111338, "grad_norm": 1.4562633032354642, "learning_rate": 8.128656710389769e-06, "loss": 0.6092, "step": 5611 }, { "epoch": 0.5732379979570991, "grad_norm": 1.589556494506836, "learning_rate": 8.125406924141846e-06, "loss": 0.7022, "step": 5612 }, { "epoch": 0.5733401430030644, "grad_norm": 1.4223103158637547, "learning_rate": 8.12215734306934e-06, "loss": 0.807, "step": 5613 }, { "epoch": 0.5734422880490296, "grad_norm": 1.436318568788936, "learning_rate": 8.118907967527933e-06, "loss": 0.7285, "step": 5614 }, { "epoch": 0.5735444330949949, "grad_norm": 1.4105874159738752, "learning_rate": 8.11565879787326e-06, "loss": 0.6907, "step": 5615 }, { "epoch": 0.5736465781409602, "grad_norm": 1.2552510874679075, "learning_rate": 8.112409834460949e-06, "loss": 0.6898, "step": 5616 }, { "epoch": 0.5737487231869254, "grad_norm": 1.5381584408876339, "learning_rate": 8.109161077646602e-06, "loss": 0.7167, "step": 5617 }, { "epoch": 0.5738508682328907, "grad_norm": 1.5725350551623212, "learning_rate": 8.105912527785797e-06, "loss": 0.7369, "step": 5618 }, { "epoch": 0.573953013278856, "grad_norm": 1.5268710241750048, "learning_rate": 8.10266418523409e-06, "loss": 0.7832, "step": 5619 }, { "epoch": 0.5740551583248212, "grad_norm": 1.4219351563476499, "learning_rate": 8.099416050347013e-06, "loss": 0.7062, "step": 5620 }, { "epoch": 0.5741573033707865, "grad_norm": 1.582483450045852, "learning_rate": 8.09616812348008e-06, "loss": 0.7934, "step": 5621 }, { "epoch": 0.5742594484167518, "grad_norm": 1.5381008434198968, "learning_rate": 8.092920404988771e-06, "loss": 0.8042, "step": 5622 }, { "epoch": 0.5743615934627171, "grad_norm": 1.428464280513046, "learning_rate": 8.08967289522856e-06, "loss": 0.7316, "step": 5623 }, { "epoch": 0.5744637385086824, "grad_norm": 1.4431869477896824, "learning_rate": 8.086425594554882e-06, "loss": 0.6957, "step": 5624 }, { "epoch": 0.5745658835546475, "grad_norm": 1.4477018724438273, "learning_rate": 8.083178503323156e-06, "loss": 0.6335, "step": 5625 }, { "epoch": 0.5746680286006128, "grad_norm": 1.2804855625032063, "learning_rate": 8.079931621888788e-06, "loss": 0.669, "step": 5626 }, { "epoch": 0.5747701736465781, "grad_norm": 1.2175355735284705, "learning_rate": 8.076684950607143e-06, "loss": 0.5338, "step": 5627 }, { "epoch": 0.5748723186925434, "grad_norm": 1.4883113317634589, "learning_rate": 8.073438489833572e-06, "loss": 0.6681, "step": 5628 }, { "epoch": 0.5749744637385087, "grad_norm": 1.3559301533091768, "learning_rate": 8.070192239923403e-06, "loss": 0.7148, "step": 5629 }, { "epoch": 0.575076608784474, "grad_norm": 1.404713633374025, "learning_rate": 8.06694620123194e-06, "loss": 0.6721, "step": 5630 }, { "epoch": 0.5751787538304393, "grad_norm": 1.317258386008676, "learning_rate": 8.063700374114465e-06, "loss": 0.5983, "step": 5631 }, { "epoch": 0.5752808988764045, "grad_norm": 1.4909351428117925, "learning_rate": 8.060454758926231e-06, "loss": 0.7412, "step": 5632 }, { "epoch": 0.5753830439223697, "grad_norm": 1.3777339864925924, "learning_rate": 8.057209356022479e-06, "loss": 0.7076, "step": 5633 }, { "epoch": 0.575485188968335, "grad_norm": 1.5536966310960205, "learning_rate": 8.05396416575842e-06, "loss": 0.784, "step": 5634 }, { "epoch": 0.5755873340143003, "grad_norm": 1.4816599492126254, "learning_rate": 8.050719188489238e-06, "loss": 0.693, "step": 5635 }, { "epoch": 0.5756894790602656, "grad_norm": 1.4859062637553826, "learning_rate": 8.047474424570102e-06, "loss": 0.7171, "step": 5636 }, { "epoch": 0.5757916241062309, "grad_norm": 1.3286036844400726, "learning_rate": 8.044229874356153e-06, "loss": 0.6409, "step": 5637 }, { "epoch": 0.5758937691521961, "grad_norm": 1.5573625662073798, "learning_rate": 8.040985538202506e-06, "loss": 0.7284, "step": 5638 }, { "epoch": 0.5759959141981614, "grad_norm": 1.5523779803189943, "learning_rate": 8.037741416464258e-06, "loss": 0.7295, "step": 5639 }, { "epoch": 0.5760980592441267, "grad_norm": 1.2871225483809485, "learning_rate": 8.034497509496477e-06, "loss": 0.69, "step": 5640 }, { "epoch": 0.5762002042900919, "grad_norm": 1.4060997871871321, "learning_rate": 8.031253817654216e-06, "loss": 0.7999, "step": 5641 }, { "epoch": 0.5763023493360572, "grad_norm": 1.4468961881177353, "learning_rate": 8.028010341292496e-06, "loss": 0.6694, "step": 5642 }, { "epoch": 0.5764044943820225, "grad_norm": 1.6859879145002146, "learning_rate": 8.02476708076632e-06, "loss": 0.7437, "step": 5643 }, { "epoch": 0.5765066394279877, "grad_norm": 1.539003912969649, "learning_rate": 8.021524036430662e-06, "loss": 0.7777, "step": 5644 }, { "epoch": 0.576608784473953, "grad_norm": 1.332350836527585, "learning_rate": 8.018281208640477e-06, "loss": 0.6686, "step": 5645 }, { "epoch": 0.5767109295199183, "grad_norm": 1.4736025610737644, "learning_rate": 8.015038597750694e-06, "loss": 0.6822, "step": 5646 }, { "epoch": 0.5768130745658836, "grad_norm": 1.4286372783490593, "learning_rate": 8.011796204116218e-06, "loss": 0.6398, "step": 5647 }, { "epoch": 0.5769152196118488, "grad_norm": 1.4752399836300267, "learning_rate": 8.00855402809193e-06, "loss": 0.6808, "step": 5648 }, { "epoch": 0.5770173646578141, "grad_norm": 1.460300262926344, "learning_rate": 8.005312070032693e-06, "loss": 0.746, "step": 5649 }, { "epoch": 0.5771195097037793, "grad_norm": 1.6087117566746612, "learning_rate": 8.002070330293337e-06, "loss": 0.7085, "step": 5650 }, { "epoch": 0.5772216547497446, "grad_norm": 1.3668984543984017, "learning_rate": 7.998828809228678e-06, "loss": 0.6678, "step": 5651 }, { "epoch": 0.5773237997957099, "grad_norm": 1.4319599078008105, "learning_rate": 7.995587507193494e-06, "loss": 0.6641, "step": 5652 }, { "epoch": 0.5774259448416752, "grad_norm": 1.4645757724847759, "learning_rate": 7.992346424542556e-06, "loss": 0.6628, "step": 5653 }, { "epoch": 0.5775280898876405, "grad_norm": 1.4685932317783474, "learning_rate": 7.989105561630598e-06, "loss": 0.7543, "step": 5654 }, { "epoch": 0.5776302349336058, "grad_norm": 1.4580521186699307, "learning_rate": 7.985864918812336e-06, "loss": 0.6402, "step": 5655 }, { "epoch": 0.5777323799795709, "grad_norm": 1.4889346291318515, "learning_rate": 7.982624496442456e-06, "loss": 0.6625, "step": 5656 }, { "epoch": 0.5778345250255362, "grad_norm": 1.6421527768513642, "learning_rate": 7.97938429487563e-06, "loss": 0.7435, "step": 5657 }, { "epoch": 0.5779366700715015, "grad_norm": 1.5417131501484904, "learning_rate": 7.976144314466501e-06, "loss": 0.6393, "step": 5658 }, { "epoch": 0.5780388151174668, "grad_norm": 1.3807027274192551, "learning_rate": 7.972904555569683e-06, "loss": 0.7074, "step": 5659 }, { "epoch": 0.5781409601634321, "grad_norm": 1.3511251046107053, "learning_rate": 7.969665018539771e-06, "loss": 0.6119, "step": 5660 }, { "epoch": 0.5782431052093974, "grad_norm": 1.7011235081288032, "learning_rate": 7.966425703731334e-06, "loss": 0.9171, "step": 5661 }, { "epoch": 0.5783452502553627, "grad_norm": 1.5274559972007, "learning_rate": 7.96318661149892e-06, "loss": 0.7116, "step": 5662 }, { "epoch": 0.5784473953013279, "grad_norm": 1.466882545657048, "learning_rate": 7.959947742197047e-06, "loss": 0.7302, "step": 5663 }, { "epoch": 0.5785495403472931, "grad_norm": 1.366570514116782, "learning_rate": 7.956709096180206e-06, "loss": 0.6748, "step": 5664 }, { "epoch": 0.5786516853932584, "grad_norm": 1.647443402906696, "learning_rate": 7.953470673802879e-06, "loss": 0.6963, "step": 5665 }, { "epoch": 0.5787538304392237, "grad_norm": 1.5027295314178175, "learning_rate": 7.95023247541951e-06, "loss": 0.7173, "step": 5666 }, { "epoch": 0.578855975485189, "grad_norm": 1.5274770136651497, "learning_rate": 7.946994501384518e-06, "loss": 0.6343, "step": 5667 }, { "epoch": 0.5789581205311543, "grad_norm": 1.549228383719072, "learning_rate": 7.943756752052307e-06, "loss": 0.7185, "step": 5668 }, { "epoch": 0.5790602655771195, "grad_norm": 1.4127040833564875, "learning_rate": 7.940519227777246e-06, "loss": 0.7594, "step": 5669 }, { "epoch": 0.5791624106230848, "grad_norm": 1.5320751243211286, "learning_rate": 7.937281928913688e-06, "loss": 0.6973, "step": 5670 }, { "epoch": 0.57926455566905, "grad_norm": 1.561218651689877, "learning_rate": 7.934044855815955e-06, "loss": 0.7799, "step": 5671 }, { "epoch": 0.5793667007150153, "grad_norm": 1.51958735641364, "learning_rate": 7.930808008838342e-06, "loss": 0.61, "step": 5672 }, { "epoch": 0.5794688457609806, "grad_norm": 1.5703503778999657, "learning_rate": 7.927571388335135e-06, "loss": 0.7601, "step": 5673 }, { "epoch": 0.5795709908069459, "grad_norm": 1.4492185089026643, "learning_rate": 7.924334994660577e-06, "loss": 0.5953, "step": 5674 }, { "epoch": 0.5796731358529111, "grad_norm": 1.3981217725898551, "learning_rate": 7.921098828168897e-06, "loss": 0.7107, "step": 5675 }, { "epoch": 0.5797752808988764, "grad_norm": 1.5133735062563882, "learning_rate": 7.917862889214292e-06, "loss": 0.6741, "step": 5676 }, { "epoch": 0.5798774259448417, "grad_norm": 1.4447121674112442, "learning_rate": 7.91462717815094e-06, "loss": 0.6765, "step": 5677 }, { "epoch": 0.579979570990807, "grad_norm": 1.570056760420568, "learning_rate": 7.911391695332988e-06, "loss": 0.8258, "step": 5678 }, { "epoch": 0.5800817160367722, "grad_norm": 1.3898283333925954, "learning_rate": 7.908156441114567e-06, "loss": 0.7303, "step": 5679 }, { "epoch": 0.5801838610827375, "grad_norm": 1.6167578452871458, "learning_rate": 7.90492141584977e-06, "loss": 0.7587, "step": 5680 }, { "epoch": 0.5802860061287027, "grad_norm": 1.372384946572026, "learning_rate": 7.901686619892685e-06, "loss": 0.585, "step": 5681 }, { "epoch": 0.580388151174668, "grad_norm": 1.5623582957132773, "learning_rate": 7.898452053597349e-06, "loss": 0.71, "step": 5682 }, { "epoch": 0.5804902962206333, "grad_norm": 1.4270688781977934, "learning_rate": 7.895217717317798e-06, "loss": 0.673, "step": 5683 }, { "epoch": 0.5805924412665986, "grad_norm": 1.464707999415032, "learning_rate": 7.891983611408026e-06, "loss": 0.7398, "step": 5684 }, { "epoch": 0.5806945863125639, "grad_norm": 1.4235345217358386, "learning_rate": 7.88874973622201e-06, "loss": 0.778, "step": 5685 }, { "epoch": 0.5807967313585292, "grad_norm": 1.5200852314676636, "learning_rate": 7.8855160921137e-06, "loss": 0.7258, "step": 5686 }, { "epoch": 0.5808988764044943, "grad_norm": 1.3818079001734629, "learning_rate": 7.88228267943702e-06, "loss": 0.5681, "step": 5687 }, { "epoch": 0.5810010214504596, "grad_norm": 1.493462708442748, "learning_rate": 7.879049498545864e-06, "loss": 0.7186, "step": 5688 }, { "epoch": 0.5811031664964249, "grad_norm": 1.5894152323926831, "learning_rate": 7.875816549794113e-06, "loss": 0.7169, "step": 5689 }, { "epoch": 0.5812053115423902, "grad_norm": 1.5425002409969844, "learning_rate": 7.872583833535616e-06, "loss": 0.8022, "step": 5690 }, { "epoch": 0.5813074565883555, "grad_norm": 1.474689486914503, "learning_rate": 7.86935135012419e-06, "loss": 0.7248, "step": 5691 }, { "epoch": 0.5814096016343208, "grad_norm": 1.402083891475651, "learning_rate": 7.866119099913638e-06, "loss": 0.5782, "step": 5692 }, { "epoch": 0.581511746680286, "grad_norm": 1.4577953624324582, "learning_rate": 7.862887083257728e-06, "loss": 0.6844, "step": 5693 }, { "epoch": 0.5816138917262513, "grad_norm": 1.4292870536491014, "learning_rate": 7.859655300510209e-06, "loss": 0.7003, "step": 5694 }, { "epoch": 0.5817160367722165, "grad_norm": 1.44065877597086, "learning_rate": 7.856423752024798e-06, "loss": 0.6851, "step": 5695 }, { "epoch": 0.5818181818181818, "grad_norm": 1.3317982467231464, "learning_rate": 7.85319243815519e-06, "loss": 0.6063, "step": 5696 }, { "epoch": 0.5819203268641471, "grad_norm": 1.50295511294295, "learning_rate": 7.84996135925506e-06, "loss": 0.7441, "step": 5697 }, { "epoch": 0.5820224719101124, "grad_norm": 1.3623730636270392, "learning_rate": 7.846730515678047e-06, "loss": 0.7867, "step": 5698 }, { "epoch": 0.5821246169560776, "grad_norm": 1.375922109924535, "learning_rate": 7.843499907777772e-06, "loss": 0.551, "step": 5699 }, { "epoch": 0.5822267620020429, "grad_norm": 1.4527912164592507, "learning_rate": 7.840269535907826e-06, "loss": 0.7223, "step": 5700 }, { "epoch": 0.5823289070480082, "grad_norm": 1.4786742798360644, "learning_rate": 7.837039400421773e-06, "loss": 0.71, "step": 5701 }, { "epoch": 0.5824310520939734, "grad_norm": 1.3067269156803258, "learning_rate": 7.833809501673155e-06, "loss": 0.6546, "step": 5702 }, { "epoch": 0.5825331971399387, "grad_norm": 1.3647659121303959, "learning_rate": 7.830579840015486e-06, "loss": 0.6541, "step": 5703 }, { "epoch": 0.582635342185904, "grad_norm": 1.4445183638176506, "learning_rate": 7.827350415802254e-06, "loss": 0.7103, "step": 5704 }, { "epoch": 0.5827374872318692, "grad_norm": 1.4962103606159176, "learning_rate": 7.824121229386925e-06, "loss": 0.7842, "step": 5705 }, { "epoch": 0.5828396322778345, "grad_norm": 1.4182254129509393, "learning_rate": 7.820892281122932e-06, "loss": 0.6714, "step": 5706 }, { "epoch": 0.5829417773237998, "grad_norm": 1.4270658988553722, "learning_rate": 7.81766357136369e-06, "loss": 0.6168, "step": 5707 }, { "epoch": 0.5830439223697651, "grad_norm": 1.3658979392186075, "learning_rate": 7.814435100462576e-06, "loss": 0.6913, "step": 5708 }, { "epoch": 0.5831460674157304, "grad_norm": 1.574813197761062, "learning_rate": 7.811206868772956e-06, "loss": 0.6508, "step": 5709 }, { "epoch": 0.5832482124616956, "grad_norm": 1.3890903426933534, "learning_rate": 7.807978876648155e-06, "loss": 0.662, "step": 5710 }, { "epoch": 0.5833503575076608, "grad_norm": 1.288814289744943, "learning_rate": 7.804751124441486e-06, "loss": 0.7052, "step": 5711 }, { "epoch": 0.5834525025536261, "grad_norm": 1.424970927454983, "learning_rate": 7.801523612506219e-06, "loss": 0.62, "step": 5712 }, { "epoch": 0.5835546475995914, "grad_norm": 1.3897015927427192, "learning_rate": 7.798296341195615e-06, "loss": 0.7511, "step": 5713 }, { "epoch": 0.5836567926455567, "grad_norm": 1.4849355068113859, "learning_rate": 7.7950693108629e-06, "loss": 0.7324, "step": 5714 }, { "epoch": 0.583758937691522, "grad_norm": 1.4632220767221882, "learning_rate": 7.791842521861273e-06, "loss": 0.6898, "step": 5715 }, { "epoch": 0.5838610827374873, "grad_norm": 1.486605349852115, "learning_rate": 7.788615974543911e-06, "loss": 0.6347, "step": 5716 }, { "epoch": 0.5839632277834526, "grad_norm": 1.4947044668385794, "learning_rate": 7.785389669263959e-06, "loss": 0.7894, "step": 5717 }, { "epoch": 0.5840653728294177, "grad_norm": 1.418817451071997, "learning_rate": 7.782163606374536e-06, "loss": 0.7046, "step": 5718 }, { "epoch": 0.584167517875383, "grad_norm": 1.477788347568906, "learning_rate": 7.778937786228742e-06, "loss": 0.7997, "step": 5719 }, { "epoch": 0.5842696629213483, "grad_norm": 1.5241703170172283, "learning_rate": 7.775712209179638e-06, "loss": 0.618, "step": 5720 }, { "epoch": 0.5843718079673136, "grad_norm": 1.3768086295740194, "learning_rate": 7.772486875580272e-06, "loss": 0.7101, "step": 5721 }, { "epoch": 0.5844739530132789, "grad_norm": 1.565074841950299, "learning_rate": 7.769261785783658e-06, "loss": 0.6457, "step": 5722 }, { "epoch": 0.5845760980592442, "grad_norm": 1.5386399844653846, "learning_rate": 7.76603694014278e-06, "loss": 0.7032, "step": 5723 }, { "epoch": 0.5846782431052094, "grad_norm": 1.4017717932230207, "learning_rate": 7.762812339010606e-06, "loss": 0.6271, "step": 5724 }, { "epoch": 0.5847803881511746, "grad_norm": 1.4419555308996117, "learning_rate": 7.759587982740064e-06, "loss": 0.7152, "step": 5725 }, { "epoch": 0.5848825331971399, "grad_norm": 1.6202475039226216, "learning_rate": 7.756363871684067e-06, "loss": 0.6838, "step": 5726 }, { "epoch": 0.5849846782431052, "grad_norm": 1.5656159122736155, "learning_rate": 7.753140006195492e-06, "loss": 0.7997, "step": 5727 }, { "epoch": 0.5850868232890705, "grad_norm": 1.4247788608531322, "learning_rate": 7.749916386627192e-06, "loss": 0.6514, "step": 5728 }, { "epoch": 0.5851889683350358, "grad_norm": 1.4193072615235633, "learning_rate": 7.746693013332003e-06, "loss": 0.6873, "step": 5729 }, { "epoch": 0.585291113381001, "grad_norm": 1.4838095393907869, "learning_rate": 7.743469886662715e-06, "loss": 0.6546, "step": 5730 }, { "epoch": 0.5853932584269663, "grad_norm": 1.4479339088576935, "learning_rate": 7.74024700697211e-06, "loss": 0.7141, "step": 5731 }, { "epoch": 0.5854954034729316, "grad_norm": 1.363378970493064, "learning_rate": 7.737024374612928e-06, "loss": 0.6987, "step": 5732 }, { "epoch": 0.5855975485188968, "grad_norm": 1.427744650188317, "learning_rate": 7.733801989937892e-06, "loss": 0.6942, "step": 5733 }, { "epoch": 0.5856996935648621, "grad_norm": 1.4391906172074227, "learning_rate": 7.730579853299691e-06, "loss": 0.7181, "step": 5734 }, { "epoch": 0.5858018386108274, "grad_norm": 1.4363305326814972, "learning_rate": 7.72735796505099e-06, "loss": 0.7135, "step": 5735 }, { "epoch": 0.5859039836567926, "grad_norm": 1.46357004625461, "learning_rate": 7.724136325544426e-06, "loss": 0.6862, "step": 5736 }, { "epoch": 0.5860061287027579, "grad_norm": 1.2960813380500738, "learning_rate": 7.720914935132618e-06, "loss": 0.7029, "step": 5737 }, { "epoch": 0.5861082737487232, "grad_norm": 1.5072538710238959, "learning_rate": 7.717693794168137e-06, "loss": 0.7662, "step": 5738 }, { "epoch": 0.5862104187946885, "grad_norm": 1.5029503113115303, "learning_rate": 7.71447290300355e-06, "loss": 0.7585, "step": 5739 }, { "epoch": 0.5863125638406538, "grad_norm": 1.4023798565232715, "learning_rate": 7.711252261991376e-06, "loss": 0.6887, "step": 5740 }, { "epoch": 0.586414708886619, "grad_norm": 1.3173696103105035, "learning_rate": 7.708031871484123e-06, "loss": 0.7161, "step": 5741 }, { "epoch": 0.5865168539325842, "grad_norm": 1.42169585958349, "learning_rate": 7.70481173183426e-06, "loss": 0.7269, "step": 5742 }, { "epoch": 0.5866189989785495, "grad_norm": 1.4560680288781076, "learning_rate": 7.70159184339424e-06, "loss": 0.741, "step": 5743 }, { "epoch": 0.5867211440245148, "grad_norm": 1.4816116590415376, "learning_rate": 7.698372206516472e-06, "loss": 0.7315, "step": 5744 }, { "epoch": 0.5868232890704801, "grad_norm": 1.4312738855062197, "learning_rate": 7.695152821553355e-06, "loss": 0.7005, "step": 5745 }, { "epoch": 0.5869254341164454, "grad_norm": 1.4659484310979962, "learning_rate": 7.691933688857254e-06, "loss": 0.774, "step": 5746 }, { "epoch": 0.5870275791624107, "grad_norm": 1.367307255945504, "learning_rate": 7.6887148087805e-06, "loss": 0.6445, "step": 5747 }, { "epoch": 0.587129724208376, "grad_norm": 1.5573391282646336, "learning_rate": 7.685496181675405e-06, "loss": 0.7286, "step": 5748 }, { "epoch": 0.5872318692543411, "grad_norm": 1.388106700766112, "learning_rate": 7.682277807894246e-06, "loss": 0.5863, "step": 5749 }, { "epoch": 0.5873340143003064, "grad_norm": 1.4125534021288657, "learning_rate": 7.679059687789281e-06, "loss": 0.6085, "step": 5750 }, { "epoch": 0.5874361593462717, "grad_norm": 1.48549400081952, "learning_rate": 7.675841821712731e-06, "loss": 0.7007, "step": 5751 }, { "epoch": 0.587538304392237, "grad_norm": 1.431591080380305, "learning_rate": 7.672624210016792e-06, "loss": 0.7538, "step": 5752 }, { "epoch": 0.5876404494382023, "grad_norm": 1.5095338713887558, "learning_rate": 7.66940685305364e-06, "loss": 0.7852, "step": 5753 }, { "epoch": 0.5877425944841675, "grad_norm": 1.5579880489233626, "learning_rate": 7.666189751175414e-06, "loss": 0.6927, "step": 5754 }, { "epoch": 0.5878447395301328, "grad_norm": 1.4907030530031022, "learning_rate": 7.662972904734227e-06, "loss": 0.6848, "step": 5755 }, { "epoch": 0.587946884576098, "grad_norm": 1.4234210016742586, "learning_rate": 7.659756314082167e-06, "loss": 0.7078, "step": 5756 }, { "epoch": 0.5880490296220633, "grad_norm": 1.5315874608161262, "learning_rate": 7.65653997957129e-06, "loss": 0.7393, "step": 5757 }, { "epoch": 0.5881511746680286, "grad_norm": 1.2921813577920969, "learning_rate": 7.653323901553625e-06, "loss": 0.6086, "step": 5758 }, { "epoch": 0.5882533197139939, "grad_norm": 1.5021323272474654, "learning_rate": 7.650108080381175e-06, "loss": 0.7231, "step": 5759 }, { "epoch": 0.5883554647599591, "grad_norm": 1.3350425349220059, "learning_rate": 7.646892516405911e-06, "loss": 0.7548, "step": 5760 }, { "epoch": 0.5884576098059244, "grad_norm": 1.3695159320305166, "learning_rate": 7.643677209979788e-06, "loss": 0.6397, "step": 5761 }, { "epoch": 0.5885597548518897, "grad_norm": 1.407791823542624, "learning_rate": 7.640462161454712e-06, "loss": 0.7314, "step": 5762 }, { "epoch": 0.588661899897855, "grad_norm": 1.4864092848524708, "learning_rate": 7.637247371182579e-06, "loss": 0.7112, "step": 5763 }, { "epoch": 0.5887640449438202, "grad_norm": 1.406465018365304, "learning_rate": 7.634032839515246e-06, "loss": 0.6085, "step": 5764 }, { "epoch": 0.5888661899897855, "grad_norm": 1.548813137918193, "learning_rate": 7.63081856680455e-06, "loss": 0.7779, "step": 5765 }, { "epoch": 0.5889683350357507, "grad_norm": 1.4307257018883277, "learning_rate": 7.627604553402291e-06, "loss": 0.6931, "step": 5766 }, { "epoch": 0.589070480081716, "grad_norm": 1.483409059446225, "learning_rate": 7.624390799660248e-06, "loss": 0.7135, "step": 5767 }, { "epoch": 0.5891726251276813, "grad_norm": 1.5442503422502547, "learning_rate": 7.621177305930162e-06, "loss": 0.7104, "step": 5768 }, { "epoch": 0.5892747701736466, "grad_norm": 1.4097107421758208, "learning_rate": 7.6179640725637596e-06, "loss": 0.7218, "step": 5769 }, { "epoch": 0.5893769152196119, "grad_norm": 1.5448612925622855, "learning_rate": 7.61475109991273e-06, "loss": 0.7752, "step": 5770 }, { "epoch": 0.5894790602655772, "grad_norm": 1.5660689682465545, "learning_rate": 7.611538388328734e-06, "loss": 0.7952, "step": 5771 }, { "epoch": 0.5895812053115423, "grad_norm": 1.5737212085221757, "learning_rate": 7.608325938163402e-06, "loss": 0.7108, "step": 5772 }, { "epoch": 0.5896833503575076, "grad_norm": 1.675708624857963, "learning_rate": 7.605113749768344e-06, "loss": 0.7589, "step": 5773 }, { "epoch": 0.5897854954034729, "grad_norm": 1.4736430180881663, "learning_rate": 7.60190182349513e-06, "loss": 0.7185, "step": 5774 }, { "epoch": 0.5898876404494382, "grad_norm": 1.479642262455952, "learning_rate": 7.598690159695314e-06, "loss": 0.6768, "step": 5775 }, { "epoch": 0.5899897854954035, "grad_norm": 1.5333942808553918, "learning_rate": 7.595478758720407e-06, "loss": 0.7368, "step": 5776 }, { "epoch": 0.5900919305413688, "grad_norm": 1.5949677671150675, "learning_rate": 7.5922676209219056e-06, "loss": 0.6986, "step": 5777 }, { "epoch": 0.590194075587334, "grad_norm": 1.4054217225454153, "learning_rate": 7.589056746651271e-06, "loss": 0.5658, "step": 5778 }, { "epoch": 0.5902962206332993, "grad_norm": 1.4962813120257945, "learning_rate": 7.5858461362599315e-06, "loss": 0.6529, "step": 5779 }, { "epoch": 0.5903983656792645, "grad_norm": 1.4521759411565687, "learning_rate": 7.582635790099293e-06, "loss": 0.7192, "step": 5780 }, { "epoch": 0.5905005107252298, "grad_norm": 1.4617757146894208, "learning_rate": 7.5794257085207265e-06, "loss": 0.6485, "step": 5781 }, { "epoch": 0.5906026557711951, "grad_norm": 1.3946528316804043, "learning_rate": 7.5762158918755844e-06, "loss": 0.6889, "step": 5782 }, { "epoch": 0.5907048008171604, "grad_norm": 1.7392021232281059, "learning_rate": 7.5730063405151755e-06, "loss": 0.7305, "step": 5783 }, { "epoch": 0.5908069458631257, "grad_norm": 1.476686831978223, "learning_rate": 7.569797054790789e-06, "loss": 0.802, "step": 5784 }, { "epoch": 0.5909090909090909, "grad_norm": 1.5975339102967892, "learning_rate": 7.566588035053688e-06, "loss": 0.7294, "step": 5785 }, { "epoch": 0.5910112359550562, "grad_norm": 1.5180944667469816, "learning_rate": 7.563379281655098e-06, "loss": 0.6873, "step": 5786 }, { "epoch": 0.5911133810010214, "grad_norm": 1.3952954998488591, "learning_rate": 7.560170794946221e-06, "loss": 0.6587, "step": 5787 }, { "epoch": 0.5912155260469867, "grad_norm": 2.5466930445511706, "learning_rate": 7.5569625752782276e-06, "loss": 0.7983, "step": 5788 }, { "epoch": 0.591317671092952, "grad_norm": 1.5079319659720216, "learning_rate": 7.553754623002256e-06, "loss": 0.7015, "step": 5789 }, { "epoch": 0.5914198161389173, "grad_norm": 1.2701662170402548, "learning_rate": 7.550546938469424e-06, "loss": 0.6452, "step": 5790 }, { "epoch": 0.5915219611848825, "grad_norm": 1.4320667155022613, "learning_rate": 7.54733952203081e-06, "loss": 0.628, "step": 5791 }, { "epoch": 0.5916241062308478, "grad_norm": 1.3872358715113562, "learning_rate": 7.544132374037467e-06, "loss": 0.6578, "step": 5792 }, { "epoch": 0.5917262512768131, "grad_norm": 1.562221409030555, "learning_rate": 7.540925494840427e-06, "loss": 0.7384, "step": 5793 }, { "epoch": 0.5918283963227784, "grad_norm": 1.3129676870599571, "learning_rate": 7.537718884790679e-06, "loss": 0.6119, "step": 5794 }, { "epoch": 0.5919305413687436, "grad_norm": 1.5074772944146426, "learning_rate": 7.534512544239192e-06, "loss": 0.6569, "step": 5795 }, { "epoch": 0.5920326864147089, "grad_norm": 1.6234323581300998, "learning_rate": 7.531306473536897e-06, "loss": 0.6378, "step": 5796 }, { "epoch": 0.5921348314606741, "grad_norm": 1.4546561105750946, "learning_rate": 7.5281006730347065e-06, "loss": 0.7374, "step": 5797 }, { "epoch": 0.5922369765066394, "grad_norm": 1.5629803179138624, "learning_rate": 7.524895143083491e-06, "loss": 0.7448, "step": 5798 }, { "epoch": 0.5923391215526047, "grad_norm": 1.491121890942575, "learning_rate": 7.521689884034104e-06, "loss": 0.6947, "step": 5799 }, { "epoch": 0.59244126659857, "grad_norm": 1.4620494522018919, "learning_rate": 7.518484896237356e-06, "loss": 0.6123, "step": 5800 }, { "epoch": 0.5925434116445353, "grad_norm": 1.5181010734176568, "learning_rate": 7.515280180044041e-06, "loss": 0.7676, "step": 5801 }, { "epoch": 0.5926455566905006, "grad_norm": 1.5533313086862486, "learning_rate": 7.512075735804919e-06, "loss": 0.7942, "step": 5802 }, { "epoch": 0.5927477017364657, "grad_norm": 1.2923887603955424, "learning_rate": 7.508871563870712e-06, "loss": 0.5818, "step": 5803 }, { "epoch": 0.592849846782431, "grad_norm": 1.4557670720280915, "learning_rate": 7.505667664592125e-06, "loss": 0.7453, "step": 5804 }, { "epoch": 0.5929519918283963, "grad_norm": 1.4093005374900336, "learning_rate": 7.502464038319822e-06, "loss": 0.6805, "step": 5805 }, { "epoch": 0.5930541368743616, "grad_norm": 1.4220418970001532, "learning_rate": 7.499260685404443e-06, "loss": 0.7014, "step": 5806 }, { "epoch": 0.5931562819203269, "grad_norm": 1.502801408614704, "learning_rate": 7.496057606196599e-06, "loss": 0.6548, "step": 5807 }, { "epoch": 0.5932584269662922, "grad_norm": 1.4601744284068825, "learning_rate": 7.4928548010468635e-06, "loss": 0.6828, "step": 5808 }, { "epoch": 0.5933605720122574, "grad_norm": 1.4496218637181033, "learning_rate": 7.489652270305792e-06, "loss": 0.6693, "step": 5809 }, { "epoch": 0.5934627170582226, "grad_norm": 1.5806909622590437, "learning_rate": 7.486450014323905e-06, "loss": 0.7726, "step": 5810 }, { "epoch": 0.5935648621041879, "grad_norm": 1.3492198744988257, "learning_rate": 7.483248033451684e-06, "loss": 0.7009, "step": 5811 }, { "epoch": 0.5936670071501532, "grad_norm": 1.5419031075159988, "learning_rate": 7.4800463280395944e-06, "loss": 0.724, "step": 5812 }, { "epoch": 0.5937691521961185, "grad_norm": 1.406955428244615, "learning_rate": 7.47684489843806e-06, "loss": 0.639, "step": 5813 }, { "epoch": 0.5938712972420838, "grad_norm": 1.4551558255379338, "learning_rate": 7.473643744997483e-06, "loss": 0.7321, "step": 5814 }, { "epoch": 0.593973442288049, "grad_norm": 1.491777370028234, "learning_rate": 7.470442868068231e-06, "loss": 0.7593, "step": 5815 }, { "epoch": 0.5940755873340143, "grad_norm": 1.4138634018268215, "learning_rate": 7.467242268000636e-06, "loss": 0.7877, "step": 5816 }, { "epoch": 0.5941777323799796, "grad_norm": 1.4756494480856852, "learning_rate": 7.464041945145017e-06, "loss": 0.708, "step": 5817 }, { "epoch": 0.5942798774259448, "grad_norm": 1.5069085261007462, "learning_rate": 7.460841899851643e-06, "loss": 0.7937, "step": 5818 }, { "epoch": 0.5943820224719101, "grad_norm": 1.4658054590771001, "learning_rate": 7.457642132470766e-06, "loss": 0.7068, "step": 5819 }, { "epoch": 0.5944841675178754, "grad_norm": 1.4350850426497235, "learning_rate": 7.454442643352599e-06, "loss": 0.7412, "step": 5820 }, { "epoch": 0.5945863125638406, "grad_norm": 1.695287321265646, "learning_rate": 7.45124343284733e-06, "loss": 0.6436, "step": 5821 }, { "epoch": 0.5946884576098059, "grad_norm": 1.5581514956614742, "learning_rate": 7.448044501305114e-06, "loss": 0.6489, "step": 5822 }, { "epoch": 0.5947906026557712, "grad_norm": 1.456584404414506, "learning_rate": 7.444845849076075e-06, "loss": 0.7394, "step": 5823 }, { "epoch": 0.5948927477017365, "grad_norm": 1.5271384689288559, "learning_rate": 7.441647476510305e-06, "loss": 0.7278, "step": 5824 }, { "epoch": 0.5949948927477018, "grad_norm": 1.3904740897323202, "learning_rate": 7.438449383957877e-06, "loss": 0.7813, "step": 5825 }, { "epoch": 0.595097037793667, "grad_norm": 1.5797853566570927, "learning_rate": 7.435251571768817e-06, "loss": 0.8489, "step": 5826 }, { "epoch": 0.5951991828396322, "grad_norm": 1.4532963611050633, "learning_rate": 7.432054040293131e-06, "loss": 0.6717, "step": 5827 }, { "epoch": 0.5953013278855975, "grad_norm": 1.3227154822064804, "learning_rate": 7.428856789880787e-06, "loss": 0.6837, "step": 5828 }, { "epoch": 0.5954034729315628, "grad_norm": 1.3784683925882522, "learning_rate": 7.425659820881732e-06, "loss": 0.6048, "step": 5829 }, { "epoch": 0.5955056179775281, "grad_norm": 1.4804097094920576, "learning_rate": 7.42246313364587e-06, "loss": 0.7372, "step": 5830 }, { "epoch": 0.5956077630234934, "grad_norm": 1.4097054303879506, "learning_rate": 7.419266728523084e-06, "loss": 0.721, "step": 5831 }, { "epoch": 0.5957099080694587, "grad_norm": 1.4773703195568362, "learning_rate": 7.416070605863219e-06, "loss": 0.6545, "step": 5832 }, { "epoch": 0.595812053115424, "grad_norm": 1.6724671679688483, "learning_rate": 7.412874766016099e-06, "loss": 0.7634, "step": 5833 }, { "epoch": 0.5959141981613891, "grad_norm": 1.5530302740637776, "learning_rate": 7.409679209331507e-06, "loss": 0.7557, "step": 5834 }, { "epoch": 0.5960163432073544, "grad_norm": 1.5128275772744901, "learning_rate": 7.406483936159199e-06, "loss": 0.6323, "step": 5835 }, { "epoch": 0.5961184882533197, "grad_norm": 1.4496759319084689, "learning_rate": 7.403288946848901e-06, "loss": 0.7238, "step": 5836 }, { "epoch": 0.596220633299285, "grad_norm": 1.3898133737338034, "learning_rate": 7.400094241750303e-06, "loss": 0.7051, "step": 5837 }, { "epoch": 0.5963227783452503, "grad_norm": 1.5120177333362457, "learning_rate": 7.396899821213072e-06, "loss": 0.6855, "step": 5838 }, { "epoch": 0.5964249233912156, "grad_norm": 1.450229976850798, "learning_rate": 7.393705685586838e-06, "loss": 0.666, "step": 5839 }, { "epoch": 0.5965270684371808, "grad_norm": 1.4057197508075616, "learning_rate": 7.390511835221199e-06, "loss": 0.7484, "step": 5840 }, { "epoch": 0.596629213483146, "grad_norm": 1.5479776705699844, "learning_rate": 7.387318270465722e-06, "loss": 0.733, "step": 5841 }, { "epoch": 0.5967313585291113, "grad_norm": 1.4607955555803671, "learning_rate": 7.384124991669954e-06, "loss": 0.6595, "step": 5842 }, { "epoch": 0.5968335035750766, "grad_norm": 1.3757148862539543, "learning_rate": 7.380931999183394e-06, "loss": 0.5806, "step": 5843 }, { "epoch": 0.5969356486210419, "grad_norm": 1.564140087412455, "learning_rate": 7.377739293355519e-06, "loss": 0.7638, "step": 5844 }, { "epoch": 0.5970377936670072, "grad_norm": 1.4236940859507847, "learning_rate": 7.374546874535771e-06, "loss": 0.7112, "step": 5845 }, { "epoch": 0.5971399387129724, "grad_norm": 1.4786527251152497, "learning_rate": 7.371354743073567e-06, "loss": 0.6516, "step": 5846 }, { "epoch": 0.5972420837589377, "grad_norm": 1.480869051169739, "learning_rate": 7.368162899318282e-06, "loss": 0.6439, "step": 5847 }, { "epoch": 0.597344228804903, "grad_norm": 1.426462840529589, "learning_rate": 7.36497134361927e-06, "loss": 0.7216, "step": 5848 }, { "epoch": 0.5974463738508682, "grad_norm": 1.2904536840636613, "learning_rate": 7.361780076325841e-06, "loss": 0.6744, "step": 5849 }, { "epoch": 0.5975485188968335, "grad_norm": 1.538850862338203, "learning_rate": 7.3585890977872924e-06, "loss": 0.6295, "step": 5850 }, { "epoch": 0.5976506639427988, "grad_norm": 1.7105226628936505, "learning_rate": 7.355398408352874e-06, "loss": 0.6409, "step": 5851 }, { "epoch": 0.597752808988764, "grad_norm": 1.5491906068221017, "learning_rate": 7.352208008371808e-06, "loss": 0.6745, "step": 5852 }, { "epoch": 0.5978549540347293, "grad_norm": 1.4498433771474881, "learning_rate": 7.349017898193286e-06, "loss": 0.6423, "step": 5853 }, { "epoch": 0.5979570990806946, "grad_norm": 1.3399637374443385, "learning_rate": 7.345828078166466e-06, "loss": 0.678, "step": 5854 }, { "epoch": 0.5980592441266599, "grad_norm": 1.370888361134912, "learning_rate": 7.342638548640482e-06, "loss": 0.7288, "step": 5855 }, { "epoch": 0.5981613891726252, "grad_norm": 1.3007171741981092, "learning_rate": 7.339449309964424e-06, "loss": 0.6071, "step": 5856 }, { "epoch": 0.5982635342185904, "grad_norm": 1.4352387008486323, "learning_rate": 7.336260362487351e-06, "loss": 0.6248, "step": 5857 }, { "epoch": 0.5983656792645556, "grad_norm": 1.4117526808498664, "learning_rate": 7.3330717065583105e-06, "loss": 0.5995, "step": 5858 }, { "epoch": 0.5984678243105209, "grad_norm": 1.4328852879476324, "learning_rate": 7.329883342526293e-06, "loss": 0.7509, "step": 5859 }, { "epoch": 0.5985699693564862, "grad_norm": 1.6384448766209287, "learning_rate": 7.3266952707402675e-06, "loss": 0.6928, "step": 5860 }, { "epoch": 0.5986721144024515, "grad_norm": 1.3871534334114641, "learning_rate": 7.323507491549173e-06, "loss": 0.5918, "step": 5861 }, { "epoch": 0.5987742594484168, "grad_norm": 1.5034207825241412, "learning_rate": 7.320320005301911e-06, "loss": 0.7017, "step": 5862 }, { "epoch": 0.5988764044943821, "grad_norm": 1.5614707879590108, "learning_rate": 7.317132812347359e-06, "loss": 0.766, "step": 5863 }, { "epoch": 0.5989785495403472, "grad_norm": 1.4148598715368401, "learning_rate": 7.31394591303435e-06, "loss": 0.6969, "step": 5864 }, { "epoch": 0.5990806945863125, "grad_norm": 1.5643572833666173, "learning_rate": 7.310759307711697e-06, "loss": 0.7568, "step": 5865 }, { "epoch": 0.5991828396322778, "grad_norm": 1.5113285530337168, "learning_rate": 7.307572996728177e-06, "loss": 0.6829, "step": 5866 }, { "epoch": 0.5992849846782431, "grad_norm": 1.4630314000231337, "learning_rate": 7.30438698043253e-06, "loss": 0.6925, "step": 5867 }, { "epoch": 0.5993871297242084, "grad_norm": 1.4150843070869774, "learning_rate": 7.301201259173474e-06, "loss": 0.7361, "step": 5868 }, { "epoch": 0.5994892747701737, "grad_norm": 1.664065993774351, "learning_rate": 7.298015833299679e-06, "loss": 0.7802, "step": 5869 }, { "epoch": 0.599591419816139, "grad_norm": 1.6287793720113375, "learning_rate": 7.2948307031598005e-06, "loss": 0.7481, "step": 5870 }, { "epoch": 0.5996935648621042, "grad_norm": 1.6506500561459407, "learning_rate": 7.291645869102446e-06, "loss": 0.847, "step": 5871 }, { "epoch": 0.5997957099080694, "grad_norm": 1.4595888537852602, "learning_rate": 7.288461331476205e-06, "loss": 0.8376, "step": 5872 }, { "epoch": 0.5998978549540347, "grad_norm": 1.343409059190042, "learning_rate": 7.285277090629617e-06, "loss": 0.7081, "step": 5873 }, { "epoch": 0.6, "grad_norm": 1.5003265921407023, "learning_rate": 7.282093146911208e-06, "loss": 0.7311, "step": 5874 }, { "epoch": 0.6001021450459653, "grad_norm": 1.4733068011069301, "learning_rate": 7.278909500669462e-06, "loss": 0.6455, "step": 5875 }, { "epoch": 0.6002042900919305, "grad_norm": 1.3783274711739495, "learning_rate": 7.27572615225283e-06, "loss": 0.7031, "step": 5876 }, { "epoch": 0.6003064351378958, "grad_norm": 1.435450382534867, "learning_rate": 7.272543102009728e-06, "loss": 0.7771, "step": 5877 }, { "epoch": 0.6004085801838611, "grad_norm": 1.3584899451012589, "learning_rate": 7.269360350288548e-06, "loss": 0.6913, "step": 5878 }, { "epoch": 0.6005107252298264, "grad_norm": 1.421750549692087, "learning_rate": 7.26617789743764e-06, "loss": 0.6962, "step": 5879 }, { "epoch": 0.6006128702757916, "grad_norm": 1.3129733482632258, "learning_rate": 7.2629957438053285e-06, "loss": 0.6996, "step": 5880 }, { "epoch": 0.6007150153217569, "grad_norm": 1.4372971112290582, "learning_rate": 7.259813889739897e-06, "loss": 0.6093, "step": 5881 }, { "epoch": 0.6008171603677221, "grad_norm": 1.493073979194371, "learning_rate": 7.256632335589609e-06, "loss": 0.7358, "step": 5882 }, { "epoch": 0.6009193054136874, "grad_norm": 1.5282753675722793, "learning_rate": 7.253451081702686e-06, "loss": 0.7078, "step": 5883 }, { "epoch": 0.6010214504596527, "grad_norm": 1.4899207091470166, "learning_rate": 7.2502701284273145e-06, "loss": 0.7203, "step": 5884 }, { "epoch": 0.601123595505618, "grad_norm": 1.403918949914807, "learning_rate": 7.247089476111655e-06, "loss": 0.7449, "step": 5885 }, { "epoch": 0.6012257405515833, "grad_norm": 1.334592331410426, "learning_rate": 7.2439091251038295e-06, "loss": 0.5974, "step": 5886 }, { "epoch": 0.6013278855975486, "grad_norm": 1.334817126493994, "learning_rate": 7.2407290757519345e-06, "loss": 0.6539, "step": 5887 }, { "epoch": 0.6014300306435137, "grad_norm": 1.5259312321091565, "learning_rate": 7.237549328404021e-06, "loss": 0.7872, "step": 5888 }, { "epoch": 0.601532175689479, "grad_norm": 1.3974426306369905, "learning_rate": 7.234369883408116e-06, "loss": 0.6334, "step": 5889 }, { "epoch": 0.6016343207354443, "grad_norm": 1.4541248673278022, "learning_rate": 7.23119074111222e-06, "loss": 0.7618, "step": 5890 }, { "epoch": 0.6017364657814096, "grad_norm": 1.4514054763066233, "learning_rate": 7.228011901864283e-06, "loss": 0.6537, "step": 5891 }, { "epoch": 0.6018386108273749, "grad_norm": 1.409127258091539, "learning_rate": 7.224833366012236e-06, "loss": 0.6423, "step": 5892 }, { "epoch": 0.6019407558733402, "grad_norm": 1.5697394140134555, "learning_rate": 7.221655133903971e-06, "loss": 0.7172, "step": 5893 }, { "epoch": 0.6020429009193055, "grad_norm": 1.552407886324837, "learning_rate": 7.218477205887344e-06, "loss": 0.6877, "step": 5894 }, { "epoch": 0.6021450459652706, "grad_norm": 1.5063556753206147, "learning_rate": 7.215299582310187e-06, "loss": 0.7238, "step": 5895 }, { "epoch": 0.6022471910112359, "grad_norm": 1.3751718840272076, "learning_rate": 7.212122263520287e-06, "loss": 0.665, "step": 5896 }, { "epoch": 0.6023493360572012, "grad_norm": 1.4101370274030278, "learning_rate": 7.208945249865404e-06, "loss": 0.6644, "step": 5897 }, { "epoch": 0.6024514811031665, "grad_norm": 1.4362162088640555, "learning_rate": 7.205768541693271e-06, "loss": 0.6734, "step": 5898 }, { "epoch": 0.6025536261491318, "grad_norm": 1.3280863216388417, "learning_rate": 7.202592139351574e-06, "loss": 0.623, "step": 5899 }, { "epoch": 0.6026557711950971, "grad_norm": 1.4321336287865873, "learning_rate": 7.199416043187978e-06, "loss": 0.6589, "step": 5900 }, { "epoch": 0.6027579162410623, "grad_norm": 1.5841810161295078, "learning_rate": 7.196240253550102e-06, "loss": 0.7485, "step": 5901 }, { "epoch": 0.6028600612870276, "grad_norm": 1.4603486447179619, "learning_rate": 7.193064770785545e-06, "loss": 0.6523, "step": 5902 }, { "epoch": 0.6029622063329928, "grad_norm": 1.4129413337559316, "learning_rate": 7.189889595241859e-06, "loss": 0.6737, "step": 5903 }, { "epoch": 0.6030643513789581, "grad_norm": 1.342943263142394, "learning_rate": 7.186714727266575e-06, "loss": 0.7003, "step": 5904 }, { "epoch": 0.6031664964249234, "grad_norm": 1.345900513431136, "learning_rate": 7.183540167207179e-06, "loss": 0.6616, "step": 5905 }, { "epoch": 0.6032686414708887, "grad_norm": 1.5392394438778376, "learning_rate": 7.180365915411131e-06, "loss": 0.7534, "step": 5906 }, { "epoch": 0.6033707865168539, "grad_norm": 1.4282662313286736, "learning_rate": 7.1771919722258586e-06, "loss": 0.586, "step": 5907 }, { "epoch": 0.6034729315628192, "grad_norm": 1.4230758702527313, "learning_rate": 7.174018337998747e-06, "loss": 0.5617, "step": 5908 }, { "epoch": 0.6035750766087845, "grad_norm": 1.4602701258456388, "learning_rate": 7.170845013077156e-06, "loss": 0.5712, "step": 5909 }, { "epoch": 0.6036772216547498, "grad_norm": 1.3836139050315057, "learning_rate": 7.167671997808406e-06, "loss": 0.7285, "step": 5910 }, { "epoch": 0.603779366700715, "grad_norm": 1.3636126912004216, "learning_rate": 7.164499292539783e-06, "loss": 0.6279, "step": 5911 }, { "epoch": 0.6038815117466803, "grad_norm": 1.3978859328454079, "learning_rate": 7.161326897618547e-06, "loss": 0.6997, "step": 5912 }, { "epoch": 0.6039836567926455, "grad_norm": 1.4979157354049246, "learning_rate": 7.158154813391911e-06, "loss": 0.7615, "step": 5913 }, { "epoch": 0.6040858018386108, "grad_norm": 1.4759052852895458, "learning_rate": 7.154983040207071e-06, "loss": 0.6707, "step": 5914 }, { "epoch": 0.6041879468845761, "grad_norm": 1.4444922606576425, "learning_rate": 7.151811578411176e-06, "loss": 0.7194, "step": 5915 }, { "epoch": 0.6042900919305414, "grad_norm": 1.5180061079976073, "learning_rate": 7.148640428351341e-06, "loss": 0.6778, "step": 5916 }, { "epoch": 0.6043922369765067, "grad_norm": 1.6038497285708615, "learning_rate": 7.1454695903746575e-06, "loss": 0.8262, "step": 5917 }, { "epoch": 0.604494382022472, "grad_norm": 1.4051256729905608, "learning_rate": 7.142299064828169e-06, "loss": 0.7348, "step": 5918 }, { "epoch": 0.6045965270684371, "grad_norm": 1.4003392105507957, "learning_rate": 7.139128852058894e-06, "loss": 0.6837, "step": 5919 }, { "epoch": 0.6046986721144024, "grad_norm": 1.5345839514618114, "learning_rate": 7.135958952413815e-06, "loss": 0.7187, "step": 5920 }, { "epoch": 0.6048008171603677, "grad_norm": 1.47841244282769, "learning_rate": 7.132789366239876e-06, "loss": 0.6616, "step": 5921 }, { "epoch": 0.604902962206333, "grad_norm": 1.4603720587270017, "learning_rate": 7.129620093883998e-06, "loss": 0.7182, "step": 5922 }, { "epoch": 0.6050051072522983, "grad_norm": 1.4935846022160644, "learning_rate": 7.126451135693052e-06, "loss": 0.7415, "step": 5923 }, { "epoch": 0.6051072522982636, "grad_norm": 1.5549311554787175, "learning_rate": 7.1232824920138884e-06, "loss": 0.7075, "step": 5924 }, { "epoch": 0.6052093973442288, "grad_norm": 1.2929420000903817, "learning_rate": 7.120114163193312e-06, "loss": 0.6061, "step": 5925 }, { "epoch": 0.605311542390194, "grad_norm": 1.242069455290526, "learning_rate": 7.116946149578106e-06, "loss": 0.6061, "step": 5926 }, { "epoch": 0.6054136874361593, "grad_norm": 1.4296596349888733, "learning_rate": 7.113778451515003e-06, "loss": 0.6942, "step": 5927 }, { "epoch": 0.6055158324821246, "grad_norm": 1.4499425587375494, "learning_rate": 7.110611069350713e-06, "loss": 0.7488, "step": 5928 }, { "epoch": 0.6056179775280899, "grad_norm": 1.480204362686207, "learning_rate": 7.107444003431906e-06, "loss": 0.665, "step": 5929 }, { "epoch": 0.6057201225740552, "grad_norm": 1.6288706572593974, "learning_rate": 7.104277254105225e-06, "loss": 0.7617, "step": 5930 }, { "epoch": 0.6058222676200204, "grad_norm": 1.4117823480389378, "learning_rate": 7.101110821717267e-06, "loss": 0.7345, "step": 5931 }, { "epoch": 0.6059244126659857, "grad_norm": 1.374198163451614, "learning_rate": 7.097944706614607e-06, "loss": 0.6384, "step": 5932 }, { "epoch": 0.606026557711951, "grad_norm": 2.150011834438242, "learning_rate": 7.09477890914377e-06, "loss": 0.6861, "step": 5933 }, { "epoch": 0.6061287027579162, "grad_norm": 1.365551514911846, "learning_rate": 7.09161342965126e-06, "loss": 0.5959, "step": 5934 }, { "epoch": 0.6062308478038815, "grad_norm": 1.5081651464651011, "learning_rate": 7.088448268483539e-06, "loss": 0.6964, "step": 5935 }, { "epoch": 0.6063329928498468, "grad_norm": 1.4641862085607433, "learning_rate": 7.085283425987037e-06, "loss": 0.7471, "step": 5936 }, { "epoch": 0.606435137895812, "grad_norm": 1.423325550546707, "learning_rate": 7.082118902508142e-06, "loss": 0.7259, "step": 5937 }, { "epoch": 0.6065372829417773, "grad_norm": 1.4216140331174967, "learning_rate": 7.0789546983932224e-06, "loss": 0.6027, "step": 5938 }, { "epoch": 0.6066394279877426, "grad_norm": 1.5072423510122697, "learning_rate": 7.075790813988599e-06, "loss": 0.7399, "step": 5939 }, { "epoch": 0.6067415730337079, "grad_norm": 1.482619911549146, "learning_rate": 7.072627249640559e-06, "loss": 0.7052, "step": 5940 }, { "epoch": 0.6068437180796732, "grad_norm": 1.3255449778024178, "learning_rate": 7.069464005695359e-06, "loss": 0.6583, "step": 5941 }, { "epoch": 0.6069458631256384, "grad_norm": 1.4585769395036152, "learning_rate": 7.0663010824992166e-06, "loss": 0.7185, "step": 5942 }, { "epoch": 0.6070480081716036, "grad_norm": 1.4400790221792514, "learning_rate": 7.063138480398316e-06, "loss": 0.6996, "step": 5943 }, { "epoch": 0.6071501532175689, "grad_norm": 1.6113066803988352, "learning_rate": 7.059976199738805e-06, "loss": 0.6919, "step": 5944 }, { "epoch": 0.6072522982635342, "grad_norm": 1.4169487563860494, "learning_rate": 7.056814240866796e-06, "loss": 0.7229, "step": 5945 }, { "epoch": 0.6073544433094995, "grad_norm": 1.3642339601865991, "learning_rate": 7.053652604128376e-06, "loss": 0.7745, "step": 5946 }, { "epoch": 0.6074565883554648, "grad_norm": 1.5863125276646959, "learning_rate": 7.05049128986958e-06, "loss": 0.7111, "step": 5947 }, { "epoch": 0.6075587334014301, "grad_norm": 1.4413117639999633, "learning_rate": 7.047330298436417e-06, "loss": 0.7088, "step": 5948 }, { "epoch": 0.6076608784473952, "grad_norm": 1.4887816423957854, "learning_rate": 7.044169630174862e-06, "loss": 0.7436, "step": 5949 }, { "epoch": 0.6077630234933605, "grad_norm": 1.4203235773642278, "learning_rate": 7.041009285430849e-06, "loss": 0.653, "step": 5950 }, { "epoch": 0.6078651685393258, "grad_norm": 1.4360534734724038, "learning_rate": 7.037849264550282e-06, "loss": 0.7616, "step": 5951 }, { "epoch": 0.6079673135852911, "grad_norm": 1.4449357041843944, "learning_rate": 7.034689567879026e-06, "loss": 0.8129, "step": 5952 }, { "epoch": 0.6080694586312564, "grad_norm": 1.8014754582260786, "learning_rate": 7.03153019576291e-06, "loss": 0.7532, "step": 5953 }, { "epoch": 0.6081716036772217, "grad_norm": 1.537709437039263, "learning_rate": 7.028371148547737e-06, "loss": 0.6204, "step": 5954 }, { "epoch": 0.608273748723187, "grad_norm": 1.4367170795663489, "learning_rate": 7.02521242657926e-06, "loss": 0.6748, "step": 5955 }, { "epoch": 0.6083758937691522, "grad_norm": 1.3520404608469325, "learning_rate": 7.022054030203206e-06, "loss": 0.6715, "step": 5956 }, { "epoch": 0.6084780388151174, "grad_norm": 1.5990293519928394, "learning_rate": 7.01889595976526e-06, "loss": 0.7127, "step": 5957 }, { "epoch": 0.6085801838610827, "grad_norm": 1.4299562860740695, "learning_rate": 7.01573821561108e-06, "loss": 0.6813, "step": 5958 }, { "epoch": 0.608682328907048, "grad_norm": 1.5606205131949153, "learning_rate": 7.012580798086278e-06, "loss": 0.6967, "step": 5959 }, { "epoch": 0.6087844739530133, "grad_norm": 1.6765703956489642, "learning_rate": 7.00942370753644e-06, "loss": 0.7797, "step": 5960 }, { "epoch": 0.6088866189989786, "grad_norm": 1.519227804228802, "learning_rate": 7.006266944307105e-06, "loss": 0.7846, "step": 5961 }, { "epoch": 0.6089887640449438, "grad_norm": 1.5044952396912175, "learning_rate": 7.003110508743789e-06, "loss": 0.7219, "step": 5962 }, { "epoch": 0.6090909090909091, "grad_norm": 1.3218294881781498, "learning_rate": 6.9999544011919664e-06, "loss": 0.6193, "step": 5963 }, { "epoch": 0.6091930541368744, "grad_norm": 1.5291194366283811, "learning_rate": 6.996798621997074e-06, "loss": 0.7157, "step": 5964 }, { "epoch": 0.6092951991828396, "grad_norm": 1.4854553444988114, "learning_rate": 6.993643171504509e-06, "loss": 0.7228, "step": 5965 }, { "epoch": 0.6093973442288049, "grad_norm": 1.5204966632121497, "learning_rate": 6.990488050059644e-06, "loss": 0.7149, "step": 5966 }, { "epoch": 0.6094994892747702, "grad_norm": 1.6388165026381813, "learning_rate": 6.987333258007805e-06, "loss": 0.8201, "step": 5967 }, { "epoch": 0.6096016343207354, "grad_norm": 1.383754035183041, "learning_rate": 6.984178795694288e-06, "loss": 0.7046, "step": 5968 }, { "epoch": 0.6097037793667007, "grad_norm": 1.500056803110432, "learning_rate": 6.981024663464347e-06, "loss": 0.7486, "step": 5969 }, { "epoch": 0.609805924412666, "grad_norm": 1.378524200367225, "learning_rate": 6.977870861663209e-06, "loss": 0.649, "step": 5970 }, { "epoch": 0.6099080694586313, "grad_norm": 1.5246033586121945, "learning_rate": 6.974717390636059e-06, "loss": 0.7265, "step": 5971 }, { "epoch": 0.6100102145045966, "grad_norm": 1.50034403507857, "learning_rate": 6.971564250728044e-06, "loss": 0.6507, "step": 5972 }, { "epoch": 0.6101123595505618, "grad_norm": 1.3708884567524493, "learning_rate": 6.968411442284279e-06, "loss": 0.7255, "step": 5973 }, { "epoch": 0.610214504596527, "grad_norm": 1.5479899644877473, "learning_rate": 6.965258965649841e-06, "loss": 0.7221, "step": 5974 }, { "epoch": 0.6103166496424923, "grad_norm": 1.412269002629425, "learning_rate": 6.96210682116977e-06, "loss": 0.6496, "step": 5975 }, { "epoch": 0.6104187946884576, "grad_norm": 1.462584429374096, "learning_rate": 6.958955009189069e-06, "loss": 0.7176, "step": 5976 }, { "epoch": 0.6105209397344229, "grad_norm": 1.4649820438546874, "learning_rate": 6.955803530052705e-06, "loss": 0.8149, "step": 5977 }, { "epoch": 0.6106230847803882, "grad_norm": 1.6030227392476382, "learning_rate": 6.952652384105614e-06, "loss": 0.7472, "step": 5978 }, { "epoch": 0.6107252298263535, "grad_norm": 1.5979450842648746, "learning_rate": 6.9495015716926875e-06, "loss": 0.7451, "step": 5979 }, { "epoch": 0.6108273748723186, "grad_norm": 1.5256350986575982, "learning_rate": 6.946351093158788e-06, "loss": 0.6909, "step": 5980 }, { "epoch": 0.6109295199182839, "grad_norm": 1.5821816437332996, "learning_rate": 6.943200948848732e-06, "loss": 0.7128, "step": 5981 }, { "epoch": 0.6110316649642492, "grad_norm": 1.4049180638821632, "learning_rate": 6.940051139107307e-06, "loss": 0.7159, "step": 5982 }, { "epoch": 0.6111338100102145, "grad_norm": 1.489407985814746, "learning_rate": 6.936901664279263e-06, "loss": 0.8012, "step": 5983 }, { "epoch": 0.6112359550561798, "grad_norm": 1.479161321048802, "learning_rate": 6.933752524709308e-06, "loss": 0.7092, "step": 5984 }, { "epoch": 0.6113381001021451, "grad_norm": 1.5259428801694348, "learning_rate": 6.9306037207421196e-06, "loss": 0.6165, "step": 5985 }, { "epoch": 0.6114402451481104, "grad_norm": 1.451341130766712, "learning_rate": 6.92745525272234e-06, "loss": 0.6468, "step": 5986 }, { "epoch": 0.6115423901940756, "grad_norm": 1.4186920855738687, "learning_rate": 6.924307120994567e-06, "loss": 0.6673, "step": 5987 }, { "epoch": 0.6116445352400408, "grad_norm": 1.5723166087547982, "learning_rate": 6.921159325903368e-06, "loss": 0.6815, "step": 5988 }, { "epoch": 0.6117466802860061, "grad_norm": 1.5937039163973152, "learning_rate": 6.918011867793268e-06, "loss": 0.7112, "step": 5989 }, { "epoch": 0.6118488253319714, "grad_norm": 1.753364603103956, "learning_rate": 6.914864747008762e-06, "loss": 0.579, "step": 5990 }, { "epoch": 0.6119509703779367, "grad_norm": 1.4531015053929697, "learning_rate": 6.911717963894302e-06, "loss": 0.7525, "step": 5991 }, { "epoch": 0.612053115423902, "grad_norm": 1.4787407748575119, "learning_rate": 6.908571518794307e-06, "loss": 0.6808, "step": 5992 }, { "epoch": 0.6121552604698672, "grad_norm": 1.5117284380836764, "learning_rate": 6.9054254120531524e-06, "loss": 0.6392, "step": 5993 }, { "epoch": 0.6122574055158325, "grad_norm": 1.4821468687066737, "learning_rate": 6.902279644015188e-06, "loss": 0.7268, "step": 5994 }, { "epoch": 0.6123595505617978, "grad_norm": 1.6372951399038573, "learning_rate": 6.8991342150247205e-06, "loss": 0.6142, "step": 5995 }, { "epoch": 0.612461695607763, "grad_norm": 1.4764270552280503, "learning_rate": 6.895989125426014e-06, "loss": 0.7137, "step": 5996 }, { "epoch": 0.6125638406537283, "grad_norm": 1.347014414032496, "learning_rate": 6.892844375563308e-06, "loss": 0.5718, "step": 5997 }, { "epoch": 0.6126659856996935, "grad_norm": 1.397411636577818, "learning_rate": 6.889699965780787e-06, "loss": 0.6611, "step": 5998 }, { "epoch": 0.6127681307456588, "grad_norm": 1.450436577550693, "learning_rate": 6.886555896422617e-06, "loss": 0.6935, "step": 5999 }, { "epoch": 0.6128702757916241, "grad_norm": 1.480619808957545, "learning_rate": 6.883412167832917e-06, "loss": 0.6834, "step": 6000 }, { "epoch": 0.6129724208375894, "grad_norm": 1.3228613907663964, "learning_rate": 6.880268780355763e-06, "loss": 0.6882, "step": 6001 }, { "epoch": 0.6130745658835547, "grad_norm": 1.466578899662363, "learning_rate": 6.87712573433521e-06, "loss": 0.6492, "step": 6002 }, { "epoch": 0.6131767109295199, "grad_norm": 1.5822021029167004, "learning_rate": 6.873983030115265e-06, "loss": 0.8076, "step": 6003 }, { "epoch": 0.6132788559754851, "grad_norm": 1.6039432440574921, "learning_rate": 6.8708406680398945e-06, "loss": 0.7018, "step": 6004 }, { "epoch": 0.6133810010214504, "grad_norm": 1.564217085038011, "learning_rate": 6.867698648453036e-06, "loss": 0.7354, "step": 6005 }, { "epoch": 0.6134831460674157, "grad_norm": 1.580602857393017, "learning_rate": 6.864556971698584e-06, "loss": 0.7381, "step": 6006 }, { "epoch": 0.613585291113381, "grad_norm": 1.7003034464864017, "learning_rate": 6.861415638120397e-06, "loss": 0.7432, "step": 6007 }, { "epoch": 0.6136874361593463, "grad_norm": 1.5240361507996587, "learning_rate": 6.8582746480622954e-06, "loss": 0.7169, "step": 6008 }, { "epoch": 0.6137895812053116, "grad_norm": 1.4840637097960456, "learning_rate": 6.855134001868061e-06, "loss": 0.6734, "step": 6009 }, { "epoch": 0.6138917262512769, "grad_norm": 1.4413475462488778, "learning_rate": 6.851993699881446e-06, "loss": 0.7434, "step": 6010 }, { "epoch": 0.613993871297242, "grad_norm": 1.3617512119022661, "learning_rate": 6.848853742446153e-06, "loss": 0.7777, "step": 6011 }, { "epoch": 0.6140960163432073, "grad_norm": 1.5202258508681286, "learning_rate": 6.845714129905855e-06, "loss": 0.6412, "step": 6012 }, { "epoch": 0.6141981613891726, "grad_norm": 1.4433463303460763, "learning_rate": 6.842574862604181e-06, "loss": 0.715, "step": 6013 }, { "epoch": 0.6143003064351379, "grad_norm": 1.5720118182756222, "learning_rate": 6.8394359408847315e-06, "loss": 0.7436, "step": 6014 }, { "epoch": 0.6144024514811032, "grad_norm": 1.4734469310118399, "learning_rate": 6.836297365091058e-06, "loss": 0.7534, "step": 6015 }, { "epoch": 0.6145045965270685, "grad_norm": 1.6581505768586497, "learning_rate": 6.833159135566683e-06, "loss": 0.6505, "step": 6016 }, { "epoch": 0.6146067415730337, "grad_norm": 1.5623848571026056, "learning_rate": 6.830021252655084e-06, "loss": 0.8233, "step": 6017 }, { "epoch": 0.614708886618999, "grad_norm": 1.5556822704679165, "learning_rate": 6.826883716699711e-06, "loss": 0.7013, "step": 6018 }, { "epoch": 0.6148110316649642, "grad_norm": 1.4043130206828793, "learning_rate": 6.823746528043962e-06, "loss": 0.6657, "step": 6019 }, { "epoch": 0.6149131767109295, "grad_norm": 1.535910934451778, "learning_rate": 6.82060968703121e-06, "loss": 0.7036, "step": 6020 }, { "epoch": 0.6150153217568948, "grad_norm": 1.5058246996551714, "learning_rate": 6.81747319400478e-06, "loss": 0.763, "step": 6021 }, { "epoch": 0.6151174668028601, "grad_norm": 1.515266693349947, "learning_rate": 6.814337049307966e-06, "loss": 0.7719, "step": 6022 }, { "epoch": 0.6152196118488253, "grad_norm": 1.6934495835496437, "learning_rate": 6.81120125328402e-06, "loss": 0.723, "step": 6023 }, { "epoch": 0.6153217568947906, "grad_norm": 1.4364644137202367, "learning_rate": 6.808065806276158e-06, "loss": 0.6603, "step": 6024 }, { "epoch": 0.6154239019407559, "grad_norm": 1.4582650876582257, "learning_rate": 6.804930708627553e-06, "loss": 0.6027, "step": 6025 }, { "epoch": 0.6155260469867212, "grad_norm": 1.41261136126919, "learning_rate": 6.801795960681348e-06, "loss": 0.6802, "step": 6026 }, { "epoch": 0.6156281920326864, "grad_norm": 1.4778627832345463, "learning_rate": 6.798661562780642e-06, "loss": 0.7586, "step": 6027 }, { "epoch": 0.6157303370786517, "grad_norm": 1.4136594091595078, "learning_rate": 6.795527515268494e-06, "loss": 0.656, "step": 6028 }, { "epoch": 0.6158324821246169, "grad_norm": 1.5071295563302154, "learning_rate": 6.792393818487933e-06, "loss": 0.68, "step": 6029 }, { "epoch": 0.6159346271705822, "grad_norm": 1.4904188938039284, "learning_rate": 6.78926047278194e-06, "loss": 0.6769, "step": 6030 }, { "epoch": 0.6160367722165475, "grad_norm": 1.5816343283680228, "learning_rate": 6.786127478493463e-06, "loss": 0.7574, "step": 6031 }, { "epoch": 0.6161389172625128, "grad_norm": 1.4066042305488502, "learning_rate": 6.7829948359654085e-06, "loss": 0.7378, "step": 6032 }, { "epoch": 0.6162410623084781, "grad_norm": 1.3822098245855512, "learning_rate": 6.779862545540645e-06, "loss": 0.6457, "step": 6033 }, { "epoch": 0.6163432073544433, "grad_norm": 1.4866280478571343, "learning_rate": 6.776730607562011e-06, "loss": 0.7453, "step": 6034 }, { "epoch": 0.6164453524004085, "grad_norm": 1.4304136550420243, "learning_rate": 6.7735990223722946e-06, "loss": 0.6577, "step": 6035 }, { "epoch": 0.6165474974463738, "grad_norm": 1.3540797154977005, "learning_rate": 6.770467790314248e-06, "loss": 0.6715, "step": 6036 }, { "epoch": 0.6166496424923391, "grad_norm": 1.3863200111804435, "learning_rate": 6.767336911730589e-06, "loss": 0.6749, "step": 6037 }, { "epoch": 0.6167517875383044, "grad_norm": 1.5307327834890467, "learning_rate": 6.764206386963992e-06, "loss": 0.7428, "step": 6038 }, { "epoch": 0.6168539325842697, "grad_norm": 1.492832825968607, "learning_rate": 6.761076216357099e-06, "loss": 0.6406, "step": 6039 }, { "epoch": 0.616956077630235, "grad_norm": 1.95631672833207, "learning_rate": 6.7579464002525055e-06, "loss": 0.7984, "step": 6040 }, { "epoch": 0.6170582226762003, "grad_norm": 1.4105278933788215, "learning_rate": 6.7548169389927696e-06, "loss": 0.688, "step": 6041 }, { "epoch": 0.6171603677221654, "grad_norm": 1.4704117349641852, "learning_rate": 6.7516878329204216e-06, "loss": 0.7668, "step": 6042 }, { "epoch": 0.6172625127681307, "grad_norm": 1.4288420107464077, "learning_rate": 6.7485590823779364e-06, "loss": 0.757, "step": 6043 }, { "epoch": 0.617364657814096, "grad_norm": 1.4247579470447873, "learning_rate": 6.745430687707761e-06, "loss": 0.6385, "step": 6044 }, { "epoch": 0.6174668028600613, "grad_norm": 1.5092677217279975, "learning_rate": 6.742302649252299e-06, "loss": 0.6657, "step": 6045 }, { "epoch": 0.6175689479060266, "grad_norm": 1.41659047436331, "learning_rate": 6.73917496735392e-06, "loss": 0.6909, "step": 6046 }, { "epoch": 0.6176710929519919, "grad_norm": 1.5660725850153576, "learning_rate": 6.7360476423549434e-06, "loss": 0.7386, "step": 6047 }, { "epoch": 0.6177732379979571, "grad_norm": 1.4524900168355066, "learning_rate": 6.732920674597665e-06, "loss": 0.6381, "step": 6048 }, { "epoch": 0.6178753830439224, "grad_norm": 1.4670458519782945, "learning_rate": 6.7297940644243245e-06, "loss": 0.6669, "step": 6049 }, { "epoch": 0.6179775280898876, "grad_norm": 1.376312966653919, "learning_rate": 6.7266678121771375e-06, "loss": 0.6741, "step": 6050 }, { "epoch": 0.6180796731358529, "grad_norm": 1.45597254893423, "learning_rate": 6.723541918198278e-06, "loss": 0.6212, "step": 6051 }, { "epoch": 0.6181818181818182, "grad_norm": 1.4291707979706016, "learning_rate": 6.720416382829867e-06, "loss": 0.6923, "step": 6052 }, { "epoch": 0.6182839632277835, "grad_norm": 1.4371727848437101, "learning_rate": 6.7172912064140065e-06, "loss": 0.7665, "step": 6053 }, { "epoch": 0.6183861082737487, "grad_norm": 1.6518260307164343, "learning_rate": 6.714166389292743e-06, "loss": 0.7503, "step": 6054 }, { "epoch": 0.618488253319714, "grad_norm": 1.499842965752922, "learning_rate": 6.7110419318080884e-06, "loss": 0.8056, "step": 6055 }, { "epoch": 0.6185903983656793, "grad_norm": 1.4816027524854394, "learning_rate": 6.707917834302024e-06, "loss": 0.8149, "step": 6056 }, { "epoch": 0.6186925434116445, "grad_norm": 1.5426118667475097, "learning_rate": 6.704794097116474e-06, "loss": 0.6889, "step": 6057 }, { "epoch": 0.6187946884576098, "grad_norm": 1.4294025819458678, "learning_rate": 6.701670720593341e-06, "loss": 0.6921, "step": 6058 }, { "epoch": 0.618896833503575, "grad_norm": 1.5660300238482336, "learning_rate": 6.6985477050744805e-06, "loss": 0.7242, "step": 6059 }, { "epoch": 0.6189989785495403, "grad_norm": 1.3925819059229059, "learning_rate": 6.695425050901705e-06, "loss": 0.5613, "step": 6060 }, { "epoch": 0.6191011235955056, "grad_norm": 1.4295065181841535, "learning_rate": 6.692302758416795e-06, "loss": 0.6763, "step": 6061 }, { "epoch": 0.6192032686414709, "grad_norm": 1.5153497676290344, "learning_rate": 6.689180827961481e-06, "loss": 0.663, "step": 6062 }, { "epoch": 0.6193054136874362, "grad_norm": 1.500692153504031, "learning_rate": 6.6860592598774685e-06, "loss": 0.5768, "step": 6063 }, { "epoch": 0.6194075587334015, "grad_norm": 1.4361191982467998, "learning_rate": 6.682938054506409e-06, "loss": 0.7274, "step": 6064 }, { "epoch": 0.6195097037793666, "grad_norm": 1.4775141073975566, "learning_rate": 6.679817212189921e-06, "loss": 0.7486, "step": 6065 }, { "epoch": 0.6196118488253319, "grad_norm": 1.506458475723345, "learning_rate": 6.676696733269588e-06, "loss": 0.7372, "step": 6066 }, { "epoch": 0.6197139938712972, "grad_norm": 1.460369331967241, "learning_rate": 6.673576618086941e-06, "loss": 0.5962, "step": 6067 }, { "epoch": 0.6198161389172625, "grad_norm": 1.2949803696878728, "learning_rate": 6.670456866983486e-06, "loss": 0.564, "step": 6068 }, { "epoch": 0.6199182839632278, "grad_norm": 1.758863773646398, "learning_rate": 6.667337480300675e-06, "loss": 0.738, "step": 6069 }, { "epoch": 0.6200204290091931, "grad_norm": 1.5500809275919858, "learning_rate": 6.664218458379933e-06, "loss": 0.6816, "step": 6070 }, { "epoch": 0.6201225740551584, "grad_norm": 1.4066602194317719, "learning_rate": 6.661099801562636e-06, "loss": 0.6525, "step": 6071 }, { "epoch": 0.6202247191011236, "grad_norm": 1.3009317908415803, "learning_rate": 6.657981510190119e-06, "loss": 0.7116, "step": 6072 }, { "epoch": 0.6203268641470888, "grad_norm": 1.575424484722834, "learning_rate": 6.654863584603684e-06, "loss": 0.7053, "step": 6073 }, { "epoch": 0.6204290091930541, "grad_norm": 1.3368640242122787, "learning_rate": 6.651746025144597e-06, "loss": 0.6669, "step": 6074 }, { "epoch": 0.6205311542390194, "grad_norm": 1.4088063879810369, "learning_rate": 6.648628832154066e-06, "loss": 0.7192, "step": 6075 }, { "epoch": 0.6206332992849847, "grad_norm": 1.435331346167685, "learning_rate": 6.645512005973278e-06, "loss": 0.6027, "step": 6076 }, { "epoch": 0.62073544433095, "grad_norm": 1.4768219350941822, "learning_rate": 6.642395546943365e-06, "loss": 0.7019, "step": 6077 }, { "epoch": 0.6208375893769152, "grad_norm": 1.543670492926742, "learning_rate": 6.639279455405432e-06, "loss": 0.6407, "step": 6078 }, { "epoch": 0.6209397344228805, "grad_norm": 1.688668873676125, "learning_rate": 6.636163731700531e-06, "loss": 0.6674, "step": 6079 }, { "epoch": 0.6210418794688458, "grad_norm": 1.3895951000534057, "learning_rate": 6.633048376169685e-06, "loss": 0.5355, "step": 6080 }, { "epoch": 0.621144024514811, "grad_norm": 1.5113002930997002, "learning_rate": 6.629933389153867e-06, "loss": 0.7938, "step": 6081 }, { "epoch": 0.6212461695607763, "grad_norm": 1.537745442511612, "learning_rate": 6.626818770994017e-06, "loss": 0.682, "step": 6082 }, { "epoch": 0.6213483146067416, "grad_norm": 1.7132838972110638, "learning_rate": 6.6237045220310335e-06, "loss": 0.7049, "step": 6083 }, { "epoch": 0.6214504596527068, "grad_norm": 1.4599340642780694, "learning_rate": 6.620590642605771e-06, "loss": 0.6635, "step": 6084 }, { "epoch": 0.6215526046986721, "grad_norm": 1.5891035170209347, "learning_rate": 6.617477133059048e-06, "loss": 0.7097, "step": 6085 }, { "epoch": 0.6216547497446374, "grad_norm": 1.6559760622480633, "learning_rate": 6.6143639937316364e-06, "loss": 0.742, "step": 6086 }, { "epoch": 0.6217568947906027, "grad_norm": 1.5919220585917173, "learning_rate": 6.611251224964273e-06, "loss": 0.6516, "step": 6087 }, { "epoch": 0.6218590398365679, "grad_norm": 1.4540915195089683, "learning_rate": 6.608138827097654e-06, "loss": 0.7436, "step": 6088 }, { "epoch": 0.6219611848825332, "grad_norm": 1.4878140228498602, "learning_rate": 6.6050268004724285e-06, "loss": 0.6584, "step": 6089 }, { "epoch": 0.6220633299284984, "grad_norm": 1.5403716067395332, "learning_rate": 6.601915145429214e-06, "loss": 0.7143, "step": 6090 }, { "epoch": 0.6221654749744637, "grad_norm": 1.4553378596096787, "learning_rate": 6.598803862308585e-06, "loss": 0.7485, "step": 6091 }, { "epoch": 0.622267620020429, "grad_norm": 1.4094091074894952, "learning_rate": 6.595692951451067e-06, "loss": 0.6686, "step": 6092 }, { "epoch": 0.6223697650663943, "grad_norm": 1.6500652180930637, "learning_rate": 6.5925824131971595e-06, "loss": 0.676, "step": 6093 }, { "epoch": 0.6224719101123596, "grad_norm": 1.3390705563168666, "learning_rate": 6.589472247887305e-06, "loss": 0.5841, "step": 6094 }, { "epoch": 0.6225740551583249, "grad_norm": 1.49096187117337, "learning_rate": 6.586362455861918e-06, "loss": 0.7096, "step": 6095 }, { "epoch": 0.62267620020429, "grad_norm": 1.4917426621288992, "learning_rate": 6.5832530374613635e-06, "loss": 0.6601, "step": 6096 }, { "epoch": 0.6227783452502553, "grad_norm": 1.6903309468479464, "learning_rate": 6.5801439930259714e-06, "loss": 0.7086, "step": 6097 }, { "epoch": 0.6228804902962206, "grad_norm": 1.5570198304384162, "learning_rate": 6.57703532289603e-06, "loss": 0.7381, "step": 6098 }, { "epoch": 0.6229826353421859, "grad_norm": 1.54621313987679, "learning_rate": 6.573927027411784e-06, "loss": 0.6884, "step": 6099 }, { "epoch": 0.6230847803881512, "grad_norm": 1.3510007195205616, "learning_rate": 6.570819106913441e-06, "loss": 0.6559, "step": 6100 }, { "epoch": 0.6231869254341165, "grad_norm": 1.4204563608987426, "learning_rate": 6.5677115617411595e-06, "loss": 0.6985, "step": 6101 }, { "epoch": 0.6232890704800818, "grad_norm": 1.496205072591101, "learning_rate": 6.5646043922350665e-06, "loss": 0.7232, "step": 6102 }, { "epoch": 0.623391215526047, "grad_norm": 1.3192881547599211, "learning_rate": 6.5614975987352404e-06, "loss": 0.7113, "step": 6103 }, { "epoch": 0.6234933605720122, "grad_norm": 1.4570154971656228, "learning_rate": 6.558391181581727e-06, "loss": 0.6575, "step": 6104 }, { "epoch": 0.6235955056179775, "grad_norm": 1.3962310531998048, "learning_rate": 6.555285141114515e-06, "loss": 0.7013, "step": 6105 }, { "epoch": 0.6236976506639428, "grad_norm": 1.522001929479248, "learning_rate": 6.552179477673577e-06, "loss": 0.6985, "step": 6106 }, { "epoch": 0.6237997957099081, "grad_norm": 1.4548704517087414, "learning_rate": 6.549074191598821e-06, "loss": 0.728, "step": 6107 }, { "epoch": 0.6239019407558734, "grad_norm": 1.388947331995313, "learning_rate": 6.545969283230125e-06, "loss": 0.6588, "step": 6108 }, { "epoch": 0.6240040858018386, "grad_norm": 1.5921301098645846, "learning_rate": 6.542864752907321e-06, "loss": 0.7473, "step": 6109 }, { "epoch": 0.6241062308478039, "grad_norm": 1.5357758512747757, "learning_rate": 6.539760600970205e-06, "loss": 0.7124, "step": 6110 }, { "epoch": 0.6242083758937692, "grad_norm": 1.5694724590235667, "learning_rate": 6.5366568277585254e-06, "loss": 0.6955, "step": 6111 }, { "epoch": 0.6243105209397344, "grad_norm": 1.556913179252182, "learning_rate": 6.533553433611994e-06, "loss": 0.7037, "step": 6112 }, { "epoch": 0.6244126659856997, "grad_norm": 1.5414589304335253, "learning_rate": 6.530450418870278e-06, "loss": 0.6921, "step": 6113 }, { "epoch": 0.624514811031665, "grad_norm": 1.4780229950873949, "learning_rate": 6.527347783873003e-06, "loss": 0.6105, "step": 6114 }, { "epoch": 0.6246169560776302, "grad_norm": 1.5167098436271296, "learning_rate": 6.524245528959763e-06, "loss": 0.702, "step": 6115 }, { "epoch": 0.6247191011235955, "grad_norm": 1.412337467423917, "learning_rate": 6.521143654470091e-06, "loss": 0.731, "step": 6116 }, { "epoch": 0.6248212461695608, "grad_norm": 1.4414654883539222, "learning_rate": 6.5180421607434964e-06, "loss": 0.7116, "step": 6117 }, { "epoch": 0.6249233912155261, "grad_norm": 1.5384967019497453, "learning_rate": 6.514941048119434e-06, "loss": 0.7711, "step": 6118 }, { "epoch": 0.6250255362614913, "grad_norm": 1.6264825953342996, "learning_rate": 6.511840316937329e-06, "loss": 0.7375, "step": 6119 }, { "epoch": 0.6251276813074566, "grad_norm": 1.544268452745341, "learning_rate": 6.508739967536553e-06, "loss": 0.7036, "step": 6120 }, { "epoch": 0.6252298263534218, "grad_norm": 1.488165588337334, "learning_rate": 6.505640000256439e-06, "loss": 0.7173, "step": 6121 }, { "epoch": 0.6253319713993871, "grad_norm": 1.390294984734998, "learning_rate": 6.50254041543629e-06, "loss": 0.7382, "step": 6122 }, { "epoch": 0.6254341164453524, "grad_norm": 1.4789159006002561, "learning_rate": 6.4994412134153505e-06, "loss": 0.6836, "step": 6123 }, { "epoch": 0.6255362614913177, "grad_norm": 1.4985027068779089, "learning_rate": 6.496342394532834e-06, "loss": 0.652, "step": 6124 }, { "epoch": 0.625638406537283, "grad_norm": 1.5859858167308005, "learning_rate": 6.493243959127906e-06, "loss": 0.7614, "step": 6125 }, { "epoch": 0.6257405515832483, "grad_norm": 1.7279997497800577, "learning_rate": 6.490145907539689e-06, "loss": 0.787, "step": 6126 }, { "epoch": 0.6258426966292134, "grad_norm": 1.519391082354374, "learning_rate": 6.487048240107275e-06, "loss": 0.6974, "step": 6127 }, { "epoch": 0.6259448416751787, "grad_norm": 1.540289547940538, "learning_rate": 6.483950957169695e-06, "loss": 0.757, "step": 6128 }, { "epoch": 0.626046986721144, "grad_norm": 1.4242659175660761, "learning_rate": 6.4808540590659555e-06, "loss": 0.6387, "step": 6129 }, { "epoch": 0.6261491317671093, "grad_norm": 1.4959259914799132, "learning_rate": 6.477757546135015e-06, "loss": 0.6499, "step": 6130 }, { "epoch": 0.6262512768130746, "grad_norm": 1.530240054016163, "learning_rate": 6.474661418715784e-06, "loss": 0.6252, "step": 6131 }, { "epoch": 0.6263534218590399, "grad_norm": 1.5543998527613867, "learning_rate": 6.471565677147142e-06, "loss": 0.6421, "step": 6132 }, { "epoch": 0.6264555669050051, "grad_norm": 1.3142449970090542, "learning_rate": 6.468470321767914e-06, "loss": 0.7445, "step": 6133 }, { "epoch": 0.6265577119509704, "grad_norm": 1.4687246598647847, "learning_rate": 6.465375352916894e-06, "loss": 0.6106, "step": 6134 }, { "epoch": 0.6266598569969356, "grad_norm": 1.490175162611309, "learning_rate": 6.4622807709328216e-06, "loss": 0.791, "step": 6135 }, { "epoch": 0.6267620020429009, "grad_norm": 1.3795828066046198, "learning_rate": 6.4591865761544085e-06, "loss": 0.6047, "step": 6136 }, { "epoch": 0.6268641470888662, "grad_norm": 1.6217155999779487, "learning_rate": 6.4560927689203055e-06, "loss": 0.6734, "step": 6137 }, { "epoch": 0.6269662921348315, "grad_norm": 1.548370911051616, "learning_rate": 6.4529993495691426e-06, "loss": 0.6208, "step": 6138 }, { "epoch": 0.6270684371807967, "grad_norm": 1.415130929828379, "learning_rate": 6.449906318439494e-06, "loss": 0.5849, "step": 6139 }, { "epoch": 0.627170582226762, "grad_norm": 1.415237121981588, "learning_rate": 6.44681367586989e-06, "loss": 0.7854, "step": 6140 }, { "epoch": 0.6272727272727273, "grad_norm": 1.4886019830601929, "learning_rate": 6.443721422198827e-06, "loss": 0.6364, "step": 6141 }, { "epoch": 0.6273748723186925, "grad_norm": 1.5229314779038945, "learning_rate": 6.440629557764752e-06, "loss": 0.8162, "step": 6142 }, { "epoch": 0.6274770173646578, "grad_norm": 1.2937330380822516, "learning_rate": 6.437538082906071e-06, "loss": 0.5845, "step": 6143 }, { "epoch": 0.6275791624106231, "grad_norm": 1.368139203411507, "learning_rate": 6.434446997961149e-06, "loss": 0.686, "step": 6144 }, { "epoch": 0.6276813074565883, "grad_norm": 1.5627474790929743, "learning_rate": 6.431356303268303e-06, "loss": 0.6666, "step": 6145 }, { "epoch": 0.6277834525025536, "grad_norm": 1.39128874288251, "learning_rate": 6.428265999165817e-06, "loss": 0.6475, "step": 6146 }, { "epoch": 0.6278855975485189, "grad_norm": 1.535923035945346, "learning_rate": 6.4251760859919275e-06, "loss": 0.7689, "step": 6147 }, { "epoch": 0.6279877425944842, "grad_norm": 1.4480834384293195, "learning_rate": 6.422086564084823e-06, "loss": 0.787, "step": 6148 }, { "epoch": 0.6280898876404495, "grad_norm": 1.5499550928607904, "learning_rate": 6.418997433782657e-06, "loss": 0.6551, "step": 6149 }, { "epoch": 0.6281920326864147, "grad_norm": 1.4702956737794057, "learning_rate": 6.415908695423534e-06, "loss": 0.7196, "step": 6150 }, { "epoch": 0.6282941777323799, "grad_norm": 1.534264929854412, "learning_rate": 6.412820349345522e-06, "loss": 0.7261, "step": 6151 }, { "epoch": 0.6283963227783452, "grad_norm": 1.5514897825533918, "learning_rate": 6.409732395886639e-06, "loss": 0.6872, "step": 6152 }, { "epoch": 0.6284984678243105, "grad_norm": 1.5145864060263614, "learning_rate": 6.406644835384861e-06, "loss": 0.6604, "step": 6153 }, { "epoch": 0.6286006128702758, "grad_norm": 1.405545308550319, "learning_rate": 6.403557668178133e-06, "loss": 0.6975, "step": 6154 }, { "epoch": 0.6287027579162411, "grad_norm": 1.409986528115615, "learning_rate": 6.400470894604339e-06, "loss": 0.7668, "step": 6155 }, { "epoch": 0.6288049029622064, "grad_norm": 1.4051823487323436, "learning_rate": 6.397384515001335e-06, "loss": 0.676, "step": 6156 }, { "epoch": 0.6289070480081717, "grad_norm": 1.5031621105998738, "learning_rate": 6.394298529706919e-06, "loss": 0.7039, "step": 6157 }, { "epoch": 0.6290091930541368, "grad_norm": 1.4679787605986283, "learning_rate": 6.3912129390588615e-06, "loss": 0.6971, "step": 6158 }, { "epoch": 0.6291113381001021, "grad_norm": 1.655866605465299, "learning_rate": 6.3881277433948785e-06, "loss": 0.7522, "step": 6159 }, { "epoch": 0.6292134831460674, "grad_norm": 1.4169112026998378, "learning_rate": 6.385042943052648e-06, "loss": 0.6098, "step": 6160 }, { "epoch": 0.6293156281920327, "grad_norm": 1.5088972911438008, "learning_rate": 6.381958538369799e-06, "loss": 0.6875, "step": 6161 }, { "epoch": 0.629417773237998, "grad_norm": 1.393540763856964, "learning_rate": 6.378874529683929e-06, "loss": 0.6638, "step": 6162 }, { "epoch": 0.6295199182839633, "grad_norm": 1.4814868758339288, "learning_rate": 6.375790917332581e-06, "loss": 0.7373, "step": 6163 }, { "epoch": 0.6296220633299285, "grad_norm": 1.4958972215313535, "learning_rate": 6.3727077016532605e-06, "loss": 0.7292, "step": 6164 }, { "epoch": 0.6297242083758938, "grad_norm": 1.382691756925648, "learning_rate": 6.3696248829834216e-06, "loss": 0.7479, "step": 6165 }, { "epoch": 0.629826353421859, "grad_norm": 1.4689183306559397, "learning_rate": 6.366542461660487e-06, "loss": 0.7133, "step": 6166 }, { "epoch": 0.6299284984678243, "grad_norm": 1.6578691864028614, "learning_rate": 6.363460438021828e-06, "loss": 0.5832, "step": 6167 }, { "epoch": 0.6300306435137896, "grad_norm": 1.4716762140685307, "learning_rate": 6.360378812404774e-06, "loss": 0.7267, "step": 6168 }, { "epoch": 0.6301327885597549, "grad_norm": 1.352135729165762, "learning_rate": 6.357297585146607e-06, "loss": 0.6907, "step": 6169 }, { "epoch": 0.6302349336057201, "grad_norm": 1.5351627885458556, "learning_rate": 6.354216756584573e-06, "loss": 0.5625, "step": 6170 }, { "epoch": 0.6303370786516854, "grad_norm": 1.5559090636427537, "learning_rate": 6.351136327055875e-06, "loss": 0.7687, "step": 6171 }, { "epoch": 0.6304392236976507, "grad_norm": 1.4939070298327277, "learning_rate": 6.34805629689766e-06, "loss": 0.7567, "step": 6172 }, { "epoch": 0.6305413687436159, "grad_norm": 1.4624280100821394, "learning_rate": 6.344976666447045e-06, "loss": 0.7369, "step": 6173 }, { "epoch": 0.6306435137895812, "grad_norm": 1.4996062015482285, "learning_rate": 6.341897436041094e-06, "loss": 0.7253, "step": 6174 }, { "epoch": 0.6307456588355465, "grad_norm": 1.4303744556780493, "learning_rate": 6.338818606016832e-06, "loss": 0.6669, "step": 6175 }, { "epoch": 0.6308478038815117, "grad_norm": 1.3145261755249669, "learning_rate": 6.335740176711241e-06, "loss": 0.6468, "step": 6176 }, { "epoch": 0.630949948927477, "grad_norm": 1.2739431330657027, "learning_rate": 6.332662148461248e-06, "loss": 0.6335, "step": 6177 }, { "epoch": 0.6310520939734423, "grad_norm": 1.5297459910209643, "learning_rate": 6.32958452160376e-06, "loss": 0.6628, "step": 6178 }, { "epoch": 0.6311542390194076, "grad_norm": 1.440386659421521, "learning_rate": 6.3265072964756155e-06, "loss": 0.7348, "step": 6179 }, { "epoch": 0.6312563840653729, "grad_norm": 1.4782781421664306, "learning_rate": 6.32343047341362e-06, "loss": 0.6466, "step": 6180 }, { "epoch": 0.631358529111338, "grad_norm": 1.392628288008539, "learning_rate": 6.320354052754536e-06, "loss": 0.7182, "step": 6181 }, { "epoch": 0.6314606741573033, "grad_norm": 1.4904357863526596, "learning_rate": 6.317278034835077e-06, "loss": 0.8133, "step": 6182 }, { "epoch": 0.6315628192032686, "grad_norm": 1.2716580098208816, "learning_rate": 6.314202419991917e-06, "loss": 0.6298, "step": 6183 }, { "epoch": 0.6316649642492339, "grad_norm": 1.5929248804767717, "learning_rate": 6.311127208561683e-06, "loss": 0.7973, "step": 6184 }, { "epoch": 0.6317671092951992, "grad_norm": 1.5075047855181032, "learning_rate": 6.308052400880955e-06, "loss": 0.6926, "step": 6185 }, { "epoch": 0.6318692543411645, "grad_norm": 1.4485528969026127, "learning_rate": 6.304977997286283e-06, "loss": 0.7937, "step": 6186 }, { "epoch": 0.6319713993871298, "grad_norm": 1.5189585560008951, "learning_rate": 6.301903998114154e-06, "loss": 0.6726, "step": 6187 }, { "epoch": 0.632073544433095, "grad_norm": 1.4535747623387782, "learning_rate": 6.298830403701024e-06, "loss": 0.6461, "step": 6188 }, { "epoch": 0.6321756894790602, "grad_norm": 1.4189325564748962, "learning_rate": 6.295757214383296e-06, "loss": 0.8286, "step": 6189 }, { "epoch": 0.6322778345250255, "grad_norm": 1.4963841322450242, "learning_rate": 6.292684430497336e-06, "loss": 0.7421, "step": 6190 }, { "epoch": 0.6323799795709908, "grad_norm": 1.4279436184550074, "learning_rate": 6.289612052379457e-06, "loss": 0.7743, "step": 6191 }, { "epoch": 0.6324821246169561, "grad_norm": 1.4492016880850955, "learning_rate": 6.286540080365939e-06, "loss": 0.7429, "step": 6192 }, { "epoch": 0.6325842696629214, "grad_norm": 1.495249373750305, "learning_rate": 6.283468514793005e-06, "loss": 0.7534, "step": 6193 }, { "epoch": 0.6326864147088866, "grad_norm": 1.5968758595614805, "learning_rate": 6.280397355996843e-06, "loss": 0.8035, "step": 6194 }, { "epoch": 0.6327885597548519, "grad_norm": 1.4433902304256785, "learning_rate": 6.277326604313598e-06, "loss": 0.8289, "step": 6195 }, { "epoch": 0.6328907048008171, "grad_norm": 1.4080068376210237, "learning_rate": 6.2742562600793614e-06, "loss": 0.7034, "step": 6196 }, { "epoch": 0.6329928498467824, "grad_norm": 1.4334213083420997, "learning_rate": 6.2711863236301805e-06, "loss": 0.7401, "step": 6197 }, { "epoch": 0.6330949948927477, "grad_norm": 1.4977396009773944, "learning_rate": 6.268116795302068e-06, "loss": 0.7156, "step": 6198 }, { "epoch": 0.633197139938713, "grad_norm": 1.5021139954309746, "learning_rate": 6.265047675430982e-06, "loss": 0.728, "step": 6199 }, { "epoch": 0.6332992849846782, "grad_norm": 1.5336938535119229, "learning_rate": 6.2619789643528424e-06, "loss": 0.8164, "step": 6200 }, { "epoch": 0.6334014300306435, "grad_norm": 1.356242017092509, "learning_rate": 6.258910662403517e-06, "loss": 0.653, "step": 6201 }, { "epoch": 0.6335035750766088, "grad_norm": 1.3265460923211345, "learning_rate": 6.255842769918838e-06, "loss": 0.7114, "step": 6202 }, { "epoch": 0.6336057201225741, "grad_norm": 1.3879826733753458, "learning_rate": 6.2527752872345895e-06, "loss": 0.6709, "step": 6203 }, { "epoch": 0.6337078651685393, "grad_norm": 1.4680080751645839, "learning_rate": 6.249708214686505e-06, "loss": 0.7722, "step": 6204 }, { "epoch": 0.6338100102145046, "grad_norm": 1.5092491757988253, "learning_rate": 6.24664155261028e-06, "loss": 0.7145, "step": 6205 }, { "epoch": 0.6339121552604698, "grad_norm": 1.578997497195343, "learning_rate": 6.243575301341561e-06, "loss": 0.7269, "step": 6206 }, { "epoch": 0.6340143003064351, "grad_norm": 1.4318419337598796, "learning_rate": 6.2405094612159546e-06, "loss": 0.6452, "step": 6207 }, { "epoch": 0.6341164453524004, "grad_norm": 1.4816845059788193, "learning_rate": 6.237444032569013e-06, "loss": 0.6853, "step": 6208 }, { "epoch": 0.6342185903983657, "grad_norm": 1.5123491478029518, "learning_rate": 6.234379015736253e-06, "loss": 0.6361, "step": 6209 }, { "epoch": 0.634320735444331, "grad_norm": 1.452998072193306, "learning_rate": 6.231314411053147e-06, "loss": 0.6088, "step": 6210 }, { "epoch": 0.6344228804902963, "grad_norm": 1.4471759682705987, "learning_rate": 6.22825021885511e-06, "loss": 0.7437, "step": 6211 }, { "epoch": 0.6345250255362614, "grad_norm": 1.4885112002115297, "learning_rate": 6.225186439477524e-06, "loss": 0.6542, "step": 6212 }, { "epoch": 0.6346271705822267, "grad_norm": 1.4219888059498778, "learning_rate": 6.222123073255724e-06, "loss": 0.6043, "step": 6213 }, { "epoch": 0.634729315628192, "grad_norm": 1.4993694308449521, "learning_rate": 6.2190601205249915e-06, "loss": 0.6974, "step": 6214 }, { "epoch": 0.6348314606741573, "grad_norm": 1.4653977402395704, "learning_rate": 6.215997581620572e-06, "loss": 0.651, "step": 6215 }, { "epoch": 0.6349336057201226, "grad_norm": 1.4504700293284152, "learning_rate": 6.212935456877663e-06, "loss": 0.6603, "step": 6216 }, { "epoch": 0.6350357507660879, "grad_norm": 1.31453360826875, "learning_rate": 6.20987374663141e-06, "loss": 0.577, "step": 6217 }, { "epoch": 0.6351378958120532, "grad_norm": 1.5966027384079478, "learning_rate": 6.20681245121693e-06, "loss": 0.641, "step": 6218 }, { "epoch": 0.6352400408580184, "grad_norm": 1.4582790796757519, "learning_rate": 6.203751570969275e-06, "loss": 0.6733, "step": 6219 }, { "epoch": 0.6353421859039836, "grad_norm": 1.462234736454773, "learning_rate": 6.200691106223466e-06, "loss": 0.7, "step": 6220 }, { "epoch": 0.6354443309499489, "grad_norm": 1.4057295654477358, "learning_rate": 6.197631057314468e-06, "loss": 0.6323, "step": 6221 }, { "epoch": 0.6355464759959142, "grad_norm": 1.5233682182128938, "learning_rate": 6.194571424577209e-06, "loss": 0.8243, "step": 6222 }, { "epoch": 0.6356486210418795, "grad_norm": 1.4220250645864718, "learning_rate": 6.191512208346564e-06, "loss": 0.7041, "step": 6223 }, { "epoch": 0.6357507660878448, "grad_norm": 1.6270115198349078, "learning_rate": 6.188453408957371e-06, "loss": 0.737, "step": 6224 }, { "epoch": 0.63585291113381, "grad_norm": 1.549927916578951, "learning_rate": 6.1853950267444095e-06, "loss": 0.6851, "step": 6225 }, { "epoch": 0.6359550561797753, "grad_norm": 1.4296112451074776, "learning_rate": 6.182337062042431e-06, "loss": 0.6015, "step": 6226 }, { "epoch": 0.6360572012257405, "grad_norm": 1.4719461868049277, "learning_rate": 6.1792795151861285e-06, "loss": 0.6562, "step": 6227 }, { "epoch": 0.6361593462717058, "grad_norm": 1.3184197167047926, "learning_rate": 6.17622238651015e-06, "loss": 0.6735, "step": 6228 }, { "epoch": 0.6362614913176711, "grad_norm": 1.4591541413720452, "learning_rate": 6.173165676349103e-06, "loss": 0.6164, "step": 6229 }, { "epoch": 0.6363636363636364, "grad_norm": 1.4203075982165287, "learning_rate": 6.170109385037546e-06, "loss": 0.7087, "step": 6230 }, { "epoch": 0.6364657814096016, "grad_norm": 1.359520142387967, "learning_rate": 6.167053512909988e-06, "loss": 0.7115, "step": 6231 }, { "epoch": 0.6365679264555669, "grad_norm": 1.3808068490156395, "learning_rate": 6.163998060300904e-06, "loss": 0.6391, "step": 6232 }, { "epoch": 0.6366700715015322, "grad_norm": 1.405883583688255, "learning_rate": 6.160943027544706e-06, "loss": 0.6704, "step": 6233 }, { "epoch": 0.6367722165474975, "grad_norm": 1.3646605696719372, "learning_rate": 6.157888414975773e-06, "loss": 0.6202, "step": 6234 }, { "epoch": 0.6368743615934627, "grad_norm": 1.5158929539816646, "learning_rate": 6.154834222928439e-06, "loss": 0.7368, "step": 6235 }, { "epoch": 0.636976506639428, "grad_norm": 1.6604850432086664, "learning_rate": 6.151780451736983e-06, "loss": 0.6779, "step": 6236 }, { "epoch": 0.6370786516853932, "grad_norm": 1.574391380965864, "learning_rate": 6.148727101735643e-06, "loss": 0.76, "step": 6237 }, { "epoch": 0.6371807967313585, "grad_norm": 1.7394989648284243, "learning_rate": 6.145674173258609e-06, "loss": 0.7345, "step": 6238 }, { "epoch": 0.6372829417773238, "grad_norm": 1.4628235786694361, "learning_rate": 6.142621666640028e-06, "loss": 0.7739, "step": 6239 }, { "epoch": 0.6373850868232891, "grad_norm": 1.5121468234428965, "learning_rate": 6.139569582213997e-06, "loss": 0.7746, "step": 6240 }, { "epoch": 0.6374872318692544, "grad_norm": 1.3744367591821849, "learning_rate": 6.1365179203145705e-06, "loss": 0.6738, "step": 6241 }, { "epoch": 0.6375893769152197, "grad_norm": 1.5669755563045646, "learning_rate": 6.13346668127575e-06, "loss": 0.6892, "step": 6242 }, { "epoch": 0.6376915219611848, "grad_norm": 1.4568635355387474, "learning_rate": 6.1304158654315015e-06, "loss": 0.8537, "step": 6243 }, { "epoch": 0.6377936670071501, "grad_norm": 1.2602954399512702, "learning_rate": 6.127365473115738e-06, "loss": 0.612, "step": 6244 }, { "epoch": 0.6378958120531154, "grad_norm": 1.4469408580318752, "learning_rate": 6.124315504662325e-06, "loss": 0.7272, "step": 6245 }, { "epoch": 0.6379979570990807, "grad_norm": 1.448084307636434, "learning_rate": 6.121265960405085e-06, "loss": 0.7752, "step": 6246 }, { "epoch": 0.638100102145046, "grad_norm": 1.3914522330353567, "learning_rate": 6.118216840677791e-06, "loss": 0.7579, "step": 6247 }, { "epoch": 0.6382022471910113, "grad_norm": 1.5852464799926391, "learning_rate": 6.11516814581417e-06, "loss": 0.7772, "step": 6248 }, { "epoch": 0.6383043922369765, "grad_norm": 1.5236108392706704, "learning_rate": 6.112119876147908e-06, "loss": 0.7219, "step": 6249 }, { "epoch": 0.6384065372829418, "grad_norm": 1.5016271098897103, "learning_rate": 6.1090720320126325e-06, "loss": 0.7993, "step": 6250 }, { "epoch": 0.638508682328907, "grad_norm": 1.4939538218284871, "learning_rate": 6.106024613741941e-06, "loss": 0.6716, "step": 6251 }, { "epoch": 0.6386108273748723, "grad_norm": 1.7935364035761887, "learning_rate": 6.102977621669371e-06, "loss": 0.7911, "step": 6252 }, { "epoch": 0.6387129724208376, "grad_norm": 1.5198210915670876, "learning_rate": 6.099931056128418e-06, "loss": 0.5812, "step": 6253 }, { "epoch": 0.6388151174668029, "grad_norm": 1.4867267152849766, "learning_rate": 6.096884917452531e-06, "loss": 0.739, "step": 6254 }, { "epoch": 0.6389172625127681, "grad_norm": 1.4288751536856423, "learning_rate": 6.093839205975111e-06, "loss": 0.689, "step": 6255 }, { "epoch": 0.6390194075587334, "grad_norm": 1.3179589640284959, "learning_rate": 6.090793922029514e-06, "loss": 0.6719, "step": 6256 }, { "epoch": 0.6391215526046987, "grad_norm": 1.5559147156948698, "learning_rate": 6.087749065949047e-06, "loss": 0.6956, "step": 6257 }, { "epoch": 0.6392236976506639, "grad_norm": 1.5465574184309092, "learning_rate": 6.08470463806697e-06, "loss": 0.7879, "step": 6258 }, { "epoch": 0.6393258426966292, "grad_norm": 1.4421781530341753, "learning_rate": 6.081660638716505e-06, "loss": 0.5971, "step": 6259 }, { "epoch": 0.6394279877425945, "grad_norm": 1.4028764383352599, "learning_rate": 6.0786170682308125e-06, "loss": 0.6198, "step": 6260 }, { "epoch": 0.6395301327885597, "grad_norm": 1.496240994624535, "learning_rate": 6.075573926943016e-06, "loss": 0.6857, "step": 6261 }, { "epoch": 0.639632277834525, "grad_norm": 1.5726742847144117, "learning_rate": 6.072531215186187e-06, "loss": 0.6352, "step": 6262 }, { "epoch": 0.6397344228804903, "grad_norm": 1.5342798755293476, "learning_rate": 6.069488933293357e-06, "loss": 0.7799, "step": 6263 }, { "epoch": 0.6398365679264556, "grad_norm": 1.466962476830368, "learning_rate": 6.066447081597502e-06, "loss": 0.5925, "step": 6264 }, { "epoch": 0.6399387129724209, "grad_norm": 1.516820858944803, "learning_rate": 6.063405660431553e-06, "loss": 0.7411, "step": 6265 }, { "epoch": 0.6400408580183861, "grad_norm": 1.5568608146490341, "learning_rate": 6.060364670128396e-06, "loss": 0.7675, "step": 6266 }, { "epoch": 0.6401430030643513, "grad_norm": 1.437974419050373, "learning_rate": 6.0573241110208755e-06, "loss": 0.6998, "step": 6267 }, { "epoch": 0.6402451481103166, "grad_norm": 1.4162603119460129, "learning_rate": 6.054283983441776e-06, "loss": 0.6653, "step": 6268 }, { "epoch": 0.6403472931562819, "grad_norm": 1.277162464892732, "learning_rate": 6.051244287723846e-06, "loss": 0.5971, "step": 6269 }, { "epoch": 0.6404494382022472, "grad_norm": 1.220602949708139, "learning_rate": 6.048205024199778e-06, "loss": 0.7208, "step": 6270 }, { "epoch": 0.6405515832482125, "grad_norm": 1.3681695386784096, "learning_rate": 6.0451661932022255e-06, "loss": 0.7108, "step": 6271 }, { "epoch": 0.6406537282941778, "grad_norm": 1.658182041622724, "learning_rate": 6.042127795063786e-06, "loss": 0.6594, "step": 6272 }, { "epoch": 0.6407558733401431, "grad_norm": 1.577463958203946, "learning_rate": 6.0390898301170185e-06, "loss": 0.7456, "step": 6273 }, { "epoch": 0.6408580183861082, "grad_norm": 1.3223477041156972, "learning_rate": 6.036052298694424e-06, "loss": 0.6867, "step": 6274 }, { "epoch": 0.6409601634320735, "grad_norm": 1.5616895836045444, "learning_rate": 6.0330152011284696e-06, "loss": 0.7719, "step": 6275 }, { "epoch": 0.6410623084780388, "grad_norm": 1.467252891058547, "learning_rate": 6.029978537751564e-06, "loss": 0.6348, "step": 6276 }, { "epoch": 0.6411644535240041, "grad_norm": 1.469235519563346, "learning_rate": 6.026942308896073e-06, "loss": 0.6996, "step": 6277 }, { "epoch": 0.6412665985699694, "grad_norm": 1.7020674155693478, "learning_rate": 6.023906514894313e-06, "loss": 0.7073, "step": 6278 }, { "epoch": 0.6413687436159347, "grad_norm": 1.5687023916689578, "learning_rate": 6.020871156078554e-06, "loss": 0.7302, "step": 6279 }, { "epoch": 0.6414708886618999, "grad_norm": 1.515846285703917, "learning_rate": 6.017836232781018e-06, "loss": 0.5688, "step": 6280 }, { "epoch": 0.6415730337078651, "grad_norm": 1.5468003853842678, "learning_rate": 6.01480174533388e-06, "loss": 0.6731, "step": 6281 }, { "epoch": 0.6416751787538304, "grad_norm": 1.3861808635196455, "learning_rate": 6.011767694069259e-06, "loss": 0.7177, "step": 6282 }, { "epoch": 0.6417773237997957, "grad_norm": 1.4517288074656356, "learning_rate": 6.008734079319247e-06, "loss": 0.6446, "step": 6283 }, { "epoch": 0.641879468845761, "grad_norm": 1.4919906162776033, "learning_rate": 6.005700901415869e-06, "loss": 0.7099, "step": 6284 }, { "epoch": 0.6419816138917263, "grad_norm": 1.4612823533062915, "learning_rate": 6.002668160691107e-06, "loss": 0.6198, "step": 6285 }, { "epoch": 0.6420837589376915, "grad_norm": 1.5320872605567237, "learning_rate": 5.999635857476897e-06, "loss": 0.809, "step": 6286 }, { "epoch": 0.6421859039836568, "grad_norm": 1.4252250187186526, "learning_rate": 5.996603992105128e-06, "loss": 0.7124, "step": 6287 }, { "epoch": 0.6422880490296221, "grad_norm": 1.479817849230376, "learning_rate": 5.993572564907637e-06, "loss": 0.6795, "step": 6288 }, { "epoch": 0.6423901940755873, "grad_norm": 1.4328461340662038, "learning_rate": 5.9905415762162176e-06, "loss": 0.5986, "step": 6289 }, { "epoch": 0.6424923391215526, "grad_norm": 1.6153171128679744, "learning_rate": 5.987511026362611e-06, "loss": 0.6841, "step": 6290 }, { "epoch": 0.6425944841675179, "grad_norm": 1.474212562965794, "learning_rate": 5.984480915678519e-06, "loss": 0.7023, "step": 6291 }, { "epoch": 0.6426966292134831, "grad_norm": 1.4628158184884703, "learning_rate": 5.981451244495582e-06, "loss": 0.8259, "step": 6292 }, { "epoch": 0.6427987742594484, "grad_norm": 1.415954260290507, "learning_rate": 5.978422013145406e-06, "loss": 0.6458, "step": 6293 }, { "epoch": 0.6429009193054137, "grad_norm": 1.482107150450611, "learning_rate": 5.975393221959535e-06, "loss": 0.684, "step": 6294 }, { "epoch": 0.643003064351379, "grad_norm": 1.4097967647962504, "learning_rate": 5.97236487126948e-06, "loss": 0.7265, "step": 6295 }, { "epoch": 0.6431052093973443, "grad_norm": 1.6066437209036217, "learning_rate": 5.969336961406689e-06, "loss": 0.7774, "step": 6296 }, { "epoch": 0.6432073544433095, "grad_norm": 1.3396529778686272, "learning_rate": 5.966309492702574e-06, "loss": 0.6666, "step": 6297 }, { "epoch": 0.6433094994892747, "grad_norm": 1.6611679074904009, "learning_rate": 5.963282465488488e-06, "loss": 0.7887, "step": 6298 }, { "epoch": 0.64341164453524, "grad_norm": 1.6416909516868807, "learning_rate": 5.960255880095746e-06, "loss": 0.8088, "step": 6299 }, { "epoch": 0.6435137895812053, "grad_norm": 1.409404647165669, "learning_rate": 5.957229736855609e-06, "loss": 0.7443, "step": 6300 }, { "epoch": 0.6436159346271706, "grad_norm": 1.4038165064767516, "learning_rate": 5.9542040360992895e-06, "loss": 0.7249, "step": 6301 }, { "epoch": 0.6437180796731359, "grad_norm": 1.4505670644750825, "learning_rate": 5.95117877815795e-06, "loss": 0.7171, "step": 6302 }, { "epoch": 0.6438202247191012, "grad_norm": 1.424467231678113, "learning_rate": 5.948153963362711e-06, "loss": 0.5964, "step": 6303 }, { "epoch": 0.6439223697650664, "grad_norm": 1.6934730700868863, "learning_rate": 5.945129592044638e-06, "loss": 0.7947, "step": 6304 }, { "epoch": 0.6440245148110316, "grad_norm": 1.5691252449151116, "learning_rate": 5.942105664534752e-06, "loss": 0.7513, "step": 6305 }, { "epoch": 0.6441266598569969, "grad_norm": 1.5759238010677883, "learning_rate": 5.939082181164018e-06, "loss": 0.7166, "step": 6306 }, { "epoch": 0.6442288049029622, "grad_norm": 1.6203843652502852, "learning_rate": 5.9360591422633654e-06, "loss": 0.66, "step": 6307 }, { "epoch": 0.6443309499489275, "grad_norm": 1.467542102367679, "learning_rate": 5.933036548163668e-06, "loss": 0.7398, "step": 6308 }, { "epoch": 0.6444330949948928, "grad_norm": 1.371049671255989, "learning_rate": 5.9300143991957445e-06, "loss": 0.6845, "step": 6309 }, { "epoch": 0.644535240040858, "grad_norm": 1.3700813356801136, "learning_rate": 5.926992695690378e-06, "loss": 0.6713, "step": 6310 }, { "epoch": 0.6446373850868233, "grad_norm": 1.4485422707085727, "learning_rate": 5.92397143797829e-06, "loss": 0.7022, "step": 6311 }, { "epoch": 0.6447395301327885, "grad_norm": 1.4308039619657558, "learning_rate": 5.920950626390163e-06, "loss": 0.6391, "step": 6312 }, { "epoch": 0.6448416751787538, "grad_norm": 1.481273965125313, "learning_rate": 5.917930261256624e-06, "loss": 0.6249, "step": 6313 }, { "epoch": 0.6449438202247191, "grad_norm": 1.594762582758169, "learning_rate": 5.9149103429082535e-06, "loss": 0.8109, "step": 6314 }, { "epoch": 0.6450459652706844, "grad_norm": 1.544382420259511, "learning_rate": 5.91189087167559e-06, "loss": 0.6977, "step": 6315 }, { "epoch": 0.6451481103166496, "grad_norm": 1.5624957361892107, "learning_rate": 5.908871847889108e-06, "loss": 0.7703, "step": 6316 }, { "epoch": 0.6452502553626149, "grad_norm": 1.3960109333197803, "learning_rate": 5.905853271879248e-06, "loss": 0.6361, "step": 6317 }, { "epoch": 0.6453524004085802, "grad_norm": 1.4233643441313804, "learning_rate": 5.902835143976393e-06, "loss": 0.7047, "step": 6318 }, { "epoch": 0.6454545454545455, "grad_norm": 1.3246914761704722, "learning_rate": 5.8998174645108766e-06, "loss": 0.6961, "step": 6319 }, { "epoch": 0.6455566905005107, "grad_norm": 1.3027690246627308, "learning_rate": 5.896800233812989e-06, "loss": 0.7153, "step": 6320 }, { "epoch": 0.645658835546476, "grad_norm": 1.311259140583127, "learning_rate": 5.893783452212965e-06, "loss": 0.7258, "step": 6321 }, { "epoch": 0.6457609805924412, "grad_norm": 1.3147488868040462, "learning_rate": 5.8907671200409944e-06, "loss": 0.7149, "step": 6322 }, { "epoch": 0.6458631256384065, "grad_norm": 1.4902720867640786, "learning_rate": 5.887751237627219e-06, "loss": 0.6608, "step": 6323 }, { "epoch": 0.6459652706843718, "grad_norm": 1.542438356662139, "learning_rate": 5.884735805301729e-06, "loss": 0.83, "step": 6324 }, { "epoch": 0.6460674157303371, "grad_norm": 1.4886161965657752, "learning_rate": 5.881720823394563e-06, "loss": 0.7997, "step": 6325 }, { "epoch": 0.6461695607763024, "grad_norm": 1.5876435627495205, "learning_rate": 5.878706292235712e-06, "loss": 0.7174, "step": 6326 }, { "epoch": 0.6462717058222677, "grad_norm": 1.4868227272801122, "learning_rate": 5.875692212155123e-06, "loss": 0.7195, "step": 6327 }, { "epoch": 0.6463738508682328, "grad_norm": 1.4926683500799103, "learning_rate": 5.872678583482684e-06, "loss": 0.6917, "step": 6328 }, { "epoch": 0.6464759959141981, "grad_norm": 1.5116797716265975, "learning_rate": 5.869665406548242e-06, "loss": 0.7352, "step": 6329 }, { "epoch": 0.6465781409601634, "grad_norm": 1.3896217737963354, "learning_rate": 5.866652681681586e-06, "loss": 0.5781, "step": 6330 }, { "epoch": 0.6466802860061287, "grad_norm": 1.56993081915318, "learning_rate": 5.863640409212467e-06, "loss": 0.7232, "step": 6331 }, { "epoch": 0.646782431052094, "grad_norm": 1.3129115209525009, "learning_rate": 5.860628589470578e-06, "loss": 0.6601, "step": 6332 }, { "epoch": 0.6468845760980593, "grad_norm": 1.5171361512116293, "learning_rate": 5.857617222785563e-06, "loss": 0.6758, "step": 6333 }, { "epoch": 0.6469867211440246, "grad_norm": 1.2263135646165713, "learning_rate": 5.854606309487023e-06, "loss": 0.6683, "step": 6334 }, { "epoch": 0.6470888661899897, "grad_norm": 1.486673194419447, "learning_rate": 5.851595849904499e-06, "loss": 0.8071, "step": 6335 }, { "epoch": 0.647191011235955, "grad_norm": 1.3338496817668883, "learning_rate": 5.848585844367487e-06, "loss": 0.7452, "step": 6336 }, { "epoch": 0.6472931562819203, "grad_norm": 1.441407239701411, "learning_rate": 5.845576293205439e-06, "loss": 0.6698, "step": 6337 }, { "epoch": 0.6473953013278856, "grad_norm": 1.3852217401224993, "learning_rate": 5.842567196747747e-06, "loss": 0.7051, "step": 6338 }, { "epoch": 0.6474974463738509, "grad_norm": 1.6108619144981824, "learning_rate": 5.839558555323764e-06, "loss": 0.7733, "step": 6339 }, { "epoch": 0.6475995914198162, "grad_norm": 1.4394800132962888, "learning_rate": 5.836550369262783e-06, "loss": 0.7789, "step": 6340 }, { "epoch": 0.6477017364657814, "grad_norm": 1.3628402270894475, "learning_rate": 5.833542638894056e-06, "loss": 0.6488, "step": 6341 }, { "epoch": 0.6478038815117467, "grad_norm": 1.509343726814961, "learning_rate": 5.830535364546779e-06, "loss": 0.6341, "step": 6342 }, { "epoch": 0.6479060265577119, "grad_norm": 1.5954579259879267, "learning_rate": 5.827528546550101e-06, "loss": 0.7731, "step": 6343 }, { "epoch": 0.6480081716036772, "grad_norm": 1.4775873272210471, "learning_rate": 5.824522185233115e-06, "loss": 0.5926, "step": 6344 }, { "epoch": 0.6481103166496425, "grad_norm": 1.4302511416703345, "learning_rate": 5.8215162809248785e-06, "loss": 0.7, "step": 6345 }, { "epoch": 0.6482124616956078, "grad_norm": 1.4511007000605698, "learning_rate": 5.818510833954379e-06, "loss": 0.7171, "step": 6346 }, { "epoch": 0.648314606741573, "grad_norm": 1.475167437030065, "learning_rate": 5.815505844650576e-06, "loss": 0.7625, "step": 6347 }, { "epoch": 0.6484167517875383, "grad_norm": 1.4842598560934537, "learning_rate": 5.81250131334236e-06, "loss": 0.6768, "step": 6348 }, { "epoch": 0.6485188968335036, "grad_norm": 1.6331893348035946, "learning_rate": 5.809497240358578e-06, "loss": 0.739, "step": 6349 }, { "epoch": 0.6486210418794689, "grad_norm": 1.3449957211538792, "learning_rate": 5.806493626028033e-06, "loss": 0.6524, "step": 6350 }, { "epoch": 0.6487231869254341, "grad_norm": 1.4835045860482052, "learning_rate": 5.803490470679473e-06, "loss": 0.6641, "step": 6351 }, { "epoch": 0.6488253319713994, "grad_norm": 1.5332059158632318, "learning_rate": 5.80048777464159e-06, "loss": 0.7534, "step": 6352 }, { "epoch": 0.6489274770173646, "grad_norm": 1.5139463503928976, "learning_rate": 5.797485538243034e-06, "loss": 0.6745, "step": 6353 }, { "epoch": 0.6490296220633299, "grad_norm": 1.5535727612526296, "learning_rate": 5.794483761812393e-06, "loss": 0.6875, "step": 6354 }, { "epoch": 0.6491317671092952, "grad_norm": 1.3930562914335123, "learning_rate": 5.7914824456782296e-06, "loss": 0.6241, "step": 6355 }, { "epoch": 0.6492339121552605, "grad_norm": 1.503355955866966, "learning_rate": 5.7884815901690306e-06, "loss": 0.7057, "step": 6356 }, { "epoch": 0.6493360572012258, "grad_norm": 1.678559934401488, "learning_rate": 5.785481195613243e-06, "loss": 0.8101, "step": 6357 }, { "epoch": 0.6494382022471911, "grad_norm": 1.4271361015473665, "learning_rate": 5.782481262339261e-06, "loss": 0.6927, "step": 6358 }, { "epoch": 0.6495403472931562, "grad_norm": 1.4001833232568726, "learning_rate": 5.779481790675426e-06, "loss": 0.5893, "step": 6359 }, { "epoch": 0.6496424923391215, "grad_norm": 1.4763191347456233, "learning_rate": 5.776482780950041e-06, "loss": 0.688, "step": 6360 }, { "epoch": 0.6497446373850868, "grad_norm": 1.3113638951517055, "learning_rate": 5.773484233491342e-06, "loss": 0.6988, "step": 6361 }, { "epoch": 0.6498467824310521, "grad_norm": 1.3460519984892971, "learning_rate": 5.770486148627523e-06, "loss": 0.6623, "step": 6362 }, { "epoch": 0.6499489274770174, "grad_norm": 1.5451060793849392, "learning_rate": 5.76748852668673e-06, "loss": 0.765, "step": 6363 }, { "epoch": 0.6500510725229827, "grad_norm": 1.4721720706511154, "learning_rate": 5.764491367997049e-06, "loss": 0.7641, "step": 6364 }, { "epoch": 0.650153217568948, "grad_norm": 1.462312018560201, "learning_rate": 5.7614946728865275e-06, "loss": 0.6499, "step": 6365 }, { "epoch": 0.6502553626149131, "grad_norm": 1.5262239818356969, "learning_rate": 5.758498441683154e-06, "loss": 0.7359, "step": 6366 }, { "epoch": 0.6503575076608784, "grad_norm": 1.40463504869887, "learning_rate": 5.755502674714865e-06, "loss": 0.801, "step": 6367 }, { "epoch": 0.6504596527068437, "grad_norm": 1.3855909996868994, "learning_rate": 5.752507372309546e-06, "loss": 0.7379, "step": 6368 }, { "epoch": 0.650561797752809, "grad_norm": 1.2718627967339275, "learning_rate": 5.749512534795044e-06, "loss": 0.6066, "step": 6369 }, { "epoch": 0.6506639427987743, "grad_norm": 1.4181372334478919, "learning_rate": 5.7465181624991374e-06, "loss": 0.8462, "step": 6370 }, { "epoch": 0.6507660878447395, "grad_norm": 1.5282786810820324, "learning_rate": 5.7435242557495705e-06, "loss": 0.727, "step": 6371 }, { "epoch": 0.6508682328907048, "grad_norm": 1.5203966197020127, "learning_rate": 5.740530814874023e-06, "loss": 0.7547, "step": 6372 }, { "epoch": 0.6509703779366701, "grad_norm": 1.4331867228001234, "learning_rate": 5.737537840200132e-06, "loss": 0.6602, "step": 6373 }, { "epoch": 0.6510725229826353, "grad_norm": 1.59507792623401, "learning_rate": 5.734545332055471e-06, "loss": 0.7751, "step": 6374 }, { "epoch": 0.6511746680286006, "grad_norm": 1.4134453399559137, "learning_rate": 5.731553290767584e-06, "loss": 0.6088, "step": 6375 }, { "epoch": 0.6512768130745659, "grad_norm": 1.5010996630166384, "learning_rate": 5.728561716663949e-06, "loss": 0.6873, "step": 6376 }, { "epoch": 0.6513789581205311, "grad_norm": 1.4138872034974663, "learning_rate": 5.7255706100719925e-06, "loss": 0.6504, "step": 6377 }, { "epoch": 0.6514811031664964, "grad_norm": 1.5847195954841544, "learning_rate": 5.722579971319091e-06, "loss": 0.7621, "step": 6378 }, { "epoch": 0.6515832482124617, "grad_norm": 1.581836987343748, "learning_rate": 5.719589800732575e-06, "loss": 0.7084, "step": 6379 }, { "epoch": 0.651685393258427, "grad_norm": 1.4856061002120062, "learning_rate": 5.716600098639724e-06, "loss": 0.7705, "step": 6380 }, { "epoch": 0.6517875383043923, "grad_norm": 1.478977177570494, "learning_rate": 5.71361086536776e-06, "loss": 0.712, "step": 6381 }, { "epoch": 0.6518896833503575, "grad_norm": 1.4958918162723975, "learning_rate": 5.710622101243857e-06, "loss": 0.6564, "step": 6382 }, { "epoch": 0.6519918283963227, "grad_norm": 1.3043031060841932, "learning_rate": 5.70763380659513e-06, "loss": 0.7272, "step": 6383 }, { "epoch": 0.652093973442288, "grad_norm": 1.6663607740503847, "learning_rate": 5.704645981748662e-06, "loss": 0.7739, "step": 6384 }, { "epoch": 0.6521961184882533, "grad_norm": 1.572339684632511, "learning_rate": 5.701658627031466e-06, "loss": 0.7304, "step": 6385 }, { "epoch": 0.6522982635342186, "grad_norm": 1.4765424427825464, "learning_rate": 5.698671742770507e-06, "loss": 0.6868, "step": 6386 }, { "epoch": 0.6524004085801839, "grad_norm": 1.5123910337252047, "learning_rate": 5.695685329292708e-06, "loss": 0.7538, "step": 6387 }, { "epoch": 0.6525025536261492, "grad_norm": 1.4264475689472662, "learning_rate": 5.692699386924927e-06, "loss": 0.6709, "step": 6388 }, { "epoch": 0.6526046986721143, "grad_norm": 1.47220997476032, "learning_rate": 5.689713915993985e-06, "loss": 0.7414, "step": 6389 }, { "epoch": 0.6527068437180796, "grad_norm": 1.563475522528441, "learning_rate": 5.68672891682664e-06, "loss": 0.6984, "step": 6390 }, { "epoch": 0.6528089887640449, "grad_norm": 1.5787700972102408, "learning_rate": 5.683744389749602e-06, "loss": 0.6489, "step": 6391 }, { "epoch": 0.6529111338100102, "grad_norm": 1.4193504468392486, "learning_rate": 5.680760335089528e-06, "loss": 0.7097, "step": 6392 }, { "epoch": 0.6530132788559755, "grad_norm": 1.2411623726872238, "learning_rate": 5.6777767531730225e-06, "loss": 0.6962, "step": 6393 }, { "epoch": 0.6531154239019408, "grad_norm": 1.4091315428955322, "learning_rate": 5.6747936443266435e-06, "loss": 0.6075, "step": 6394 }, { "epoch": 0.6532175689479061, "grad_norm": 1.4631515867390752, "learning_rate": 5.6718110088769e-06, "loss": 0.7178, "step": 6395 }, { "epoch": 0.6533197139938713, "grad_norm": 1.2854831506396007, "learning_rate": 5.6688288471502365e-06, "loss": 0.6645, "step": 6396 }, { "epoch": 0.6534218590398365, "grad_norm": 1.4117525668697681, "learning_rate": 5.665847159473053e-06, "loss": 0.6162, "step": 6397 }, { "epoch": 0.6535240040858018, "grad_norm": 1.4588081405168964, "learning_rate": 5.6628659461716965e-06, "loss": 0.7265, "step": 6398 }, { "epoch": 0.6536261491317671, "grad_norm": 1.5537495002960608, "learning_rate": 5.659885207572466e-06, "loss": 0.7343, "step": 6399 }, { "epoch": 0.6537282941777324, "grad_norm": 1.6106729023277113, "learning_rate": 5.656904944001607e-06, "loss": 0.6697, "step": 6400 }, { "epoch": 0.6538304392236977, "grad_norm": 1.5463585793752392, "learning_rate": 5.653925155785305e-06, "loss": 0.7391, "step": 6401 }, { "epoch": 0.6539325842696629, "grad_norm": 1.399071494854687, "learning_rate": 5.6509458432496985e-06, "loss": 0.6772, "step": 6402 }, { "epoch": 0.6540347293156282, "grad_norm": 1.4919613571178558, "learning_rate": 5.64796700672088e-06, "loss": 0.7101, "step": 6403 }, { "epoch": 0.6541368743615935, "grad_norm": 1.4797670762771453, "learning_rate": 5.644988646524889e-06, "loss": 0.6975, "step": 6404 }, { "epoch": 0.6542390194075587, "grad_norm": 1.5498117775501439, "learning_rate": 5.642010762987704e-06, "loss": 0.6865, "step": 6405 }, { "epoch": 0.654341164453524, "grad_norm": 1.4989140822337448, "learning_rate": 5.639033356435257e-06, "loss": 0.6455, "step": 6406 }, { "epoch": 0.6544433094994893, "grad_norm": 1.4019225799138728, "learning_rate": 5.636056427193426e-06, "loss": 0.6001, "step": 6407 }, { "epoch": 0.6545454545454545, "grad_norm": 1.320388791277678, "learning_rate": 5.633079975588035e-06, "loss": 0.7011, "step": 6408 }, { "epoch": 0.6546475995914198, "grad_norm": 1.644115941851424, "learning_rate": 5.630104001944865e-06, "loss": 0.6918, "step": 6409 }, { "epoch": 0.6547497446373851, "grad_norm": 1.6101219848444763, "learning_rate": 5.627128506589633e-06, "loss": 0.6704, "step": 6410 }, { "epoch": 0.6548518896833504, "grad_norm": 1.6161133579459637, "learning_rate": 5.624153489848014e-06, "loss": 0.7291, "step": 6411 }, { "epoch": 0.6549540347293157, "grad_norm": 1.5440675157851353, "learning_rate": 5.621178952045622e-06, "loss": 0.6293, "step": 6412 }, { "epoch": 0.6550561797752809, "grad_norm": 1.7283246226720521, "learning_rate": 5.618204893508021e-06, "loss": 0.697, "step": 6413 }, { "epoch": 0.6551583248212461, "grad_norm": 1.3559492819230137, "learning_rate": 5.615231314560727e-06, "loss": 0.5894, "step": 6414 }, { "epoch": 0.6552604698672114, "grad_norm": 1.5935615385559916, "learning_rate": 5.6122582155292e-06, "loss": 0.6376, "step": 6415 }, { "epoch": 0.6553626149131767, "grad_norm": 1.538253692371307, "learning_rate": 5.609285596738847e-06, "loss": 0.6759, "step": 6416 }, { "epoch": 0.655464759959142, "grad_norm": 1.4012332649924164, "learning_rate": 5.606313458515017e-06, "loss": 0.6324, "step": 6417 }, { "epoch": 0.6555669050051073, "grad_norm": 1.336031351324118, "learning_rate": 5.603341801183017e-06, "loss": 0.5308, "step": 6418 }, { "epoch": 0.6556690500510726, "grad_norm": 1.492685158102753, "learning_rate": 5.600370625068103e-06, "loss": 0.6106, "step": 6419 }, { "epoch": 0.6557711950970377, "grad_norm": 1.4597778620592337, "learning_rate": 5.597399930495466e-06, "loss": 0.795, "step": 6420 }, { "epoch": 0.655873340143003, "grad_norm": 1.4632272190260227, "learning_rate": 5.594429717790251e-06, "loss": 0.613, "step": 6421 }, { "epoch": 0.6559754851889683, "grad_norm": 1.5499079670946054, "learning_rate": 5.591459987277545e-06, "loss": 0.7384, "step": 6422 }, { "epoch": 0.6560776302349336, "grad_norm": 1.3879338002574035, "learning_rate": 5.588490739282396e-06, "loss": 0.6619, "step": 6423 }, { "epoch": 0.6561797752808989, "grad_norm": 1.3718112923058157, "learning_rate": 5.585521974129786e-06, "loss": 0.6934, "step": 6424 }, { "epoch": 0.6562819203268642, "grad_norm": 1.4101973443413434, "learning_rate": 5.582553692144648e-06, "loss": 0.7318, "step": 6425 }, { "epoch": 0.6563840653728295, "grad_norm": 1.466562194126063, "learning_rate": 5.57958589365186e-06, "loss": 0.6801, "step": 6426 }, { "epoch": 0.6564862104187947, "grad_norm": 1.3753948035749377, "learning_rate": 5.576618578976254e-06, "loss": 0.6615, "step": 6427 }, { "epoch": 0.6565883554647599, "grad_norm": 1.4559602828184697, "learning_rate": 5.573651748442599e-06, "loss": 0.7807, "step": 6428 }, { "epoch": 0.6566905005107252, "grad_norm": 1.4531113426015971, "learning_rate": 5.570685402375623e-06, "loss": 0.8324, "step": 6429 }, { "epoch": 0.6567926455566905, "grad_norm": 1.5327802305331653, "learning_rate": 5.567719541099992e-06, "loss": 0.7614, "step": 6430 }, { "epoch": 0.6568947906026558, "grad_norm": 1.3291991390675684, "learning_rate": 5.564754164940322e-06, "loss": 0.6325, "step": 6431 }, { "epoch": 0.656996935648621, "grad_norm": 1.617498900406194, "learning_rate": 5.5617892742211685e-06, "loss": 0.7064, "step": 6432 }, { "epoch": 0.6570990806945863, "grad_norm": 1.4534551761199075, "learning_rate": 5.558824869267051e-06, "loss": 0.669, "step": 6433 }, { "epoch": 0.6572012257405516, "grad_norm": 1.3924221299839745, "learning_rate": 5.555860950402417e-06, "loss": 0.6807, "step": 6434 }, { "epoch": 0.6573033707865169, "grad_norm": 1.4733493077412563, "learning_rate": 5.552897517951678e-06, "loss": 0.709, "step": 6435 }, { "epoch": 0.6574055158324821, "grad_norm": 1.494083039848911, "learning_rate": 5.549934572239177e-06, "loss": 0.7293, "step": 6436 }, { "epoch": 0.6575076608784474, "grad_norm": 1.4418543852146206, "learning_rate": 5.546972113589211e-06, "loss": 0.6049, "step": 6437 }, { "epoch": 0.6576098059244126, "grad_norm": 1.4503013134343983, "learning_rate": 5.544010142326026e-06, "loss": 0.7854, "step": 6438 }, { "epoch": 0.6577119509703779, "grad_norm": 1.620662963952648, "learning_rate": 5.5410486587738096e-06, "loss": 0.861, "step": 6439 }, { "epoch": 0.6578140960163432, "grad_norm": 1.320980141073788, "learning_rate": 5.5380876632566995e-06, "loss": 0.6461, "step": 6440 }, { "epoch": 0.6579162410623085, "grad_norm": 1.4444173840822545, "learning_rate": 5.535127156098776e-06, "loss": 0.5556, "step": 6441 }, { "epoch": 0.6580183861082738, "grad_norm": 1.424762596957635, "learning_rate": 5.532167137624064e-06, "loss": 0.7649, "step": 6442 }, { "epoch": 0.6581205311542391, "grad_norm": 1.443251849291834, "learning_rate": 5.529207608156553e-06, "loss": 0.6466, "step": 6443 }, { "epoch": 0.6582226762002042, "grad_norm": 1.4949742404987731, "learning_rate": 5.526248568020156e-06, "loss": 0.7689, "step": 6444 }, { "epoch": 0.6583248212461695, "grad_norm": 1.5021058512750207, "learning_rate": 5.523290017538745e-06, "loss": 0.7298, "step": 6445 }, { "epoch": 0.6584269662921348, "grad_norm": 1.297823489032694, "learning_rate": 5.520331957036134e-06, "loss": 0.5758, "step": 6446 }, { "epoch": 0.6585291113381001, "grad_norm": 1.2451483847653233, "learning_rate": 5.517374386836081e-06, "loss": 0.4773, "step": 6447 }, { "epoch": 0.6586312563840654, "grad_norm": 1.4070563818357695, "learning_rate": 5.5144173072623e-06, "loss": 0.6623, "step": 6448 }, { "epoch": 0.6587334014300307, "grad_norm": 1.5243881935184953, "learning_rate": 5.511460718638444e-06, "loss": 0.7343, "step": 6449 }, { "epoch": 0.658835546475996, "grad_norm": 1.667082783672821, "learning_rate": 5.508504621288107e-06, "loss": 0.7512, "step": 6450 }, { "epoch": 0.6589376915219611, "grad_norm": 1.7212574845060382, "learning_rate": 5.505549015534846e-06, "loss": 0.841, "step": 6451 }, { "epoch": 0.6590398365679264, "grad_norm": 1.378710960646386, "learning_rate": 5.502593901702145e-06, "loss": 0.7135, "step": 6452 }, { "epoch": 0.6591419816138917, "grad_norm": 1.4052582965138933, "learning_rate": 5.499639280113452e-06, "loss": 0.656, "step": 6453 }, { "epoch": 0.659244126659857, "grad_norm": 1.4718936841890282, "learning_rate": 5.496685151092145e-06, "loss": 0.6765, "step": 6454 }, { "epoch": 0.6593462717058223, "grad_norm": 1.407794575075968, "learning_rate": 5.49373151496156e-06, "loss": 0.5649, "step": 6455 }, { "epoch": 0.6594484167517876, "grad_norm": 1.5143000127292756, "learning_rate": 5.490778372044967e-06, "loss": 0.7044, "step": 6456 }, { "epoch": 0.6595505617977528, "grad_norm": 1.5182534188864187, "learning_rate": 5.487825722665599e-06, "loss": 0.6589, "step": 6457 }, { "epoch": 0.6596527068437181, "grad_norm": 1.4667008927160596, "learning_rate": 5.484873567146617e-06, "loss": 0.7224, "step": 6458 }, { "epoch": 0.6597548518896833, "grad_norm": 1.5326465357974048, "learning_rate": 5.481921905811144e-06, "loss": 0.7052, "step": 6459 }, { "epoch": 0.6598569969356486, "grad_norm": 1.6814841682182398, "learning_rate": 5.478970738982236e-06, "loss": 0.769, "step": 6460 }, { "epoch": 0.6599591419816139, "grad_norm": 1.4830018545780765, "learning_rate": 5.476020066982903e-06, "loss": 0.6657, "step": 6461 }, { "epoch": 0.6600612870275792, "grad_norm": 1.3549759526147498, "learning_rate": 5.473069890136094e-06, "loss": 0.7184, "step": 6462 }, { "epoch": 0.6601634320735444, "grad_norm": 1.438517311815641, "learning_rate": 5.470120208764713e-06, "loss": 0.7584, "step": 6463 }, { "epoch": 0.6602655771195097, "grad_norm": 1.3336390518437293, "learning_rate": 5.467171023191601e-06, "loss": 0.6437, "step": 6464 }, { "epoch": 0.660367722165475, "grad_norm": 1.349422722425278, "learning_rate": 5.464222333739551e-06, "loss": 0.6402, "step": 6465 }, { "epoch": 0.6604698672114403, "grad_norm": 1.4738551892646254, "learning_rate": 5.4612741407312905e-06, "loss": 0.6567, "step": 6466 }, { "epoch": 0.6605720122574055, "grad_norm": 1.5419523999833575, "learning_rate": 5.458326444489509e-06, "loss": 0.7172, "step": 6467 }, { "epoch": 0.6606741573033708, "grad_norm": 1.2470581652385127, "learning_rate": 5.455379245336836e-06, "loss": 0.5738, "step": 6468 }, { "epoch": 0.660776302349336, "grad_norm": 1.3619156841412574, "learning_rate": 5.4524325435958424e-06, "loss": 0.6338, "step": 6469 }, { "epoch": 0.6608784473953013, "grad_norm": 1.473565004636987, "learning_rate": 5.4494863395890426e-06, "loss": 0.6713, "step": 6470 }, { "epoch": 0.6609805924412666, "grad_norm": 1.563945853726889, "learning_rate": 5.4465406336389015e-06, "loss": 0.7009, "step": 6471 }, { "epoch": 0.6610827374872319, "grad_norm": 1.612131754723165, "learning_rate": 5.443595426067831e-06, "loss": 0.7489, "step": 6472 }, { "epoch": 0.6611848825331972, "grad_norm": 1.3962362114749298, "learning_rate": 5.440650717198188e-06, "loss": 0.7396, "step": 6473 }, { "epoch": 0.6612870275791624, "grad_norm": 1.3733358544311225, "learning_rate": 5.437706507352264e-06, "loss": 0.6004, "step": 6474 }, { "epoch": 0.6613891726251276, "grad_norm": 1.4649064634105868, "learning_rate": 5.434762796852315e-06, "loss": 0.6832, "step": 6475 }, { "epoch": 0.6614913176710929, "grad_norm": 1.434765150725726, "learning_rate": 5.431819586020523e-06, "loss": 0.6423, "step": 6476 }, { "epoch": 0.6615934627170582, "grad_norm": 1.3863263050926706, "learning_rate": 5.428876875179032e-06, "loss": 0.6334, "step": 6477 }, { "epoch": 0.6616956077630235, "grad_norm": 1.6001201553599602, "learning_rate": 5.425934664649921e-06, "loss": 0.8054, "step": 6478 }, { "epoch": 0.6617977528089888, "grad_norm": 1.578634973435631, "learning_rate": 5.422992954755217e-06, "loss": 0.7148, "step": 6479 }, { "epoch": 0.6618998978549541, "grad_norm": 1.6205820698479412, "learning_rate": 5.42005174581689e-06, "loss": 0.7558, "step": 6480 }, { "epoch": 0.6620020429009194, "grad_norm": 1.360396711234745, "learning_rate": 5.417111038156855e-06, "loss": 0.6639, "step": 6481 }, { "epoch": 0.6621041879468845, "grad_norm": 1.3258747500264592, "learning_rate": 5.414170832096979e-06, "loss": 0.7072, "step": 6482 }, { "epoch": 0.6622063329928498, "grad_norm": 1.5084639101310708, "learning_rate": 5.411231127959072e-06, "loss": 0.7023, "step": 6483 }, { "epoch": 0.6623084780388151, "grad_norm": 1.4111645373653259, "learning_rate": 5.40829192606488e-06, "loss": 0.7475, "step": 6484 }, { "epoch": 0.6624106230847804, "grad_norm": 1.5271301900245238, "learning_rate": 5.405353226736105e-06, "loss": 0.6814, "step": 6485 }, { "epoch": 0.6625127681307457, "grad_norm": 1.4094411106383697, "learning_rate": 5.4024150302943844e-06, "loss": 0.6854, "step": 6486 }, { "epoch": 0.662614913176711, "grad_norm": 1.4444107854209403, "learning_rate": 5.399477337061313e-06, "loss": 0.7403, "step": 6487 }, { "epoch": 0.6627170582226762, "grad_norm": 1.4123421999324308, "learning_rate": 5.3965401473584186e-06, "loss": 0.6387, "step": 6488 }, { "epoch": 0.6628192032686415, "grad_norm": 1.5440964240956108, "learning_rate": 5.39360346150718e-06, "loss": 0.7416, "step": 6489 }, { "epoch": 0.6629213483146067, "grad_norm": 1.5282751224424966, "learning_rate": 5.3906672798290135e-06, "loss": 0.717, "step": 6490 }, { "epoch": 0.663023493360572, "grad_norm": 1.1771836562756872, "learning_rate": 5.3877316026452944e-06, "loss": 0.6176, "step": 6491 }, { "epoch": 0.6631256384065373, "grad_norm": 1.5891312320059654, "learning_rate": 5.384796430277333e-06, "loss": 0.638, "step": 6492 }, { "epoch": 0.6632277834525026, "grad_norm": 1.485319485215655, "learning_rate": 5.381861763046383e-06, "loss": 0.7164, "step": 6493 }, { "epoch": 0.6633299284984678, "grad_norm": 1.4032204114898663, "learning_rate": 5.378927601273648e-06, "loss": 0.6983, "step": 6494 }, { "epoch": 0.6634320735444331, "grad_norm": 1.3612329496393076, "learning_rate": 5.375993945280273e-06, "loss": 0.7325, "step": 6495 }, { "epoch": 0.6635342185903984, "grad_norm": 1.5981859503469504, "learning_rate": 5.3730607953873436e-06, "loss": 0.7693, "step": 6496 }, { "epoch": 0.6636363636363637, "grad_norm": 1.5333443813825893, "learning_rate": 5.370128151915903e-06, "loss": 0.6943, "step": 6497 }, { "epoch": 0.6637385086823289, "grad_norm": 1.6257901390006935, "learning_rate": 5.367196015186924e-06, "loss": 0.7508, "step": 6498 }, { "epoch": 0.6638406537282941, "grad_norm": 1.445519200534978, "learning_rate": 5.36426438552134e-06, "loss": 0.684, "step": 6499 }, { "epoch": 0.6639427987742594, "grad_norm": 1.4069808746085324, "learning_rate": 5.361333263240012e-06, "loss": 0.658, "step": 6500 }, { "epoch": 0.6640449438202247, "grad_norm": 1.4878350649869472, "learning_rate": 5.358402648663752e-06, "loss": 0.7193, "step": 6501 }, { "epoch": 0.66414708886619, "grad_norm": 1.4000396055031883, "learning_rate": 5.355472542113325e-06, "loss": 0.5534, "step": 6502 }, { "epoch": 0.6642492339121553, "grad_norm": 1.4459998675335881, "learning_rate": 5.3525429439094275e-06, "loss": 0.6397, "step": 6503 }, { "epoch": 0.6643513789581206, "grad_norm": 1.5814478450083063, "learning_rate": 5.34961385437271e-06, "loss": 0.8189, "step": 6504 }, { "epoch": 0.6644535240040857, "grad_norm": 1.5744139873093734, "learning_rate": 5.346685273823755e-06, "loss": 0.733, "step": 6505 }, { "epoch": 0.664555669050051, "grad_norm": 1.5684568120459548, "learning_rate": 5.343757202583104e-06, "loss": 0.7395, "step": 6506 }, { "epoch": 0.6646578140960163, "grad_norm": 1.500171207711971, "learning_rate": 5.340829640971239e-06, "loss": 0.6649, "step": 6507 }, { "epoch": 0.6647599591419816, "grad_norm": 1.4138434026554247, "learning_rate": 5.337902589308579e-06, "loss": 0.6882, "step": 6508 }, { "epoch": 0.6648621041879469, "grad_norm": 1.4934437149639046, "learning_rate": 5.334976047915494e-06, "loss": 0.7525, "step": 6509 }, { "epoch": 0.6649642492339122, "grad_norm": 1.5406119987203275, "learning_rate": 5.332050017112292e-06, "loss": 0.7706, "step": 6510 }, { "epoch": 0.6650663942798775, "grad_norm": 1.5439920515651857, "learning_rate": 5.329124497219232e-06, "loss": 0.7902, "step": 6511 }, { "epoch": 0.6651685393258427, "grad_norm": 1.6255241994993048, "learning_rate": 5.326199488556516e-06, "loss": 0.5789, "step": 6512 }, { "epoch": 0.6652706843718079, "grad_norm": 1.6199711517177522, "learning_rate": 5.323274991444285e-06, "loss": 0.7271, "step": 6513 }, { "epoch": 0.6653728294177732, "grad_norm": 1.686948899869703, "learning_rate": 5.320351006202624e-06, "loss": 0.6075, "step": 6514 }, { "epoch": 0.6654749744637385, "grad_norm": 1.6663570823939364, "learning_rate": 5.317427533151572e-06, "loss": 0.6938, "step": 6515 }, { "epoch": 0.6655771195097038, "grad_norm": 1.638513014635251, "learning_rate": 5.3145045726110984e-06, "loss": 0.7046, "step": 6516 }, { "epoch": 0.6656792645556691, "grad_norm": 1.5384884184510565, "learning_rate": 5.311582124901131e-06, "loss": 0.7302, "step": 6517 }, { "epoch": 0.6657814096016343, "grad_norm": 1.5998308938073411, "learning_rate": 5.308660190341528e-06, "loss": 0.7253, "step": 6518 }, { "epoch": 0.6658835546475996, "grad_norm": 1.4463100253888226, "learning_rate": 5.3057387692521e-06, "loss": 0.6748, "step": 6519 }, { "epoch": 0.6659856996935649, "grad_norm": 1.4107388180518095, "learning_rate": 5.302817861952592e-06, "loss": 0.5955, "step": 6520 }, { "epoch": 0.6660878447395301, "grad_norm": 1.4218335993768003, "learning_rate": 5.299897468762707e-06, "loss": 0.6951, "step": 6521 }, { "epoch": 0.6661899897854954, "grad_norm": 1.4308701748717412, "learning_rate": 5.296977590002077e-06, "loss": 0.668, "step": 6522 }, { "epoch": 0.6662921348314607, "grad_norm": 1.4805881925189268, "learning_rate": 5.294058225990292e-06, "loss": 0.5968, "step": 6523 }, { "epoch": 0.6663942798774259, "grad_norm": 1.3493272474950293, "learning_rate": 5.291139377046874e-06, "loss": 0.6837, "step": 6524 }, { "epoch": 0.6664964249233912, "grad_norm": 1.563174685269643, "learning_rate": 5.288221043491291e-06, "loss": 0.7525, "step": 6525 }, { "epoch": 0.6665985699693565, "grad_norm": 1.439215358013041, "learning_rate": 5.285303225642962e-06, "loss": 0.6924, "step": 6526 }, { "epoch": 0.6667007150153218, "grad_norm": 1.5090365411301374, "learning_rate": 5.282385923821242e-06, "loss": 0.7549, "step": 6527 }, { "epoch": 0.666802860061287, "grad_norm": 1.5130527851200468, "learning_rate": 5.27946913834543e-06, "loss": 0.7345, "step": 6528 }, { "epoch": 0.6669050051072523, "grad_norm": 1.5686150717105887, "learning_rate": 5.276552869534765e-06, "loss": 0.7035, "step": 6529 }, { "epoch": 0.6670071501532175, "grad_norm": 1.2694511967841604, "learning_rate": 5.273637117708444e-06, "loss": 0.6929, "step": 6530 }, { "epoch": 0.6671092951991828, "grad_norm": 1.4093188713704887, "learning_rate": 5.270721883185594e-06, "loss": 0.7178, "step": 6531 }, { "epoch": 0.6672114402451481, "grad_norm": 1.5029835971487457, "learning_rate": 5.267807166285292e-06, "loss": 0.7558, "step": 6532 }, { "epoch": 0.6673135852911134, "grad_norm": 1.4367556022956347, "learning_rate": 5.264892967326552e-06, "loss": 0.6893, "step": 6533 }, { "epoch": 0.6674157303370787, "grad_norm": 1.5494496929827504, "learning_rate": 5.261979286628336e-06, "loss": 0.7603, "step": 6534 }, { "epoch": 0.667517875383044, "grad_norm": 1.5322589830504494, "learning_rate": 5.259066124509545e-06, "loss": 0.7354, "step": 6535 }, { "epoch": 0.6676200204290091, "grad_norm": 1.496147167283038, "learning_rate": 5.256153481289034e-06, "loss": 0.6617, "step": 6536 }, { "epoch": 0.6677221654749744, "grad_norm": 1.3786676288102002, "learning_rate": 5.253241357285588e-06, "loss": 0.7243, "step": 6537 }, { "epoch": 0.6678243105209397, "grad_norm": 1.488728077548782, "learning_rate": 5.25032975281794e-06, "loss": 0.6611, "step": 6538 }, { "epoch": 0.667926455566905, "grad_norm": 1.4599517840004244, "learning_rate": 5.247418668204771e-06, "loss": 0.6775, "step": 6539 }, { "epoch": 0.6680286006128703, "grad_norm": 1.4195890697553577, "learning_rate": 5.244508103764696e-06, "loss": 0.6637, "step": 6540 }, { "epoch": 0.6681307456588356, "grad_norm": 1.4687881668013583, "learning_rate": 5.241598059816286e-06, "loss": 0.6672, "step": 6541 }, { "epoch": 0.6682328907048009, "grad_norm": 1.4630041184733678, "learning_rate": 5.2386885366780425e-06, "loss": 0.6072, "step": 6542 }, { "epoch": 0.6683350357507661, "grad_norm": 1.5330979327950098, "learning_rate": 5.2357795346684145e-06, "loss": 0.7786, "step": 6543 }, { "epoch": 0.6684371807967313, "grad_norm": 1.530092992825617, "learning_rate": 5.2328710541057924e-06, "loss": 0.7351, "step": 6544 }, { "epoch": 0.6685393258426966, "grad_norm": 1.50290595964251, "learning_rate": 5.229963095308516e-06, "loss": 0.7557, "step": 6545 }, { "epoch": 0.6686414708886619, "grad_norm": 1.613286057290504, "learning_rate": 5.227055658594856e-06, "loss": 0.7371, "step": 6546 }, { "epoch": 0.6687436159346272, "grad_norm": 1.4414580684985665, "learning_rate": 5.2241487442830414e-06, "loss": 0.7132, "step": 6547 }, { "epoch": 0.6688457609805925, "grad_norm": 1.434827594414842, "learning_rate": 5.221242352691235e-06, "loss": 0.6117, "step": 6548 }, { "epoch": 0.6689479060265577, "grad_norm": 1.5834954720822438, "learning_rate": 5.218336484137538e-06, "loss": 0.5977, "step": 6549 }, { "epoch": 0.669050051072523, "grad_norm": 1.416561166848323, "learning_rate": 5.215431138939999e-06, "loss": 0.6967, "step": 6550 }, { "epoch": 0.6691521961184883, "grad_norm": 1.4934765919504858, "learning_rate": 5.2125263174166175e-06, "loss": 0.694, "step": 6551 }, { "epoch": 0.6692543411644535, "grad_norm": 1.4895070249210873, "learning_rate": 5.2096220198853235e-06, "loss": 0.5782, "step": 6552 }, { "epoch": 0.6693564862104188, "grad_norm": 1.4889361345992935, "learning_rate": 5.206718246663995e-06, "loss": 0.7017, "step": 6553 }, { "epoch": 0.669458631256384, "grad_norm": 1.5467146273376797, "learning_rate": 5.2038149980704465e-06, "loss": 0.7144, "step": 6554 }, { "epoch": 0.6695607763023493, "grad_norm": 1.4741384646996376, "learning_rate": 5.200912274422445e-06, "loss": 0.5475, "step": 6555 }, { "epoch": 0.6696629213483146, "grad_norm": 1.3351884568966441, "learning_rate": 5.1980100760377e-06, "loss": 0.547, "step": 6556 }, { "epoch": 0.6697650663942799, "grad_norm": 1.3939121797880538, "learning_rate": 5.195108403233855e-06, "loss": 0.7277, "step": 6557 }, { "epoch": 0.6698672114402452, "grad_norm": 1.62938894276569, "learning_rate": 5.192207256328499e-06, "loss": 0.7715, "step": 6558 }, { "epoch": 0.6699693564862104, "grad_norm": 1.4398936694352984, "learning_rate": 5.189306635639161e-06, "loss": 0.7604, "step": 6559 }, { "epoch": 0.6700715015321757, "grad_norm": 1.6527054972011854, "learning_rate": 5.1864065414833245e-06, "loss": 0.7469, "step": 6560 }, { "epoch": 0.6701736465781409, "grad_norm": 1.5103351826028084, "learning_rate": 5.183506974178401e-06, "loss": 0.7293, "step": 6561 }, { "epoch": 0.6702757916241062, "grad_norm": 1.4535778761489895, "learning_rate": 5.180607934041748e-06, "loss": 0.6732, "step": 6562 }, { "epoch": 0.6703779366700715, "grad_norm": 1.679899997469296, "learning_rate": 5.177709421390673e-06, "loss": 0.7865, "step": 6563 }, { "epoch": 0.6704800817160368, "grad_norm": 1.6202467052195313, "learning_rate": 5.174811436542415e-06, "loss": 0.8204, "step": 6564 }, { "epoch": 0.6705822267620021, "grad_norm": 1.5247507487009946, "learning_rate": 5.171913979814167e-06, "loss": 0.683, "step": 6565 }, { "epoch": 0.6706843718079674, "grad_norm": 1.3933838195908217, "learning_rate": 5.1690170515230506e-06, "loss": 0.7376, "step": 6566 }, { "epoch": 0.6707865168539325, "grad_norm": 1.509413770690142, "learning_rate": 5.166120651986142e-06, "loss": 0.6444, "step": 6567 }, { "epoch": 0.6708886618998978, "grad_norm": 1.5152130706510722, "learning_rate": 5.163224781520451e-06, "loss": 0.6644, "step": 6568 }, { "epoch": 0.6709908069458631, "grad_norm": 1.37562656334029, "learning_rate": 5.160329440442926e-06, "loss": 0.5414, "step": 6569 }, { "epoch": 0.6710929519918284, "grad_norm": 1.4609110125963283, "learning_rate": 5.157434629070472e-06, "loss": 0.7248, "step": 6570 }, { "epoch": 0.6711950970377937, "grad_norm": 1.4048633231392489, "learning_rate": 5.154540347719932e-06, "loss": 0.65, "step": 6571 }, { "epoch": 0.671297242083759, "grad_norm": 1.3271921421702595, "learning_rate": 5.15164659670808e-06, "loss": 0.6715, "step": 6572 }, { "epoch": 0.6713993871297242, "grad_norm": 1.45304237135027, "learning_rate": 5.148753376351641e-06, "loss": 0.6591, "step": 6573 }, { "epoch": 0.6715015321756895, "grad_norm": 1.57070151202486, "learning_rate": 5.145860686967274e-06, "loss": 0.6569, "step": 6574 }, { "epoch": 0.6716036772216547, "grad_norm": 1.3965757483142136, "learning_rate": 5.142968528871597e-06, "loss": 0.7426, "step": 6575 }, { "epoch": 0.67170582226762, "grad_norm": 1.626117468109773, "learning_rate": 5.14007690238115e-06, "loss": 0.8063, "step": 6576 }, { "epoch": 0.6718079673135853, "grad_norm": 1.4695694723975783, "learning_rate": 5.137185807812428e-06, "loss": 0.7343, "step": 6577 }, { "epoch": 0.6719101123595506, "grad_norm": 1.3639947068452039, "learning_rate": 5.134295245481857e-06, "loss": 0.6953, "step": 6578 }, { "epoch": 0.6720122574055158, "grad_norm": 1.4759568290668346, "learning_rate": 5.1314052157058144e-06, "loss": 0.6707, "step": 6579 }, { "epoch": 0.6721144024514811, "grad_norm": 1.428728791074938, "learning_rate": 5.128515718800622e-06, "loss": 0.6727, "step": 6580 }, { "epoch": 0.6722165474974464, "grad_norm": 1.3494917940459454, "learning_rate": 5.125626755082529e-06, "loss": 0.6655, "step": 6581 }, { "epoch": 0.6723186925434116, "grad_norm": 1.465630613249248, "learning_rate": 5.122738324867738e-06, "loss": 0.6034, "step": 6582 }, { "epoch": 0.6724208375893769, "grad_norm": 1.5454673622988147, "learning_rate": 5.119850428472389e-06, "loss": 0.7355, "step": 6583 }, { "epoch": 0.6725229826353422, "grad_norm": 1.3711412106030618, "learning_rate": 5.1169630662125595e-06, "loss": 0.5643, "step": 6584 }, { "epoch": 0.6726251276813074, "grad_norm": 1.5420577678172087, "learning_rate": 5.11407623840428e-06, "loss": 0.6618, "step": 6585 }, { "epoch": 0.6727272727272727, "grad_norm": 1.510257622594131, "learning_rate": 5.111189945363511e-06, "loss": 0.6347, "step": 6586 }, { "epoch": 0.672829417773238, "grad_norm": 1.3824948577907834, "learning_rate": 5.108304187406163e-06, "loss": 0.7013, "step": 6587 }, { "epoch": 0.6729315628192033, "grad_norm": 1.4832340409581468, "learning_rate": 5.105418964848083e-06, "loss": 0.7476, "step": 6588 }, { "epoch": 0.6730337078651686, "grad_norm": 1.5023182187315376, "learning_rate": 5.102534278005056e-06, "loss": 0.7392, "step": 6589 }, { "epoch": 0.6731358529111338, "grad_norm": 1.3159532220618566, "learning_rate": 5.09965012719282e-06, "loss": 0.6357, "step": 6590 }, { "epoch": 0.673237997957099, "grad_norm": 1.702468417858951, "learning_rate": 5.096766512727043e-06, "loss": 0.6953, "step": 6591 }, { "epoch": 0.6733401430030643, "grad_norm": 1.5434887732215472, "learning_rate": 5.09388343492334e-06, "loss": 0.7115, "step": 6592 }, { "epoch": 0.6734422880490296, "grad_norm": 1.4608055714610884, "learning_rate": 5.091000894097261e-06, "loss": 0.7106, "step": 6593 }, { "epoch": 0.6735444330949949, "grad_norm": 1.5683751619969664, "learning_rate": 5.088118890564305e-06, "loss": 0.7356, "step": 6594 }, { "epoch": 0.6736465781409602, "grad_norm": 1.5817571525844558, "learning_rate": 5.085237424639915e-06, "loss": 0.7321, "step": 6595 }, { "epoch": 0.6737487231869255, "grad_norm": 1.51906903190527, "learning_rate": 5.082356496639462e-06, "loss": 0.6924, "step": 6596 }, { "epoch": 0.6738508682328908, "grad_norm": 1.3989166392097978, "learning_rate": 5.07947610687827e-06, "loss": 0.6758, "step": 6597 }, { "epoch": 0.6739530132788559, "grad_norm": 1.559384315354782, "learning_rate": 5.0765962556715916e-06, "loss": 0.7788, "step": 6598 }, { "epoch": 0.6740551583248212, "grad_norm": 1.5233535305228334, "learning_rate": 5.0737169433346385e-06, "loss": 0.7204, "step": 6599 }, { "epoch": 0.6741573033707865, "grad_norm": 1.4664687101172342, "learning_rate": 5.070838170182549e-06, "loss": 0.6751, "step": 6600 }, { "epoch": 0.6742594484167518, "grad_norm": 1.5519747408427538, "learning_rate": 5.067959936530407e-06, "loss": 0.7508, "step": 6601 }, { "epoch": 0.6743615934627171, "grad_norm": 1.5653169292582163, "learning_rate": 5.065082242693231e-06, "loss": 0.7521, "step": 6602 }, { "epoch": 0.6744637385086824, "grad_norm": 1.455583437509877, "learning_rate": 5.0622050889859966e-06, "loss": 0.705, "step": 6603 }, { "epoch": 0.6745658835546476, "grad_norm": 1.5595628588487156, "learning_rate": 5.059328475723602e-06, "loss": 0.7317, "step": 6604 }, { "epoch": 0.6746680286006129, "grad_norm": 1.4946461941335332, "learning_rate": 5.056452403220902e-06, "loss": 0.8592, "step": 6605 }, { "epoch": 0.6747701736465781, "grad_norm": 1.4257444736154308, "learning_rate": 5.053576871792678e-06, "loss": 0.7127, "step": 6606 }, { "epoch": 0.6748723186925434, "grad_norm": 1.434121148425506, "learning_rate": 5.050701881753664e-06, "loss": 0.6463, "step": 6607 }, { "epoch": 0.6749744637385087, "grad_norm": 1.4849004445757477, "learning_rate": 5.047827433418522e-06, "loss": 0.6639, "step": 6608 }, { "epoch": 0.675076608784474, "grad_norm": 1.6475417733550277, "learning_rate": 5.04495352710187e-06, "loss": 0.6994, "step": 6609 }, { "epoch": 0.6751787538304392, "grad_norm": 1.298612033368378, "learning_rate": 5.042080163118252e-06, "loss": 0.7191, "step": 6610 }, { "epoch": 0.6752808988764045, "grad_norm": 1.3795673728978026, "learning_rate": 5.039207341782167e-06, "loss": 0.6393, "step": 6611 }, { "epoch": 0.6753830439223698, "grad_norm": 1.4168220936444358, "learning_rate": 5.036335063408044e-06, "loss": 0.606, "step": 6612 }, { "epoch": 0.675485188968335, "grad_norm": 1.5760137326955517, "learning_rate": 5.033463328310253e-06, "loss": 0.8183, "step": 6613 }, { "epoch": 0.6755873340143003, "grad_norm": 1.4497956837022048, "learning_rate": 5.030592136803112e-06, "loss": 0.6718, "step": 6614 }, { "epoch": 0.6756894790602656, "grad_norm": 1.4444671501142599, "learning_rate": 5.027721489200872e-06, "loss": 0.6991, "step": 6615 }, { "epoch": 0.6757916241062308, "grad_norm": 1.392913015668251, "learning_rate": 5.0248513858177266e-06, "loss": 0.5669, "step": 6616 }, { "epoch": 0.6758937691521961, "grad_norm": 1.5204553661403344, "learning_rate": 5.021981826967809e-06, "loss": 0.7106, "step": 6617 }, { "epoch": 0.6759959141981614, "grad_norm": 1.466928484954999, "learning_rate": 5.019112812965197e-06, "loss": 0.7747, "step": 6618 }, { "epoch": 0.6760980592441267, "grad_norm": 1.5203976582229837, "learning_rate": 5.0162443441239085e-06, "loss": 0.7392, "step": 6619 }, { "epoch": 0.676200204290092, "grad_norm": 1.4580340103316611, "learning_rate": 5.013376420757896e-06, "loss": 0.6529, "step": 6620 }, { "epoch": 0.6763023493360572, "grad_norm": 1.456013509016094, "learning_rate": 5.010509043181058e-06, "loss": 0.5973, "step": 6621 }, { "epoch": 0.6764044943820224, "grad_norm": 1.5420054922467161, "learning_rate": 5.007642211707227e-06, "loss": 0.6645, "step": 6622 }, { "epoch": 0.6765066394279877, "grad_norm": 1.5268921001182891, "learning_rate": 5.004775926650178e-06, "loss": 0.7764, "step": 6623 }, { "epoch": 0.676608784473953, "grad_norm": 1.4839674977623316, "learning_rate": 5.001910188323636e-06, "loss": 0.7424, "step": 6624 }, { "epoch": 0.6767109295199183, "grad_norm": 1.57778982725158, "learning_rate": 4.999044997041252e-06, "loss": 0.7599, "step": 6625 }, { "epoch": 0.6768130745658836, "grad_norm": 1.5131135733668533, "learning_rate": 4.996180353116623e-06, "loss": 0.7231, "step": 6626 }, { "epoch": 0.6769152196118489, "grad_norm": 1.5434461817148768, "learning_rate": 4.993316256863286e-06, "loss": 0.76, "step": 6627 }, { "epoch": 0.6770173646578141, "grad_norm": 1.3926953451505821, "learning_rate": 4.990452708594718e-06, "loss": 0.7106, "step": 6628 }, { "epoch": 0.6771195097037793, "grad_norm": 1.5281744489409315, "learning_rate": 4.987589708624341e-06, "loss": 0.7454, "step": 6629 }, { "epoch": 0.6772216547497446, "grad_norm": 1.7266306780241274, "learning_rate": 4.984727257265509e-06, "loss": 0.7728, "step": 6630 }, { "epoch": 0.6773237997957099, "grad_norm": 1.2644585881055908, "learning_rate": 4.9818653548315195e-06, "loss": 0.5821, "step": 6631 }, { "epoch": 0.6774259448416752, "grad_norm": 1.40328094084618, "learning_rate": 4.979004001635606e-06, "loss": 0.7213, "step": 6632 }, { "epoch": 0.6775280898876405, "grad_norm": 1.4476314247451192, "learning_rate": 4.976143197990951e-06, "loss": 0.8015, "step": 6633 }, { "epoch": 0.6776302349336057, "grad_norm": 1.4252608120792556, "learning_rate": 4.973282944210669e-06, "loss": 0.7014, "step": 6634 }, { "epoch": 0.677732379979571, "grad_norm": 1.4685408326294858, "learning_rate": 4.970423240607813e-06, "loss": 0.6852, "step": 6635 }, { "epoch": 0.6778345250255363, "grad_norm": 1.5352684984438767, "learning_rate": 4.967564087495387e-06, "loss": 0.583, "step": 6636 }, { "epoch": 0.6779366700715015, "grad_norm": 1.5257333448327395, "learning_rate": 4.964705485186322e-06, "loss": 0.744, "step": 6637 }, { "epoch": 0.6780388151174668, "grad_norm": 1.6406488744826682, "learning_rate": 4.961847433993492e-06, "loss": 0.7701, "step": 6638 }, { "epoch": 0.6781409601634321, "grad_norm": 1.3964741829908782, "learning_rate": 4.958989934229719e-06, "loss": 0.6519, "step": 6639 }, { "epoch": 0.6782431052093973, "grad_norm": 1.4418400734239152, "learning_rate": 4.956132986207754e-06, "loss": 0.6461, "step": 6640 }, { "epoch": 0.6783452502553626, "grad_norm": 1.4194208621196294, "learning_rate": 4.953276590240293e-06, "loss": 0.7303, "step": 6641 }, { "epoch": 0.6784473953013279, "grad_norm": 1.4864260105518399, "learning_rate": 4.950420746639965e-06, "loss": 0.6842, "step": 6642 }, { "epoch": 0.6785495403472932, "grad_norm": 1.4165569700189224, "learning_rate": 4.94756545571935e-06, "loss": 0.7414, "step": 6643 }, { "epoch": 0.6786516853932584, "grad_norm": 1.6240892196511818, "learning_rate": 4.944710717790964e-06, "loss": 0.8087, "step": 6644 }, { "epoch": 0.6787538304392237, "grad_norm": 1.228910125760652, "learning_rate": 4.941856533167255e-06, "loss": 0.6193, "step": 6645 }, { "epoch": 0.678855975485189, "grad_norm": 1.6021855448018207, "learning_rate": 4.939002902160617e-06, "loss": 0.725, "step": 6646 }, { "epoch": 0.6789581205311542, "grad_norm": 1.496872619388595, "learning_rate": 4.9361498250833775e-06, "loss": 0.8093, "step": 6647 }, { "epoch": 0.6790602655771195, "grad_norm": 1.4258167654538545, "learning_rate": 4.933297302247815e-06, "loss": 0.6564, "step": 6648 }, { "epoch": 0.6791624106230848, "grad_norm": 1.424649667091576, "learning_rate": 4.930445333966136e-06, "loss": 0.7106, "step": 6649 }, { "epoch": 0.6792645556690501, "grad_norm": 1.5427968334313757, "learning_rate": 4.927593920550491e-06, "loss": 0.759, "step": 6650 }, { "epoch": 0.6793667007150154, "grad_norm": 1.5581699739933221, "learning_rate": 4.9247430623129645e-06, "loss": 0.7082, "step": 6651 }, { "epoch": 0.6794688457609805, "grad_norm": 1.4596845818509856, "learning_rate": 4.921892759565589e-06, "loss": 0.7917, "step": 6652 }, { "epoch": 0.6795709908069458, "grad_norm": 1.6775701120037028, "learning_rate": 4.919043012620336e-06, "loss": 0.75, "step": 6653 }, { "epoch": 0.6796731358529111, "grad_norm": 1.3735870159030041, "learning_rate": 4.916193821789107e-06, "loss": 0.6587, "step": 6654 }, { "epoch": 0.6797752808988764, "grad_norm": 1.549705494592493, "learning_rate": 4.91334518738375e-06, "loss": 0.6841, "step": 6655 }, { "epoch": 0.6798774259448417, "grad_norm": 1.4305981604508688, "learning_rate": 4.910497109716048e-06, "loss": 0.7037, "step": 6656 }, { "epoch": 0.679979570990807, "grad_norm": 1.6312959348527254, "learning_rate": 4.907649589097722e-06, "loss": 0.6718, "step": 6657 }, { "epoch": 0.6800817160367723, "grad_norm": 1.5311068184205798, "learning_rate": 4.904802625840441e-06, "loss": 0.6918, "step": 6658 }, { "epoch": 0.6801838610827375, "grad_norm": 1.3627896636893202, "learning_rate": 4.901956220255802e-06, "loss": 0.7811, "step": 6659 }, { "epoch": 0.6802860061287027, "grad_norm": 1.4420947546147513, "learning_rate": 4.899110372655352e-06, "loss": 0.6849, "step": 6660 }, { "epoch": 0.680388151174668, "grad_norm": 1.3150617819247594, "learning_rate": 4.896265083350568e-06, "loss": 0.662, "step": 6661 }, { "epoch": 0.6804902962206333, "grad_norm": 1.462922858278926, "learning_rate": 4.893420352652863e-06, "loss": 0.6594, "step": 6662 }, { "epoch": 0.6805924412665986, "grad_norm": 1.460430423691327, "learning_rate": 4.890576180873605e-06, "loss": 0.7559, "step": 6663 }, { "epoch": 0.6806945863125639, "grad_norm": 1.59406105858736, "learning_rate": 4.887732568324084e-06, "loss": 0.6935, "step": 6664 }, { "epoch": 0.6807967313585291, "grad_norm": 1.5703209928963187, "learning_rate": 4.884889515315537e-06, "loss": 0.7929, "step": 6665 }, { "epoch": 0.6808988764044944, "grad_norm": 1.645252523846712, "learning_rate": 4.8820470221591335e-06, "loss": 0.7196, "step": 6666 }, { "epoch": 0.6810010214504596, "grad_norm": 1.5532248594509683, "learning_rate": 4.87920508916599e-06, "loss": 0.731, "step": 6667 }, { "epoch": 0.6811031664964249, "grad_norm": 1.4603258316768737, "learning_rate": 4.876363716647161e-06, "loss": 0.6922, "step": 6668 }, { "epoch": 0.6812053115423902, "grad_norm": 1.5878906931259673, "learning_rate": 4.8735229049136335e-06, "loss": 0.6597, "step": 6669 }, { "epoch": 0.6813074565883555, "grad_norm": 1.620717516313211, "learning_rate": 4.870682654276337e-06, "loss": 0.7801, "step": 6670 }, { "epoch": 0.6814096016343207, "grad_norm": 1.570211284718985, "learning_rate": 4.867842965046134e-06, "loss": 0.7025, "step": 6671 }, { "epoch": 0.681511746680286, "grad_norm": 1.408406329301186, "learning_rate": 4.865003837533837e-06, "loss": 0.6692, "step": 6672 }, { "epoch": 0.6816138917262513, "grad_norm": 1.5196156166042094, "learning_rate": 4.8621652720501884e-06, "loss": 0.6758, "step": 6673 }, { "epoch": 0.6817160367722166, "grad_norm": 1.439353720681077, "learning_rate": 4.859327268905871e-06, "loss": 0.7435, "step": 6674 }, { "epoch": 0.6818181818181818, "grad_norm": 1.4984924473539427, "learning_rate": 4.8564898284115e-06, "loss": 0.6777, "step": 6675 }, { "epoch": 0.681920326864147, "grad_norm": 1.4387357897987425, "learning_rate": 4.853652950877645e-06, "loss": 0.7449, "step": 6676 }, { "epoch": 0.6820224719101123, "grad_norm": 1.4636392795403104, "learning_rate": 4.850816636614797e-06, "loss": 0.6896, "step": 6677 }, { "epoch": 0.6821246169560776, "grad_norm": 1.2668154360523531, "learning_rate": 4.8479808859333964e-06, "loss": 0.6283, "step": 6678 }, { "epoch": 0.6822267620020429, "grad_norm": 1.470623676503276, "learning_rate": 4.845145699143818e-06, "loss": 0.6737, "step": 6679 }, { "epoch": 0.6823289070480082, "grad_norm": 1.5385561183222132, "learning_rate": 4.842311076556373e-06, "loss": 0.7029, "step": 6680 }, { "epoch": 0.6824310520939735, "grad_norm": 1.5013611336741748, "learning_rate": 4.839477018481309e-06, "loss": 0.7339, "step": 6681 }, { "epoch": 0.6825331971399388, "grad_norm": 1.3831685164489391, "learning_rate": 4.836643525228822e-06, "loss": 0.6541, "step": 6682 }, { "epoch": 0.6826353421859039, "grad_norm": 1.8112592003340724, "learning_rate": 4.833810597109036e-06, "loss": 0.7911, "step": 6683 }, { "epoch": 0.6827374872318692, "grad_norm": 1.5188390671802119, "learning_rate": 4.83097823443202e-06, "loss": 0.6318, "step": 6684 }, { "epoch": 0.6828396322778345, "grad_norm": 1.47318579445148, "learning_rate": 4.828146437507776e-06, "loss": 0.7134, "step": 6685 }, { "epoch": 0.6829417773237998, "grad_norm": 1.406808962296762, "learning_rate": 4.825315206646242e-06, "loss": 0.6433, "step": 6686 }, { "epoch": 0.6830439223697651, "grad_norm": 1.498110994291386, "learning_rate": 4.822484542157306e-06, "loss": 0.6846, "step": 6687 }, { "epoch": 0.6831460674157304, "grad_norm": 1.4645350879203618, "learning_rate": 4.819654444350782e-06, "loss": 0.6449, "step": 6688 }, { "epoch": 0.6832482124616956, "grad_norm": 1.5929618040681501, "learning_rate": 4.816824913536425e-06, "loss": 0.6954, "step": 6689 }, { "epoch": 0.6833503575076609, "grad_norm": 1.5178842337698355, "learning_rate": 4.813995950023932e-06, "loss": 0.711, "step": 6690 }, { "epoch": 0.6834525025536261, "grad_norm": 1.5036647118132327, "learning_rate": 4.811167554122928e-06, "loss": 0.5943, "step": 6691 }, { "epoch": 0.6835546475995914, "grad_norm": 1.4356735651208241, "learning_rate": 4.8083397261429865e-06, "loss": 0.743, "step": 6692 }, { "epoch": 0.6836567926455567, "grad_norm": 1.6297738158493271, "learning_rate": 4.805512466393621e-06, "loss": 0.7049, "step": 6693 }, { "epoch": 0.683758937691522, "grad_norm": 1.4861232710480963, "learning_rate": 4.802685775184272e-06, "loss": 0.6331, "step": 6694 }, { "epoch": 0.6838610827374872, "grad_norm": 1.6436316159506252, "learning_rate": 4.799859652824323e-06, "loss": 0.8186, "step": 6695 }, { "epoch": 0.6839632277834525, "grad_norm": 1.4400975364979185, "learning_rate": 4.797034099623091e-06, "loss": 0.6597, "step": 6696 }, { "epoch": 0.6840653728294178, "grad_norm": 1.357322197603183, "learning_rate": 4.794209115889843e-06, "loss": 0.7439, "step": 6697 }, { "epoch": 0.684167517875383, "grad_norm": 1.370924954165412, "learning_rate": 4.791384701933769e-06, "loss": 0.6024, "step": 6698 }, { "epoch": 0.6842696629213483, "grad_norm": 1.505675658507532, "learning_rate": 4.788560858064002e-06, "loss": 0.6839, "step": 6699 }, { "epoch": 0.6843718079673136, "grad_norm": 1.5510826159942197, "learning_rate": 4.785737584589619e-06, "loss": 0.682, "step": 6700 }, { "epoch": 0.6844739530132788, "grad_norm": 1.6365368398842248, "learning_rate": 4.782914881819623e-06, "loss": 0.7179, "step": 6701 }, { "epoch": 0.6845760980592441, "grad_norm": 1.5877252611737447, "learning_rate": 4.780092750062967e-06, "loss": 0.699, "step": 6702 }, { "epoch": 0.6846782431052094, "grad_norm": 1.584563810122719, "learning_rate": 4.777271189628533e-06, "loss": 0.7084, "step": 6703 }, { "epoch": 0.6847803881511747, "grad_norm": 1.503604752418825, "learning_rate": 4.77445020082514e-06, "loss": 0.7073, "step": 6704 }, { "epoch": 0.68488253319714, "grad_norm": 1.4559730071882393, "learning_rate": 4.771629783961547e-06, "loss": 0.6774, "step": 6705 }, { "epoch": 0.6849846782431052, "grad_norm": 1.4530798548958979, "learning_rate": 4.768809939346454e-06, "loss": 0.7003, "step": 6706 }, { "epoch": 0.6850868232890704, "grad_norm": 1.496299529952143, "learning_rate": 4.765990667288489e-06, "loss": 0.6538, "step": 6707 }, { "epoch": 0.6851889683350357, "grad_norm": 1.527692864965651, "learning_rate": 4.763171968096233e-06, "loss": 0.738, "step": 6708 }, { "epoch": 0.685291113381001, "grad_norm": 1.3640835582031088, "learning_rate": 4.760353842078187e-06, "loss": 0.6269, "step": 6709 }, { "epoch": 0.6853932584269663, "grad_norm": 1.6351615653899563, "learning_rate": 4.757536289542798e-06, "loss": 0.6922, "step": 6710 }, { "epoch": 0.6854954034729316, "grad_norm": 1.6325769371838277, "learning_rate": 4.754719310798446e-06, "loss": 0.7335, "step": 6711 }, { "epoch": 0.6855975485188969, "grad_norm": 1.547190540641468, "learning_rate": 4.751902906153458e-06, "loss": 0.657, "step": 6712 }, { "epoch": 0.6856996935648622, "grad_norm": 1.4775470590933055, "learning_rate": 4.749087075916088e-06, "loss": 0.6832, "step": 6713 }, { "epoch": 0.6858018386108273, "grad_norm": 1.3931627128624873, "learning_rate": 4.746271820394529e-06, "loss": 0.6831, "step": 6714 }, { "epoch": 0.6859039836567926, "grad_norm": 1.9562594884170799, "learning_rate": 4.7434571398969105e-06, "loss": 0.7254, "step": 6715 }, { "epoch": 0.6860061287027579, "grad_norm": 1.3596156401056345, "learning_rate": 4.7406430347313045e-06, "loss": 0.5291, "step": 6716 }, { "epoch": 0.6861082737487232, "grad_norm": 1.460419798711219, "learning_rate": 4.73782950520572e-06, "loss": 0.7014, "step": 6717 }, { "epoch": 0.6862104187946885, "grad_norm": 1.5593519326529974, "learning_rate": 4.735016551628095e-06, "loss": 0.8455, "step": 6718 }, { "epoch": 0.6863125638406538, "grad_norm": 1.5513534553894874, "learning_rate": 4.732204174306312e-06, "loss": 0.7057, "step": 6719 }, { "epoch": 0.686414708886619, "grad_norm": 1.396672613190142, "learning_rate": 4.7293923735481796e-06, "loss": 0.6862, "step": 6720 }, { "epoch": 0.6865168539325842, "grad_norm": 1.5772073339324972, "learning_rate": 4.726581149661462e-06, "loss": 0.7344, "step": 6721 }, { "epoch": 0.6866189989785495, "grad_norm": 1.7539862686874437, "learning_rate": 4.723770502953845e-06, "loss": 0.7465, "step": 6722 }, { "epoch": 0.6867211440245148, "grad_norm": 1.2399727676892098, "learning_rate": 4.7209604337329505e-06, "loss": 0.5424, "step": 6723 }, { "epoch": 0.6868232890704801, "grad_norm": 1.5313293719636298, "learning_rate": 4.7181509423063525e-06, "loss": 0.7396, "step": 6724 }, { "epoch": 0.6869254341164454, "grad_norm": 1.433112636658535, "learning_rate": 4.715342028981541e-06, "loss": 0.6887, "step": 6725 }, { "epoch": 0.6870275791624106, "grad_norm": 1.440273819299479, "learning_rate": 4.712533694065964e-06, "loss": 0.7249, "step": 6726 }, { "epoch": 0.6871297242083759, "grad_norm": 1.4711643861067927, "learning_rate": 4.709725937866989e-06, "loss": 0.7243, "step": 6727 }, { "epoch": 0.6872318692543412, "grad_norm": 1.4802606396596845, "learning_rate": 4.7069187606919286e-06, "loss": 0.7458, "step": 6728 }, { "epoch": 0.6873340143003064, "grad_norm": 1.2461919690492955, "learning_rate": 4.704112162848029e-06, "loss": 0.6586, "step": 6729 }, { "epoch": 0.6874361593462717, "grad_norm": 1.4940429570595293, "learning_rate": 4.701306144642472e-06, "loss": 0.6583, "step": 6730 }, { "epoch": 0.687538304392237, "grad_norm": 1.4740443164617025, "learning_rate": 4.698500706382381e-06, "loss": 0.7566, "step": 6731 }, { "epoch": 0.6876404494382022, "grad_norm": 1.6399334052662342, "learning_rate": 4.695695848374816e-06, "loss": 0.7727, "step": 6732 }, { "epoch": 0.6877425944841675, "grad_norm": 1.3842060966011773, "learning_rate": 4.692891570926768e-06, "loss": 0.6256, "step": 6733 }, { "epoch": 0.6878447395301328, "grad_norm": 1.53419332995097, "learning_rate": 4.690087874345165e-06, "loss": 0.6802, "step": 6734 }, { "epoch": 0.6879468845760981, "grad_norm": 1.475943024993585, "learning_rate": 4.687284758936872e-06, "loss": 0.6802, "step": 6735 }, { "epoch": 0.6880490296220634, "grad_norm": 1.4953806841439174, "learning_rate": 4.684482225008698e-06, "loss": 0.67, "step": 6736 }, { "epoch": 0.6881511746680286, "grad_norm": 1.505365778003549, "learning_rate": 4.6816802728673795e-06, "loss": 0.7716, "step": 6737 }, { "epoch": 0.6882533197139938, "grad_norm": 1.5428755220029613, "learning_rate": 4.67887890281959e-06, "loss": 0.6938, "step": 6738 }, { "epoch": 0.6883554647599591, "grad_norm": 1.329703349164968, "learning_rate": 4.67607811517194e-06, "loss": 0.7307, "step": 6739 }, { "epoch": 0.6884576098059244, "grad_norm": 1.5220102618993503, "learning_rate": 4.67327791023098e-06, "loss": 0.8349, "step": 6740 }, { "epoch": 0.6885597548518897, "grad_norm": 1.4787125015148104, "learning_rate": 4.670478288303198e-06, "loss": 0.6891, "step": 6741 }, { "epoch": 0.688661899897855, "grad_norm": 1.6445839132738922, "learning_rate": 4.66767924969501e-06, "loss": 0.6832, "step": 6742 }, { "epoch": 0.6887640449438203, "grad_norm": 1.5876279047181747, "learning_rate": 4.664880794712773e-06, "loss": 0.7316, "step": 6743 }, { "epoch": 0.6888661899897855, "grad_norm": 1.3801947009980542, "learning_rate": 4.662082923662779e-06, "loss": 0.729, "step": 6744 }, { "epoch": 0.6889683350357507, "grad_norm": 1.585264591113842, "learning_rate": 4.659285636851256e-06, "loss": 0.7225, "step": 6745 }, { "epoch": 0.689070480081716, "grad_norm": 1.535521748135794, "learning_rate": 4.656488934584373e-06, "loss": 0.713, "step": 6746 }, { "epoch": 0.6891726251276813, "grad_norm": 1.3766737283308377, "learning_rate": 4.653692817168226e-06, "loss": 0.6324, "step": 6747 }, { "epoch": 0.6892747701736466, "grad_norm": 1.574540104804747, "learning_rate": 4.6508972849088576e-06, "loss": 0.7868, "step": 6748 }, { "epoch": 0.6893769152196119, "grad_norm": 1.6123208145547114, "learning_rate": 4.648102338112237e-06, "loss": 0.643, "step": 6749 }, { "epoch": 0.6894790602655771, "grad_norm": 1.4709619458645418, "learning_rate": 4.6453079770842695e-06, "loss": 0.7227, "step": 6750 }, { "epoch": 0.6895812053115424, "grad_norm": 1.4740964904163405, "learning_rate": 4.642514202130808e-06, "loss": 0.6539, "step": 6751 }, { "epoch": 0.6896833503575076, "grad_norm": 1.4854651757543027, "learning_rate": 4.639721013557628e-06, "loss": 0.7845, "step": 6752 }, { "epoch": 0.6897854954034729, "grad_norm": 1.470395882509057, "learning_rate": 4.636928411670445e-06, "loss": 0.7604, "step": 6753 }, { "epoch": 0.6898876404494382, "grad_norm": 1.3619495547676008, "learning_rate": 4.6341363967749095e-06, "loss": 0.7, "step": 6754 }, { "epoch": 0.6899897854954035, "grad_norm": 1.5371267942225482, "learning_rate": 4.631344969176613e-06, "loss": 0.7035, "step": 6755 }, { "epoch": 0.6900919305413687, "grad_norm": 1.5499009757428495, "learning_rate": 4.628554129181081e-06, "loss": 0.6739, "step": 6756 }, { "epoch": 0.690194075587334, "grad_norm": 1.551004136175099, "learning_rate": 4.625763877093771e-06, "loss": 0.7334, "step": 6757 }, { "epoch": 0.6902962206332993, "grad_norm": 1.5304277111028575, "learning_rate": 4.622974213220075e-06, "loss": 0.8103, "step": 6758 }, { "epoch": 0.6903983656792646, "grad_norm": 1.4824124887067411, "learning_rate": 4.620185137865323e-06, "loss": 0.7334, "step": 6759 }, { "epoch": 0.6905005107252298, "grad_norm": 1.4368503232086713, "learning_rate": 4.617396651334787e-06, "loss": 0.7893, "step": 6760 }, { "epoch": 0.6906026557711951, "grad_norm": 1.5745773273697659, "learning_rate": 4.614608753933665e-06, "loss": 0.6698, "step": 6761 }, { "epoch": 0.6907048008171603, "grad_norm": 1.5336579134333808, "learning_rate": 4.611821445967094e-06, "loss": 0.6801, "step": 6762 }, { "epoch": 0.6908069458631256, "grad_norm": 1.5908909391837565, "learning_rate": 4.609034727740144e-06, "loss": 0.6461, "step": 6763 }, { "epoch": 0.6909090909090909, "grad_norm": 1.5035990021692525, "learning_rate": 4.606248599557829e-06, "loss": 0.8033, "step": 6764 }, { "epoch": 0.6910112359550562, "grad_norm": 1.4000414953588973, "learning_rate": 4.603463061725086e-06, "loss": 0.6628, "step": 6765 }, { "epoch": 0.6911133810010215, "grad_norm": 1.5166798619516466, "learning_rate": 4.600678114546802e-06, "loss": 0.6355, "step": 6766 }, { "epoch": 0.6912155260469868, "grad_norm": 1.3870179082346747, "learning_rate": 4.597893758327786e-06, "loss": 0.7154, "step": 6767 }, { "epoch": 0.691317671092952, "grad_norm": 1.483211684229176, "learning_rate": 4.595109993372788e-06, "loss": 0.5908, "step": 6768 }, { "epoch": 0.6914198161389172, "grad_norm": 1.554261259595864, "learning_rate": 4.59232681998649e-06, "loss": 0.6731, "step": 6769 }, { "epoch": 0.6915219611848825, "grad_norm": 1.6506411859584873, "learning_rate": 4.58954423847352e-06, "loss": 0.5771, "step": 6770 }, { "epoch": 0.6916241062308478, "grad_norm": 1.5024146222286596, "learning_rate": 4.5867622491384234e-06, "loss": 0.6638, "step": 6771 }, { "epoch": 0.6917262512768131, "grad_norm": 1.519873280517956, "learning_rate": 4.583980852285701e-06, "loss": 0.695, "step": 6772 }, { "epoch": 0.6918283963227784, "grad_norm": 1.543278638382172, "learning_rate": 4.5812000482197725e-06, "loss": 0.7164, "step": 6773 }, { "epoch": 0.6919305413687437, "grad_norm": 1.7728598853237947, "learning_rate": 4.5784198372449974e-06, "loss": 0.7695, "step": 6774 }, { "epoch": 0.6920326864147089, "grad_norm": 1.5863337853201025, "learning_rate": 4.575640219665676e-06, "loss": 0.7183, "step": 6775 }, { "epoch": 0.6921348314606741, "grad_norm": 1.369405799073389, "learning_rate": 4.572861195786038e-06, "loss": 0.682, "step": 6776 }, { "epoch": 0.6922369765066394, "grad_norm": 1.5079931583850443, "learning_rate": 4.5700827659102484e-06, "loss": 0.6408, "step": 6777 }, { "epoch": 0.6923391215526047, "grad_norm": 1.548565726644308, "learning_rate": 4.567304930342407e-06, "loss": 0.7144, "step": 6778 }, { "epoch": 0.69244126659857, "grad_norm": 1.5469566630761717, "learning_rate": 4.564527689386545e-06, "loss": 0.7029, "step": 6779 }, { "epoch": 0.6925434116445353, "grad_norm": 1.4339483761466318, "learning_rate": 4.561751043346647e-06, "loss": 0.7767, "step": 6780 }, { "epoch": 0.6926455566905005, "grad_norm": 1.457798586439289, "learning_rate": 4.55897499252661e-06, "loss": 0.6564, "step": 6781 }, { "epoch": 0.6927477017364658, "grad_norm": 1.2877988227655481, "learning_rate": 4.556199537230276e-06, "loss": 0.6737, "step": 6782 }, { "epoch": 0.692849846782431, "grad_norm": 1.332956315392737, "learning_rate": 4.553424677761419e-06, "loss": 0.6441, "step": 6783 }, { "epoch": 0.6929519918283963, "grad_norm": 1.5229127198105892, "learning_rate": 4.550650414423747e-06, "loss": 0.7253, "step": 6784 }, { "epoch": 0.6930541368743616, "grad_norm": 1.3709148348665277, "learning_rate": 4.547876747520911e-06, "loss": 0.6405, "step": 6785 }, { "epoch": 0.6931562819203269, "grad_norm": 1.5462928232319892, "learning_rate": 4.545103677356489e-06, "loss": 0.7494, "step": 6786 }, { "epoch": 0.6932584269662921, "grad_norm": 1.389197771720662, "learning_rate": 4.5423312042339876e-06, "loss": 0.6425, "step": 6787 }, { "epoch": 0.6933605720122574, "grad_norm": 1.4993160423413212, "learning_rate": 4.539559328456867e-06, "loss": 0.6509, "step": 6788 }, { "epoch": 0.6934627170582227, "grad_norm": 1.5994802172836868, "learning_rate": 4.536788050328502e-06, "loss": 0.6251, "step": 6789 }, { "epoch": 0.693564862104188, "grad_norm": 1.6486208739643695, "learning_rate": 4.534017370152218e-06, "loss": 0.7125, "step": 6790 }, { "epoch": 0.6936670071501532, "grad_norm": 1.4943118369009607, "learning_rate": 4.531247288231265e-06, "loss": 0.675, "step": 6791 }, { "epoch": 0.6937691521961185, "grad_norm": 1.548715625290685, "learning_rate": 4.528477804868829e-06, "loss": 0.667, "step": 6792 }, { "epoch": 0.6938712972420837, "grad_norm": 1.4068601735992454, "learning_rate": 4.525708920368029e-06, "loss": 0.6167, "step": 6793 }, { "epoch": 0.693973442288049, "grad_norm": 1.576602950420474, "learning_rate": 4.5229406350319285e-06, "loss": 0.7461, "step": 6794 }, { "epoch": 0.6940755873340143, "grad_norm": 1.4440606292743543, "learning_rate": 4.520172949163509e-06, "loss": 0.6371, "step": 6795 }, { "epoch": 0.6941777323799796, "grad_norm": 1.6045921964400394, "learning_rate": 4.517405863065706e-06, "loss": 0.7217, "step": 6796 }, { "epoch": 0.6942798774259449, "grad_norm": 1.5416770513302898, "learning_rate": 4.5146393770413724e-06, "loss": 0.6997, "step": 6797 }, { "epoch": 0.6943820224719102, "grad_norm": 1.3208797193866764, "learning_rate": 4.511873491393304e-06, "loss": 0.6503, "step": 6798 }, { "epoch": 0.6944841675178753, "grad_norm": 1.5476251422551661, "learning_rate": 4.509108206424223e-06, "loss": 0.7272, "step": 6799 }, { "epoch": 0.6945863125638406, "grad_norm": 1.4396853103810245, "learning_rate": 4.5063435224368e-06, "loss": 0.5444, "step": 6800 }, { "epoch": 0.6946884576098059, "grad_norm": 1.4631912127787892, "learning_rate": 4.503579439733629e-06, "loss": 0.6514, "step": 6801 }, { "epoch": 0.6947906026557712, "grad_norm": 1.58518884301646, "learning_rate": 4.500815958617239e-06, "loss": 0.7738, "step": 6802 }, { "epoch": 0.6948927477017365, "grad_norm": 1.3724546226795158, "learning_rate": 4.498053079390091e-06, "loss": 0.6166, "step": 6803 }, { "epoch": 0.6949948927477018, "grad_norm": 1.5630562975742832, "learning_rate": 4.495290802354589e-06, "loss": 0.6906, "step": 6804 }, { "epoch": 0.695097037793667, "grad_norm": 1.5008174847983093, "learning_rate": 4.492529127813068e-06, "loss": 0.6745, "step": 6805 }, { "epoch": 0.6951991828396322, "grad_norm": 1.4605168782645175, "learning_rate": 4.489768056067792e-06, "loss": 0.6766, "step": 6806 }, { "epoch": 0.6953013278855975, "grad_norm": 1.50560593680736, "learning_rate": 4.487007587420963e-06, "loss": 0.7417, "step": 6807 }, { "epoch": 0.6954034729315628, "grad_norm": 1.5373706522338397, "learning_rate": 4.484247722174712e-06, "loss": 0.7043, "step": 6808 }, { "epoch": 0.6955056179775281, "grad_norm": 1.531580383076186, "learning_rate": 4.481488460631115e-06, "loss": 0.7957, "step": 6809 }, { "epoch": 0.6956077630234934, "grad_norm": 1.3779029730932686, "learning_rate": 4.478729803092171e-06, "loss": 0.6066, "step": 6810 }, { "epoch": 0.6957099080694586, "grad_norm": 2.4343603660073705, "learning_rate": 4.475971749859813e-06, "loss": 0.7859, "step": 6811 }, { "epoch": 0.6958120531154239, "grad_norm": 1.4829636500444363, "learning_rate": 4.473214301235921e-06, "loss": 0.6414, "step": 6812 }, { "epoch": 0.6959141981613892, "grad_norm": 1.5804713553243892, "learning_rate": 4.470457457522291e-06, "loss": 0.6887, "step": 6813 }, { "epoch": 0.6960163432073544, "grad_norm": 1.3389988806746864, "learning_rate": 4.467701219020667e-06, "loss": 0.6037, "step": 6814 }, { "epoch": 0.6961184882533197, "grad_norm": 1.6110851358640677, "learning_rate": 4.464945586032719e-06, "loss": 0.7449, "step": 6815 }, { "epoch": 0.696220633299285, "grad_norm": 1.4116805551395035, "learning_rate": 4.462190558860054e-06, "loss": 0.6191, "step": 6816 }, { "epoch": 0.6963227783452502, "grad_norm": 1.5306103476090789, "learning_rate": 4.45943613780421e-06, "loss": 0.7304, "step": 6817 }, { "epoch": 0.6964249233912155, "grad_norm": 1.4304422432659412, "learning_rate": 4.456682323166656e-06, "loss": 0.6608, "step": 6818 }, { "epoch": 0.6965270684371808, "grad_norm": 1.387485509871996, "learning_rate": 4.453929115248803e-06, "loss": 0.6742, "step": 6819 }, { "epoch": 0.6966292134831461, "grad_norm": 1.4655215858268449, "learning_rate": 4.4511765143519955e-06, "loss": 0.6303, "step": 6820 }, { "epoch": 0.6967313585291114, "grad_norm": 1.3684655845039935, "learning_rate": 4.448424520777503e-06, "loss": 0.629, "step": 6821 }, { "epoch": 0.6968335035750766, "grad_norm": 1.50780324349912, "learning_rate": 4.4456731348265345e-06, "loss": 0.8013, "step": 6822 }, { "epoch": 0.6969356486210418, "grad_norm": 1.6324324350891788, "learning_rate": 4.442922356800226e-06, "loss": 0.7644, "step": 6823 }, { "epoch": 0.6970377936670071, "grad_norm": 1.4603877755238088, "learning_rate": 4.44017218699966e-06, "loss": 0.7205, "step": 6824 }, { "epoch": 0.6971399387129724, "grad_norm": 1.5417780828124856, "learning_rate": 4.437422625725839e-06, "loss": 0.8077, "step": 6825 }, { "epoch": 0.6972420837589377, "grad_norm": 1.3933072464651688, "learning_rate": 4.434673673279707e-06, "loss": 0.6998, "step": 6826 }, { "epoch": 0.697344228804903, "grad_norm": 1.508865392691754, "learning_rate": 4.431925329962133e-06, "loss": 0.7974, "step": 6827 }, { "epoch": 0.6974463738508683, "grad_norm": 1.5528196401118837, "learning_rate": 4.429177596073928e-06, "loss": 0.6934, "step": 6828 }, { "epoch": 0.6975485188968336, "grad_norm": 1.4996057078826062, "learning_rate": 4.426430471915839e-06, "loss": 0.7067, "step": 6829 }, { "epoch": 0.6976506639427987, "grad_norm": 1.4715660573342602, "learning_rate": 4.423683957788535e-06, "loss": 0.7131, "step": 6830 }, { "epoch": 0.697752808988764, "grad_norm": 1.4553917884317094, "learning_rate": 4.420938053992625e-06, "loss": 0.605, "step": 6831 }, { "epoch": 0.6978549540347293, "grad_norm": 1.5675002383686016, "learning_rate": 4.41819276082865e-06, "loss": 0.7439, "step": 6832 }, { "epoch": 0.6979570990806946, "grad_norm": 1.423942422371826, "learning_rate": 4.41544807859708e-06, "loss": 0.7102, "step": 6833 }, { "epoch": 0.6980592441266599, "grad_norm": 1.4989983801327054, "learning_rate": 4.412704007598329e-06, "loss": 0.6979, "step": 6834 }, { "epoch": 0.6981613891726252, "grad_norm": 1.6122564019126269, "learning_rate": 4.40996054813273e-06, "loss": 0.8116, "step": 6835 }, { "epoch": 0.6982635342185904, "grad_norm": 1.5216957910952824, "learning_rate": 4.407217700500565e-06, "loss": 0.7415, "step": 6836 }, { "epoch": 0.6983656792645556, "grad_norm": 1.6023715853634912, "learning_rate": 4.4044754650020364e-06, "loss": 0.6932, "step": 6837 }, { "epoch": 0.6984678243105209, "grad_norm": 1.6164665136924132, "learning_rate": 4.401733841937279e-06, "loss": 0.6726, "step": 6838 }, { "epoch": 0.6985699693564862, "grad_norm": 1.37887163323791, "learning_rate": 4.398992831606374e-06, "loss": 0.6674, "step": 6839 }, { "epoch": 0.6986721144024515, "grad_norm": 1.4392720418203955, "learning_rate": 4.396252434309322e-06, "loss": 0.7381, "step": 6840 }, { "epoch": 0.6987742594484168, "grad_norm": 1.4570259042514484, "learning_rate": 4.3935126503460615e-06, "loss": 0.6417, "step": 6841 }, { "epoch": 0.698876404494382, "grad_norm": 1.5996022268624903, "learning_rate": 4.3907734800164594e-06, "loss": 0.7228, "step": 6842 }, { "epoch": 0.6989785495403473, "grad_norm": 1.5816727774641341, "learning_rate": 4.3880349236203245e-06, "loss": 0.7383, "step": 6843 }, { "epoch": 0.6990806945863126, "grad_norm": 1.512896962122419, "learning_rate": 4.385296981457396e-06, "loss": 0.6839, "step": 6844 }, { "epoch": 0.6991828396322778, "grad_norm": 1.4808097566520873, "learning_rate": 4.382559653827342e-06, "loss": 0.687, "step": 6845 }, { "epoch": 0.6992849846782431, "grad_norm": 1.4820606024713598, "learning_rate": 4.379822941029762e-06, "loss": 0.6584, "step": 6846 }, { "epoch": 0.6993871297242084, "grad_norm": 1.4276576022115857, "learning_rate": 4.377086843364189e-06, "loss": 0.6382, "step": 6847 }, { "epoch": 0.6994892747701736, "grad_norm": 1.298809824069812, "learning_rate": 4.374351361130097e-06, "loss": 0.5897, "step": 6848 }, { "epoch": 0.6995914198161389, "grad_norm": 1.4530968951386005, "learning_rate": 4.371616494626884e-06, "loss": 0.6452, "step": 6849 }, { "epoch": 0.6996935648621042, "grad_norm": 1.5924035859429548, "learning_rate": 4.3688822441538815e-06, "loss": 0.7587, "step": 6850 }, { "epoch": 0.6997957099080695, "grad_norm": 1.5350968776949536, "learning_rate": 4.366148610010352e-06, "loss": 0.6546, "step": 6851 }, { "epoch": 0.6998978549540348, "grad_norm": 1.4959940604051245, "learning_rate": 4.3634155924955e-06, "loss": 0.6761, "step": 6852 }, { "epoch": 0.7, "grad_norm": 1.3715700521176932, "learning_rate": 4.360683191908451e-06, "loss": 0.6209, "step": 6853 }, { "epoch": 0.7001021450459652, "grad_norm": 1.6486401280220049, "learning_rate": 4.357951408548272e-06, "loss": 0.8114, "step": 6854 }, { "epoch": 0.7002042900919305, "grad_norm": 1.5473517479074097, "learning_rate": 4.355220242713958e-06, "loss": 0.6874, "step": 6855 }, { "epoch": 0.7003064351378958, "grad_norm": 1.4934656023474913, "learning_rate": 4.3524896947044345e-06, "loss": 0.6962, "step": 6856 }, { "epoch": 0.7004085801838611, "grad_norm": 1.5908546904137781, "learning_rate": 4.3497597648185595e-06, "loss": 0.6728, "step": 6857 }, { "epoch": 0.7005107252298264, "grad_norm": 1.4895237427281691, "learning_rate": 4.347030453355132e-06, "loss": 0.7172, "step": 6858 }, { "epoch": 0.7006128702757917, "grad_norm": 1.4994178324754317, "learning_rate": 4.344301760612871e-06, "loss": 0.6547, "step": 6859 }, { "epoch": 0.7007150153217568, "grad_norm": 1.6105977295987985, "learning_rate": 4.341573686890439e-06, "loss": 0.7568, "step": 6860 }, { "epoch": 0.7008171603677221, "grad_norm": 1.435604422655134, "learning_rate": 4.338846232486423e-06, "loss": 0.8171, "step": 6861 }, { "epoch": 0.7009193054136874, "grad_norm": 1.4530488259824883, "learning_rate": 4.336119397699341e-06, "loss": 0.7082, "step": 6862 }, { "epoch": 0.7010214504596527, "grad_norm": 1.522581258810552, "learning_rate": 4.3333931828276545e-06, "loss": 0.7314, "step": 6863 }, { "epoch": 0.701123595505618, "grad_norm": 1.465312359074145, "learning_rate": 4.330667588169743e-06, "loss": 0.6522, "step": 6864 }, { "epoch": 0.7012257405515833, "grad_norm": 1.5613916215177015, "learning_rate": 4.327942614023929e-06, "loss": 0.6868, "step": 6865 }, { "epoch": 0.7013278855975486, "grad_norm": 1.50412780534582, "learning_rate": 4.325218260688461e-06, "loss": 0.7757, "step": 6866 }, { "epoch": 0.7014300306435138, "grad_norm": 1.4053414029349975, "learning_rate": 4.322494528461512e-06, "loss": 0.6582, "step": 6867 }, { "epoch": 0.701532175689479, "grad_norm": 1.3978757912291284, "learning_rate": 4.319771417641212e-06, "loss": 0.7862, "step": 6868 }, { "epoch": 0.7016343207354443, "grad_norm": 1.3690172022526992, "learning_rate": 4.317048928525601e-06, "loss": 0.7281, "step": 6869 }, { "epoch": 0.7017364657814096, "grad_norm": 1.6706524631067317, "learning_rate": 4.314327061412657e-06, "loss": 0.7186, "step": 6870 }, { "epoch": 0.7018386108273749, "grad_norm": 1.3303549259325627, "learning_rate": 4.311605816600288e-06, "loss": 0.7078, "step": 6871 }, { "epoch": 0.7019407558733401, "grad_norm": 1.3520839907710007, "learning_rate": 4.308885194386335e-06, "loss": 0.4908, "step": 6872 }, { "epoch": 0.7020429009193054, "grad_norm": 1.5570065059996687, "learning_rate": 4.306165195068578e-06, "loss": 0.7567, "step": 6873 }, { "epoch": 0.7021450459652707, "grad_norm": 1.3878115998411766, "learning_rate": 4.303445818944718e-06, "loss": 0.6341, "step": 6874 }, { "epoch": 0.702247191011236, "grad_norm": 1.514493281075276, "learning_rate": 4.3007270663123916e-06, "loss": 0.7526, "step": 6875 }, { "epoch": 0.7023493360572012, "grad_norm": 1.388361325332902, "learning_rate": 4.298008937469172e-06, "loss": 0.7016, "step": 6876 }, { "epoch": 0.7024514811031665, "grad_norm": 1.4907063128873075, "learning_rate": 4.295291432712556e-06, "loss": 0.7577, "step": 6877 }, { "epoch": 0.7025536261491317, "grad_norm": 1.5049483647748787, "learning_rate": 4.292574552339981e-06, "loss": 0.7872, "step": 6878 }, { "epoch": 0.702655771195097, "grad_norm": 1.6883457230224006, "learning_rate": 4.289858296648809e-06, "loss": 0.7188, "step": 6879 }, { "epoch": 0.7027579162410623, "grad_norm": 1.5472129190231299, "learning_rate": 4.287142665936336e-06, "loss": 0.7458, "step": 6880 }, { "epoch": 0.7028600612870276, "grad_norm": 1.5132961111446008, "learning_rate": 4.284427660499786e-06, "loss": 0.6451, "step": 6881 }, { "epoch": 0.7029622063329929, "grad_norm": 1.5492297674414648, "learning_rate": 4.281713280636324e-06, "loss": 0.6612, "step": 6882 }, { "epoch": 0.7030643513789582, "grad_norm": 1.3573816775271157, "learning_rate": 4.278999526643034e-06, "loss": 0.6041, "step": 6883 }, { "epoch": 0.7031664964249233, "grad_norm": 1.4751164839055932, "learning_rate": 4.276286398816946e-06, "loss": 0.6709, "step": 6884 }, { "epoch": 0.7032686414708886, "grad_norm": 1.5169584769255473, "learning_rate": 4.27357389745501e-06, "loss": 0.7304, "step": 6885 }, { "epoch": 0.7033707865168539, "grad_norm": 1.342609572756083, "learning_rate": 4.2708620228541105e-06, "loss": 0.6573, "step": 6886 }, { "epoch": 0.7034729315628192, "grad_norm": 1.4762555109216227, "learning_rate": 4.268150775311061e-06, "loss": 0.6281, "step": 6887 }, { "epoch": 0.7035750766087845, "grad_norm": 1.3985719796100031, "learning_rate": 4.2654401551226156e-06, "loss": 0.6348, "step": 6888 }, { "epoch": 0.7036772216547498, "grad_norm": 1.4927722265263943, "learning_rate": 4.26273016258545e-06, "loss": 0.6519, "step": 6889 }, { "epoch": 0.7037793667007151, "grad_norm": 1.544805630759808, "learning_rate": 4.260020797996175e-06, "loss": 0.7031, "step": 6890 }, { "epoch": 0.7038815117466802, "grad_norm": 1.4298455386768272, "learning_rate": 4.257312061651329e-06, "loss": 0.7162, "step": 6891 }, { "epoch": 0.7039836567926455, "grad_norm": 1.4985224227709653, "learning_rate": 4.254603953847387e-06, "loss": 0.6963, "step": 6892 }, { "epoch": 0.7040858018386108, "grad_norm": 1.3714373532174218, "learning_rate": 4.251896474880758e-06, "loss": 0.7687, "step": 6893 }, { "epoch": 0.7041879468845761, "grad_norm": 1.615170701940534, "learning_rate": 4.249189625047773e-06, "loss": 0.7103, "step": 6894 }, { "epoch": 0.7042900919305414, "grad_norm": 1.475600933192139, "learning_rate": 4.246483404644702e-06, "loss": 0.6687, "step": 6895 }, { "epoch": 0.7043922369765067, "grad_norm": 1.4405323194950397, "learning_rate": 4.243777813967733e-06, "loss": 0.6387, "step": 6896 }, { "epoch": 0.7044943820224719, "grad_norm": 1.2871270164363433, "learning_rate": 4.241072853313006e-06, "loss": 0.6883, "step": 6897 }, { "epoch": 0.7045965270684372, "grad_norm": 1.4923302086763857, "learning_rate": 4.2383685229765755e-06, "loss": 0.6945, "step": 6898 }, { "epoch": 0.7046986721144024, "grad_norm": 1.5667679334535802, "learning_rate": 4.235664823254431e-06, "loss": 0.7438, "step": 6899 }, { "epoch": 0.7048008171603677, "grad_norm": 1.6150210127153137, "learning_rate": 4.2329617544424976e-06, "loss": 0.7016, "step": 6900 }, { "epoch": 0.704902962206333, "grad_norm": 1.3653693254244708, "learning_rate": 4.230259316836622e-06, "loss": 0.651, "step": 6901 }, { "epoch": 0.7050051072522983, "grad_norm": 1.518331945787565, "learning_rate": 4.2275575107325965e-06, "loss": 0.6647, "step": 6902 }, { "epoch": 0.7051072522982635, "grad_norm": 1.4338161491876356, "learning_rate": 4.22485633642613e-06, "loss": 0.728, "step": 6903 }, { "epoch": 0.7052093973442288, "grad_norm": 1.2339149468194266, "learning_rate": 4.22215579421287e-06, "loss": 0.6812, "step": 6904 }, { "epoch": 0.7053115423901941, "grad_norm": 1.3312497716484102, "learning_rate": 4.219455884388391e-06, "loss": 0.7013, "step": 6905 }, { "epoch": 0.7054136874361594, "grad_norm": 1.3893373807235514, "learning_rate": 4.216756607248197e-06, "loss": 0.6227, "step": 6906 }, { "epoch": 0.7055158324821246, "grad_norm": 1.4223983699624907, "learning_rate": 4.214057963087727e-06, "loss": 0.6653, "step": 6907 }, { "epoch": 0.7056179775280899, "grad_norm": 1.3985984317691704, "learning_rate": 4.211359952202357e-06, "loss": 0.7557, "step": 6908 }, { "epoch": 0.7057201225740551, "grad_norm": 1.554109838648449, "learning_rate": 4.208662574887379e-06, "loss": 0.703, "step": 6909 }, { "epoch": 0.7058222676200204, "grad_norm": 1.2245517088798967, "learning_rate": 4.205965831438024e-06, "loss": 0.6619, "step": 6910 }, { "epoch": 0.7059244126659857, "grad_norm": 1.7681143736486562, "learning_rate": 4.203269722149448e-06, "loss": 0.6726, "step": 6911 }, { "epoch": 0.706026557711951, "grad_norm": 1.4262529315357486, "learning_rate": 4.20057424731675e-06, "loss": 0.6832, "step": 6912 }, { "epoch": 0.7061287027579163, "grad_norm": 1.4871599430438882, "learning_rate": 4.197879407234947e-06, "loss": 0.6949, "step": 6913 }, { "epoch": 0.7062308478038815, "grad_norm": 1.627684395736922, "learning_rate": 4.195185202198992e-06, "loss": 0.8037, "step": 6914 }, { "epoch": 0.7063329928498467, "grad_norm": 1.3533222196916344, "learning_rate": 4.192491632503765e-06, "loss": 0.753, "step": 6915 }, { "epoch": 0.706435137895812, "grad_norm": 1.56137148764183, "learning_rate": 4.1897986984440784e-06, "loss": 0.7667, "step": 6916 }, { "epoch": 0.7065372829417773, "grad_norm": 1.4538822772656388, "learning_rate": 4.187106400314683e-06, "loss": 0.5975, "step": 6917 }, { "epoch": 0.7066394279877426, "grad_norm": 1.4345386327078853, "learning_rate": 4.1844147384102486e-06, "loss": 0.6025, "step": 6918 }, { "epoch": 0.7067415730337079, "grad_norm": 1.48088715885521, "learning_rate": 4.181723713025376e-06, "loss": 0.7334, "step": 6919 }, { "epoch": 0.7068437180796732, "grad_norm": 1.485899176001851, "learning_rate": 4.179033324454605e-06, "loss": 0.6834, "step": 6920 }, { "epoch": 0.7069458631256385, "grad_norm": 1.5269414957077465, "learning_rate": 4.176343572992392e-06, "loss": 0.8186, "step": 6921 }, { "epoch": 0.7070480081716036, "grad_norm": 1.4123607121690231, "learning_rate": 4.173654458933141e-06, "loss": 0.7038, "step": 6922 }, { "epoch": 0.7071501532175689, "grad_norm": 1.4741140082472883, "learning_rate": 4.170965982571171e-06, "loss": 0.6858, "step": 6923 }, { "epoch": 0.7072522982635342, "grad_norm": 1.4369479066319717, "learning_rate": 4.168278144200743e-06, "loss": 0.7296, "step": 6924 }, { "epoch": 0.7073544433094995, "grad_norm": 1.3466378817008398, "learning_rate": 4.16559094411604e-06, "loss": 0.6677, "step": 6925 }, { "epoch": 0.7074565883554648, "grad_norm": 1.4729974792095448, "learning_rate": 4.1629043826111745e-06, "loss": 0.6488, "step": 6926 }, { "epoch": 0.70755873340143, "grad_norm": 1.656646769248252, "learning_rate": 4.1602184599802e-06, "loss": 0.6709, "step": 6927 }, { "epoch": 0.7076608784473953, "grad_norm": 1.3342365848423112, "learning_rate": 4.157533176517087e-06, "loss": 0.6062, "step": 6928 }, { "epoch": 0.7077630234933606, "grad_norm": 1.5572655294927515, "learning_rate": 4.154848532515743e-06, "loss": 0.6751, "step": 6929 }, { "epoch": 0.7078651685393258, "grad_norm": 1.4551539307134356, "learning_rate": 4.152164528269999e-06, "loss": 0.6958, "step": 6930 }, { "epoch": 0.7079673135852911, "grad_norm": 1.5365208776514503, "learning_rate": 4.149481164073626e-06, "loss": 0.6269, "step": 6931 }, { "epoch": 0.7080694586312564, "grad_norm": 1.6637751151269546, "learning_rate": 4.146798440220324e-06, "loss": 0.7469, "step": 6932 }, { "epoch": 0.7081716036772217, "grad_norm": 1.3399627605260522, "learning_rate": 4.144116357003713e-06, "loss": 0.638, "step": 6933 }, { "epoch": 0.7082737487231869, "grad_norm": 1.4809522081192772, "learning_rate": 4.1414349147173514e-06, "loss": 0.6571, "step": 6934 }, { "epoch": 0.7083758937691522, "grad_norm": 1.5454271114375617, "learning_rate": 4.138754113654719e-06, "loss": 0.695, "step": 6935 }, { "epoch": 0.7084780388151175, "grad_norm": 1.567514451263671, "learning_rate": 4.1360739541092396e-06, "loss": 0.7454, "step": 6936 }, { "epoch": 0.7085801838610828, "grad_norm": 1.3656927578039195, "learning_rate": 4.133394436374255e-06, "loss": 0.6397, "step": 6937 }, { "epoch": 0.708682328907048, "grad_norm": 1.5548160918283194, "learning_rate": 4.130715560743039e-06, "loss": 0.7252, "step": 6938 }, { "epoch": 0.7087844739530132, "grad_norm": 1.5319142486253907, "learning_rate": 4.128037327508794e-06, "loss": 0.6388, "step": 6939 }, { "epoch": 0.7088866189989785, "grad_norm": 1.5094076415842528, "learning_rate": 4.12535973696466e-06, "loss": 0.6951, "step": 6940 }, { "epoch": 0.7089887640449438, "grad_norm": 1.3711593003138105, "learning_rate": 4.122682789403695e-06, "loss": 0.7026, "step": 6941 }, { "epoch": 0.7090909090909091, "grad_norm": 1.5301888201338882, "learning_rate": 4.120006485118901e-06, "loss": 0.7315, "step": 6942 }, { "epoch": 0.7091930541368744, "grad_norm": 1.5458233754864787, "learning_rate": 4.117330824403194e-06, "loss": 0.6448, "step": 6943 }, { "epoch": 0.7092951991828397, "grad_norm": 1.58428243301101, "learning_rate": 4.114655807549429e-06, "loss": 0.7707, "step": 6944 }, { "epoch": 0.7093973442288048, "grad_norm": 1.4589447303945489, "learning_rate": 4.111981434850386e-06, "loss": 0.7338, "step": 6945 }, { "epoch": 0.7094994892747701, "grad_norm": 1.3573813231157297, "learning_rate": 4.1093077065987816e-06, "loss": 0.594, "step": 6946 }, { "epoch": 0.7096016343207354, "grad_norm": 1.4929450602905736, "learning_rate": 4.1066346230872526e-06, "loss": 0.6907, "step": 6947 }, { "epoch": 0.7097037793667007, "grad_norm": 1.4765637889977614, "learning_rate": 4.103962184608373e-06, "loss": 0.707, "step": 6948 }, { "epoch": 0.709805924412666, "grad_norm": 1.4266829655229054, "learning_rate": 4.101290391454643e-06, "loss": 0.7152, "step": 6949 }, { "epoch": 0.7099080694586313, "grad_norm": 1.3393810861270083, "learning_rate": 4.098619243918487e-06, "loss": 0.7704, "step": 6950 }, { "epoch": 0.7100102145045966, "grad_norm": 1.4913386915484104, "learning_rate": 4.095948742292271e-06, "loss": 0.7648, "step": 6951 }, { "epoch": 0.7101123595505618, "grad_norm": 1.4933262446769613, "learning_rate": 4.09327888686828e-06, "loss": 0.7223, "step": 6952 }, { "epoch": 0.710214504596527, "grad_norm": 1.510729726440067, "learning_rate": 4.090609677938731e-06, "loss": 0.6809, "step": 6953 }, { "epoch": 0.7103166496424923, "grad_norm": 1.4359945221295232, "learning_rate": 4.087941115795767e-06, "loss": 0.6382, "step": 6954 }, { "epoch": 0.7104187946884576, "grad_norm": 1.586910192518934, "learning_rate": 4.085273200731468e-06, "loss": 0.6279, "step": 6955 }, { "epoch": 0.7105209397344229, "grad_norm": 1.580053122296865, "learning_rate": 4.082605933037841e-06, "loss": 0.7024, "step": 6956 }, { "epoch": 0.7106230847803882, "grad_norm": 1.4279719561691568, "learning_rate": 4.079939313006819e-06, "loss": 0.7049, "step": 6957 }, { "epoch": 0.7107252298263534, "grad_norm": 1.4984985937706568, "learning_rate": 4.0772733409302634e-06, "loss": 0.7153, "step": 6958 }, { "epoch": 0.7108273748723187, "grad_norm": 1.512320694524567, "learning_rate": 4.0746080170999665e-06, "loss": 0.7081, "step": 6959 }, { "epoch": 0.710929519918284, "grad_norm": 1.365056956827684, "learning_rate": 4.071943341807648e-06, "loss": 0.72, "step": 6960 }, { "epoch": 0.7110316649642492, "grad_norm": 1.5510494384170725, "learning_rate": 4.069279315344964e-06, "loss": 0.7533, "step": 6961 }, { "epoch": 0.7111338100102145, "grad_norm": 1.544704850899328, "learning_rate": 4.066615938003491e-06, "loss": 0.6952, "step": 6962 }, { "epoch": 0.7112359550561798, "grad_norm": 1.4054277916532003, "learning_rate": 4.063953210074733e-06, "loss": 0.6552, "step": 6963 }, { "epoch": 0.711338100102145, "grad_norm": 1.5535030525651847, "learning_rate": 4.0612911318501345e-06, "loss": 0.6126, "step": 6964 }, { "epoch": 0.7114402451481103, "grad_norm": 1.5182672575597311, "learning_rate": 4.058629703621055e-06, "loss": 0.6508, "step": 6965 }, { "epoch": 0.7115423901940756, "grad_norm": 1.4253831232899727, "learning_rate": 4.055968925678797e-06, "loss": 0.6922, "step": 6966 }, { "epoch": 0.7116445352400409, "grad_norm": 1.4661668455035148, "learning_rate": 4.05330879831458e-06, "loss": 0.7007, "step": 6967 }, { "epoch": 0.7117466802860062, "grad_norm": 1.5024060149316576, "learning_rate": 4.050649321819559e-06, "loss": 0.7593, "step": 6968 }, { "epoch": 0.7118488253319714, "grad_norm": 1.546589826827416, "learning_rate": 4.047990496484808e-06, "loss": 0.7128, "step": 6969 }, { "epoch": 0.7119509703779366, "grad_norm": 1.4341931944088089, "learning_rate": 4.045332322601346e-06, "loss": 0.6251, "step": 6970 }, { "epoch": 0.7120531154239019, "grad_norm": 1.6632871188263734, "learning_rate": 4.042674800460106e-06, "loss": 0.7633, "step": 6971 }, { "epoch": 0.7121552604698672, "grad_norm": 1.2719144549393413, "learning_rate": 4.0400179303519616e-06, "loss": 0.7222, "step": 6972 }, { "epoch": 0.7122574055158325, "grad_norm": 1.3800349891909651, "learning_rate": 4.037361712567705e-06, "loss": 0.6266, "step": 6973 }, { "epoch": 0.7123595505617978, "grad_norm": 1.504362942420356, "learning_rate": 4.034706147398061e-06, "loss": 0.6854, "step": 6974 }, { "epoch": 0.7124616956077631, "grad_norm": 1.3387059733069222, "learning_rate": 4.0320512351336806e-06, "loss": 0.6917, "step": 6975 }, { "epoch": 0.7125638406537282, "grad_norm": 1.4098029850031484, "learning_rate": 4.029396976065151e-06, "loss": 0.6511, "step": 6976 }, { "epoch": 0.7126659856996935, "grad_norm": 1.6626313742270857, "learning_rate": 4.026743370482979e-06, "loss": 0.6912, "step": 6977 }, { "epoch": 0.7127681307456588, "grad_norm": 1.3152340121133819, "learning_rate": 4.024090418677607e-06, "loss": 0.5649, "step": 6978 }, { "epoch": 0.7128702757916241, "grad_norm": 1.5206404099244941, "learning_rate": 4.021438120939394e-06, "loss": 0.6973, "step": 6979 }, { "epoch": 0.7129724208375894, "grad_norm": 1.5027527665548193, "learning_rate": 4.0187864775586415e-06, "loss": 0.7196, "step": 6980 }, { "epoch": 0.7130745658835547, "grad_norm": 1.4543889960971648, "learning_rate": 4.016135488825576e-06, "loss": 0.7567, "step": 6981 }, { "epoch": 0.71317671092952, "grad_norm": 1.457362973335699, "learning_rate": 4.01348515503035e-06, "loss": 0.5176, "step": 6982 }, { "epoch": 0.7132788559754852, "grad_norm": 1.514257364983238, "learning_rate": 4.01083547646304e-06, "loss": 0.6429, "step": 6983 }, { "epoch": 0.7133810010214504, "grad_norm": 1.5201850988807657, "learning_rate": 4.008186453413653e-06, "loss": 0.7623, "step": 6984 }, { "epoch": 0.7134831460674157, "grad_norm": 1.4757826354680996, "learning_rate": 4.0055380861721335e-06, "loss": 0.5967, "step": 6985 }, { "epoch": 0.713585291113381, "grad_norm": 1.4068852740322775, "learning_rate": 4.002890375028343e-06, "loss": 0.6404, "step": 6986 }, { "epoch": 0.7136874361593463, "grad_norm": 1.613139122710717, "learning_rate": 4.000243320272073e-06, "loss": 0.7457, "step": 6987 }, { "epoch": 0.7137895812053116, "grad_norm": 1.4987010532850589, "learning_rate": 3.997596922193051e-06, "loss": 0.725, "step": 6988 }, { "epoch": 0.7138917262512768, "grad_norm": 1.4925355664152664, "learning_rate": 3.99495118108092e-06, "loss": 0.6383, "step": 6989 }, { "epoch": 0.7139938712972421, "grad_norm": 1.5000109920763496, "learning_rate": 3.992306097225266e-06, "loss": 0.752, "step": 6990 }, { "epoch": 0.7140960163432074, "grad_norm": 1.5989585905451302, "learning_rate": 3.989661670915591e-06, "loss": 0.7079, "step": 6991 }, { "epoch": 0.7141981613891726, "grad_norm": 1.3948397128365424, "learning_rate": 3.987017902441329e-06, "loss": 0.6121, "step": 6992 }, { "epoch": 0.7143003064351379, "grad_norm": 1.3696154895201689, "learning_rate": 3.984374792091843e-06, "loss": 0.5742, "step": 6993 }, { "epoch": 0.7144024514811032, "grad_norm": 1.5097901420389772, "learning_rate": 3.981732340156419e-06, "loss": 0.6033, "step": 6994 }, { "epoch": 0.7145045965270684, "grad_norm": 1.5133340711156522, "learning_rate": 3.979090546924278e-06, "loss": 0.7838, "step": 6995 }, { "epoch": 0.7146067415730337, "grad_norm": 1.434584662011082, "learning_rate": 3.976449412684571e-06, "loss": 0.6877, "step": 6996 }, { "epoch": 0.714708886618999, "grad_norm": 1.3855418475958892, "learning_rate": 3.973808937726368e-06, "loss": 0.6551, "step": 6997 }, { "epoch": 0.7148110316649643, "grad_norm": 1.4332538025688333, "learning_rate": 3.971169122338668e-06, "loss": 0.761, "step": 6998 }, { "epoch": 0.7149131767109295, "grad_norm": 1.5403945880685816, "learning_rate": 3.968529966810402e-06, "loss": 0.6885, "step": 6999 }, { "epoch": 0.7150153217568948, "grad_norm": 1.544246550440396, "learning_rate": 3.965891471430429e-06, "loss": 0.6154, "step": 7000 }, { "epoch": 0.71511746680286, "grad_norm": 1.6109009217632133, "learning_rate": 3.963253636487534e-06, "loss": 0.6905, "step": 7001 }, { "epoch": 0.7152196118488253, "grad_norm": 1.595923082487574, "learning_rate": 3.960616462270429e-06, "loss": 0.6738, "step": 7002 }, { "epoch": 0.7153217568947906, "grad_norm": 1.448628712174153, "learning_rate": 3.957979949067751e-06, "loss": 0.5547, "step": 7003 }, { "epoch": 0.7154239019407559, "grad_norm": 1.5690766306953075, "learning_rate": 3.95534409716807e-06, "loss": 0.7901, "step": 7004 }, { "epoch": 0.7155260469867212, "grad_norm": 1.5334029487229923, "learning_rate": 3.952708906859887e-06, "loss": 0.6771, "step": 7005 }, { "epoch": 0.7156281920326865, "grad_norm": 1.4864462986841054, "learning_rate": 3.9500743784316206e-06, "loss": 0.717, "step": 7006 }, { "epoch": 0.7157303370786516, "grad_norm": 1.6609678120529443, "learning_rate": 3.947440512171623e-06, "loss": 0.72, "step": 7007 }, { "epoch": 0.7158324821246169, "grad_norm": 1.320077026184007, "learning_rate": 3.9448073083681685e-06, "loss": 0.5906, "step": 7008 }, { "epoch": 0.7159346271705822, "grad_norm": 1.6223332202334915, "learning_rate": 3.942174767309469e-06, "loss": 0.7974, "step": 7009 }, { "epoch": 0.7160367722165475, "grad_norm": 1.6065971506850496, "learning_rate": 3.9395428892836554e-06, "loss": 0.6773, "step": 7010 }, { "epoch": 0.7161389172625128, "grad_norm": 1.3829526396625, "learning_rate": 3.936911674578785e-06, "loss": 0.6503, "step": 7011 }, { "epoch": 0.7162410623084781, "grad_norm": 1.4655438965843948, "learning_rate": 3.934281123482851e-06, "loss": 0.6437, "step": 7012 }, { "epoch": 0.7163432073544433, "grad_norm": 1.303770766939529, "learning_rate": 3.931651236283769e-06, "loss": 0.585, "step": 7013 }, { "epoch": 0.7164453524004086, "grad_norm": 1.5095490572184738, "learning_rate": 3.929022013269376e-06, "loss": 0.7376, "step": 7014 }, { "epoch": 0.7165474974463738, "grad_norm": 1.522119712747717, "learning_rate": 3.926393454727448e-06, "loss": 0.6947, "step": 7015 }, { "epoch": 0.7166496424923391, "grad_norm": 1.475139075591249, "learning_rate": 3.923765560945683e-06, "loss": 0.7413, "step": 7016 }, { "epoch": 0.7167517875383044, "grad_norm": 1.590885631802265, "learning_rate": 3.9211383322117e-06, "loss": 0.7123, "step": 7017 }, { "epoch": 0.7168539325842697, "grad_norm": 1.6443483748199068, "learning_rate": 3.918511768813053e-06, "loss": 0.7947, "step": 7018 }, { "epoch": 0.716956077630235, "grad_norm": 1.4083675300888494, "learning_rate": 3.9158858710372205e-06, "loss": 0.697, "step": 7019 }, { "epoch": 0.7170582226762002, "grad_norm": 1.4583557746783096, "learning_rate": 3.913260639171614e-06, "loss": 0.7127, "step": 7020 }, { "epoch": 0.7171603677221655, "grad_norm": 1.5971502915000542, "learning_rate": 3.910636073503564e-06, "loss": 0.7369, "step": 7021 }, { "epoch": 0.7172625127681308, "grad_norm": 1.414205804962322, "learning_rate": 3.908012174320329e-06, "loss": 0.6655, "step": 7022 }, { "epoch": 0.717364657814096, "grad_norm": 1.404453918439644, "learning_rate": 3.905388941909095e-06, "loss": 0.5964, "step": 7023 }, { "epoch": 0.7174668028600613, "grad_norm": 1.4673374134692105, "learning_rate": 3.902766376556982e-06, "loss": 0.6769, "step": 7024 }, { "epoch": 0.7175689479060265, "grad_norm": 1.404688132525737, "learning_rate": 3.900144478551028e-06, "loss": 0.7689, "step": 7025 }, { "epoch": 0.7176710929519918, "grad_norm": 1.4072704858483545, "learning_rate": 3.897523248178202e-06, "loss": 0.6927, "step": 7026 }, { "epoch": 0.7177732379979571, "grad_norm": 1.444236287226445, "learning_rate": 3.894902685725399e-06, "loss": 0.6627, "step": 7027 }, { "epoch": 0.7178753830439224, "grad_norm": 1.435534095170892, "learning_rate": 3.892282791479438e-06, "loss": 0.7429, "step": 7028 }, { "epoch": 0.7179775280898877, "grad_norm": 1.4310946315005222, "learning_rate": 3.889663565727072e-06, "loss": 0.6564, "step": 7029 }, { "epoch": 0.7180796731358529, "grad_norm": 1.6263096407519817, "learning_rate": 3.887045008754981e-06, "loss": 0.7515, "step": 7030 }, { "epoch": 0.7181818181818181, "grad_norm": 1.4223494455700318, "learning_rate": 3.884427120849761e-06, "loss": 0.674, "step": 7031 }, { "epoch": 0.7182839632277834, "grad_norm": 1.3829281867394458, "learning_rate": 3.881809902297945e-06, "loss": 0.7106, "step": 7032 }, { "epoch": 0.7183861082737487, "grad_norm": 1.5425671289927854, "learning_rate": 3.879193353385985e-06, "loss": 0.7882, "step": 7033 }, { "epoch": 0.718488253319714, "grad_norm": 1.478219515082956, "learning_rate": 3.876577474400269e-06, "loss": 0.7238, "step": 7034 }, { "epoch": 0.7185903983656793, "grad_norm": 1.5372798387160067, "learning_rate": 3.873962265627106e-06, "loss": 0.7272, "step": 7035 }, { "epoch": 0.7186925434116446, "grad_norm": 1.4976429074576547, "learning_rate": 3.871347727352727e-06, "loss": 0.7289, "step": 7036 }, { "epoch": 0.7187946884576099, "grad_norm": 1.552556400668667, "learning_rate": 3.868733859863302e-06, "loss": 0.6202, "step": 7037 }, { "epoch": 0.718896833503575, "grad_norm": 1.5312789556181494, "learning_rate": 3.8661206634449145e-06, "loss": 0.7477, "step": 7038 }, { "epoch": 0.7189989785495403, "grad_norm": 1.4448015782734187, "learning_rate": 3.863508138383587e-06, "loss": 0.6774, "step": 7039 }, { "epoch": 0.7191011235955056, "grad_norm": 1.461959968335165, "learning_rate": 3.860896284965258e-06, "loss": 0.6163, "step": 7040 }, { "epoch": 0.7192032686414709, "grad_norm": 1.4815567449589533, "learning_rate": 3.858285103475796e-06, "loss": 0.5988, "step": 7041 }, { "epoch": 0.7193054136874362, "grad_norm": 1.5629535533684802, "learning_rate": 3.855674594200995e-06, "loss": 0.7161, "step": 7042 }, { "epoch": 0.7194075587334015, "grad_norm": 1.3457463589579566, "learning_rate": 3.853064757426583e-06, "loss": 0.6995, "step": 7043 }, { "epoch": 0.7195097037793667, "grad_norm": 1.5584706421016177, "learning_rate": 3.8504555934382015e-06, "loss": 0.68, "step": 7044 }, { "epoch": 0.719611848825332, "grad_norm": 1.4662350746784676, "learning_rate": 3.847847102521432e-06, "loss": 0.658, "step": 7045 }, { "epoch": 0.7197139938712972, "grad_norm": 1.4188881366868653, "learning_rate": 3.845239284961772e-06, "loss": 0.6267, "step": 7046 }, { "epoch": 0.7198161389172625, "grad_norm": 1.496800561754512, "learning_rate": 3.842632141044648e-06, "loss": 0.7227, "step": 7047 }, { "epoch": 0.7199182839632278, "grad_norm": 1.3977356849715123, "learning_rate": 3.8400256710554105e-06, "loss": 0.6527, "step": 7048 }, { "epoch": 0.720020429009193, "grad_norm": 1.5639474526378097, "learning_rate": 3.8374198752793465e-06, "loss": 0.6285, "step": 7049 }, { "epoch": 0.7201225740551583, "grad_norm": 1.5188239094557237, "learning_rate": 3.83481475400166e-06, "loss": 0.73, "step": 7050 }, { "epoch": 0.7202247191011236, "grad_norm": 1.5438991487309413, "learning_rate": 3.832210307507481e-06, "loss": 0.6782, "step": 7051 }, { "epoch": 0.7203268641470889, "grad_norm": 1.450365324602008, "learning_rate": 3.8296065360818654e-06, "loss": 0.6902, "step": 7052 }, { "epoch": 0.7204290091930541, "grad_norm": 1.5775740274142305, "learning_rate": 3.827003440009801e-06, "loss": 0.7509, "step": 7053 }, { "epoch": 0.7205311542390194, "grad_norm": 1.5936993252326161, "learning_rate": 3.824401019576202e-06, "loss": 0.7231, "step": 7054 }, { "epoch": 0.7206332992849847, "grad_norm": 1.4052472018197752, "learning_rate": 3.821799275065901e-06, "loss": 0.6809, "step": 7055 }, { "epoch": 0.7207354443309499, "grad_norm": 1.6340562175740863, "learning_rate": 3.819198206763662e-06, "loss": 0.6778, "step": 7056 }, { "epoch": 0.7208375893769152, "grad_norm": 1.5534429790957294, "learning_rate": 3.816597814954168e-06, "loss": 0.7628, "step": 7057 }, { "epoch": 0.7209397344228805, "grad_norm": 1.3576150178355606, "learning_rate": 3.8139980999220426e-06, "loss": 0.5862, "step": 7058 }, { "epoch": 0.7210418794688458, "grad_norm": 1.3692708540587757, "learning_rate": 3.8113990619518214e-06, "loss": 0.6034, "step": 7059 }, { "epoch": 0.7211440245148111, "grad_norm": 1.5062612610466932, "learning_rate": 3.808800701327967e-06, "loss": 0.6191, "step": 7060 }, { "epoch": 0.7212461695607763, "grad_norm": 1.481839888144394, "learning_rate": 3.8062030183348806e-06, "loss": 0.7233, "step": 7061 }, { "epoch": 0.7213483146067415, "grad_norm": 1.479973312481816, "learning_rate": 3.8036060132568708e-06, "loss": 0.6779, "step": 7062 }, { "epoch": 0.7214504596527068, "grad_norm": 1.5202882822887256, "learning_rate": 3.8010096863781908e-06, "loss": 0.6729, "step": 7063 }, { "epoch": 0.7215526046986721, "grad_norm": 1.6327802284402848, "learning_rate": 3.798414037983005e-06, "loss": 0.6329, "step": 7064 }, { "epoch": 0.7216547497446374, "grad_norm": 1.5073896989004747, "learning_rate": 3.79581906835541e-06, "loss": 0.6826, "step": 7065 }, { "epoch": 0.7217568947906027, "grad_norm": 1.4239633388829123, "learning_rate": 3.793224777779426e-06, "loss": 0.7346, "step": 7066 }, { "epoch": 0.721859039836568, "grad_norm": 1.4009700114636707, "learning_rate": 3.790631166538997e-06, "loss": 0.72, "step": 7067 }, { "epoch": 0.7219611848825332, "grad_norm": 1.4674404835529677, "learning_rate": 3.7880382349179978e-06, "loss": 0.6512, "step": 7068 }, { "epoch": 0.7220633299284984, "grad_norm": 1.5536633851504968, "learning_rate": 3.7854459832002312e-06, "loss": 0.5836, "step": 7069 }, { "epoch": 0.7221654749744637, "grad_norm": 1.4578115200460853, "learning_rate": 3.782854411669418e-06, "loss": 0.7717, "step": 7070 }, { "epoch": 0.722267620020429, "grad_norm": 1.420378745929112, "learning_rate": 3.780263520609204e-06, "loss": 0.5877, "step": 7071 }, { "epoch": 0.7223697650663943, "grad_norm": 1.6933517286350868, "learning_rate": 3.777673310303164e-06, "loss": 0.7097, "step": 7072 }, { "epoch": 0.7224719101123596, "grad_norm": 1.4256525968299967, "learning_rate": 3.775083781034804e-06, "loss": 0.6869, "step": 7073 }, { "epoch": 0.7225740551583248, "grad_norm": 1.5198555916083616, "learning_rate": 3.7724949330875447e-06, "loss": 0.6888, "step": 7074 }, { "epoch": 0.7226762002042901, "grad_norm": 1.4938551964791031, "learning_rate": 3.7699067667447396e-06, "loss": 0.7049, "step": 7075 }, { "epoch": 0.7227783452502554, "grad_norm": 1.4123405504628173, "learning_rate": 3.767319282289661e-06, "loss": 0.6369, "step": 7076 }, { "epoch": 0.7228804902962206, "grad_norm": 1.6328955447232858, "learning_rate": 3.764732480005513e-06, "loss": 0.6459, "step": 7077 }, { "epoch": 0.7229826353421859, "grad_norm": 1.4193681387451087, "learning_rate": 3.7621463601754273e-06, "loss": 0.7052, "step": 7078 }, { "epoch": 0.7230847803881512, "grad_norm": 1.4869512090919346, "learning_rate": 3.7595609230824525e-06, "loss": 0.649, "step": 7079 }, { "epoch": 0.7231869254341164, "grad_norm": 1.5796632070595396, "learning_rate": 3.7569761690095664e-06, "loss": 0.7175, "step": 7080 }, { "epoch": 0.7232890704800817, "grad_norm": 1.7077753631680226, "learning_rate": 3.754392098239672e-06, "loss": 0.6698, "step": 7081 }, { "epoch": 0.723391215526047, "grad_norm": 1.45969217145936, "learning_rate": 3.7518087110555943e-06, "loss": 0.6231, "step": 7082 }, { "epoch": 0.7234933605720123, "grad_norm": 1.4122795067251015, "learning_rate": 3.7492260077400934e-06, "loss": 0.6069, "step": 7083 }, { "epoch": 0.7235955056179775, "grad_norm": 1.5040644557105953, "learning_rate": 3.746643988575841e-06, "loss": 0.6834, "step": 7084 }, { "epoch": 0.7236976506639428, "grad_norm": 1.532657976039376, "learning_rate": 3.7440626538454484e-06, "loss": 0.7281, "step": 7085 }, { "epoch": 0.723799795709908, "grad_norm": 1.4650438947348665, "learning_rate": 3.741482003831439e-06, "loss": 0.6559, "step": 7086 }, { "epoch": 0.7239019407558733, "grad_norm": 1.4995369927961768, "learning_rate": 3.7389020388162656e-06, "loss": 0.6265, "step": 7087 }, { "epoch": 0.7240040858018386, "grad_norm": 1.5606598861071947, "learning_rate": 3.7363227590823115e-06, "loss": 0.6614, "step": 7088 }, { "epoch": 0.7241062308478039, "grad_norm": 1.5002165668915637, "learning_rate": 3.733744164911879e-06, "loss": 0.6644, "step": 7089 }, { "epoch": 0.7242083758937692, "grad_norm": 1.3573597979686727, "learning_rate": 3.7311662565871966e-06, "loss": 0.6999, "step": 7090 }, { "epoch": 0.7243105209397345, "grad_norm": 1.5651019121910072, "learning_rate": 3.728589034390413e-06, "loss": 0.6785, "step": 7091 }, { "epoch": 0.7244126659856996, "grad_norm": 1.352499112302756, "learning_rate": 3.7260124986036116e-06, "loss": 0.7253, "step": 7092 }, { "epoch": 0.7245148110316649, "grad_norm": 1.4814936750694578, "learning_rate": 3.7234366495088005e-06, "loss": 0.6392, "step": 7093 }, { "epoch": 0.7246169560776302, "grad_norm": 1.4845030672235702, "learning_rate": 3.7208614873879013e-06, "loss": 0.7076, "step": 7094 }, { "epoch": 0.7247191011235955, "grad_norm": 1.580611567656964, "learning_rate": 3.718287012522771e-06, "loss": 0.7371, "step": 7095 }, { "epoch": 0.7248212461695608, "grad_norm": 1.3830700958636837, "learning_rate": 3.7157132251951812e-06, "loss": 0.671, "step": 7096 }, { "epoch": 0.7249233912155261, "grad_norm": 1.6113487156070951, "learning_rate": 3.7131401256868428e-06, "loss": 0.7594, "step": 7097 }, { "epoch": 0.7250255362614914, "grad_norm": 1.3592419887689868, "learning_rate": 3.7105677142793795e-06, "loss": 0.6995, "step": 7098 }, { "epoch": 0.7251276813074566, "grad_norm": 1.3236772279693776, "learning_rate": 3.7079959912543427e-06, "loss": 0.6768, "step": 7099 }, { "epoch": 0.7252298263534218, "grad_norm": 1.3300000601187854, "learning_rate": 3.7054249568932077e-06, "loss": 0.5502, "step": 7100 }, { "epoch": 0.7253319713993871, "grad_norm": 1.467577643493302, "learning_rate": 3.70285461147738e-06, "loss": 0.6946, "step": 7101 }, { "epoch": 0.7254341164453524, "grad_norm": 1.4494053819354962, "learning_rate": 3.7002849552881815e-06, "loss": 0.7189, "step": 7102 }, { "epoch": 0.7255362614913177, "grad_norm": 1.5581260701782937, "learning_rate": 3.697715988606867e-06, "loss": 0.729, "step": 7103 }, { "epoch": 0.725638406537283, "grad_norm": 1.5866894844431803, "learning_rate": 3.6951477117146107e-06, "loss": 0.7443, "step": 7104 }, { "epoch": 0.7257405515832482, "grad_norm": 1.6474687016486003, "learning_rate": 3.6925801248925096e-06, "loss": 0.8058, "step": 7105 }, { "epoch": 0.7258426966292135, "grad_norm": 1.4128404825486578, "learning_rate": 3.690013228421586e-06, "loss": 0.6532, "step": 7106 }, { "epoch": 0.7259448416751788, "grad_norm": 1.6705538661096744, "learning_rate": 3.687447022582794e-06, "loss": 0.6811, "step": 7107 }, { "epoch": 0.726046986721144, "grad_norm": 1.4967103316949135, "learning_rate": 3.684881507657001e-06, "loss": 0.6834, "step": 7108 }, { "epoch": 0.7261491317671093, "grad_norm": 1.6543726034663198, "learning_rate": 3.682316683925011e-06, "loss": 0.8138, "step": 7109 }, { "epoch": 0.7262512768130746, "grad_norm": 1.5044237801891542, "learning_rate": 3.6797525516675414e-06, "loss": 0.6637, "step": 7110 }, { "epoch": 0.7263534218590398, "grad_norm": 1.3778905832099217, "learning_rate": 3.6771891111652347e-06, "loss": 0.5826, "step": 7111 }, { "epoch": 0.7264555669050051, "grad_norm": 1.4630210990796815, "learning_rate": 3.674626362698668e-06, "loss": 0.7679, "step": 7112 }, { "epoch": 0.7265577119509704, "grad_norm": 1.5365308947751668, "learning_rate": 3.672064306548333e-06, "loss": 0.7601, "step": 7113 }, { "epoch": 0.7266598569969357, "grad_norm": 1.449295538844758, "learning_rate": 3.6695029429946484e-06, "loss": 0.7215, "step": 7114 }, { "epoch": 0.7267620020429009, "grad_norm": 1.6272325789129607, "learning_rate": 3.666942272317956e-06, "loss": 0.7152, "step": 7115 }, { "epoch": 0.7268641470888662, "grad_norm": 1.486904217626116, "learning_rate": 3.6643822947985208e-06, "loss": 0.6623, "step": 7116 }, { "epoch": 0.7269662921348314, "grad_norm": 1.4123845721614765, "learning_rate": 3.6618230107165366e-06, "loss": 0.5766, "step": 7117 }, { "epoch": 0.7270684371807967, "grad_norm": 1.3959585615359724, "learning_rate": 3.659264420352122e-06, "loss": 0.6357, "step": 7118 }, { "epoch": 0.727170582226762, "grad_norm": 1.4126078328443816, "learning_rate": 3.656706523985313e-06, "loss": 0.6886, "step": 7119 }, { "epoch": 0.7272727272727273, "grad_norm": 1.6406001983046046, "learning_rate": 3.654149321896073e-06, "loss": 0.7197, "step": 7120 }, { "epoch": 0.7273748723186926, "grad_norm": 1.3952546470205065, "learning_rate": 3.6515928143642876e-06, "loss": 0.652, "step": 7121 }, { "epoch": 0.7274770173646579, "grad_norm": 1.3925626880905309, "learning_rate": 3.649037001669773e-06, "loss": 0.5609, "step": 7122 }, { "epoch": 0.727579162410623, "grad_norm": 1.4561756854550991, "learning_rate": 3.6464818840922623e-06, "loss": 0.6818, "step": 7123 }, { "epoch": 0.7276813074565883, "grad_norm": 1.495220544409117, "learning_rate": 3.6439274619114095e-06, "loss": 0.7383, "step": 7124 }, { "epoch": 0.7277834525025536, "grad_norm": 1.48774184775132, "learning_rate": 3.6413737354068067e-06, "loss": 0.7774, "step": 7125 }, { "epoch": 0.7278855975485189, "grad_norm": 1.624373724228595, "learning_rate": 3.6388207048579537e-06, "loss": 0.6348, "step": 7126 }, { "epoch": 0.7279877425944842, "grad_norm": 1.6616488704099306, "learning_rate": 3.636268370544288e-06, "loss": 0.7529, "step": 7127 }, { "epoch": 0.7280898876404495, "grad_norm": 1.3931581297431792, "learning_rate": 3.6337167327451596e-06, "loss": 0.6658, "step": 7128 }, { "epoch": 0.7281920326864147, "grad_norm": 1.4688661290422278, "learning_rate": 3.631165791739849e-06, "loss": 0.689, "step": 7129 }, { "epoch": 0.72829417773238, "grad_norm": 1.4982968394553628, "learning_rate": 3.6286155478075536e-06, "loss": 0.6903, "step": 7130 }, { "epoch": 0.7283963227783452, "grad_norm": 1.4409055752172903, "learning_rate": 3.626066001227405e-06, "loss": 0.6621, "step": 7131 }, { "epoch": 0.7284984678243105, "grad_norm": 1.4421167579036958, "learning_rate": 3.6235171522784495e-06, "loss": 0.7708, "step": 7132 }, { "epoch": 0.7286006128702758, "grad_norm": 1.3512700613370983, "learning_rate": 3.6209690012396636e-06, "loss": 0.7661, "step": 7133 }, { "epoch": 0.7287027579162411, "grad_norm": 1.546624688665969, "learning_rate": 3.618421548389942e-06, "loss": 0.6801, "step": 7134 }, { "epoch": 0.7288049029622063, "grad_norm": 1.4474796972220918, "learning_rate": 3.615874794008105e-06, "loss": 0.7565, "step": 7135 }, { "epoch": 0.7289070480081716, "grad_norm": 1.5045904675792385, "learning_rate": 3.613328738372893e-06, "loss": 0.7765, "step": 7136 }, { "epoch": 0.7290091930541369, "grad_norm": 1.3356333399849432, "learning_rate": 3.61078338176298e-06, "loss": 0.6626, "step": 7137 }, { "epoch": 0.7291113381001021, "grad_norm": 1.5806846091549118, "learning_rate": 3.608238724456954e-06, "loss": 0.5791, "step": 7138 }, { "epoch": 0.7292134831460674, "grad_norm": 1.5623368907765467, "learning_rate": 3.6056947667333297e-06, "loss": 0.7334, "step": 7139 }, { "epoch": 0.7293156281920327, "grad_norm": 1.4933888209447022, "learning_rate": 3.6031515088705406e-06, "loss": 0.7063, "step": 7140 }, { "epoch": 0.729417773237998, "grad_norm": 1.579515555301018, "learning_rate": 3.600608951146952e-06, "loss": 0.679, "step": 7141 }, { "epoch": 0.7295199182839632, "grad_norm": 1.4636003323639317, "learning_rate": 3.598067093840851e-06, "loss": 0.5558, "step": 7142 }, { "epoch": 0.7296220633299285, "grad_norm": 1.5299345311274193, "learning_rate": 3.595525937230444e-06, "loss": 0.6927, "step": 7143 }, { "epoch": 0.7297242083758938, "grad_norm": 1.6358456360435245, "learning_rate": 3.59298548159386e-06, "loss": 0.7249, "step": 7144 }, { "epoch": 0.7298263534218591, "grad_norm": 1.471630059044617, "learning_rate": 3.590445727209151e-06, "loss": 0.571, "step": 7145 }, { "epoch": 0.7299284984678243, "grad_norm": 1.589561516493663, "learning_rate": 3.5879066743543023e-06, "loss": 0.7061, "step": 7146 }, { "epoch": 0.7300306435137895, "grad_norm": 1.5118799599283819, "learning_rate": 3.58536832330721e-06, "loss": 0.7199, "step": 7147 }, { "epoch": 0.7301327885597548, "grad_norm": 1.7172277757291687, "learning_rate": 3.5828306743456965e-06, "loss": 0.6499, "step": 7148 }, { "epoch": 0.7302349336057201, "grad_norm": 1.4518023115557317, "learning_rate": 3.5802937277475147e-06, "loss": 0.6272, "step": 7149 }, { "epoch": 0.7303370786516854, "grad_norm": 1.4584611492893302, "learning_rate": 3.5777574837903295e-06, "loss": 0.6615, "step": 7150 }, { "epoch": 0.7304392236976507, "grad_norm": 1.6160825393862535, "learning_rate": 3.5752219427517386e-06, "loss": 0.7541, "step": 7151 }, { "epoch": 0.730541368743616, "grad_norm": 1.3967771381085305, "learning_rate": 3.5726871049092593e-06, "loss": 0.6769, "step": 7152 }, { "epoch": 0.7306435137895813, "grad_norm": 1.4304688706193205, "learning_rate": 3.570152970540327e-06, "loss": 0.6651, "step": 7153 }, { "epoch": 0.7307456588355464, "grad_norm": 1.5314516705188512, "learning_rate": 3.567619539922307e-06, "loss": 0.6655, "step": 7154 }, { "epoch": 0.7308478038815117, "grad_norm": 1.5354712002342537, "learning_rate": 3.565086813332481e-06, "loss": 0.7095, "step": 7155 }, { "epoch": 0.730949948927477, "grad_norm": 1.4956568529942784, "learning_rate": 3.5625547910480607e-06, "loss": 0.6174, "step": 7156 }, { "epoch": 0.7310520939734423, "grad_norm": 1.2963747770743712, "learning_rate": 3.5600234733461812e-06, "loss": 0.5643, "step": 7157 }, { "epoch": 0.7311542390194076, "grad_norm": 1.5265117940517414, "learning_rate": 3.557492860503893e-06, "loss": 0.6967, "step": 7158 }, { "epoch": 0.7312563840653729, "grad_norm": 1.445588013400257, "learning_rate": 3.5549629527981733e-06, "loss": 0.7311, "step": 7159 }, { "epoch": 0.7313585291113381, "grad_norm": 1.4786910753124427, "learning_rate": 3.552433750505919e-06, "loss": 0.7194, "step": 7160 }, { "epoch": 0.7314606741573034, "grad_norm": 1.4029233753362451, "learning_rate": 3.5499052539039603e-06, "loss": 0.7547, "step": 7161 }, { "epoch": 0.7315628192032686, "grad_norm": 1.467078022122068, "learning_rate": 3.5473774632690395e-06, "loss": 0.6511, "step": 7162 }, { "epoch": 0.7316649642492339, "grad_norm": 1.4830073117511937, "learning_rate": 3.5448503788778234e-06, "loss": 0.5864, "step": 7163 }, { "epoch": 0.7317671092951992, "grad_norm": 1.511643053701804, "learning_rate": 3.5423240010069004e-06, "loss": 0.6341, "step": 7164 }, { "epoch": 0.7318692543411645, "grad_norm": 1.604933532719099, "learning_rate": 3.5397983299327876e-06, "loss": 0.7123, "step": 7165 }, { "epoch": 0.7319713993871297, "grad_norm": 1.3599871792762637, "learning_rate": 3.537273365931926e-06, "loss": 0.767, "step": 7166 }, { "epoch": 0.732073544433095, "grad_norm": 1.5201300117831562, "learning_rate": 3.5347491092806686e-06, "loss": 0.6454, "step": 7167 }, { "epoch": 0.7321756894790603, "grad_norm": 1.5895356066540456, "learning_rate": 3.532225560255298e-06, "loss": 0.7863, "step": 7168 }, { "epoch": 0.7322778345250255, "grad_norm": 1.461497461510232, "learning_rate": 3.52970271913202e-06, "loss": 0.7136, "step": 7169 }, { "epoch": 0.7323799795709908, "grad_norm": 1.452832453064315, "learning_rate": 3.527180586186956e-06, "loss": 0.622, "step": 7170 }, { "epoch": 0.732482124616956, "grad_norm": 1.5542932723616634, "learning_rate": 3.524659161696161e-06, "loss": 0.6089, "step": 7171 }, { "epoch": 0.7325842696629213, "grad_norm": 1.5202432629788885, "learning_rate": 3.5221384459356022e-06, "loss": 0.7928, "step": 7172 }, { "epoch": 0.7326864147088866, "grad_norm": 1.5314338168712633, "learning_rate": 3.5196184391811785e-06, "loss": 0.6573, "step": 7173 }, { "epoch": 0.7327885597548519, "grad_norm": 1.4972369643738652, "learning_rate": 3.517099141708703e-06, "loss": 0.6848, "step": 7174 }, { "epoch": 0.7328907048008172, "grad_norm": 1.4640711603988523, "learning_rate": 3.5145805537939124e-06, "loss": 0.7297, "step": 7175 }, { "epoch": 0.7329928498467825, "grad_norm": 1.5985663475466105, "learning_rate": 3.512062675712474e-06, "loss": 0.7752, "step": 7176 }, { "epoch": 0.7330949948927477, "grad_norm": 1.4900808781794106, "learning_rate": 3.5095455077399663e-06, "loss": 0.6853, "step": 7177 }, { "epoch": 0.7331971399387129, "grad_norm": 1.4542986060158645, "learning_rate": 3.5070290501518978e-06, "loss": 0.7126, "step": 7178 }, { "epoch": 0.7332992849846782, "grad_norm": 1.5232548757198607, "learning_rate": 3.50451330322369e-06, "loss": 0.6991, "step": 7179 }, { "epoch": 0.7334014300306435, "grad_norm": 1.512530032272857, "learning_rate": 3.5019982672306986e-06, "loss": 0.6829, "step": 7180 }, { "epoch": 0.7335035750766088, "grad_norm": 1.51292559122578, "learning_rate": 3.4994839424481974e-06, "loss": 0.6688, "step": 7181 }, { "epoch": 0.7336057201225741, "grad_norm": 1.59561868685321, "learning_rate": 3.49697032915138e-06, "loss": 0.7978, "step": 7182 }, { "epoch": 0.7337078651685394, "grad_norm": 1.4013467532466148, "learning_rate": 3.494457427615361e-06, "loss": 0.6973, "step": 7183 }, { "epoch": 0.7338100102145046, "grad_norm": 1.4714948005857993, "learning_rate": 3.4919452381151753e-06, "loss": 0.6588, "step": 7184 }, { "epoch": 0.7339121552604698, "grad_norm": 1.3678885165041412, "learning_rate": 3.4894337609257923e-06, "loss": 0.5918, "step": 7185 }, { "epoch": 0.7340143003064351, "grad_norm": 1.3923041447997604, "learning_rate": 3.4869229963220906e-06, "loss": 0.5693, "step": 7186 }, { "epoch": 0.7341164453524004, "grad_norm": 1.345741310302056, "learning_rate": 3.4844129445788754e-06, "loss": 0.6793, "step": 7187 }, { "epoch": 0.7342185903983657, "grad_norm": 1.4503587892912575, "learning_rate": 3.4819036059708687e-06, "loss": 0.6342, "step": 7188 }, { "epoch": 0.734320735444331, "grad_norm": 1.569453285302973, "learning_rate": 3.4793949807727267e-06, "loss": 0.7437, "step": 7189 }, { "epoch": 0.7344228804902962, "grad_norm": 1.519197692524186, "learning_rate": 3.476887069259015e-06, "loss": 0.6797, "step": 7190 }, { "epoch": 0.7345250255362615, "grad_norm": 1.4453106322507356, "learning_rate": 3.47437987170423e-06, "loss": 0.7073, "step": 7191 }, { "epoch": 0.7346271705822267, "grad_norm": 1.4883022686074887, "learning_rate": 3.471873388382785e-06, "loss": 0.7629, "step": 7192 }, { "epoch": 0.734729315628192, "grad_norm": 1.3957535034379795, "learning_rate": 3.4693676195690153e-06, "loss": 0.6807, "step": 7193 }, { "epoch": 0.7348314606741573, "grad_norm": 1.5067281858837211, "learning_rate": 3.4668625655371746e-06, "loss": 0.8111, "step": 7194 }, { "epoch": 0.7349336057201226, "grad_norm": 1.427991742552689, "learning_rate": 3.4643582265614517e-06, "loss": 0.6167, "step": 7195 }, { "epoch": 0.7350357507660878, "grad_norm": 1.4159163593659914, "learning_rate": 3.4618546029159396e-06, "loss": 0.5929, "step": 7196 }, { "epoch": 0.7351378958120531, "grad_norm": 1.6577175125691186, "learning_rate": 3.4593516948746684e-06, "loss": 0.7623, "step": 7197 }, { "epoch": 0.7352400408580184, "grad_norm": 1.495075436371532, "learning_rate": 3.45684950271158e-06, "loss": 0.7823, "step": 7198 }, { "epoch": 0.7353421859039837, "grad_norm": 1.5950963923451145, "learning_rate": 3.4543480267005382e-06, "loss": 0.696, "step": 7199 }, { "epoch": 0.7354443309499489, "grad_norm": 1.458427327218405, "learning_rate": 3.451847267115337e-06, "loss": 0.6556, "step": 7200 }, { "epoch": 0.7355464759959142, "grad_norm": 1.4449637934544628, "learning_rate": 3.4493472242296822e-06, "loss": 0.7478, "step": 7201 }, { "epoch": 0.7356486210418794, "grad_norm": 1.5049990469872028, "learning_rate": 3.446847898317207e-06, "loss": 0.632, "step": 7202 }, { "epoch": 0.7357507660878447, "grad_norm": 1.449369441550173, "learning_rate": 3.444349289651463e-06, "loss": 0.6109, "step": 7203 }, { "epoch": 0.73585291113381, "grad_norm": 1.3839527223524368, "learning_rate": 3.4418513985059177e-06, "loss": 0.6787, "step": 7204 }, { "epoch": 0.7359550561797753, "grad_norm": 1.431790013002358, "learning_rate": 3.439354225153981e-06, "loss": 0.7069, "step": 7205 }, { "epoch": 0.7360572012257406, "grad_norm": 1.4624326882996626, "learning_rate": 3.436857769868963e-06, "loss": 0.6592, "step": 7206 }, { "epoch": 0.7361593462717059, "grad_norm": 1.5310688826032746, "learning_rate": 3.4343620329241032e-06, "loss": 0.6343, "step": 7207 }, { "epoch": 0.736261491317671, "grad_norm": 1.4627653718402227, "learning_rate": 3.4318670145925602e-06, "loss": 0.5845, "step": 7208 }, { "epoch": 0.7363636363636363, "grad_norm": 1.4752384829460108, "learning_rate": 3.429372715147412e-06, "loss": 0.6331, "step": 7209 }, { "epoch": 0.7364657814096016, "grad_norm": 1.5428244207008903, "learning_rate": 3.4268791348616693e-06, "loss": 0.7231, "step": 7210 }, { "epoch": 0.7365679264555669, "grad_norm": 1.5561949164468831, "learning_rate": 3.4243862740082524e-06, "loss": 0.719, "step": 7211 }, { "epoch": 0.7366700715015322, "grad_norm": 1.6050505235409525, "learning_rate": 3.421894132860002e-06, "loss": 0.7217, "step": 7212 }, { "epoch": 0.7367722165474975, "grad_norm": 1.4638893459042792, "learning_rate": 3.4194027116896924e-06, "loss": 0.6663, "step": 7213 }, { "epoch": 0.7368743615934628, "grad_norm": 1.6646856601349274, "learning_rate": 3.416912010770005e-06, "loss": 0.7543, "step": 7214 }, { "epoch": 0.736976506639428, "grad_norm": 1.449352196068496, "learning_rate": 3.4144220303735533e-06, "loss": 0.6968, "step": 7215 }, { "epoch": 0.7370786516853932, "grad_norm": 1.4720725308435278, "learning_rate": 3.4119327707728654e-06, "loss": 0.7161, "step": 7216 }, { "epoch": 0.7371807967313585, "grad_norm": 1.550236175244078, "learning_rate": 3.4094442322403933e-06, "loss": 0.6046, "step": 7217 }, { "epoch": 0.7372829417773238, "grad_norm": 1.5106948286610038, "learning_rate": 3.4069564150485034e-06, "loss": 0.7142, "step": 7218 }, { "epoch": 0.7373850868232891, "grad_norm": 1.4180692817344909, "learning_rate": 3.4044693194694976e-06, "loss": 0.6724, "step": 7219 }, { "epoch": 0.7374872318692544, "grad_norm": 1.5140786796032497, "learning_rate": 3.401982945775583e-06, "loss": 0.6531, "step": 7220 }, { "epoch": 0.7375893769152196, "grad_norm": 1.473493152878974, "learning_rate": 3.3994972942389005e-06, "loss": 0.6631, "step": 7221 }, { "epoch": 0.7376915219611849, "grad_norm": 1.375794971763795, "learning_rate": 3.3970123651315045e-06, "loss": 0.6411, "step": 7222 }, { "epoch": 0.7377936670071501, "grad_norm": 1.270011343452332, "learning_rate": 3.3945281587253708e-06, "loss": 0.6038, "step": 7223 }, { "epoch": 0.7378958120531154, "grad_norm": 1.4326292711052482, "learning_rate": 3.392044675292394e-06, "loss": 0.6222, "step": 7224 }, { "epoch": 0.7379979570990807, "grad_norm": 1.4776069485012797, "learning_rate": 3.3895619151044003e-06, "loss": 0.7445, "step": 7225 }, { "epoch": 0.738100102145046, "grad_norm": 1.4807192178262323, "learning_rate": 3.387079878433126e-06, "loss": 0.6391, "step": 7226 }, { "epoch": 0.7382022471910112, "grad_norm": 1.5574698102052305, "learning_rate": 3.3845985655502313e-06, "loss": 0.6874, "step": 7227 }, { "epoch": 0.7383043922369765, "grad_norm": 1.3324613031301005, "learning_rate": 3.382117976727295e-06, "loss": 0.6133, "step": 7228 }, { "epoch": 0.7384065372829418, "grad_norm": 1.5188391532322971, "learning_rate": 3.379638112235821e-06, "loss": 0.7132, "step": 7229 }, { "epoch": 0.7385086823289071, "grad_norm": 1.3869807491225528, "learning_rate": 3.3771589723472364e-06, "loss": 0.6558, "step": 7230 }, { "epoch": 0.7386108273748723, "grad_norm": 1.4928145566997701, "learning_rate": 3.3746805573328824e-06, "loss": 0.5816, "step": 7231 }, { "epoch": 0.7387129724208376, "grad_norm": 1.4688092257406549, "learning_rate": 3.3722028674640207e-06, "loss": 0.7525, "step": 7232 }, { "epoch": 0.7388151174668028, "grad_norm": 1.4174700517716656, "learning_rate": 3.3697259030118336e-06, "loss": 0.6307, "step": 7233 }, { "epoch": 0.7389172625127681, "grad_norm": 1.4872439760387615, "learning_rate": 3.367249664247434e-06, "loss": 0.752, "step": 7234 }, { "epoch": 0.7390194075587334, "grad_norm": 1.458537152772094, "learning_rate": 3.364774151441844e-06, "loss": 0.6458, "step": 7235 }, { "epoch": 0.7391215526046987, "grad_norm": 1.5278031342504776, "learning_rate": 3.3622993648660063e-06, "loss": 0.6955, "step": 7236 }, { "epoch": 0.739223697650664, "grad_norm": 1.5433173660474409, "learning_rate": 3.3598253047907958e-06, "loss": 0.7985, "step": 7237 }, { "epoch": 0.7393258426966293, "grad_norm": 1.4967563310210548, "learning_rate": 3.3573519714869916e-06, "loss": 0.6981, "step": 7238 }, { "epoch": 0.7394279877425944, "grad_norm": 1.4624890046214614, "learning_rate": 3.3548793652253098e-06, "loss": 0.6894, "step": 7239 }, { "epoch": 0.7395301327885597, "grad_norm": 1.60549234977735, "learning_rate": 3.3524074862763743e-06, "loss": 0.7191, "step": 7240 }, { "epoch": 0.739632277834525, "grad_norm": 1.625812735779238, "learning_rate": 3.349936334910735e-06, "loss": 0.6513, "step": 7241 }, { "epoch": 0.7397344228804903, "grad_norm": 1.5059778448795302, "learning_rate": 3.3474659113988596e-06, "loss": 0.6758, "step": 7242 }, { "epoch": 0.7398365679264556, "grad_norm": 1.4672863320748002, "learning_rate": 3.344996216011135e-06, "loss": 0.7758, "step": 7243 }, { "epoch": 0.7399387129724209, "grad_norm": 1.3542951773587226, "learning_rate": 3.342527249017875e-06, "loss": 0.7247, "step": 7244 }, { "epoch": 0.7400408580183861, "grad_norm": 1.551796242630123, "learning_rate": 3.3400590106893118e-06, "loss": 0.7712, "step": 7245 }, { "epoch": 0.7401430030643513, "grad_norm": 1.358722244171248, "learning_rate": 3.3375915012955916e-06, "loss": 0.6043, "step": 7246 }, { "epoch": 0.7402451481103166, "grad_norm": 1.54651746690138, "learning_rate": 3.3351247211067874e-06, "loss": 0.6831, "step": 7247 }, { "epoch": 0.7403472931562819, "grad_norm": 1.352675279889106, "learning_rate": 3.3326586703928853e-06, "loss": 0.7196, "step": 7248 }, { "epoch": 0.7404494382022472, "grad_norm": 1.551020232396347, "learning_rate": 3.3301933494238013e-06, "loss": 0.7378, "step": 7249 }, { "epoch": 0.7405515832482125, "grad_norm": 1.7399529116090866, "learning_rate": 3.327728758469366e-06, "loss": 0.8401, "step": 7250 }, { "epoch": 0.7406537282941777, "grad_norm": 1.4847729055160444, "learning_rate": 3.3252648977993287e-06, "loss": 0.6599, "step": 7251 }, { "epoch": 0.740755873340143, "grad_norm": 1.4652868008733393, "learning_rate": 3.322801767683357e-06, "loss": 0.6007, "step": 7252 }, { "epoch": 0.7408580183861083, "grad_norm": 1.574100038754834, "learning_rate": 3.3203393683910458e-06, "loss": 0.6654, "step": 7253 }, { "epoch": 0.7409601634320735, "grad_norm": 1.4632781804503752, "learning_rate": 3.3178777001919093e-06, "loss": 0.6738, "step": 7254 }, { "epoch": 0.7410623084780388, "grad_norm": 1.5213587181308414, "learning_rate": 3.315416763355377e-06, "loss": 0.6601, "step": 7255 }, { "epoch": 0.7411644535240041, "grad_norm": 1.513228078561968, "learning_rate": 3.3129565581507973e-06, "loss": 0.7303, "step": 7256 }, { "epoch": 0.7412665985699693, "grad_norm": 1.4967698844966448, "learning_rate": 3.3104970848474437e-06, "loss": 0.6814, "step": 7257 }, { "epoch": 0.7413687436159346, "grad_norm": 1.4222064201541127, "learning_rate": 3.3080383437145026e-06, "loss": 0.7067, "step": 7258 }, { "epoch": 0.7414708886618999, "grad_norm": 1.490917076007063, "learning_rate": 3.305580335021091e-06, "loss": 0.6712, "step": 7259 }, { "epoch": 0.7415730337078652, "grad_norm": 1.390164529893434, "learning_rate": 3.303123059036234e-06, "loss": 0.6398, "step": 7260 }, { "epoch": 0.7416751787538305, "grad_norm": 1.5219966094539366, "learning_rate": 3.3006665160288886e-06, "loss": 0.724, "step": 7261 }, { "epoch": 0.7417773237997957, "grad_norm": 1.4100850582186577, "learning_rate": 3.2982107062679213e-06, "loss": 0.5999, "step": 7262 }, { "epoch": 0.741879468845761, "grad_norm": 1.2663477616788879, "learning_rate": 3.295755630022118e-06, "loss": 0.6376, "step": 7263 }, { "epoch": 0.7419816138917262, "grad_norm": 1.4460492280077328, "learning_rate": 3.2933012875601967e-06, "loss": 0.6815, "step": 7264 }, { "epoch": 0.7420837589376915, "grad_norm": 1.4435069223224857, "learning_rate": 3.2908476791507826e-06, "loss": 0.6855, "step": 7265 }, { "epoch": 0.7421859039836568, "grad_norm": 1.3340386701722808, "learning_rate": 3.2883948050624236e-06, "loss": 0.6297, "step": 7266 }, { "epoch": 0.7422880490296221, "grad_norm": 1.4474932404420011, "learning_rate": 3.285942665563587e-06, "loss": 0.6878, "step": 7267 }, { "epoch": 0.7423901940755874, "grad_norm": 1.6095537947457488, "learning_rate": 3.2834912609226633e-06, "loss": 0.6765, "step": 7268 }, { "epoch": 0.7424923391215527, "grad_norm": 1.3510676801884707, "learning_rate": 3.2810405914079645e-06, "loss": 0.6259, "step": 7269 }, { "epoch": 0.7425944841675178, "grad_norm": 1.4110180195028938, "learning_rate": 3.2785906572877135e-06, "loss": 0.7216, "step": 7270 }, { "epoch": 0.7426966292134831, "grad_norm": 1.4087824912598197, "learning_rate": 3.276141458830057e-06, "loss": 0.6014, "step": 7271 }, { "epoch": 0.7427987742594484, "grad_norm": 1.3623740061496348, "learning_rate": 3.2736929963030596e-06, "loss": 0.5993, "step": 7272 }, { "epoch": 0.7429009193054137, "grad_norm": 1.38695352408741, "learning_rate": 3.271245269974712e-06, "loss": 0.6104, "step": 7273 }, { "epoch": 0.743003064351379, "grad_norm": 1.43493684614806, "learning_rate": 3.268798280112917e-06, "loss": 0.6218, "step": 7274 }, { "epoch": 0.7431052093973443, "grad_norm": 1.5346958965523905, "learning_rate": 3.2663520269855e-06, "loss": 0.7025, "step": 7275 }, { "epoch": 0.7432073544433095, "grad_norm": 1.5065464684883334, "learning_rate": 3.2639065108601995e-06, "loss": 0.6544, "step": 7276 }, { "epoch": 0.7433094994892747, "grad_norm": 1.5840414921562407, "learning_rate": 3.261461732004688e-06, "loss": 0.7125, "step": 7277 }, { "epoch": 0.74341164453524, "grad_norm": 1.430433024442273, "learning_rate": 3.25901769068654e-06, "loss": 0.643, "step": 7278 }, { "epoch": 0.7435137895812053, "grad_norm": 1.5975369393170706, "learning_rate": 3.2565743871732634e-06, "loss": 0.711, "step": 7279 }, { "epoch": 0.7436159346271706, "grad_norm": 1.5894228550897576, "learning_rate": 3.2541318217322782e-06, "loss": 0.6307, "step": 7280 }, { "epoch": 0.7437180796731359, "grad_norm": 1.5935119865717782, "learning_rate": 3.251689994630923e-06, "loss": 0.6238, "step": 7281 }, { "epoch": 0.7438202247191011, "grad_norm": 1.5279066014904128, "learning_rate": 3.249248906136454e-06, "loss": 0.7027, "step": 7282 }, { "epoch": 0.7439223697650664, "grad_norm": 1.4985212237329442, "learning_rate": 3.246808556516058e-06, "loss": 0.628, "step": 7283 }, { "epoch": 0.7440245148110317, "grad_norm": 1.612355435294389, "learning_rate": 3.2443689460368256e-06, "loss": 0.6772, "step": 7284 }, { "epoch": 0.7441266598569969, "grad_norm": 1.3842280240303255, "learning_rate": 3.2419300749657788e-06, "loss": 0.6964, "step": 7285 }, { "epoch": 0.7442288049029622, "grad_norm": 1.5756630060754682, "learning_rate": 3.2394919435698526e-06, "loss": 0.8375, "step": 7286 }, { "epoch": 0.7443309499489275, "grad_norm": 1.5380175848027595, "learning_rate": 3.2370545521158968e-06, "loss": 0.608, "step": 7287 }, { "epoch": 0.7444330949948927, "grad_norm": 1.5228010398601513, "learning_rate": 3.2346179008706936e-06, "loss": 0.7184, "step": 7288 }, { "epoch": 0.744535240040858, "grad_norm": 1.45623032112833, "learning_rate": 3.2321819901009323e-06, "loss": 0.625, "step": 7289 }, { "epoch": 0.7446373850868233, "grad_norm": 1.4247644509637543, "learning_rate": 3.229746820073224e-06, "loss": 0.6352, "step": 7290 }, { "epoch": 0.7447395301327886, "grad_norm": 1.4712841668249657, "learning_rate": 3.2273123910541006e-06, "loss": 0.609, "step": 7291 }, { "epoch": 0.7448416751787539, "grad_norm": 1.5116294547848812, "learning_rate": 3.2248787033100058e-06, "loss": 0.802, "step": 7292 }, { "epoch": 0.744943820224719, "grad_norm": 1.6310981821690147, "learning_rate": 3.2224457571073196e-06, "loss": 0.7314, "step": 7293 }, { "epoch": 0.7450459652706843, "grad_norm": 1.497685927433057, "learning_rate": 3.2200135527123256e-06, "loss": 0.6653, "step": 7294 }, { "epoch": 0.7451481103166496, "grad_norm": 1.5585276616226476, "learning_rate": 3.217582090391228e-06, "loss": 0.6928, "step": 7295 }, { "epoch": 0.7452502553626149, "grad_norm": 1.6122584920978522, "learning_rate": 3.215151370410152e-06, "loss": 0.7437, "step": 7296 }, { "epoch": 0.7453524004085802, "grad_norm": 1.4281443866998378, "learning_rate": 3.2127213930351398e-06, "loss": 0.7053, "step": 7297 }, { "epoch": 0.7454545454545455, "grad_norm": 1.6461917705663103, "learning_rate": 3.2102921585321587e-06, "loss": 0.7238, "step": 7298 }, { "epoch": 0.7455566905005108, "grad_norm": 1.4923356043669864, "learning_rate": 3.207863667167088e-06, "loss": 0.6422, "step": 7299 }, { "epoch": 0.745658835546476, "grad_norm": 1.58967118620895, "learning_rate": 3.2054359192057238e-06, "loss": 0.6988, "step": 7300 }, { "epoch": 0.7457609805924412, "grad_norm": 1.526901592081634, "learning_rate": 3.2030089149137923e-06, "loss": 0.7652, "step": 7301 }, { "epoch": 0.7458631256384065, "grad_norm": 1.515780907718703, "learning_rate": 3.200582654556922e-06, "loss": 0.6502, "step": 7302 }, { "epoch": 0.7459652706843718, "grad_norm": 1.5548364514754278, "learning_rate": 3.198157138400677e-06, "loss": 0.6859, "step": 7303 }, { "epoch": 0.7460674157303371, "grad_norm": 1.6106308380089376, "learning_rate": 3.1957323667105277e-06, "loss": 0.7171, "step": 7304 }, { "epoch": 0.7461695607763024, "grad_norm": 1.4965631492150013, "learning_rate": 3.193308339751866e-06, "loss": 0.7202, "step": 7305 }, { "epoch": 0.7462717058222677, "grad_norm": 1.5051339652885638, "learning_rate": 3.190885057790002e-06, "loss": 0.7318, "step": 7306 }, { "epoch": 0.7463738508682329, "grad_norm": 1.477044628838819, "learning_rate": 3.188462521090171e-06, "loss": 0.6794, "step": 7307 }, { "epoch": 0.7464759959141981, "grad_norm": 1.3917881719476153, "learning_rate": 3.1860407299175145e-06, "loss": 0.7053, "step": 7308 }, { "epoch": 0.7465781409601634, "grad_norm": 1.4609751502695738, "learning_rate": 3.183619684537106e-06, "loss": 0.7413, "step": 7309 }, { "epoch": 0.7466802860061287, "grad_norm": 1.3920167427784755, "learning_rate": 3.1811993852139257e-06, "loss": 0.6667, "step": 7310 }, { "epoch": 0.746782431052094, "grad_norm": 1.442138760045788, "learning_rate": 3.1787798322128794e-06, "loss": 0.658, "step": 7311 }, { "epoch": 0.7468845760980592, "grad_norm": 1.4984865352180206, "learning_rate": 3.1763610257987844e-06, "loss": 0.7049, "step": 7312 }, { "epoch": 0.7469867211440245, "grad_norm": 1.491174203291151, "learning_rate": 3.173942966236386e-06, "loss": 0.7258, "step": 7313 }, { "epoch": 0.7470888661899898, "grad_norm": 1.5426439857598107, "learning_rate": 3.1715256537903404e-06, "loss": 0.5599, "step": 7314 }, { "epoch": 0.7471910112359551, "grad_norm": 1.4738893994846944, "learning_rate": 3.169109088725224e-06, "loss": 0.5835, "step": 7315 }, { "epoch": 0.7472931562819203, "grad_norm": 1.4424190877182772, "learning_rate": 3.1666932713055285e-06, "loss": 0.5675, "step": 7316 }, { "epoch": 0.7473953013278856, "grad_norm": 1.3693468486433427, "learning_rate": 3.1642782017956684e-06, "loss": 0.6681, "step": 7317 }, { "epoch": 0.7474974463738508, "grad_norm": 1.5483712880270935, "learning_rate": 3.16186388045998e-06, "loss": 0.7208, "step": 7318 }, { "epoch": 0.7475995914198161, "grad_norm": 1.4007978433216566, "learning_rate": 3.159450307562707e-06, "loss": 0.5058, "step": 7319 }, { "epoch": 0.7477017364657814, "grad_norm": 1.4125182555935674, "learning_rate": 3.1570374833680173e-06, "loss": 0.6628, "step": 7320 }, { "epoch": 0.7478038815117467, "grad_norm": 1.4670233854278145, "learning_rate": 3.154625408139993e-06, "loss": 0.5997, "step": 7321 }, { "epoch": 0.747906026557712, "grad_norm": 1.5112228224401663, "learning_rate": 3.152214082142644e-06, "loss": 0.7062, "step": 7322 }, { "epoch": 0.7480081716036773, "grad_norm": 1.5362015298202545, "learning_rate": 3.149803505639888e-06, "loss": 0.6824, "step": 7323 }, { "epoch": 0.7481103166496424, "grad_norm": 1.4122900656734656, "learning_rate": 3.1473936788955606e-06, "loss": 0.6665, "step": 7324 }, { "epoch": 0.7482124616956077, "grad_norm": 1.4945991531306546, "learning_rate": 3.1449846021734256e-06, "loss": 0.6677, "step": 7325 }, { "epoch": 0.748314606741573, "grad_norm": 1.574172493101341, "learning_rate": 3.1425762757371514e-06, "loss": 0.6022, "step": 7326 }, { "epoch": 0.7484167517875383, "grad_norm": 1.4955029150719963, "learning_rate": 3.1401686998503377e-06, "loss": 0.7323, "step": 7327 }, { "epoch": 0.7485188968335036, "grad_norm": 1.515072363249974, "learning_rate": 3.1377618747764914e-06, "loss": 0.6286, "step": 7328 }, { "epoch": 0.7486210418794689, "grad_norm": 1.462095536505743, "learning_rate": 3.135355800779042e-06, "loss": 0.6922, "step": 7329 }, { "epoch": 0.7487231869254342, "grad_norm": 1.3026849421733193, "learning_rate": 3.132950478121336e-06, "loss": 0.5579, "step": 7330 }, { "epoch": 0.7488253319713993, "grad_norm": 1.3130148444370955, "learning_rate": 3.1305459070666324e-06, "loss": 0.623, "step": 7331 }, { "epoch": 0.7489274770173646, "grad_norm": 1.4763716259279873, "learning_rate": 3.128142087878118e-06, "loss": 0.7339, "step": 7332 }, { "epoch": 0.7490296220633299, "grad_norm": 1.5549038305097915, "learning_rate": 3.1257390208188954e-06, "loss": 0.7197, "step": 7333 }, { "epoch": 0.7491317671092952, "grad_norm": 1.5393353675810355, "learning_rate": 3.1233367061519782e-06, "loss": 0.658, "step": 7334 }, { "epoch": 0.7492339121552605, "grad_norm": 1.4142178875609257, "learning_rate": 3.1209351441403013e-06, "loss": 0.6252, "step": 7335 }, { "epoch": 0.7493360572012258, "grad_norm": 1.3927300083760394, "learning_rate": 3.1185343350467135e-06, "loss": 0.6814, "step": 7336 }, { "epoch": 0.749438202247191, "grad_norm": 1.432365673442275, "learning_rate": 3.116134279133992e-06, "loss": 0.7229, "step": 7337 }, { "epoch": 0.7495403472931563, "grad_norm": 1.470825030166371, "learning_rate": 3.1137349766648215e-06, "loss": 0.5936, "step": 7338 }, { "epoch": 0.7496424923391215, "grad_norm": 1.3562483228082394, "learning_rate": 3.1113364279018075e-06, "loss": 0.6443, "step": 7339 }, { "epoch": 0.7497446373850868, "grad_norm": 1.5846451805343864, "learning_rate": 3.108938633107469e-06, "loss": 0.7362, "step": 7340 }, { "epoch": 0.7498467824310521, "grad_norm": 1.4437187461839855, "learning_rate": 3.1065415925442487e-06, "loss": 0.7117, "step": 7341 }, { "epoch": 0.7499489274770174, "grad_norm": 1.5059706587337267, "learning_rate": 3.1041453064745073e-06, "loss": 0.6752, "step": 7342 }, { "epoch": 0.7500510725229826, "grad_norm": 1.5932847338564633, "learning_rate": 3.1017497751605184e-06, "loss": 0.7279, "step": 7343 }, { "epoch": 0.7501532175689479, "grad_norm": 1.3606241091957954, "learning_rate": 3.0993549988644733e-06, "loss": 0.6704, "step": 7344 }, { "epoch": 0.7502553626149132, "grad_norm": 1.4568792028542599, "learning_rate": 3.096960977848482e-06, "loss": 0.7453, "step": 7345 }, { "epoch": 0.7503575076608785, "grad_norm": 1.5080967527396887, "learning_rate": 3.0945677123745687e-06, "loss": 0.6659, "step": 7346 }, { "epoch": 0.7504596527068437, "grad_norm": 1.4384596193466221, "learning_rate": 3.092175202704684e-06, "loss": 0.7362, "step": 7347 }, { "epoch": 0.750561797752809, "grad_norm": 1.3483693961574803, "learning_rate": 3.0897834491006818e-06, "loss": 0.6838, "step": 7348 }, { "epoch": 0.7506639427987742, "grad_norm": 1.5851545150928008, "learning_rate": 3.0873924518243504e-06, "loss": 0.8075, "step": 7349 }, { "epoch": 0.7507660878447395, "grad_norm": 1.3054090809092345, "learning_rate": 3.08500221113738e-06, "loss": 0.7067, "step": 7350 }, { "epoch": 0.7508682328907048, "grad_norm": 1.4891763215503115, "learning_rate": 3.082612727301383e-06, "loss": 0.7387, "step": 7351 }, { "epoch": 0.7509703779366701, "grad_norm": 1.486049117147843, "learning_rate": 3.080224000577895e-06, "loss": 0.6871, "step": 7352 }, { "epoch": 0.7510725229826354, "grad_norm": 1.4085850780168814, "learning_rate": 3.0778360312283617e-06, "loss": 0.6649, "step": 7353 }, { "epoch": 0.7511746680286007, "grad_norm": 1.4818951099275164, "learning_rate": 3.0754488195141464e-06, "loss": 0.7912, "step": 7354 }, { "epoch": 0.7512768130745658, "grad_norm": 1.396184903660817, "learning_rate": 3.0730623656965288e-06, "loss": 0.6842, "step": 7355 }, { "epoch": 0.7513789581205311, "grad_norm": 1.5695093846584869, "learning_rate": 3.0706766700367095e-06, "loss": 0.7839, "step": 7356 }, { "epoch": 0.7514811031664964, "grad_norm": 1.3078015936791683, "learning_rate": 3.0682917327958095e-06, "loss": 0.6343, "step": 7357 }, { "epoch": 0.7515832482124617, "grad_norm": 1.5495646021413993, "learning_rate": 3.0659075542348583e-06, "loss": 0.6841, "step": 7358 }, { "epoch": 0.751685393258427, "grad_norm": 1.530334184665937, "learning_rate": 3.063524134614805e-06, "loss": 0.7695, "step": 7359 }, { "epoch": 0.7517875383043923, "grad_norm": 1.6010646702197036, "learning_rate": 3.061141474196513e-06, "loss": 0.6714, "step": 7360 }, { "epoch": 0.7518896833503576, "grad_norm": 1.5282134803130527, "learning_rate": 3.058759573240774e-06, "loss": 0.6667, "step": 7361 }, { "epoch": 0.7519918283963227, "grad_norm": 1.5222213239995666, "learning_rate": 3.0563784320082833e-06, "loss": 0.7305, "step": 7362 }, { "epoch": 0.752093973442288, "grad_norm": 1.486241774941304, "learning_rate": 3.0539980507596588e-06, "loss": 0.6444, "step": 7363 }, { "epoch": 0.7521961184882533, "grad_norm": 1.5667639932440547, "learning_rate": 3.051618429755433e-06, "loss": 0.817, "step": 7364 }, { "epoch": 0.7522982635342186, "grad_norm": 1.398122195975387, "learning_rate": 3.049239569256063e-06, "loss": 0.6346, "step": 7365 }, { "epoch": 0.7524004085801839, "grad_norm": 1.4643337629675643, "learning_rate": 3.046861469521909e-06, "loss": 0.7417, "step": 7366 }, { "epoch": 0.7525025536261492, "grad_norm": 1.3961404005152431, "learning_rate": 3.0444841308132635e-06, "loss": 0.588, "step": 7367 }, { "epoch": 0.7526046986721144, "grad_norm": 1.4747741193082498, "learning_rate": 3.042107553390323e-06, "loss": 0.6993, "step": 7368 }, { "epoch": 0.7527068437180797, "grad_norm": 1.5891928412582947, "learning_rate": 3.0397317375132064e-06, "loss": 0.7577, "step": 7369 }, { "epoch": 0.7528089887640449, "grad_norm": 1.4921189015476584, "learning_rate": 3.0373566834419445e-06, "loss": 0.6711, "step": 7370 }, { "epoch": 0.7529111338100102, "grad_norm": 1.2600322502848067, "learning_rate": 3.034982391436495e-06, "loss": 0.6099, "step": 7371 }, { "epoch": 0.7530132788559755, "grad_norm": 1.4885130462834362, "learning_rate": 3.0326088617567204e-06, "loss": 0.6047, "step": 7372 }, { "epoch": 0.7531154239019408, "grad_norm": 1.5156072995660586, "learning_rate": 3.03023609466241e-06, "loss": 0.7578, "step": 7373 }, { "epoch": 0.753217568947906, "grad_norm": 1.6085434385377166, "learning_rate": 3.027864090413263e-06, "loss": 0.724, "step": 7374 }, { "epoch": 0.7533197139938713, "grad_norm": 1.5651032270204697, "learning_rate": 3.0254928492688905e-06, "loss": 0.7588, "step": 7375 }, { "epoch": 0.7534218590398366, "grad_norm": 1.4929193041921875, "learning_rate": 3.023122371488837e-06, "loss": 0.6857, "step": 7376 }, { "epoch": 0.7535240040858019, "grad_norm": 1.4130015211760834, "learning_rate": 3.0207526573325473e-06, "loss": 0.7327, "step": 7377 }, { "epoch": 0.7536261491317671, "grad_norm": 1.6705119570409108, "learning_rate": 3.018383707059388e-06, "loss": 0.7442, "step": 7378 }, { "epoch": 0.7537282941777323, "grad_norm": 1.5065922576770472, "learning_rate": 3.016015520928639e-06, "loss": 0.7277, "step": 7379 }, { "epoch": 0.7538304392236976, "grad_norm": 1.5787258215061597, "learning_rate": 3.013648099199504e-06, "loss": 0.7471, "step": 7380 }, { "epoch": 0.7539325842696629, "grad_norm": 1.430279911098824, "learning_rate": 3.011281442131102e-06, "loss": 0.7229, "step": 7381 }, { "epoch": 0.7540347293156282, "grad_norm": 1.544452746334185, "learning_rate": 3.008915549982461e-06, "loss": 0.6635, "step": 7382 }, { "epoch": 0.7541368743615935, "grad_norm": 1.3988708005590775, "learning_rate": 3.0065504230125297e-06, "loss": 0.6076, "step": 7383 }, { "epoch": 0.7542390194075588, "grad_norm": 1.582997856886227, "learning_rate": 3.0041860614801734e-06, "loss": 0.7025, "step": 7384 }, { "epoch": 0.754341164453524, "grad_norm": 1.6702792905658461, "learning_rate": 3.0018224656441684e-06, "loss": 0.6837, "step": 7385 }, { "epoch": 0.7544433094994892, "grad_norm": 1.50883689100958, "learning_rate": 2.99945963576322e-06, "loss": 0.7914, "step": 7386 }, { "epoch": 0.7545454545454545, "grad_norm": 1.5125548351810687, "learning_rate": 2.9970975720959372e-06, "loss": 0.7445, "step": 7387 }, { "epoch": 0.7546475995914198, "grad_norm": 1.538047176267832, "learning_rate": 2.994736274900847e-06, "loss": 0.623, "step": 7388 }, { "epoch": 0.7547497446373851, "grad_norm": 1.4895210683350129, "learning_rate": 2.9923757444364e-06, "loss": 0.6346, "step": 7389 }, { "epoch": 0.7548518896833504, "grad_norm": 1.502613610771345, "learning_rate": 2.990015980960952e-06, "loss": 0.6239, "step": 7390 }, { "epoch": 0.7549540347293157, "grad_norm": 1.5176719967855912, "learning_rate": 2.9876569847327873e-06, "loss": 0.6667, "step": 7391 }, { "epoch": 0.755056179775281, "grad_norm": 1.6275282986861968, "learning_rate": 2.9852987560100955e-06, "loss": 0.7237, "step": 7392 }, { "epoch": 0.7551583248212461, "grad_norm": 1.6384351087907165, "learning_rate": 2.9829412950509874e-06, "loss": 0.7894, "step": 7393 }, { "epoch": 0.7552604698672114, "grad_norm": 1.46122284759592, "learning_rate": 2.9805846021134856e-06, "loss": 0.6324, "step": 7394 }, { "epoch": 0.7553626149131767, "grad_norm": 1.663885318224284, "learning_rate": 2.9782286774555367e-06, "loss": 0.721, "step": 7395 }, { "epoch": 0.755464759959142, "grad_norm": 1.5699906893194122, "learning_rate": 2.975873521334993e-06, "loss": 0.7131, "step": 7396 }, { "epoch": 0.7555669050051073, "grad_norm": 1.5305018301446212, "learning_rate": 2.9735191340096335e-06, "loss": 0.6917, "step": 7397 }, { "epoch": 0.7556690500510725, "grad_norm": 1.4032371257552516, "learning_rate": 2.9711655157371444e-06, "loss": 0.7249, "step": 7398 }, { "epoch": 0.7557711950970378, "grad_norm": 1.5201139342252188, "learning_rate": 2.9688126667751303e-06, "loss": 0.6589, "step": 7399 }, { "epoch": 0.7558733401430031, "grad_norm": 1.500631948926616, "learning_rate": 2.9664605873811104e-06, "loss": 0.7535, "step": 7400 }, { "epoch": 0.7559754851889683, "grad_norm": 1.5108203604305874, "learning_rate": 2.964109277812526e-06, "loss": 0.6811, "step": 7401 }, { "epoch": 0.7560776302349336, "grad_norm": 1.3680764431558419, "learning_rate": 2.9617587383267266e-06, "loss": 0.6151, "step": 7402 }, { "epoch": 0.7561797752808989, "grad_norm": 1.5882061152168516, "learning_rate": 2.959408969180981e-06, "loss": 0.7107, "step": 7403 }, { "epoch": 0.7562819203268641, "grad_norm": 1.5603439801066015, "learning_rate": 2.95705997063247e-06, "loss": 0.7559, "step": 7404 }, { "epoch": 0.7563840653728294, "grad_norm": 1.6739827262326474, "learning_rate": 2.9547117429382955e-06, "loss": 0.7459, "step": 7405 }, { "epoch": 0.7564862104187947, "grad_norm": 1.5155734596075967, "learning_rate": 2.952364286355475e-06, "loss": 0.6424, "step": 7406 }, { "epoch": 0.75658835546476, "grad_norm": 1.3215348387500192, "learning_rate": 2.9500176011409365e-06, "loss": 0.577, "step": 7407 }, { "epoch": 0.7566905005107253, "grad_norm": 1.5166281698816015, "learning_rate": 2.9476716875515265e-06, "loss": 0.6843, "step": 7408 }, { "epoch": 0.7567926455566905, "grad_norm": 1.3937676171948743, "learning_rate": 2.945326545844004e-06, "loss": 0.6411, "step": 7409 }, { "epoch": 0.7568947906026557, "grad_norm": 1.6200071846609922, "learning_rate": 2.942982176275052e-06, "loss": 0.6305, "step": 7410 }, { "epoch": 0.756996935648621, "grad_norm": 1.4115235226309217, "learning_rate": 2.9406385791012604e-06, "loss": 0.6409, "step": 7411 }, { "epoch": 0.7570990806945863, "grad_norm": 1.4044946628605268, "learning_rate": 2.9382957545791333e-06, "loss": 0.7496, "step": 7412 }, { "epoch": 0.7572012257405516, "grad_norm": 1.6162198970035375, "learning_rate": 2.935953702965102e-06, "loss": 0.6873, "step": 7413 }, { "epoch": 0.7573033707865169, "grad_norm": 1.431189574773295, "learning_rate": 2.9336124245154995e-06, "loss": 0.6205, "step": 7414 }, { "epoch": 0.7574055158324822, "grad_norm": 1.482056556253484, "learning_rate": 2.9312719194865845e-06, "loss": 0.7205, "step": 7415 }, { "epoch": 0.7575076608784473, "grad_norm": 1.805903595230955, "learning_rate": 2.9289321881345257e-06, "loss": 0.7599, "step": 7416 }, { "epoch": 0.7576098059244126, "grad_norm": 1.3284276573168574, "learning_rate": 2.9265932307154064e-06, "loss": 0.6073, "step": 7417 }, { "epoch": 0.7577119509703779, "grad_norm": 1.3868868341373433, "learning_rate": 2.9242550474852294e-06, "loss": 0.7313, "step": 7418 }, { "epoch": 0.7578140960163432, "grad_norm": 1.4549558391544015, "learning_rate": 2.9219176386999048e-06, "loss": 0.6481, "step": 7419 }, { "epoch": 0.7579162410623085, "grad_norm": 1.3727739847972151, "learning_rate": 2.9195810046152717e-06, "loss": 0.7543, "step": 7420 }, { "epoch": 0.7580183861082738, "grad_norm": 1.513107430084354, "learning_rate": 2.917245145487069e-06, "loss": 0.6979, "step": 7421 }, { "epoch": 0.758120531154239, "grad_norm": 1.4864682381043417, "learning_rate": 2.9149100615709635e-06, "loss": 0.7042, "step": 7422 }, { "epoch": 0.7582226762002043, "grad_norm": 1.3509754291362757, "learning_rate": 2.9125757531225296e-06, "loss": 0.6282, "step": 7423 }, { "epoch": 0.7583248212461695, "grad_norm": 1.568394017620605, "learning_rate": 2.9102422203972546e-06, "loss": 0.6835, "step": 7424 }, { "epoch": 0.7584269662921348, "grad_norm": 1.3114329100927211, "learning_rate": 2.9079094636505533e-06, "loss": 0.584, "step": 7425 }, { "epoch": 0.7585291113381001, "grad_norm": 1.5630330466552067, "learning_rate": 2.9055774831377436e-06, "loss": 0.7948, "step": 7426 }, { "epoch": 0.7586312563840654, "grad_norm": 1.4368909478942313, "learning_rate": 2.9032462791140613e-06, "loss": 0.6769, "step": 7427 }, { "epoch": 0.7587334014300307, "grad_norm": 1.452773688240602, "learning_rate": 2.9009158518346557e-06, "loss": 0.6853, "step": 7428 }, { "epoch": 0.7588355464759959, "grad_norm": 1.512828472508022, "learning_rate": 2.8985862015545973e-06, "loss": 0.7146, "step": 7429 }, { "epoch": 0.7589376915219612, "grad_norm": 1.4306841096048442, "learning_rate": 2.89625732852887e-06, "loss": 0.6882, "step": 7430 }, { "epoch": 0.7590398365679265, "grad_norm": 1.5306789506679488, "learning_rate": 2.893929233012367e-06, "loss": 0.594, "step": 7431 }, { "epoch": 0.7591419816138917, "grad_norm": 1.3675652168530181, "learning_rate": 2.8916019152599017e-06, "loss": 0.6409, "step": 7432 }, { "epoch": 0.759244126659857, "grad_norm": 1.5936042672261972, "learning_rate": 2.889275375526196e-06, "loss": 0.7151, "step": 7433 }, { "epoch": 0.7593462717058223, "grad_norm": 1.5382525355908887, "learning_rate": 2.886949614065897e-06, "loss": 0.6944, "step": 7434 }, { "epoch": 0.7594484167517875, "grad_norm": 1.497449052729323, "learning_rate": 2.884624631133559e-06, "loss": 0.763, "step": 7435 }, { "epoch": 0.7595505617977528, "grad_norm": 1.5548274775949733, "learning_rate": 2.8823004269836517e-06, "loss": 0.6861, "step": 7436 }, { "epoch": 0.7596527068437181, "grad_norm": 1.4865981173243363, "learning_rate": 2.8799770018705587e-06, "loss": 0.7088, "step": 7437 }, { "epoch": 0.7597548518896834, "grad_norm": 1.4000335774005424, "learning_rate": 2.877654356048586e-06, "loss": 0.7364, "step": 7438 }, { "epoch": 0.7598569969356487, "grad_norm": 1.5071074989111395, "learning_rate": 2.8753324897719425e-06, "loss": 0.7147, "step": 7439 }, { "epoch": 0.7599591419816139, "grad_norm": 1.4078020191031093, "learning_rate": 2.8730114032947643e-06, "loss": 0.6415, "step": 7440 }, { "epoch": 0.7600612870275791, "grad_norm": 1.5260236379706307, "learning_rate": 2.8706910968710923e-06, "loss": 0.5871, "step": 7441 }, { "epoch": 0.7601634320735444, "grad_norm": 1.3698127996229998, "learning_rate": 2.8683715707548863e-06, "loss": 0.6525, "step": 7442 }, { "epoch": 0.7602655771195097, "grad_norm": 1.4774106970744387, "learning_rate": 2.8660528252000165e-06, "loss": 0.6196, "step": 7443 }, { "epoch": 0.760367722165475, "grad_norm": 1.431585655150089, "learning_rate": 2.8637348604602765e-06, "loss": 0.7249, "step": 7444 }, { "epoch": 0.7604698672114403, "grad_norm": 1.4239611399007868, "learning_rate": 2.8614176767893644e-06, "loss": 0.6432, "step": 7445 }, { "epoch": 0.7605720122574056, "grad_norm": 1.6015901888784416, "learning_rate": 2.859101274440902e-06, "loss": 0.6907, "step": 7446 }, { "epoch": 0.7606741573033707, "grad_norm": 1.5402087867669139, "learning_rate": 2.856785653668419e-06, "loss": 0.6794, "step": 7447 }, { "epoch": 0.760776302349336, "grad_norm": 1.4990709112836973, "learning_rate": 2.8544708147253585e-06, "loss": 0.6723, "step": 7448 }, { "epoch": 0.7608784473953013, "grad_norm": 1.4290053785281838, "learning_rate": 2.8521567578650867e-06, "loss": 0.6772, "step": 7449 }, { "epoch": 0.7609805924412666, "grad_norm": 1.3486039730762873, "learning_rate": 2.8498434833408762e-06, "loss": 0.6593, "step": 7450 }, { "epoch": 0.7610827374872319, "grad_norm": 1.6297030504294905, "learning_rate": 2.8475309914059157e-06, "loss": 0.6684, "step": 7451 }, { "epoch": 0.7611848825331972, "grad_norm": 1.5306049430840014, "learning_rate": 2.8452192823133096e-06, "loss": 0.7069, "step": 7452 }, { "epoch": 0.7612870275791624, "grad_norm": 1.409766100219532, "learning_rate": 2.8429083563160718e-06, "loss": 0.6406, "step": 7453 }, { "epoch": 0.7613891726251277, "grad_norm": 1.367544172775995, "learning_rate": 2.8405982136671394e-06, "loss": 0.6512, "step": 7454 }, { "epoch": 0.7614913176710929, "grad_norm": 1.5869502903828088, "learning_rate": 2.838288854619361e-06, "loss": 0.77, "step": 7455 }, { "epoch": 0.7615934627170582, "grad_norm": 1.4597399174442398, "learning_rate": 2.835980279425494e-06, "loss": 0.6743, "step": 7456 }, { "epoch": 0.7616956077630235, "grad_norm": 1.428650917932562, "learning_rate": 2.8336724883382137e-06, "loss": 0.5289, "step": 7457 }, { "epoch": 0.7617977528089888, "grad_norm": 1.3248505602502982, "learning_rate": 2.831365481610108e-06, "loss": 0.6505, "step": 7458 }, { "epoch": 0.761899897854954, "grad_norm": 1.4151535911990676, "learning_rate": 2.8290592594936837e-06, "loss": 0.7428, "step": 7459 }, { "epoch": 0.7620020429009193, "grad_norm": 1.4835404460932058, "learning_rate": 2.826753822241356e-06, "loss": 0.6555, "step": 7460 }, { "epoch": 0.7621041879468846, "grad_norm": 1.6097453819487826, "learning_rate": 2.8244491701054555e-06, "loss": 0.8062, "step": 7461 }, { "epoch": 0.7622063329928499, "grad_norm": 1.5719075408084238, "learning_rate": 2.8221453033382306e-06, "loss": 0.7377, "step": 7462 }, { "epoch": 0.7623084780388151, "grad_norm": 1.4120764478716656, "learning_rate": 2.8198422221918387e-06, "loss": 0.6108, "step": 7463 }, { "epoch": 0.7624106230847804, "grad_norm": 1.6434040750966188, "learning_rate": 2.8175399269183556e-06, "loss": 0.6043, "step": 7464 }, { "epoch": 0.7625127681307456, "grad_norm": 1.4799989995851495, "learning_rate": 2.815238417769769e-06, "loss": 0.6473, "step": 7465 }, { "epoch": 0.7626149131767109, "grad_norm": 1.3046272786143347, "learning_rate": 2.8129376949979805e-06, "loss": 0.5871, "step": 7466 }, { "epoch": 0.7627170582226762, "grad_norm": 1.4752261876779604, "learning_rate": 2.8106377588547996e-06, "loss": 0.6863, "step": 7467 }, { "epoch": 0.7628192032686415, "grad_norm": 1.4929004380384108, "learning_rate": 2.808338609591965e-06, "loss": 0.6802, "step": 7468 }, { "epoch": 0.7629213483146068, "grad_norm": 1.373773440059979, "learning_rate": 2.8060402474611128e-06, "loss": 0.733, "step": 7469 }, { "epoch": 0.763023493360572, "grad_norm": 1.4713833482270784, "learning_rate": 2.803742672713807e-06, "loss": 0.6507, "step": 7470 }, { "epoch": 0.7631256384065372, "grad_norm": 1.4034397487136077, "learning_rate": 2.801445885601515e-06, "loss": 0.7204, "step": 7471 }, { "epoch": 0.7632277834525025, "grad_norm": 1.5504528216279785, "learning_rate": 2.7991498863756205e-06, "loss": 0.7103, "step": 7472 }, { "epoch": 0.7633299284984678, "grad_norm": 1.5087620857803632, "learning_rate": 2.7968546752874214e-06, "loss": 0.6467, "step": 7473 }, { "epoch": 0.7634320735444331, "grad_norm": 1.563373337757284, "learning_rate": 2.7945602525881345e-06, "loss": 0.7126, "step": 7474 }, { "epoch": 0.7635342185903984, "grad_norm": 1.4671772582514293, "learning_rate": 2.7922666185288837e-06, "loss": 0.7377, "step": 7475 }, { "epoch": 0.7636363636363637, "grad_norm": 1.3530425895537521, "learning_rate": 2.789973773360708e-06, "loss": 0.7174, "step": 7476 }, { "epoch": 0.763738508682329, "grad_norm": 1.4603876322373197, "learning_rate": 2.7876817173345573e-06, "loss": 0.6659, "step": 7477 }, { "epoch": 0.7638406537282941, "grad_norm": 1.5486962596843985, "learning_rate": 2.785390450701303e-06, "loss": 0.6162, "step": 7478 }, { "epoch": 0.7639427987742594, "grad_norm": 1.5618213122041809, "learning_rate": 2.783099973711728e-06, "loss": 0.7013, "step": 7479 }, { "epoch": 0.7640449438202247, "grad_norm": 1.4928805565947687, "learning_rate": 2.7808102866165243e-06, "loss": 0.6871, "step": 7480 }, { "epoch": 0.76414708886619, "grad_norm": 1.4876570883563174, "learning_rate": 2.7785213896662987e-06, "loss": 0.7634, "step": 7481 }, { "epoch": 0.7642492339121553, "grad_norm": 1.5855894388454, "learning_rate": 2.776233283111569e-06, "loss": 0.7416, "step": 7482 }, { "epoch": 0.7643513789581206, "grad_norm": 1.522498379785394, "learning_rate": 2.773945967202777e-06, "loss": 0.7315, "step": 7483 }, { "epoch": 0.7644535240040858, "grad_norm": 1.374140706227445, "learning_rate": 2.7716594421902674e-06, "loss": 0.6875, "step": 7484 }, { "epoch": 0.7645556690500511, "grad_norm": 1.5176072044157276, "learning_rate": 2.7693737083243e-06, "loss": 0.6176, "step": 7485 }, { "epoch": 0.7646578140960163, "grad_norm": 1.465286480544789, "learning_rate": 2.767088765855054e-06, "loss": 0.7252, "step": 7486 }, { "epoch": 0.7647599591419816, "grad_norm": 1.4324917418480216, "learning_rate": 2.7648046150326113e-06, "loss": 0.6059, "step": 7487 }, { "epoch": 0.7648621041879469, "grad_norm": 1.5877790408839978, "learning_rate": 2.7625212561069826e-06, "loss": 0.7645, "step": 7488 }, { "epoch": 0.7649642492339122, "grad_norm": 1.4613514662155962, "learning_rate": 2.7602386893280786e-06, "loss": 0.6777, "step": 7489 }, { "epoch": 0.7650663942798774, "grad_norm": 1.4025383232396098, "learning_rate": 2.7579569149457266e-06, "loss": 0.7007, "step": 7490 }, { "epoch": 0.7651685393258427, "grad_norm": 1.4609774678610743, "learning_rate": 2.7556759332096694e-06, "loss": 0.7087, "step": 7491 }, { "epoch": 0.765270684371808, "grad_norm": 1.448706019032512, "learning_rate": 2.753395744369559e-06, "loss": 0.6244, "step": 7492 }, { "epoch": 0.7653728294177733, "grad_norm": 1.3290411468165644, "learning_rate": 2.751116348674967e-06, "loss": 0.604, "step": 7493 }, { "epoch": 0.7654749744637385, "grad_norm": 1.317100743286196, "learning_rate": 2.7488377463753755e-06, "loss": 0.661, "step": 7494 }, { "epoch": 0.7655771195097038, "grad_norm": 1.4570245621552407, "learning_rate": 2.746559937720179e-06, "loss": 0.7588, "step": 7495 }, { "epoch": 0.765679264555669, "grad_norm": 1.4616013657384508, "learning_rate": 2.744282922958683e-06, "loss": 0.6718, "step": 7496 }, { "epoch": 0.7657814096016343, "grad_norm": 1.6002785039196028, "learning_rate": 2.7420067023401055e-06, "loss": 0.6644, "step": 7497 }, { "epoch": 0.7658835546475996, "grad_norm": 1.5499481201136127, "learning_rate": 2.7397312761135864e-06, "loss": 0.7273, "step": 7498 }, { "epoch": 0.7659856996935649, "grad_norm": 1.4890135175877488, "learning_rate": 2.7374566445281715e-06, "loss": 0.651, "step": 7499 }, { "epoch": 0.7660878447395302, "grad_norm": 1.397372249896923, "learning_rate": 2.735182807832818e-06, "loss": 0.6331, "step": 7500 }, { "epoch": 0.7661899897854954, "grad_norm": 1.406717338735443, "learning_rate": 2.732909766276396e-06, "loss": 0.6921, "step": 7501 }, { "epoch": 0.7662921348314606, "grad_norm": 1.4415220672406783, "learning_rate": 2.7306375201076963e-06, "loss": 0.6378, "step": 7502 }, { "epoch": 0.7663942798774259, "grad_norm": 1.5594160374417692, "learning_rate": 2.72836606957542e-06, "loss": 0.7122, "step": 7503 }, { "epoch": 0.7664964249233912, "grad_norm": 1.4497392599049908, "learning_rate": 2.726095414928175e-06, "loss": 0.6539, "step": 7504 }, { "epoch": 0.7665985699693565, "grad_norm": 1.529216016344709, "learning_rate": 2.7238255564144854e-06, "loss": 0.5987, "step": 7505 }, { "epoch": 0.7667007150153218, "grad_norm": 1.482709032801827, "learning_rate": 2.721556494282791e-06, "loss": 0.6588, "step": 7506 }, { "epoch": 0.7668028600612871, "grad_norm": 1.4342341980255233, "learning_rate": 2.719288228781437e-06, "loss": 0.6514, "step": 7507 }, { "epoch": 0.7669050051072523, "grad_norm": 1.5900169066365148, "learning_rate": 2.717020760158694e-06, "loss": 0.8167, "step": 7508 }, { "epoch": 0.7670071501532175, "grad_norm": 1.4868839873783102, "learning_rate": 2.714754088662731e-06, "loss": 0.7151, "step": 7509 }, { "epoch": 0.7671092951991828, "grad_norm": 1.5472925782318938, "learning_rate": 2.712488214541642e-06, "loss": 0.753, "step": 7510 }, { "epoch": 0.7672114402451481, "grad_norm": 1.5169657050369079, "learning_rate": 2.7102231380434276e-06, "loss": 0.6872, "step": 7511 }, { "epoch": 0.7673135852911134, "grad_norm": 1.4831163029773318, "learning_rate": 2.7079588594159966e-06, "loss": 0.6376, "step": 7512 }, { "epoch": 0.7674157303370787, "grad_norm": 1.5441842632365448, "learning_rate": 2.7056953789071826e-06, "loss": 0.6884, "step": 7513 }, { "epoch": 0.767517875383044, "grad_norm": 1.5903428649550728, "learning_rate": 2.7034326967647228e-06, "loss": 0.731, "step": 7514 }, { "epoch": 0.7676200204290092, "grad_norm": 1.5789230944885124, "learning_rate": 2.701170813236268e-06, "loss": 0.7097, "step": 7515 }, { "epoch": 0.7677221654749745, "grad_norm": 1.5352716098611365, "learning_rate": 2.698909728569381e-06, "loss": 0.7075, "step": 7516 }, { "epoch": 0.7678243105209397, "grad_norm": 1.3717757136103805, "learning_rate": 2.696649443011541e-06, "loss": 0.5222, "step": 7517 }, { "epoch": 0.767926455566905, "grad_norm": 1.5008977627382827, "learning_rate": 2.6943899568101404e-06, "loss": 0.7349, "step": 7518 }, { "epoch": 0.7680286006128703, "grad_norm": 1.4578786805285304, "learning_rate": 2.6921312702124792e-06, "loss": 0.7027, "step": 7519 }, { "epoch": 0.7681307456588355, "grad_norm": 1.464822780206585, "learning_rate": 2.6898733834657732e-06, "loss": 0.648, "step": 7520 }, { "epoch": 0.7682328907048008, "grad_norm": 1.614545270250516, "learning_rate": 2.687616296817144e-06, "loss": 0.7547, "step": 7521 }, { "epoch": 0.7683350357507661, "grad_norm": 1.412759199973982, "learning_rate": 2.6853600105136392e-06, "loss": 0.7793, "step": 7522 }, { "epoch": 0.7684371807967314, "grad_norm": 1.5145375536468386, "learning_rate": 2.6831045248022068e-06, "loss": 0.5748, "step": 7523 }, { "epoch": 0.7685393258426966, "grad_norm": 1.357646530383558, "learning_rate": 2.6808498399297113e-06, "loss": 0.649, "step": 7524 }, { "epoch": 0.7686414708886619, "grad_norm": 1.340401378469464, "learning_rate": 2.6785959561429264e-06, "loss": 0.6371, "step": 7525 }, { "epoch": 0.7687436159346271, "grad_norm": 1.5109008779893367, "learning_rate": 2.6763428736885477e-06, "loss": 0.749, "step": 7526 }, { "epoch": 0.7688457609805924, "grad_norm": 1.4268138130774322, "learning_rate": 2.6740905928131712e-06, "loss": 0.6787, "step": 7527 }, { "epoch": 0.7689479060265577, "grad_norm": 1.3867794435906604, "learning_rate": 2.6718391137633138e-06, "loss": 0.7069, "step": 7528 }, { "epoch": 0.769050051072523, "grad_norm": 1.4807100385710656, "learning_rate": 2.669588436785401e-06, "loss": 0.5362, "step": 7529 }, { "epoch": 0.7691521961184883, "grad_norm": 1.497960398518875, "learning_rate": 2.6673385621257698e-06, "loss": 0.7432, "step": 7530 }, { "epoch": 0.7692543411644536, "grad_norm": 1.372783096873257, "learning_rate": 2.6650894900306667e-06, "loss": 0.6242, "step": 7531 }, { "epoch": 0.7693564862104187, "grad_norm": 1.6108390917590352, "learning_rate": 2.6628412207462616e-06, "loss": 0.6651, "step": 7532 }, { "epoch": 0.769458631256384, "grad_norm": 1.4706447661449698, "learning_rate": 2.660593754518622e-06, "loss": 0.5825, "step": 7533 }, { "epoch": 0.7695607763023493, "grad_norm": 1.5474869320658484, "learning_rate": 2.6583470915937403e-06, "loss": 0.7041, "step": 7534 }, { "epoch": 0.7696629213483146, "grad_norm": 1.4532029343508164, "learning_rate": 2.656101232217514e-06, "loss": 0.6687, "step": 7535 }, { "epoch": 0.7697650663942799, "grad_norm": 1.427826472608391, "learning_rate": 2.6538561766357486e-06, "loss": 0.7307, "step": 7536 }, { "epoch": 0.7698672114402452, "grad_norm": 1.449006821514821, "learning_rate": 2.651611925094174e-06, "loss": 0.6744, "step": 7537 }, { "epoch": 0.7699693564862105, "grad_norm": 1.564866030599005, "learning_rate": 2.649368477838422e-06, "loss": 0.5883, "step": 7538 }, { "epoch": 0.7700715015321757, "grad_norm": 1.5395065801551826, "learning_rate": 2.6471258351140393e-06, "loss": 0.5955, "step": 7539 }, { "epoch": 0.7701736465781409, "grad_norm": 1.6246055267562316, "learning_rate": 2.6448839971664853e-06, "loss": 0.6826, "step": 7540 }, { "epoch": 0.7702757916241062, "grad_norm": 1.5117148746997864, "learning_rate": 2.6426429642411235e-06, "loss": 0.6492, "step": 7541 }, { "epoch": 0.7703779366700715, "grad_norm": 1.5138880704219884, "learning_rate": 2.6404027365832473e-06, "loss": 0.7324, "step": 7542 }, { "epoch": 0.7704800817160368, "grad_norm": 1.4896248288183176, "learning_rate": 2.638163314438048e-06, "loss": 0.6586, "step": 7543 }, { "epoch": 0.770582226762002, "grad_norm": 1.4400288205613612, "learning_rate": 2.6359246980506293e-06, "loss": 0.646, "step": 7544 }, { "epoch": 0.7706843718079673, "grad_norm": 1.4597150643872812, "learning_rate": 2.6336868876660104e-06, "loss": 0.6293, "step": 7545 }, { "epoch": 0.7707865168539326, "grad_norm": 1.510403378345157, "learning_rate": 2.631449883529119e-06, "loss": 0.6754, "step": 7546 }, { "epoch": 0.7708886618998979, "grad_norm": 1.5858149207337373, "learning_rate": 2.6292136858848006e-06, "loss": 0.7348, "step": 7547 }, { "epoch": 0.7709908069458631, "grad_norm": 1.4434889761626102, "learning_rate": 2.6269782949778066e-06, "loss": 0.7862, "step": 7548 }, { "epoch": 0.7710929519918284, "grad_norm": 1.5675751178491457, "learning_rate": 2.6247437110527984e-06, "loss": 0.7084, "step": 7549 }, { "epoch": 0.7711950970377937, "grad_norm": 1.4935930227943943, "learning_rate": 2.6225099343543593e-06, "loss": 0.6616, "step": 7550 }, { "epoch": 0.7712972420837589, "grad_norm": 1.5252268690979696, "learning_rate": 2.620276965126971e-06, "loss": 0.6467, "step": 7551 }, { "epoch": 0.7713993871297242, "grad_norm": 1.3865577879508761, "learning_rate": 2.618044803615041e-06, "loss": 0.6129, "step": 7552 }, { "epoch": 0.7715015321756895, "grad_norm": 1.3724344870142149, "learning_rate": 2.615813450062875e-06, "loss": 0.5971, "step": 7553 }, { "epoch": 0.7716036772216548, "grad_norm": 1.4986142043516903, "learning_rate": 2.613582904714699e-06, "loss": 0.641, "step": 7554 }, { "epoch": 0.77170582226762, "grad_norm": 1.5344258851324915, "learning_rate": 2.611353167814643e-06, "loss": 0.7846, "step": 7555 }, { "epoch": 0.7718079673135853, "grad_norm": 1.3651701702339099, "learning_rate": 2.6091242396067586e-06, "loss": 0.5861, "step": 7556 }, { "epoch": 0.7719101123595505, "grad_norm": 1.5376752242556553, "learning_rate": 2.6068961203349997e-06, "loss": 0.6837, "step": 7557 }, { "epoch": 0.7720122574055158, "grad_norm": 1.5233326085928105, "learning_rate": 2.604668810243238e-06, "loss": 0.674, "step": 7558 }, { "epoch": 0.7721144024514811, "grad_norm": 1.7229607075030353, "learning_rate": 2.6024423095752547e-06, "loss": 0.7216, "step": 7559 }, { "epoch": 0.7722165474974464, "grad_norm": 1.6084217351509287, "learning_rate": 2.6002166185747403e-06, "loss": 0.6364, "step": 7560 }, { "epoch": 0.7723186925434117, "grad_norm": 1.4218962991240265, "learning_rate": 2.5979917374852935e-06, "loss": 0.6961, "step": 7561 }, { "epoch": 0.772420837589377, "grad_norm": 1.4656733956316854, "learning_rate": 2.595767666550437e-06, "loss": 0.5589, "step": 7562 }, { "epoch": 0.7725229826353421, "grad_norm": 1.528048160897142, "learning_rate": 2.5935444060135938e-06, "loss": 0.6968, "step": 7563 }, { "epoch": 0.7726251276813074, "grad_norm": 1.482536158161189, "learning_rate": 2.591321956118099e-06, "loss": 0.6833, "step": 7564 }, { "epoch": 0.7727272727272727, "grad_norm": 1.6896649908100947, "learning_rate": 2.5891003171072014e-06, "loss": 0.7131, "step": 7565 }, { "epoch": 0.772829417773238, "grad_norm": 1.4242507648558607, "learning_rate": 2.586879489224061e-06, "loss": 0.7124, "step": 7566 }, { "epoch": 0.7729315628192033, "grad_norm": 1.6088421900468373, "learning_rate": 2.5846594727117537e-06, "loss": 0.6358, "step": 7567 }, { "epoch": 0.7730337078651686, "grad_norm": 1.5067738200233267, "learning_rate": 2.5824402678132576e-06, "loss": 0.7074, "step": 7568 }, { "epoch": 0.7731358529111338, "grad_norm": 1.416804467247809, "learning_rate": 2.580221874771467e-06, "loss": 0.6683, "step": 7569 }, { "epoch": 0.7732379979570991, "grad_norm": 1.5147699063257296, "learning_rate": 2.5780042938291817e-06, "loss": 0.7125, "step": 7570 }, { "epoch": 0.7733401430030643, "grad_norm": 1.3546254665536068, "learning_rate": 2.5757875252291266e-06, "loss": 0.5713, "step": 7571 }, { "epoch": 0.7734422880490296, "grad_norm": 1.5794784190956486, "learning_rate": 2.573571569213922e-06, "loss": 0.6644, "step": 7572 }, { "epoch": 0.7735444330949949, "grad_norm": 1.5533172552031582, "learning_rate": 2.571356426026105e-06, "loss": 0.6402, "step": 7573 }, { "epoch": 0.7736465781409602, "grad_norm": 1.554171903370183, "learning_rate": 2.5691420959081295e-06, "loss": 0.7905, "step": 7574 }, { "epoch": 0.7737487231869254, "grad_norm": 1.4843270149620513, "learning_rate": 2.566928579102349e-06, "loss": 0.6466, "step": 7575 }, { "epoch": 0.7738508682328907, "grad_norm": 1.7730460244938047, "learning_rate": 2.5647158758510414e-06, "loss": 0.7165, "step": 7576 }, { "epoch": 0.773953013278856, "grad_norm": 1.5373337920144932, "learning_rate": 2.562503986396385e-06, "loss": 0.6663, "step": 7577 }, { "epoch": 0.7740551583248212, "grad_norm": 1.5770840037033396, "learning_rate": 2.5602929109804717e-06, "loss": 0.7249, "step": 7578 }, { "epoch": 0.7741573033707865, "grad_norm": 1.3823815536036115, "learning_rate": 2.558082649845307e-06, "loss": 0.6508, "step": 7579 }, { "epoch": 0.7742594484167518, "grad_norm": 1.608982361327682, "learning_rate": 2.5558732032328013e-06, "loss": 0.73, "step": 7580 }, { "epoch": 0.774361593462717, "grad_norm": 1.4351901003319678, "learning_rate": 2.553664571384783e-06, "loss": 0.6713, "step": 7581 }, { "epoch": 0.7744637385086823, "grad_norm": 1.4472968123956922, "learning_rate": 2.5514567545429914e-06, "loss": 0.7065, "step": 7582 }, { "epoch": 0.7745658835546476, "grad_norm": 1.5267743672428034, "learning_rate": 2.54924975294907e-06, "loss": 0.6673, "step": 7583 }, { "epoch": 0.7746680286006129, "grad_norm": 1.6842849837530693, "learning_rate": 2.547043566844577e-06, "loss": 0.6996, "step": 7584 }, { "epoch": 0.7747701736465782, "grad_norm": 1.5499116395696109, "learning_rate": 2.5448381964709777e-06, "loss": 0.8077, "step": 7585 }, { "epoch": 0.7748723186925434, "grad_norm": 1.529752465699179, "learning_rate": 2.5426336420696586e-06, "loss": 0.7524, "step": 7586 }, { "epoch": 0.7749744637385086, "grad_norm": 1.5834104831618432, "learning_rate": 2.5404299038819036e-06, "loss": 0.7769, "step": 7587 }, { "epoch": 0.7750766087844739, "grad_norm": 1.5286150675161132, "learning_rate": 2.538226982148917e-06, "loss": 0.6937, "step": 7588 }, { "epoch": 0.7751787538304392, "grad_norm": 1.469695878784593, "learning_rate": 2.5360248771118036e-06, "loss": 0.7045, "step": 7589 }, { "epoch": 0.7752808988764045, "grad_norm": 1.6513951261113482, "learning_rate": 2.5338235890115905e-06, "loss": 0.7059, "step": 7590 }, { "epoch": 0.7753830439223698, "grad_norm": 1.4783175317189345, "learning_rate": 2.5316231180892127e-06, "loss": 0.5918, "step": 7591 }, { "epoch": 0.7754851889683351, "grad_norm": 1.338950286394361, "learning_rate": 2.529423464585509e-06, "loss": 0.5098, "step": 7592 }, { "epoch": 0.7755873340143004, "grad_norm": 1.406579206030611, "learning_rate": 2.527224628741234e-06, "loss": 0.6693, "step": 7593 }, { "epoch": 0.7756894790602655, "grad_norm": 1.337700529423796, "learning_rate": 2.525026610797051e-06, "loss": 0.652, "step": 7594 }, { "epoch": 0.7757916241062308, "grad_norm": 1.4092099099407729, "learning_rate": 2.5228294109935323e-06, "loss": 0.6822, "step": 7595 }, { "epoch": 0.7758937691521961, "grad_norm": 1.4722013992232574, "learning_rate": 2.520633029571169e-06, "loss": 0.6622, "step": 7596 }, { "epoch": 0.7759959141981614, "grad_norm": 1.4936439600304174, "learning_rate": 2.5184374667703494e-06, "loss": 0.7578, "step": 7597 }, { "epoch": 0.7760980592441267, "grad_norm": 1.6265080971670343, "learning_rate": 2.5162427228313856e-06, "loss": 0.7249, "step": 7598 }, { "epoch": 0.776200204290092, "grad_norm": 1.5547549053836527, "learning_rate": 2.5140487979944907e-06, "loss": 0.6888, "step": 7599 }, { "epoch": 0.7763023493360572, "grad_norm": 1.5441945973243612, "learning_rate": 2.5118556924997882e-06, "loss": 0.7252, "step": 7600 }, { "epoch": 0.7764044943820225, "grad_norm": 1.4859112984741925, "learning_rate": 2.5096634065873215e-06, "loss": 0.701, "step": 7601 }, { "epoch": 0.7765066394279877, "grad_norm": 1.6939475727356772, "learning_rate": 2.507471940497035e-06, "loss": 0.6758, "step": 7602 }, { "epoch": 0.776608784473953, "grad_norm": 1.4976764380426018, "learning_rate": 2.5052812944687854e-06, "loss": 0.7398, "step": 7603 }, { "epoch": 0.7767109295199183, "grad_norm": 1.3857948933455269, "learning_rate": 2.503091468742337e-06, "loss": 0.6992, "step": 7604 }, { "epoch": 0.7768130745658836, "grad_norm": 1.4347429297035994, "learning_rate": 2.50090246355737e-06, "loss": 0.6946, "step": 7605 }, { "epoch": 0.7769152196118488, "grad_norm": 1.502427772819888, "learning_rate": 2.498714279153477e-06, "loss": 0.6293, "step": 7606 }, { "epoch": 0.7770173646578141, "grad_norm": 1.4880544900763362, "learning_rate": 2.4965269157701533e-06, "loss": 0.7517, "step": 7607 }, { "epoch": 0.7771195097037794, "grad_norm": 1.4335461118431672, "learning_rate": 2.494340373646805e-06, "loss": 0.6687, "step": 7608 }, { "epoch": 0.7772216547497446, "grad_norm": 1.6035790374291685, "learning_rate": 2.4921546530227515e-06, "loss": 0.6078, "step": 7609 }, { "epoch": 0.7773237997957099, "grad_norm": 1.488469466015601, "learning_rate": 2.4899697541372224e-06, "loss": 0.6302, "step": 7610 }, { "epoch": 0.7774259448416752, "grad_norm": 1.5170960014944703, "learning_rate": 2.487785677229357e-06, "loss": 0.7064, "step": 7611 }, { "epoch": 0.7775280898876404, "grad_norm": 1.4380095436649278, "learning_rate": 2.4856024225382027e-06, "loss": 0.6981, "step": 7612 }, { "epoch": 0.7776302349336057, "grad_norm": 1.4389450598710658, "learning_rate": 2.4834199903027157e-06, "loss": 0.5713, "step": 7613 }, { "epoch": 0.777732379979571, "grad_norm": 1.6116819205263988, "learning_rate": 2.481238380761769e-06, "loss": 0.6738, "step": 7614 }, { "epoch": 0.7778345250255363, "grad_norm": 1.6546075890814558, "learning_rate": 2.4790575941541374e-06, "loss": 0.7098, "step": 7615 }, { "epoch": 0.7779366700715016, "grad_norm": 1.5638639184473528, "learning_rate": 2.476877630718514e-06, "loss": 0.7438, "step": 7616 }, { "epoch": 0.7780388151174668, "grad_norm": 1.33827279086134, "learning_rate": 2.4746984906934934e-06, "loss": 0.717, "step": 7617 }, { "epoch": 0.778140960163432, "grad_norm": 1.4094009629834923, "learning_rate": 2.4725201743175854e-06, "loss": 0.6304, "step": 7618 }, { "epoch": 0.7782431052093973, "grad_norm": 1.7314463038745098, "learning_rate": 2.4703426818292055e-06, "loss": 0.6905, "step": 7619 }, { "epoch": 0.7783452502553626, "grad_norm": 1.5702428441509972, "learning_rate": 2.468166013466686e-06, "loss": 0.7202, "step": 7620 }, { "epoch": 0.7784473953013279, "grad_norm": 1.6683688231958707, "learning_rate": 2.4659901694682597e-06, "loss": 0.6697, "step": 7621 }, { "epoch": 0.7785495403472932, "grad_norm": 1.388103610440498, "learning_rate": 2.463815150072081e-06, "loss": 0.6895, "step": 7622 }, { "epoch": 0.7786516853932585, "grad_norm": 1.487254299221795, "learning_rate": 2.4616409555162012e-06, "loss": 0.6981, "step": 7623 }, { "epoch": 0.7787538304392237, "grad_norm": 1.515056821045475, "learning_rate": 2.4594675860385873e-06, "loss": 0.6662, "step": 7624 }, { "epoch": 0.7788559754851889, "grad_norm": 1.4267759597723066, "learning_rate": 2.457295041877121e-06, "loss": 0.6674, "step": 7625 }, { "epoch": 0.7789581205311542, "grad_norm": 1.3765671190295736, "learning_rate": 2.455123323269586e-06, "loss": 0.6682, "step": 7626 }, { "epoch": 0.7790602655771195, "grad_norm": 1.3672344889990873, "learning_rate": 2.452952430453677e-06, "loss": 0.7113, "step": 7627 }, { "epoch": 0.7791624106230848, "grad_norm": 1.540548363282977, "learning_rate": 2.4507823636670016e-06, "loss": 0.7496, "step": 7628 }, { "epoch": 0.7792645556690501, "grad_norm": 1.55100357333724, "learning_rate": 2.4486131231470665e-06, "loss": 0.6543, "step": 7629 }, { "epoch": 0.7793667007150153, "grad_norm": 1.4784275752553009, "learning_rate": 2.4464447091313103e-06, "loss": 0.6619, "step": 7630 }, { "epoch": 0.7794688457609806, "grad_norm": 1.4783956474879465, "learning_rate": 2.4442771218570618e-06, "loss": 0.675, "step": 7631 }, { "epoch": 0.7795709908069459, "grad_norm": 1.4683689840493102, "learning_rate": 2.4421103615615626e-06, "loss": 0.6474, "step": 7632 }, { "epoch": 0.7796731358529111, "grad_norm": 1.7554036087838423, "learning_rate": 2.4399444284819685e-06, "loss": 0.6144, "step": 7633 }, { "epoch": 0.7797752808988764, "grad_norm": 1.3290933715433075, "learning_rate": 2.437779322855337e-06, "loss": 0.5352, "step": 7634 }, { "epoch": 0.7798774259448417, "grad_norm": 1.6188972321603414, "learning_rate": 2.4356150449186487e-06, "loss": 0.6766, "step": 7635 }, { "epoch": 0.779979570990807, "grad_norm": 1.4314166944686777, "learning_rate": 2.43345159490878e-06, "loss": 0.6879, "step": 7636 }, { "epoch": 0.7800817160367722, "grad_norm": 1.6147653985890673, "learning_rate": 2.43128897306252e-06, "loss": 0.7007, "step": 7637 }, { "epoch": 0.7801838610827375, "grad_norm": 1.447919503119416, "learning_rate": 2.429127179616575e-06, "loss": 0.6918, "step": 7638 }, { "epoch": 0.7802860061287028, "grad_norm": 1.3470491877991122, "learning_rate": 2.426966214807549e-06, "loss": 0.63, "step": 7639 }, { "epoch": 0.780388151174668, "grad_norm": 1.3820116857530786, "learning_rate": 2.424806078871966e-06, "loss": 0.7285, "step": 7640 }, { "epoch": 0.7804902962206333, "grad_norm": 1.603025158894778, "learning_rate": 2.422646772046252e-06, "loss": 0.7288, "step": 7641 }, { "epoch": 0.7805924412665985, "grad_norm": 1.5172059781463059, "learning_rate": 2.420488294566745e-06, "loss": 0.6552, "step": 7642 }, { "epoch": 0.7806945863125638, "grad_norm": 1.477034566542824, "learning_rate": 2.4183306466696877e-06, "loss": 0.661, "step": 7643 }, { "epoch": 0.7807967313585291, "grad_norm": 1.3917112776727087, "learning_rate": 2.4161738285912427e-06, "loss": 0.7714, "step": 7644 }, { "epoch": 0.7808988764044944, "grad_norm": 1.4742150368416518, "learning_rate": 2.4140178405674685e-06, "loss": 0.6272, "step": 7645 }, { "epoch": 0.7810010214504597, "grad_norm": 1.4376822368932918, "learning_rate": 2.411862682834346e-06, "loss": 0.6434, "step": 7646 }, { "epoch": 0.781103166496425, "grad_norm": 1.5077904883888897, "learning_rate": 2.4097083556277555e-06, "loss": 0.7089, "step": 7647 }, { "epoch": 0.7812053115423901, "grad_norm": 1.4974472962238985, "learning_rate": 2.4075548591834897e-06, "loss": 0.5656, "step": 7648 }, { "epoch": 0.7813074565883554, "grad_norm": 1.445861665764525, "learning_rate": 2.405402193737246e-06, "loss": 0.7199, "step": 7649 }, { "epoch": 0.7814096016343207, "grad_norm": 1.312592755924976, "learning_rate": 2.4032503595246437e-06, "loss": 0.7204, "step": 7650 }, { "epoch": 0.781511746680286, "grad_norm": 1.577097964607172, "learning_rate": 2.4010993567811956e-06, "loss": 0.692, "step": 7651 }, { "epoch": 0.7816138917262513, "grad_norm": 1.5185926617116954, "learning_rate": 2.398949185742334e-06, "loss": 0.6134, "step": 7652 }, { "epoch": 0.7817160367722166, "grad_norm": 1.5451330602267526, "learning_rate": 2.3967998466433916e-06, "loss": 0.6931, "step": 7653 }, { "epoch": 0.7818181818181819, "grad_norm": 1.318621785866221, "learning_rate": 2.394651339719618e-06, "loss": 0.6825, "step": 7654 }, { "epoch": 0.7819203268641471, "grad_norm": 1.4521904128742185, "learning_rate": 2.3925036652061717e-06, "loss": 0.5794, "step": 7655 }, { "epoch": 0.7820224719101123, "grad_norm": 1.532754558659476, "learning_rate": 2.3903568233381146e-06, "loss": 0.7097, "step": 7656 }, { "epoch": 0.7821246169560776, "grad_norm": 1.4848551096716975, "learning_rate": 2.388210814350419e-06, "loss": 0.7281, "step": 7657 }, { "epoch": 0.7822267620020429, "grad_norm": 1.5240290141762678, "learning_rate": 2.386065638477966e-06, "loss": 0.7469, "step": 7658 }, { "epoch": 0.7823289070480082, "grad_norm": 1.5137068931192816, "learning_rate": 2.38392129595555e-06, "loss": 0.6739, "step": 7659 }, { "epoch": 0.7824310520939735, "grad_norm": 1.4835196680465237, "learning_rate": 2.3817777870178692e-06, "loss": 0.7589, "step": 7660 }, { "epoch": 0.7825331971399387, "grad_norm": 1.4313653341396373, "learning_rate": 2.3796351118995287e-06, "loss": 0.6427, "step": 7661 }, { "epoch": 0.782635342185904, "grad_norm": 1.6172344529486073, "learning_rate": 2.377493270835051e-06, "loss": 0.7277, "step": 7662 }, { "epoch": 0.7827374872318692, "grad_norm": 1.3378542726162284, "learning_rate": 2.3753522640588567e-06, "loss": 0.6077, "step": 7663 }, { "epoch": 0.7828396322778345, "grad_norm": 1.6194442006199115, "learning_rate": 2.373212091805287e-06, "loss": 0.7198, "step": 7664 }, { "epoch": 0.7829417773237998, "grad_norm": 1.5822903909886965, "learning_rate": 2.371072754308581e-06, "loss": 0.7089, "step": 7665 }, { "epoch": 0.783043922369765, "grad_norm": 1.4514494964980613, "learning_rate": 2.36893425180289e-06, "loss": 0.6891, "step": 7666 }, { "epoch": 0.7831460674157303, "grad_norm": 1.508272499985356, "learning_rate": 2.3667965845222774e-06, "loss": 0.6542, "step": 7667 }, { "epoch": 0.7832482124616956, "grad_norm": 1.5027684876561473, "learning_rate": 2.364659752700705e-06, "loss": 0.6781, "step": 7668 }, { "epoch": 0.7833503575076609, "grad_norm": 1.7554138002883881, "learning_rate": 2.362523756572058e-06, "loss": 0.7918, "step": 7669 }, { "epoch": 0.7834525025536262, "grad_norm": 1.4273235942365043, "learning_rate": 2.3603885963701225e-06, "loss": 0.681, "step": 7670 }, { "epoch": 0.7835546475995914, "grad_norm": 1.523686588231696, "learning_rate": 2.3582542723285904e-06, "loss": 0.6414, "step": 7671 }, { "epoch": 0.7836567926455567, "grad_norm": 1.5645708272699634, "learning_rate": 2.356120784681065e-06, "loss": 0.7478, "step": 7672 }, { "epoch": 0.7837589376915219, "grad_norm": 1.5715813585413854, "learning_rate": 2.353988133661056e-06, "loss": 0.8196, "step": 7673 }, { "epoch": 0.7838610827374872, "grad_norm": 1.4354206242403733, "learning_rate": 2.3518563195019893e-06, "loss": 0.6108, "step": 7674 }, { "epoch": 0.7839632277834525, "grad_norm": 1.386126888076502, "learning_rate": 2.3497253424371892e-06, "loss": 0.6445, "step": 7675 }, { "epoch": 0.7840653728294178, "grad_norm": 1.4041361724344759, "learning_rate": 2.3475952026998927e-06, "loss": 0.6445, "step": 7676 }, { "epoch": 0.7841675178753831, "grad_norm": 1.6572468984466966, "learning_rate": 2.3454659005232425e-06, "loss": 0.745, "step": 7677 }, { "epoch": 0.7842696629213484, "grad_norm": 1.5299766591611008, "learning_rate": 2.343337436140295e-06, "loss": 0.6797, "step": 7678 }, { "epoch": 0.7843718079673135, "grad_norm": 1.376831705725251, "learning_rate": 2.3412098097840154e-06, "loss": 0.6109, "step": 7679 }, { "epoch": 0.7844739530132788, "grad_norm": 1.4169714742399724, "learning_rate": 2.3390830216872697e-06, "loss": 0.6675, "step": 7680 }, { "epoch": 0.7845760980592441, "grad_norm": 1.3636722903648457, "learning_rate": 2.3369570720828372e-06, "loss": 0.6785, "step": 7681 }, { "epoch": 0.7846782431052094, "grad_norm": 1.4925143166837687, "learning_rate": 2.3348319612034042e-06, "loss": 0.836, "step": 7682 }, { "epoch": 0.7847803881511747, "grad_norm": 1.3346006933195858, "learning_rate": 2.3327076892815626e-06, "loss": 0.6976, "step": 7683 }, { "epoch": 0.78488253319714, "grad_norm": 1.4023282393407008, "learning_rate": 2.3305842565498203e-06, "loss": 0.6069, "step": 7684 }, { "epoch": 0.7849846782431052, "grad_norm": 1.5711373378897706, "learning_rate": 2.3284616632405842e-06, "loss": 0.7301, "step": 7685 }, { "epoch": 0.7850868232890705, "grad_norm": 1.4996578332272632, "learning_rate": 2.3263399095861785e-06, "loss": 0.6176, "step": 7686 }, { "epoch": 0.7851889683350357, "grad_norm": 1.5191855583785092, "learning_rate": 2.3242189958188264e-06, "loss": 0.576, "step": 7687 }, { "epoch": 0.785291113381001, "grad_norm": 1.5242311616271274, "learning_rate": 2.3220989221706626e-06, "loss": 0.5946, "step": 7688 }, { "epoch": 0.7853932584269663, "grad_norm": 1.6345507436252389, "learning_rate": 2.3199796888737338e-06, "loss": 0.7623, "step": 7689 }, { "epoch": 0.7854954034729316, "grad_norm": 1.466363102995361, "learning_rate": 2.317861296159991e-06, "loss": 0.6504, "step": 7690 }, { "epoch": 0.7855975485188968, "grad_norm": 1.3461696448152913, "learning_rate": 2.3157437442612927e-06, "loss": 0.6434, "step": 7691 }, { "epoch": 0.7856996935648621, "grad_norm": 1.4707618637234265, "learning_rate": 2.3136270334094035e-06, "loss": 0.7655, "step": 7692 }, { "epoch": 0.7858018386108274, "grad_norm": 1.5984912603227295, "learning_rate": 2.311511163836001e-06, "loss": 0.7024, "step": 7693 }, { "epoch": 0.7859039836567926, "grad_norm": 1.4768427932076362, "learning_rate": 2.3093961357726723e-06, "loss": 0.656, "step": 7694 }, { "epoch": 0.7860061287027579, "grad_norm": 1.5417951071721385, "learning_rate": 2.307281949450905e-06, "loss": 0.7571, "step": 7695 }, { "epoch": 0.7861082737487232, "grad_norm": 1.416543992798943, "learning_rate": 2.3051686051020983e-06, "loss": 0.7372, "step": 7696 }, { "epoch": 0.7862104187946884, "grad_norm": 1.5643679749839585, "learning_rate": 2.303056102957557e-06, "loss": 0.6975, "step": 7697 }, { "epoch": 0.7863125638406537, "grad_norm": 1.462697740373663, "learning_rate": 2.3009444432485007e-06, "loss": 0.7783, "step": 7698 }, { "epoch": 0.786414708886619, "grad_norm": 1.5875406803774808, "learning_rate": 2.2988336262060485e-06, "loss": 0.7481, "step": 7699 }, { "epoch": 0.7865168539325843, "grad_norm": 1.555921731479916, "learning_rate": 2.2967236520612322e-06, "loss": 0.7263, "step": 7700 }, { "epoch": 0.7866189989785496, "grad_norm": 1.4674329237488177, "learning_rate": 2.2946145210449864e-06, "loss": 0.5993, "step": 7701 }, { "epoch": 0.7867211440245148, "grad_norm": 1.4915759610494845, "learning_rate": 2.292506233388162e-06, "loss": 0.7766, "step": 7702 }, { "epoch": 0.78682328907048, "grad_norm": 1.415489653003323, "learning_rate": 2.2903987893215086e-06, "loss": 0.6813, "step": 7703 }, { "epoch": 0.7869254341164453, "grad_norm": 1.6817373728547307, "learning_rate": 2.2882921890756906e-06, "loss": 0.7087, "step": 7704 }, { "epoch": 0.7870275791624106, "grad_norm": 1.4575124396260595, "learning_rate": 2.2861864328812744e-06, "loss": 0.6379, "step": 7705 }, { "epoch": 0.7871297242083759, "grad_norm": 1.5891209368904666, "learning_rate": 2.2840815209687374e-06, "loss": 0.7641, "step": 7706 }, { "epoch": 0.7872318692543412, "grad_norm": 1.38507553831139, "learning_rate": 2.28197745356846e-06, "loss": 0.6488, "step": 7707 }, { "epoch": 0.7873340143003065, "grad_norm": 1.6155673762899823, "learning_rate": 2.2798742309107403e-06, "loss": 0.641, "step": 7708 }, { "epoch": 0.7874361593462718, "grad_norm": 1.4748210167259121, "learning_rate": 2.2777718532257697e-06, "loss": 0.7034, "step": 7709 }, { "epoch": 0.7875383043922369, "grad_norm": 1.3663874776661888, "learning_rate": 2.2756703207436627e-06, "loss": 0.6579, "step": 7710 }, { "epoch": 0.7876404494382022, "grad_norm": 1.54911097365704, "learning_rate": 2.27356963369443e-06, "loss": 0.6903, "step": 7711 }, { "epoch": 0.7877425944841675, "grad_norm": 1.601233941436273, "learning_rate": 2.2714697923079887e-06, "loss": 0.6352, "step": 7712 }, { "epoch": 0.7878447395301328, "grad_norm": 1.4373921554482778, "learning_rate": 2.2693707968141763e-06, "loss": 0.6822, "step": 7713 }, { "epoch": 0.7879468845760981, "grad_norm": 1.4424820628253012, "learning_rate": 2.267272647442724e-06, "loss": 0.7219, "step": 7714 }, { "epoch": 0.7880490296220634, "grad_norm": 1.533168956869607, "learning_rate": 2.265175344423276e-06, "loss": 0.7286, "step": 7715 }, { "epoch": 0.7881511746680286, "grad_norm": 1.41928161159938, "learning_rate": 2.2630788879853815e-06, "loss": 0.6958, "step": 7716 }, { "epoch": 0.7882533197139938, "grad_norm": 1.5728201144796565, "learning_rate": 2.2609832783585018e-06, "loss": 0.6634, "step": 7717 }, { "epoch": 0.7883554647599591, "grad_norm": 1.6185838848821745, "learning_rate": 2.2588885157720053e-06, "loss": 0.6313, "step": 7718 }, { "epoch": 0.7884576098059244, "grad_norm": 1.445847426059235, "learning_rate": 2.2567946004551612e-06, "loss": 0.7044, "step": 7719 }, { "epoch": 0.7885597548518897, "grad_norm": 1.5788010115964295, "learning_rate": 2.254701532637151e-06, "loss": 0.7247, "step": 7720 }, { "epoch": 0.788661899897855, "grad_norm": 1.5785515159173298, "learning_rate": 2.2526093125470627e-06, "loss": 0.6895, "step": 7721 }, { "epoch": 0.7887640449438202, "grad_norm": 1.5414168102281451, "learning_rate": 2.2505179404138876e-06, "loss": 0.7424, "step": 7722 }, { "epoch": 0.7888661899897855, "grad_norm": 1.4583423290212223, "learning_rate": 2.2484274164665333e-06, "loss": 0.6, "step": 7723 }, { "epoch": 0.7889683350357508, "grad_norm": 1.502839995420565, "learning_rate": 2.246337740933806e-06, "loss": 0.717, "step": 7724 }, { "epoch": 0.789070480081716, "grad_norm": 1.494109657510985, "learning_rate": 2.244248914044421e-06, "loss": 0.6619, "step": 7725 }, { "epoch": 0.7891726251276813, "grad_norm": 1.4023564978251515, "learning_rate": 2.2421609360270047e-06, "loss": 0.6693, "step": 7726 }, { "epoch": 0.7892747701736466, "grad_norm": 1.3483217304961572, "learning_rate": 2.2400738071100845e-06, "loss": 0.5749, "step": 7727 }, { "epoch": 0.7893769152196118, "grad_norm": 1.5477708024937964, "learning_rate": 2.237987527522102e-06, "loss": 0.786, "step": 7728 }, { "epoch": 0.7894790602655771, "grad_norm": 1.4838746471082362, "learning_rate": 2.2359020974913993e-06, "loss": 0.6476, "step": 7729 }, { "epoch": 0.7895812053115424, "grad_norm": 1.6475066396148115, "learning_rate": 2.2338175172462283e-06, "loss": 0.7322, "step": 7730 }, { "epoch": 0.7896833503575077, "grad_norm": 1.4992696030548056, "learning_rate": 2.2317337870147447e-06, "loss": 0.7326, "step": 7731 }, { "epoch": 0.789785495403473, "grad_norm": 1.5950771049805939, "learning_rate": 2.2296509070250204e-06, "loss": 0.72, "step": 7732 }, { "epoch": 0.7898876404494382, "grad_norm": 1.5274755871763535, "learning_rate": 2.2275688775050207e-06, "loss": 0.8128, "step": 7733 }, { "epoch": 0.7899897854954034, "grad_norm": 2.3044514493104735, "learning_rate": 2.2254876986826325e-06, "loss": 0.6297, "step": 7734 }, { "epoch": 0.7900919305413687, "grad_norm": 1.458553851396111, "learning_rate": 2.2234073707856396e-06, "loss": 0.693, "step": 7735 }, { "epoch": 0.790194075587334, "grad_norm": 1.4160446000964098, "learning_rate": 2.2213278940417324e-06, "loss": 0.6243, "step": 7736 }, { "epoch": 0.7902962206332993, "grad_norm": 1.5466232503962603, "learning_rate": 2.2192492686785118e-06, "loss": 0.7368, "step": 7737 }, { "epoch": 0.7903983656792646, "grad_norm": 1.6364154664122268, "learning_rate": 2.217171494923488e-06, "loss": 0.7579, "step": 7738 }, { "epoch": 0.7905005107252299, "grad_norm": 1.429565326953612, "learning_rate": 2.215094573004072e-06, "loss": 0.7286, "step": 7739 }, { "epoch": 0.7906026557711952, "grad_norm": 1.5316915083205112, "learning_rate": 2.2130185031475846e-06, "loss": 0.7297, "step": 7740 }, { "epoch": 0.7907048008171603, "grad_norm": 1.4328271417827034, "learning_rate": 2.2109432855812506e-06, "loss": 0.6855, "step": 7741 }, { "epoch": 0.7908069458631256, "grad_norm": 1.5359671614090045, "learning_rate": 2.2088689205322065e-06, "loss": 0.6877, "step": 7742 }, { "epoch": 0.7909090909090909, "grad_norm": 1.3718872049803184, "learning_rate": 2.2067954082274957e-06, "loss": 0.6588, "step": 7743 }, { "epoch": 0.7910112359550562, "grad_norm": 1.254225192578895, "learning_rate": 2.2047227488940612e-06, "loss": 0.6267, "step": 7744 }, { "epoch": 0.7911133810010215, "grad_norm": 1.4011968923465954, "learning_rate": 2.2026509427587605e-06, "loss": 0.6158, "step": 7745 }, { "epoch": 0.7912155260469868, "grad_norm": 1.6006798977682952, "learning_rate": 2.200579990048347e-06, "loss": 0.7223, "step": 7746 }, { "epoch": 0.791317671092952, "grad_norm": 1.5850799455521105, "learning_rate": 2.1985098909894966e-06, "loss": 0.543, "step": 7747 }, { "epoch": 0.7914198161389172, "grad_norm": 1.5306118364914878, "learning_rate": 2.196440645808778e-06, "loss": 0.7339, "step": 7748 }, { "epoch": 0.7915219611848825, "grad_norm": 1.4610803300551756, "learning_rate": 2.194372254732671e-06, "loss": 0.7584, "step": 7749 }, { "epoch": 0.7916241062308478, "grad_norm": 1.4929412606662422, "learning_rate": 2.1923047179875657e-06, "loss": 0.6773, "step": 7750 }, { "epoch": 0.7917262512768131, "grad_norm": 1.4306215611670807, "learning_rate": 2.190238035799751e-06, "loss": 0.671, "step": 7751 }, { "epoch": 0.7918283963227783, "grad_norm": 1.5143401090958584, "learning_rate": 2.1881722083954315e-06, "loss": 0.7536, "step": 7752 }, { "epoch": 0.7919305413687436, "grad_norm": 1.3999228204357042, "learning_rate": 2.186107236000712e-06, "loss": 0.7278, "step": 7753 }, { "epoch": 0.7920326864147089, "grad_norm": 1.7467814304123583, "learning_rate": 2.1840431188416023e-06, "loss": 0.6505, "step": 7754 }, { "epoch": 0.7921348314606742, "grad_norm": 1.4953040765986458, "learning_rate": 2.181979857144024e-06, "loss": 0.6531, "step": 7755 }, { "epoch": 0.7922369765066394, "grad_norm": 1.7642953030621489, "learning_rate": 2.1799174511337986e-06, "loss": 0.7327, "step": 7756 }, { "epoch": 0.7923391215526047, "grad_norm": 1.542536512103279, "learning_rate": 2.177855901036661e-06, "loss": 0.6371, "step": 7757 }, { "epoch": 0.79244126659857, "grad_norm": 1.4940264897239741, "learning_rate": 2.1757952070782507e-06, "loss": 0.6538, "step": 7758 }, { "epoch": 0.7925434116445352, "grad_norm": 1.5707196521044482, "learning_rate": 2.17373536948411e-06, "loss": 0.7362, "step": 7759 }, { "epoch": 0.7926455566905005, "grad_norm": 1.616765095131558, "learning_rate": 2.17167638847969e-06, "loss": 0.7563, "step": 7760 }, { "epoch": 0.7927477017364658, "grad_norm": 1.4381821085408717, "learning_rate": 2.169618264290344e-06, "loss": 0.6801, "step": 7761 }, { "epoch": 0.7928498467824311, "grad_norm": 1.4715761686181472, "learning_rate": 2.1675609971413402e-06, "loss": 0.6975, "step": 7762 }, { "epoch": 0.7929519918283964, "grad_norm": 1.5236353731762902, "learning_rate": 2.1655045872578475e-06, "loss": 0.7424, "step": 7763 }, { "epoch": 0.7930541368743615, "grad_norm": 1.5206858183983176, "learning_rate": 2.1634490348649372e-06, "loss": 0.7655, "step": 7764 }, { "epoch": 0.7931562819203268, "grad_norm": 1.4346918407738651, "learning_rate": 2.1613943401875924e-06, "loss": 0.7352, "step": 7765 }, { "epoch": 0.7932584269662921, "grad_norm": 1.4833452816491084, "learning_rate": 2.1593405034506998e-06, "loss": 0.6923, "step": 7766 }, { "epoch": 0.7933605720122574, "grad_norm": 1.420371761572305, "learning_rate": 2.157287524879058e-06, "loss": 0.5678, "step": 7767 }, { "epoch": 0.7934627170582227, "grad_norm": 1.5075347096621476, "learning_rate": 2.1552354046973646e-06, "loss": 0.6416, "step": 7768 }, { "epoch": 0.793564862104188, "grad_norm": 1.5575647830266741, "learning_rate": 2.1531841431302234e-06, "loss": 0.7177, "step": 7769 }, { "epoch": 0.7936670071501533, "grad_norm": 1.5433175771119019, "learning_rate": 2.151133740402148e-06, "loss": 0.7175, "step": 7770 }, { "epoch": 0.7937691521961185, "grad_norm": 1.7005725423204792, "learning_rate": 2.1490841967375532e-06, "loss": 0.607, "step": 7771 }, { "epoch": 0.7938712972420837, "grad_norm": 1.5178772110503094, "learning_rate": 2.147035512360768e-06, "loss": 0.7173, "step": 7772 }, { "epoch": 0.793973442288049, "grad_norm": 1.4680008683515775, "learning_rate": 2.1449876874960163e-06, "loss": 0.7326, "step": 7773 }, { "epoch": 0.7940755873340143, "grad_norm": 1.495083306898762, "learning_rate": 2.1429407223674403e-06, "loss": 0.5941, "step": 7774 }, { "epoch": 0.7941777323799796, "grad_norm": 1.4657531382249784, "learning_rate": 2.1408946171990785e-06, "loss": 0.6601, "step": 7775 }, { "epoch": 0.7942798774259449, "grad_norm": 1.433108704608721, "learning_rate": 2.1388493722148763e-06, "loss": 0.629, "step": 7776 }, { "epoch": 0.7943820224719101, "grad_norm": 1.435341016400755, "learning_rate": 2.136804987638691e-06, "loss": 0.6752, "step": 7777 }, { "epoch": 0.7944841675178754, "grad_norm": 1.4858876831898125, "learning_rate": 2.1347614636942815e-06, "loss": 0.6933, "step": 7778 }, { "epoch": 0.7945863125638406, "grad_norm": 1.5384439538433965, "learning_rate": 2.13271880060531e-06, "loss": 0.6353, "step": 7779 }, { "epoch": 0.7946884576098059, "grad_norm": 1.4775748546862888, "learning_rate": 2.130676998595347e-06, "loss": 0.6364, "step": 7780 }, { "epoch": 0.7947906026557712, "grad_norm": 1.4768966528031424, "learning_rate": 2.1286360578878693e-06, "loss": 0.7569, "step": 7781 }, { "epoch": 0.7948927477017365, "grad_norm": 1.5743931378204703, "learning_rate": 2.126595978706265e-06, "loss": 0.6567, "step": 7782 }, { "epoch": 0.7949948927477017, "grad_norm": 1.473910644840203, "learning_rate": 2.1245567612738162e-06, "loss": 0.6736, "step": 7783 }, { "epoch": 0.795097037793667, "grad_norm": 1.804966063752203, "learning_rate": 2.1225184058137193e-06, "loss": 0.7526, "step": 7784 }, { "epoch": 0.7951991828396323, "grad_norm": 1.4447521422290606, "learning_rate": 2.120480912549069e-06, "loss": 0.7293, "step": 7785 }, { "epoch": 0.7953013278855976, "grad_norm": 1.4685435216149456, "learning_rate": 2.118444281702876e-06, "loss": 0.8328, "step": 7786 }, { "epoch": 0.7954034729315628, "grad_norm": 1.674530418337665, "learning_rate": 2.1164085134980495e-06, "loss": 0.7127, "step": 7787 }, { "epoch": 0.7955056179775281, "grad_norm": 1.2504057617445306, "learning_rate": 2.114373608157404e-06, "loss": 0.5616, "step": 7788 }, { "epoch": 0.7956077630234933, "grad_norm": 1.5912585076784915, "learning_rate": 2.1123395659036596e-06, "loss": 0.7491, "step": 7789 }, { "epoch": 0.7957099080694586, "grad_norm": 1.4060460282399452, "learning_rate": 2.1103063869594486e-06, "loss": 0.667, "step": 7790 }, { "epoch": 0.7958120531154239, "grad_norm": 1.4262370083762665, "learning_rate": 2.108274071547297e-06, "loss": 0.6282, "step": 7791 }, { "epoch": 0.7959141981613892, "grad_norm": 1.4336262958184738, "learning_rate": 2.1062426198896514e-06, "loss": 0.7297, "step": 7792 }, { "epoch": 0.7960163432073545, "grad_norm": 1.6402344100111523, "learning_rate": 2.10421203220885e-06, "loss": 0.7123, "step": 7793 }, { "epoch": 0.7961184882533198, "grad_norm": 1.6616797050760206, "learning_rate": 2.1021823087271432e-06, "loss": 0.7789, "step": 7794 }, { "epoch": 0.7962206332992849, "grad_norm": 1.4345402458261807, "learning_rate": 2.100153449666682e-06, "loss": 0.6388, "step": 7795 }, { "epoch": 0.7963227783452502, "grad_norm": 1.468227603833657, "learning_rate": 2.0981254552495334e-06, "loss": 0.6402, "step": 7796 }, { "epoch": 0.7964249233912155, "grad_norm": 1.5579346912602685, "learning_rate": 2.0960983256976565e-06, "loss": 0.737, "step": 7797 }, { "epoch": 0.7965270684371808, "grad_norm": 3.482558951292091, "learning_rate": 2.0940720612329258e-06, "loss": 0.7463, "step": 7798 }, { "epoch": 0.7966292134831461, "grad_norm": 1.4661638893927695, "learning_rate": 2.0920466620771174e-06, "loss": 0.611, "step": 7799 }, { "epoch": 0.7967313585291114, "grad_norm": 1.5498746242493828, "learning_rate": 2.0900221284519074e-06, "loss": 0.7438, "step": 7800 }, { "epoch": 0.7968335035750767, "grad_norm": 1.4277591910811147, "learning_rate": 2.0879984605788882e-06, "loss": 0.5957, "step": 7801 }, { "epoch": 0.7969356486210418, "grad_norm": 1.390686777835806, "learning_rate": 2.085975658679551e-06, "loss": 0.6177, "step": 7802 }, { "epoch": 0.7970377936670071, "grad_norm": 1.5217939386383543, "learning_rate": 2.0839537229752893e-06, "loss": 0.6949, "step": 7803 }, { "epoch": 0.7971399387129724, "grad_norm": 1.5078474293340662, "learning_rate": 2.081932653687405e-06, "loss": 0.7715, "step": 7804 }, { "epoch": 0.7972420837589377, "grad_norm": 1.5915791364209657, "learning_rate": 2.079912451037107e-06, "loss": 0.6397, "step": 7805 }, { "epoch": 0.797344228804903, "grad_norm": 1.5338049938419043, "learning_rate": 2.077893115245512e-06, "loss": 0.7746, "step": 7806 }, { "epoch": 0.7974463738508683, "grad_norm": 1.4118878727361974, "learning_rate": 2.0758746465336333e-06, "loss": 0.712, "step": 7807 }, { "epoch": 0.7975485188968335, "grad_norm": 1.598702725140203, "learning_rate": 2.073857045122395e-06, "loss": 0.7753, "step": 7808 }, { "epoch": 0.7976506639427988, "grad_norm": 1.5440358595469432, "learning_rate": 2.0718403112326224e-06, "loss": 0.6232, "step": 7809 }, { "epoch": 0.797752808988764, "grad_norm": 1.4412255520040955, "learning_rate": 2.069824445085048e-06, "loss": 0.6509, "step": 7810 }, { "epoch": 0.7978549540347293, "grad_norm": 1.5233929953072618, "learning_rate": 2.0678094469003152e-06, "loss": 0.7029, "step": 7811 }, { "epoch": 0.7979570990806946, "grad_norm": 1.5344864200997315, "learning_rate": 2.065795316898962e-06, "loss": 0.71, "step": 7812 }, { "epoch": 0.7980592441266599, "grad_norm": 1.4710654671240766, "learning_rate": 2.0637820553014385e-06, "loss": 0.7155, "step": 7813 }, { "epoch": 0.7981613891726251, "grad_norm": 1.4652225766492732, "learning_rate": 2.0617696623280937e-06, "loss": 0.7055, "step": 7814 }, { "epoch": 0.7982635342185904, "grad_norm": 1.489376405220086, "learning_rate": 2.059758138199187e-06, "loss": 0.6514, "step": 7815 }, { "epoch": 0.7983656792645557, "grad_norm": 1.519029822602198, "learning_rate": 2.0577474831348864e-06, "loss": 0.7517, "step": 7816 }, { "epoch": 0.798467824310521, "grad_norm": 1.6589740526821566, "learning_rate": 2.0557376973552544e-06, "loss": 0.6488, "step": 7817 }, { "epoch": 0.7985699693564862, "grad_norm": 1.4441806248944669, "learning_rate": 2.053728781080264e-06, "loss": 0.7254, "step": 7818 }, { "epoch": 0.7986721144024514, "grad_norm": 1.4006448883095295, "learning_rate": 2.0517207345297897e-06, "loss": 0.6432, "step": 7819 }, { "epoch": 0.7987742594484167, "grad_norm": 1.7425232741713046, "learning_rate": 2.0497135579236195e-06, "loss": 0.7133, "step": 7820 }, { "epoch": 0.798876404494382, "grad_norm": 1.5223818585166318, "learning_rate": 2.0477072514814354e-06, "loss": 0.691, "step": 7821 }, { "epoch": 0.7989785495403473, "grad_norm": 1.566802337374586, "learning_rate": 2.045701815422829e-06, "loss": 0.724, "step": 7822 }, { "epoch": 0.7990806945863126, "grad_norm": 1.429739979681164, "learning_rate": 2.043697249967301e-06, "loss": 0.636, "step": 7823 }, { "epoch": 0.7991828396322779, "grad_norm": 1.5745168034946238, "learning_rate": 2.041693555334249e-06, "loss": 0.6891, "step": 7824 }, { "epoch": 0.7992849846782432, "grad_norm": 1.3969928437003056, "learning_rate": 2.039690731742976e-06, "loss": 0.6785, "step": 7825 }, { "epoch": 0.7993871297242083, "grad_norm": 1.395234069145252, "learning_rate": 2.0376887794126986e-06, "loss": 0.6319, "step": 7826 }, { "epoch": 0.7994892747701736, "grad_norm": 1.4144523298206955, "learning_rate": 2.0356876985625285e-06, "loss": 0.5293, "step": 7827 }, { "epoch": 0.7995914198161389, "grad_norm": 1.5673781311751296, "learning_rate": 2.0336874894114856e-06, "loss": 0.6844, "step": 7828 }, { "epoch": 0.7996935648621042, "grad_norm": 1.4040287387446788, "learning_rate": 2.0316881521784916e-06, "loss": 0.7407, "step": 7829 }, { "epoch": 0.7997957099080695, "grad_norm": 1.468170969782854, "learning_rate": 2.0296896870823767e-06, "loss": 0.6657, "step": 7830 }, { "epoch": 0.7998978549540348, "grad_norm": 1.4338864919497423, "learning_rate": 2.0276920943418777e-06, "loss": 0.7278, "step": 7831 }, { "epoch": 0.8, "grad_norm": 1.4287010507776465, "learning_rate": 2.02569537417563e-06, "loss": 0.7813, "step": 7832 }, { "epoch": 0.8001021450459652, "grad_norm": 1.479377825599003, "learning_rate": 2.0236995268021753e-06, "loss": 0.6252, "step": 7833 }, { "epoch": 0.8002042900919305, "grad_norm": 1.3965300952698099, "learning_rate": 2.021704552439959e-06, "loss": 0.6533, "step": 7834 }, { "epoch": 0.8003064351378958, "grad_norm": 1.5008063754573753, "learning_rate": 2.0197104513073364e-06, "loss": 0.8283, "step": 7835 }, { "epoch": 0.8004085801838611, "grad_norm": 1.5756820422433289, "learning_rate": 2.017717223622561e-06, "loss": 0.7619, "step": 7836 }, { "epoch": 0.8005107252298264, "grad_norm": 1.3299366697271333, "learning_rate": 2.0157248696037913e-06, "loss": 0.6385, "step": 7837 }, { "epoch": 0.8006128702757916, "grad_norm": 1.4951029011046026, "learning_rate": 2.0137333894690913e-06, "loss": 0.6253, "step": 7838 }, { "epoch": 0.8007150153217569, "grad_norm": 1.407405901496055, "learning_rate": 2.011742783436432e-06, "loss": 0.8059, "step": 7839 }, { "epoch": 0.8008171603677222, "grad_norm": 1.5685550241561312, "learning_rate": 2.0097530517236887e-06, "loss": 0.7203, "step": 7840 }, { "epoch": 0.8009193054136874, "grad_norm": 1.5488988160344208, "learning_rate": 2.007764194548636e-06, "loss": 0.5405, "step": 7841 }, { "epoch": 0.8010214504596527, "grad_norm": 1.6215162095236308, "learning_rate": 2.0057762121289557e-06, "loss": 0.6855, "step": 7842 }, { "epoch": 0.801123595505618, "grad_norm": 1.476000901025003, "learning_rate": 2.0037891046822343e-06, "loss": 0.741, "step": 7843 }, { "epoch": 0.8012257405515832, "grad_norm": 1.3818520490469952, "learning_rate": 2.0018028724259588e-06, "loss": 0.6146, "step": 7844 }, { "epoch": 0.8013278855975485, "grad_norm": 1.5172993527121303, "learning_rate": 1.999817515577529e-06, "loss": 0.6863, "step": 7845 }, { "epoch": 0.8014300306435138, "grad_norm": 1.4830161140553875, "learning_rate": 1.9978330343542384e-06, "loss": 0.6809, "step": 7846 }, { "epoch": 0.8015321756894791, "grad_norm": 1.3914812293102228, "learning_rate": 1.9958494289732957e-06, "loss": 0.7178, "step": 7847 }, { "epoch": 0.8016343207354444, "grad_norm": 1.3899691250074713, "learning_rate": 1.993866699651803e-06, "loss": 0.6144, "step": 7848 }, { "epoch": 0.8017364657814096, "grad_norm": 1.507668945810921, "learning_rate": 1.991884846606771e-06, "loss": 0.788, "step": 7849 }, { "epoch": 0.8018386108273748, "grad_norm": 1.5818428599766068, "learning_rate": 1.9899038700551178e-06, "loss": 0.7728, "step": 7850 }, { "epoch": 0.8019407558733401, "grad_norm": 1.623485864049958, "learning_rate": 1.987923770213662e-06, "loss": 0.7417, "step": 7851 }, { "epoch": 0.8020429009193054, "grad_norm": 1.4559210447437414, "learning_rate": 1.9859445472991257e-06, "loss": 0.6484, "step": 7852 }, { "epoch": 0.8021450459652707, "grad_norm": 1.3207792022236151, "learning_rate": 1.983966201528135e-06, "loss": 0.5143, "step": 7853 }, { "epoch": 0.802247191011236, "grad_norm": 1.5882542768427277, "learning_rate": 1.9819887331172204e-06, "loss": 0.6981, "step": 7854 }, { "epoch": 0.8023493360572013, "grad_norm": 1.4830620488288764, "learning_rate": 1.9800121422828233e-06, "loss": 0.6929, "step": 7855 }, { "epoch": 0.8024514811031664, "grad_norm": 1.412441450711497, "learning_rate": 1.978036429241279e-06, "loss": 0.6024, "step": 7856 }, { "epoch": 0.8025536261491317, "grad_norm": 1.4299424048926375, "learning_rate": 1.9760615942088303e-06, "loss": 0.6576, "step": 7857 }, { "epoch": 0.802655771195097, "grad_norm": 1.2771392906672054, "learning_rate": 1.9740876374016218e-06, "loss": 0.6277, "step": 7858 }, { "epoch": 0.8027579162410623, "grad_norm": 1.5305675571129098, "learning_rate": 1.972114559035708e-06, "loss": 0.6761, "step": 7859 }, { "epoch": 0.8028600612870276, "grad_norm": 1.5722089861990158, "learning_rate": 1.970142359327044e-06, "loss": 0.8413, "step": 7860 }, { "epoch": 0.8029622063329929, "grad_norm": 1.4898777718418432, "learning_rate": 1.968171038491485e-06, "loss": 0.6995, "step": 7861 }, { "epoch": 0.8030643513789582, "grad_norm": 1.658860581799003, "learning_rate": 1.966200596744794e-06, "loss": 0.7807, "step": 7862 }, { "epoch": 0.8031664964249234, "grad_norm": 1.5518478295504126, "learning_rate": 1.9642310343026405e-06, "loss": 0.6348, "step": 7863 }, { "epoch": 0.8032686414708886, "grad_norm": 1.535489079308936, "learning_rate": 1.9622623513805894e-06, "loss": 0.6936, "step": 7864 }, { "epoch": 0.8033707865168539, "grad_norm": 1.3781665758697053, "learning_rate": 1.9602945481941194e-06, "loss": 0.6291, "step": 7865 }, { "epoch": 0.8034729315628192, "grad_norm": 1.4477381102309401, "learning_rate": 1.958327624958606e-06, "loss": 0.7067, "step": 7866 }, { "epoch": 0.8035750766087845, "grad_norm": 1.3350438482889797, "learning_rate": 1.956361581889329e-06, "loss": 0.654, "step": 7867 }, { "epoch": 0.8036772216547498, "grad_norm": 1.4554463691795991, "learning_rate": 1.9543964192014707e-06, "loss": 0.6267, "step": 7868 }, { "epoch": 0.803779366700715, "grad_norm": 1.3856215847927158, "learning_rate": 1.952432137110125e-06, "loss": 0.622, "step": 7869 }, { "epoch": 0.8038815117466803, "grad_norm": 1.5511593394798093, "learning_rate": 1.95046873583028e-06, "loss": 0.748, "step": 7870 }, { "epoch": 0.8039836567926456, "grad_norm": 1.4244741164085335, "learning_rate": 1.9485062155768344e-06, "loss": 0.6314, "step": 7871 }, { "epoch": 0.8040858018386108, "grad_norm": 1.406015533786862, "learning_rate": 1.946544576564585e-06, "loss": 0.5779, "step": 7872 }, { "epoch": 0.8041879468845761, "grad_norm": 1.3767782878573327, "learning_rate": 1.9445838190082334e-06, "loss": 0.6728, "step": 7873 }, { "epoch": 0.8042900919305414, "grad_norm": 1.4741064952387142, "learning_rate": 1.942623943122388e-06, "loss": 0.673, "step": 7874 }, { "epoch": 0.8043922369765066, "grad_norm": 1.314622080331586, "learning_rate": 1.940664949121559e-06, "loss": 0.6857, "step": 7875 }, { "epoch": 0.8044943820224719, "grad_norm": 1.7046901476973353, "learning_rate": 1.938706837220159e-06, "loss": 0.6651, "step": 7876 }, { "epoch": 0.8045965270684372, "grad_norm": 1.4262506013747607, "learning_rate": 1.9367496076325033e-06, "loss": 0.6365, "step": 7877 }, { "epoch": 0.8046986721144025, "grad_norm": 1.4257702533989989, "learning_rate": 1.934793260572809e-06, "loss": 0.5918, "step": 7878 }, { "epoch": 0.8048008171603678, "grad_norm": 1.7041732961066027, "learning_rate": 1.932837796255205e-06, "loss": 0.6626, "step": 7879 }, { "epoch": 0.804902962206333, "grad_norm": 1.383811858906262, "learning_rate": 1.9308832148937175e-06, "loss": 0.6692, "step": 7880 }, { "epoch": 0.8050051072522982, "grad_norm": 1.369167082064326, "learning_rate": 1.928929516702276e-06, "loss": 0.6775, "step": 7881 }, { "epoch": 0.8051072522982635, "grad_norm": 1.6522479135479788, "learning_rate": 1.926976701894713e-06, "loss": 0.6464, "step": 7882 }, { "epoch": 0.8052093973442288, "grad_norm": 1.533123067540322, "learning_rate": 1.9250247706847635e-06, "loss": 0.675, "step": 7883 }, { "epoch": 0.8053115423901941, "grad_norm": 1.5250416963307527, "learning_rate": 1.9230737232860718e-06, "loss": 0.7414, "step": 7884 }, { "epoch": 0.8054136874361594, "grad_norm": 1.5228670946620295, "learning_rate": 1.92112355991218e-06, "loss": 0.7282, "step": 7885 }, { "epoch": 0.8055158324821247, "grad_norm": 1.5578649434320615, "learning_rate": 1.9191742807765323e-06, "loss": 0.7192, "step": 7886 }, { "epoch": 0.8056179775280898, "grad_norm": 1.5396840309612243, "learning_rate": 1.917225886092483e-06, "loss": 0.745, "step": 7887 }, { "epoch": 0.8057201225740551, "grad_norm": 1.475875451024162, "learning_rate": 1.9152783760732785e-06, "loss": 0.6995, "step": 7888 }, { "epoch": 0.8058222676200204, "grad_norm": 1.5239632145627844, "learning_rate": 1.9133317509320837e-06, "loss": 0.7923, "step": 7889 }, { "epoch": 0.8059244126659857, "grad_norm": 1.765683970603823, "learning_rate": 1.9113860108819513e-06, "loss": 0.6844, "step": 7890 }, { "epoch": 0.806026557711951, "grad_norm": 1.4589596888832825, "learning_rate": 1.9094411561358485e-06, "loss": 0.7314, "step": 7891 }, { "epoch": 0.8061287027579163, "grad_norm": 1.5959397508765965, "learning_rate": 1.9074971869066337e-06, "loss": 0.6925, "step": 7892 }, { "epoch": 0.8062308478038815, "grad_norm": 1.3503961303506125, "learning_rate": 1.9055541034070835e-06, "loss": 0.6406, "step": 7893 }, { "epoch": 0.8063329928498468, "grad_norm": 1.44041817746977, "learning_rate": 1.9036119058498637e-06, "loss": 0.6599, "step": 7894 }, { "epoch": 0.806435137895812, "grad_norm": 1.4683504265177987, "learning_rate": 1.9016705944475544e-06, "loss": 0.6885, "step": 7895 }, { "epoch": 0.8065372829417773, "grad_norm": 1.4871009334409362, "learning_rate": 1.8997301694126303e-06, "loss": 0.6982, "step": 7896 }, { "epoch": 0.8066394279877426, "grad_norm": 1.3450239755714446, "learning_rate": 1.8977906309574733e-06, "loss": 0.6026, "step": 7897 }, { "epoch": 0.8067415730337079, "grad_norm": 1.4546195268425013, "learning_rate": 1.8958519792943631e-06, "loss": 0.634, "step": 7898 }, { "epoch": 0.8068437180796731, "grad_norm": 1.5034998113985705, "learning_rate": 1.8939142146354927e-06, "loss": 0.6906, "step": 7899 }, { "epoch": 0.8069458631256384, "grad_norm": 1.4837782354910563, "learning_rate": 1.8919773371929485e-06, "loss": 0.6728, "step": 7900 }, { "epoch": 0.8070480081716037, "grad_norm": 1.348778695904617, "learning_rate": 1.8900413471787238e-06, "loss": 0.7286, "step": 7901 }, { "epoch": 0.807150153217569, "grad_norm": 1.4728710949084605, "learning_rate": 1.8881062448047105e-06, "loss": 0.6788, "step": 7902 }, { "epoch": 0.8072522982635342, "grad_norm": 1.672686682683323, "learning_rate": 1.8861720302827091e-06, "loss": 0.641, "step": 7903 }, { "epoch": 0.8073544433094995, "grad_norm": 1.5156966434767025, "learning_rate": 1.8842387038244248e-06, "loss": 0.7404, "step": 7904 }, { "epoch": 0.8074565883554647, "grad_norm": 1.4814875906895417, "learning_rate": 1.882306265641457e-06, "loss": 0.6202, "step": 7905 }, { "epoch": 0.80755873340143, "grad_norm": 1.545100705588449, "learning_rate": 1.8803747159453134e-06, "loss": 0.72, "step": 7906 }, { "epoch": 0.8076608784473953, "grad_norm": 1.553555430444274, "learning_rate": 1.8784440549474004e-06, "loss": 0.6529, "step": 7907 }, { "epoch": 0.8077630234933606, "grad_norm": 1.4648502374514578, "learning_rate": 1.8765142828590355e-06, "loss": 0.6535, "step": 7908 }, { "epoch": 0.8078651685393259, "grad_norm": 1.5225389019818745, "learning_rate": 1.874585399891431e-06, "loss": 0.6364, "step": 7909 }, { "epoch": 0.8079673135852911, "grad_norm": 1.3958729195569635, "learning_rate": 1.8726574062557012e-06, "loss": 0.6797, "step": 7910 }, { "epoch": 0.8080694586312563, "grad_norm": 1.2868002028588237, "learning_rate": 1.870730302162872e-06, "loss": 0.6603, "step": 7911 }, { "epoch": 0.8081716036772216, "grad_norm": 1.5641896140628415, "learning_rate": 1.86880408782386e-06, "loss": 0.7456, "step": 7912 }, { "epoch": 0.8082737487231869, "grad_norm": 1.5537834301057654, "learning_rate": 1.8668787634494977e-06, "loss": 0.6884, "step": 7913 }, { "epoch": 0.8083758937691522, "grad_norm": 1.8912484030472259, "learning_rate": 1.8649543292505091e-06, "loss": 0.7589, "step": 7914 }, { "epoch": 0.8084780388151175, "grad_norm": 1.4358158549150455, "learning_rate": 1.863030785437525e-06, "loss": 0.7381, "step": 7915 }, { "epoch": 0.8085801838610828, "grad_norm": 1.5004073353923482, "learning_rate": 1.861108132221079e-06, "loss": 0.6894, "step": 7916 }, { "epoch": 0.808682328907048, "grad_norm": 1.5386572871918545, "learning_rate": 1.859186369811603e-06, "loss": 0.6716, "step": 7917 }, { "epoch": 0.8087844739530132, "grad_norm": 1.45145770741655, "learning_rate": 1.8572654984194395e-06, "loss": 0.6968, "step": 7918 }, { "epoch": 0.8088866189989785, "grad_norm": 1.5539913209580558, "learning_rate": 1.8553455182548296e-06, "loss": 0.6104, "step": 7919 }, { "epoch": 0.8089887640449438, "grad_norm": 1.3410486680365905, "learning_rate": 1.8534264295279147e-06, "loss": 0.6389, "step": 7920 }, { "epoch": 0.8090909090909091, "grad_norm": 1.2980081262901544, "learning_rate": 1.851508232448741e-06, "loss": 0.6786, "step": 7921 }, { "epoch": 0.8091930541368744, "grad_norm": 1.5485028065506046, "learning_rate": 1.849590927227254e-06, "loss": 0.6797, "step": 7922 }, { "epoch": 0.8092951991828397, "grad_norm": 1.4579903362829445, "learning_rate": 1.8476745140733077e-06, "loss": 0.6679, "step": 7923 }, { "epoch": 0.8093973442288049, "grad_norm": 1.431220913720757, "learning_rate": 1.8457589931966524e-06, "loss": 0.6918, "step": 7924 }, { "epoch": 0.8094994892747702, "grad_norm": 1.4342173299359275, "learning_rate": 1.8438443648069438e-06, "loss": 0.6399, "step": 7925 }, { "epoch": 0.8096016343207354, "grad_norm": 1.5700348943244125, "learning_rate": 1.8419306291137374e-06, "loss": 0.7745, "step": 7926 }, { "epoch": 0.8097037793667007, "grad_norm": 1.733029895278668, "learning_rate": 1.8400177863264934e-06, "loss": 0.7366, "step": 7927 }, { "epoch": 0.809805924412666, "grad_norm": 1.648336832311013, "learning_rate": 1.8381058366545778e-06, "loss": 0.715, "step": 7928 }, { "epoch": 0.8099080694586313, "grad_norm": 1.4661661728975928, "learning_rate": 1.836194780307251e-06, "loss": 0.7344, "step": 7929 }, { "epoch": 0.8100102145045965, "grad_norm": 1.7310958176408033, "learning_rate": 1.8342846174936812e-06, "loss": 0.7719, "step": 7930 }, { "epoch": 0.8101123595505618, "grad_norm": 1.5382464051656972, "learning_rate": 1.8323753484229345e-06, "loss": 0.7742, "step": 7931 }, { "epoch": 0.8102145045965271, "grad_norm": 1.5591911258345108, "learning_rate": 1.8304669733039815e-06, "loss": 0.6613, "step": 7932 }, { "epoch": 0.8103166496424924, "grad_norm": 1.4359448935836214, "learning_rate": 1.8285594923456985e-06, "loss": 0.6323, "step": 7933 }, { "epoch": 0.8104187946884576, "grad_norm": 1.5158351227814322, "learning_rate": 1.826652905756855e-06, "loss": 0.6783, "step": 7934 }, { "epoch": 0.8105209397344229, "grad_norm": 1.426404921507915, "learning_rate": 1.8247472137461354e-06, "loss": 0.675, "step": 7935 }, { "epoch": 0.8106230847803881, "grad_norm": 1.462823116399566, "learning_rate": 1.8228424165221148e-06, "loss": 0.7208, "step": 7936 }, { "epoch": 0.8107252298263534, "grad_norm": 1.6508443109566289, "learning_rate": 1.8209385142932722e-06, "loss": 0.7414, "step": 7937 }, { "epoch": 0.8108273748723187, "grad_norm": 1.470469224493644, "learning_rate": 1.8190355072679955e-06, "loss": 0.618, "step": 7938 }, { "epoch": 0.810929519918284, "grad_norm": 1.5006714561173804, "learning_rate": 1.817133395654569e-06, "loss": 0.7605, "step": 7939 }, { "epoch": 0.8110316649642493, "grad_norm": 1.4202852696306167, "learning_rate": 1.8152321796611795e-06, "loss": 0.7237, "step": 7940 }, { "epoch": 0.8111338100102145, "grad_norm": 1.4820735550254496, "learning_rate": 1.8133318594959127e-06, "loss": 0.6766, "step": 7941 }, { "epoch": 0.8112359550561797, "grad_norm": 1.443814795037324, "learning_rate": 1.8114324353667633e-06, "loss": 0.6379, "step": 7942 }, { "epoch": 0.811338100102145, "grad_norm": 1.5418020654124236, "learning_rate": 1.8095339074816264e-06, "loss": 0.6774, "step": 7943 }, { "epoch": 0.8114402451481103, "grad_norm": 1.5022210483368588, "learning_rate": 1.8076362760482956e-06, "loss": 0.6925, "step": 7944 }, { "epoch": 0.8115423901940756, "grad_norm": 1.34420492719122, "learning_rate": 1.8057395412744672e-06, "loss": 0.7079, "step": 7945 }, { "epoch": 0.8116445352400409, "grad_norm": 1.484745545525041, "learning_rate": 1.8038437033677381e-06, "loss": 0.6916, "step": 7946 }, { "epoch": 0.8117466802860062, "grad_norm": 1.524257887649823, "learning_rate": 1.8019487625356125e-06, "loss": 0.7285, "step": 7947 }, { "epoch": 0.8118488253319714, "grad_norm": 1.4591004516769996, "learning_rate": 1.8000547189854921e-06, "loss": 0.6556, "step": 7948 }, { "epoch": 0.8119509703779366, "grad_norm": 1.4466771995277452, "learning_rate": 1.7981615729246804e-06, "loss": 0.7212, "step": 7949 }, { "epoch": 0.8120531154239019, "grad_norm": 1.5152753876360099, "learning_rate": 1.7962693245603813e-06, "loss": 0.6432, "step": 7950 }, { "epoch": 0.8121552604698672, "grad_norm": 1.4223410322362229, "learning_rate": 1.7943779740997081e-06, "loss": 0.6375, "step": 7951 }, { "epoch": 0.8122574055158325, "grad_norm": 1.4052379961453423, "learning_rate": 1.7924875217496628e-06, "loss": 0.6403, "step": 7952 }, { "epoch": 0.8123595505617978, "grad_norm": 1.4867767262620033, "learning_rate": 1.7905979677171648e-06, "loss": 0.65, "step": 7953 }, { "epoch": 0.812461695607763, "grad_norm": 1.4383118836889621, "learning_rate": 1.7887093122090238e-06, "loss": 0.6201, "step": 7954 }, { "epoch": 0.8125638406537283, "grad_norm": 1.3333697799039885, "learning_rate": 1.7868215554319524e-06, "loss": 0.678, "step": 7955 }, { "epoch": 0.8126659856996936, "grad_norm": 1.4250342459219705, "learning_rate": 1.7849346975925663e-06, "loss": 0.649, "step": 7956 }, { "epoch": 0.8127681307456588, "grad_norm": 1.6062665012308408, "learning_rate": 1.7830487388973873e-06, "loss": 0.7567, "step": 7957 }, { "epoch": 0.8128702757916241, "grad_norm": 1.4070349300697151, "learning_rate": 1.781163679552831e-06, "loss": 0.6264, "step": 7958 }, { "epoch": 0.8129724208375894, "grad_norm": 1.5890624660871187, "learning_rate": 1.7792795197652212e-06, "loss": 0.753, "step": 7959 }, { "epoch": 0.8130745658835546, "grad_norm": 1.4959304316405335, "learning_rate": 1.77739625974078e-06, "loss": 0.7473, "step": 7960 }, { "epoch": 0.8131767109295199, "grad_norm": 1.426543241922162, "learning_rate": 1.775513899685628e-06, "loss": 0.6915, "step": 7961 }, { "epoch": 0.8132788559754852, "grad_norm": 1.5997129597596234, "learning_rate": 1.7736324398057959e-06, "loss": 0.7321, "step": 7962 }, { "epoch": 0.8133810010214505, "grad_norm": 1.368155297263089, "learning_rate": 1.7717518803072087e-06, "loss": 0.6691, "step": 7963 }, { "epoch": 0.8134831460674158, "grad_norm": 1.4458812247504478, "learning_rate": 1.7698722213956943e-06, "loss": 0.6933, "step": 7964 }, { "epoch": 0.813585291113381, "grad_norm": 1.4594710639090436, "learning_rate": 1.7679934632769812e-06, "loss": 0.7478, "step": 7965 }, { "epoch": 0.8136874361593462, "grad_norm": 1.5303612311546528, "learning_rate": 1.7661156061566985e-06, "loss": 0.7417, "step": 7966 }, { "epoch": 0.8137895812053115, "grad_norm": 1.4609730584893192, "learning_rate": 1.7642386502403875e-06, "loss": 0.7137, "step": 7967 }, { "epoch": 0.8138917262512768, "grad_norm": 1.6602929442118024, "learning_rate": 1.7623625957334767e-06, "loss": 0.7384, "step": 7968 }, { "epoch": 0.8139938712972421, "grad_norm": 1.5775511671755067, "learning_rate": 1.7604874428413022e-06, "loss": 0.7189, "step": 7969 }, { "epoch": 0.8140960163432074, "grad_norm": 1.6028287070045681, "learning_rate": 1.7586131917690996e-06, "loss": 0.7424, "step": 7970 }, { "epoch": 0.8141981613891727, "grad_norm": 1.4257714808375646, "learning_rate": 1.7567398427220062e-06, "loss": 0.7238, "step": 7971 }, { "epoch": 0.8143003064351378, "grad_norm": 1.428431088335843, "learning_rate": 1.7548673959050654e-06, "loss": 0.6986, "step": 7972 }, { "epoch": 0.8144024514811031, "grad_norm": 1.412206966343264, "learning_rate": 1.7529958515232149e-06, "loss": 0.6455, "step": 7973 }, { "epoch": 0.8145045965270684, "grad_norm": 1.3634755109936474, "learning_rate": 1.7511252097812948e-06, "loss": 0.6548, "step": 7974 }, { "epoch": 0.8146067415730337, "grad_norm": 1.6398673407439053, "learning_rate": 1.7492554708840514e-06, "loss": 0.7374, "step": 7975 }, { "epoch": 0.814708886618999, "grad_norm": 1.4343336537508875, "learning_rate": 1.7473866350361256e-06, "loss": 0.6379, "step": 7976 }, { "epoch": 0.8148110316649643, "grad_norm": 1.3744558549715546, "learning_rate": 1.7455187024420662e-06, "loss": 0.673, "step": 7977 }, { "epoch": 0.8149131767109296, "grad_norm": 1.5028474784407904, "learning_rate": 1.743651673306318e-06, "loss": 0.7562, "step": 7978 }, { "epoch": 0.8150153217568948, "grad_norm": 1.4273108881268894, "learning_rate": 1.741785547833229e-06, "loss": 0.7721, "step": 7979 }, { "epoch": 0.81511746680286, "grad_norm": 1.4828473332250276, "learning_rate": 1.7399203262270447e-06, "loss": 0.6029, "step": 7980 }, { "epoch": 0.8152196118488253, "grad_norm": 1.586794582438247, "learning_rate": 1.7380560086919196e-06, "loss": 0.7166, "step": 7981 }, { "epoch": 0.8153217568947906, "grad_norm": 1.3648965712591843, "learning_rate": 1.7361925954319003e-06, "loss": 0.618, "step": 7982 }, { "epoch": 0.8154239019407559, "grad_norm": 1.5343647647186425, "learning_rate": 1.7343300866509426e-06, "loss": 0.6, "step": 7983 }, { "epoch": 0.8155260469867212, "grad_norm": 1.4454439852683993, "learning_rate": 1.7324684825528982e-06, "loss": 0.6846, "step": 7984 }, { "epoch": 0.8156281920326864, "grad_norm": 1.589671921118025, "learning_rate": 1.730607783341518e-06, "loss": 0.7539, "step": 7985 }, { "epoch": 0.8157303370786517, "grad_norm": 1.3656294592986191, "learning_rate": 1.7287479892204572e-06, "loss": 0.5753, "step": 7986 }, { "epoch": 0.815832482124617, "grad_norm": 1.4227661619110714, "learning_rate": 1.7268891003932753e-06, "loss": 0.6567, "step": 7987 }, { "epoch": 0.8159346271705822, "grad_norm": 1.8317641131320619, "learning_rate": 1.725031117063427e-06, "loss": 0.8709, "step": 7988 }, { "epoch": 0.8160367722165475, "grad_norm": 1.5219043725969479, "learning_rate": 1.723174039434269e-06, "loss": 0.8022, "step": 7989 }, { "epoch": 0.8161389172625128, "grad_norm": 1.5051377183444266, "learning_rate": 1.721317867709057e-06, "loss": 0.6678, "step": 7990 }, { "epoch": 0.816241062308478, "grad_norm": 1.591724202947102, "learning_rate": 1.7194626020909532e-06, "loss": 0.694, "step": 7991 }, { "epoch": 0.8163432073544433, "grad_norm": 1.369392228141755, "learning_rate": 1.7176082427830198e-06, "loss": 0.7399, "step": 7992 }, { "epoch": 0.8164453524004086, "grad_norm": 1.4050256967612185, "learning_rate": 1.7157547899882155e-06, "loss": 0.586, "step": 7993 }, { "epoch": 0.8165474974463739, "grad_norm": 1.5161216006757114, "learning_rate": 1.713902243909402e-06, "loss": 0.6881, "step": 7994 }, { "epoch": 0.8166496424923391, "grad_norm": 1.568385117457169, "learning_rate": 1.712050604749339e-06, "loss": 0.69, "step": 7995 }, { "epoch": 0.8167517875383044, "grad_norm": 1.583993008667526, "learning_rate": 1.7101998727106938e-06, "loss": 0.6624, "step": 7996 }, { "epoch": 0.8168539325842696, "grad_norm": 1.4016847403491144, "learning_rate": 1.7083500479960292e-06, "loss": 0.658, "step": 7997 }, { "epoch": 0.8169560776302349, "grad_norm": 1.5733702102729963, "learning_rate": 1.7065011308078062e-06, "loss": 0.6432, "step": 7998 }, { "epoch": 0.8170582226762002, "grad_norm": 1.5326362048040314, "learning_rate": 1.7046531213483953e-06, "loss": 0.7044, "step": 7999 }, { "epoch": 0.8171603677221655, "grad_norm": 1.3846577361600956, "learning_rate": 1.7028060198200568e-06, "loss": 0.6446, "step": 8000 }, { "epoch": 0.8172625127681308, "grad_norm": 1.5068825977677331, "learning_rate": 1.7009598264249626e-06, "loss": 0.7935, "step": 8001 }, { "epoch": 0.8173646578140961, "grad_norm": 1.3413378608236013, "learning_rate": 1.6991145413651778e-06, "loss": 0.6713, "step": 8002 }, { "epoch": 0.8174668028600612, "grad_norm": 1.4468841209413241, "learning_rate": 1.6972701648426693e-06, "loss": 0.6289, "step": 8003 }, { "epoch": 0.8175689479060265, "grad_norm": 1.4014668987421202, "learning_rate": 1.695426697059306e-06, "loss": 0.7557, "step": 8004 }, { "epoch": 0.8176710929519918, "grad_norm": 1.5265277883019224, "learning_rate": 1.6935841382168527e-06, "loss": 0.7633, "step": 8005 }, { "epoch": 0.8177732379979571, "grad_norm": 1.4407500763661103, "learning_rate": 1.6917424885169832e-06, "loss": 0.6888, "step": 8006 }, { "epoch": 0.8178753830439224, "grad_norm": 1.3939769220695781, "learning_rate": 1.6899017481612678e-06, "loss": 0.7354, "step": 8007 }, { "epoch": 0.8179775280898877, "grad_norm": 1.6343383738154178, "learning_rate": 1.6880619173511748e-06, "loss": 0.8354, "step": 8008 }, { "epoch": 0.818079673135853, "grad_norm": 1.5620359390454892, "learning_rate": 1.6862229962880762e-06, "loss": 0.6092, "step": 8009 }, { "epoch": 0.8181818181818182, "grad_norm": 1.587466748414701, "learning_rate": 1.6843849851732385e-06, "loss": 0.7723, "step": 8010 }, { "epoch": 0.8182839632277834, "grad_norm": 1.381452032875976, "learning_rate": 1.682547884207839e-06, "loss": 0.6425, "step": 8011 }, { "epoch": 0.8183861082737487, "grad_norm": 1.700186596589639, "learning_rate": 1.6807116935929491e-06, "loss": 0.7651, "step": 8012 }, { "epoch": 0.818488253319714, "grad_norm": 1.5748054651105816, "learning_rate": 1.6788764135295377e-06, "loss": 0.6691, "step": 8013 }, { "epoch": 0.8185903983656793, "grad_norm": 1.5176831660640564, "learning_rate": 1.6770420442184765e-06, "loss": 0.6788, "step": 8014 }, { "epoch": 0.8186925434116445, "grad_norm": 1.54067703369349, "learning_rate": 1.6752085858605416e-06, "loss": 0.6585, "step": 8015 }, { "epoch": 0.8187946884576098, "grad_norm": 1.3861119891961795, "learning_rate": 1.673376038656408e-06, "loss": 0.589, "step": 8016 }, { "epoch": 0.8188968335035751, "grad_norm": 1.2826141347021762, "learning_rate": 1.6715444028066463e-06, "loss": 0.6013, "step": 8017 }, { "epoch": 0.8189989785495404, "grad_norm": 1.6498472966622095, "learning_rate": 1.6697136785117307e-06, "loss": 0.7429, "step": 8018 }, { "epoch": 0.8191011235955056, "grad_norm": 1.4555512125186842, "learning_rate": 1.6678838659720353e-06, "loss": 0.6784, "step": 8019 }, { "epoch": 0.8192032686414709, "grad_norm": 1.550441510101218, "learning_rate": 1.666054965387831e-06, "loss": 0.7041, "step": 8020 }, { "epoch": 0.8193054136874361, "grad_norm": 1.4590390443304897, "learning_rate": 1.6642269769592978e-06, "loss": 0.6652, "step": 8021 }, { "epoch": 0.8194075587334014, "grad_norm": 1.805244553495291, "learning_rate": 1.6623999008865055e-06, "loss": 0.7808, "step": 8022 }, { "epoch": 0.8195097037793667, "grad_norm": 1.4407135472508246, "learning_rate": 1.6605737373694332e-06, "loss": 0.7324, "step": 8023 }, { "epoch": 0.819611848825332, "grad_norm": 1.6032452874921865, "learning_rate": 1.6587484866079528e-06, "loss": 0.7143, "step": 8024 }, { "epoch": 0.8197139938712973, "grad_norm": 1.4601865129573774, "learning_rate": 1.6569241488018373e-06, "loss": 0.6749, "step": 8025 }, { "epoch": 0.8198161389172625, "grad_norm": 1.4190417545020593, "learning_rate": 1.6551007241507667e-06, "loss": 0.7113, "step": 8026 }, { "epoch": 0.8199182839632277, "grad_norm": 1.5552960256500907, "learning_rate": 1.6532782128543135e-06, "loss": 0.5919, "step": 8027 }, { "epoch": 0.820020429009193, "grad_norm": 1.4225992003830799, "learning_rate": 1.6514566151119526e-06, "loss": 0.7428, "step": 8028 }, { "epoch": 0.8201225740551583, "grad_norm": 1.4629221048171699, "learning_rate": 1.649635931123056e-06, "loss": 0.7327, "step": 8029 }, { "epoch": 0.8202247191011236, "grad_norm": 1.6162658350818524, "learning_rate": 1.6478161610869026e-06, "loss": 0.7273, "step": 8030 }, { "epoch": 0.8203268641470889, "grad_norm": 1.6195539289532868, "learning_rate": 1.645997305202668e-06, "loss": 0.7651, "step": 8031 }, { "epoch": 0.8204290091930542, "grad_norm": 1.5338828765134138, "learning_rate": 1.6441793636694259e-06, "loss": 0.605, "step": 8032 }, { "epoch": 0.8205311542390195, "grad_norm": 1.4530702857021864, "learning_rate": 1.6423623366861519e-06, "loss": 0.6014, "step": 8033 }, { "epoch": 0.8206332992849846, "grad_norm": 1.677579327574725, "learning_rate": 1.6405462244517167e-06, "loss": 0.6106, "step": 8034 }, { "epoch": 0.8207354443309499, "grad_norm": 1.5250263496829697, "learning_rate": 1.6387310271649005e-06, "loss": 0.6463, "step": 8035 }, { "epoch": 0.8208375893769152, "grad_norm": 1.6666564207397836, "learning_rate": 1.6369167450243761e-06, "loss": 0.8187, "step": 8036 }, { "epoch": 0.8209397344228805, "grad_norm": 1.533884955634993, "learning_rate": 1.6351033782287163e-06, "loss": 0.71, "step": 8037 }, { "epoch": 0.8210418794688458, "grad_norm": 1.536911361800758, "learning_rate": 1.6332909269763952e-06, "loss": 0.7066, "step": 8038 }, { "epoch": 0.821144024514811, "grad_norm": 1.3162474454341764, "learning_rate": 1.631479391465789e-06, "loss": 0.65, "step": 8039 }, { "epoch": 0.8212461695607763, "grad_norm": 1.4383768944053172, "learning_rate": 1.6296687718951677e-06, "loss": 0.7751, "step": 8040 }, { "epoch": 0.8213483146067416, "grad_norm": 1.4374625784179178, "learning_rate": 1.6278590684627094e-06, "loss": 0.5932, "step": 8041 }, { "epoch": 0.8214504596527068, "grad_norm": 1.4662672539692108, "learning_rate": 1.626050281366486e-06, "loss": 0.6711, "step": 8042 }, { "epoch": 0.8215526046986721, "grad_norm": 1.4336486510731772, "learning_rate": 1.6242424108044697e-06, "loss": 0.6622, "step": 8043 }, { "epoch": 0.8216547497446374, "grad_norm": 1.723983258685338, "learning_rate": 1.6224354569745294e-06, "loss": 0.752, "step": 8044 }, { "epoch": 0.8217568947906027, "grad_norm": 1.4421408122236568, "learning_rate": 1.6206294200744444e-06, "loss": 0.7447, "step": 8045 }, { "epoch": 0.8218590398365679, "grad_norm": 1.3984127387809175, "learning_rate": 1.61882430030188e-06, "loss": 0.5685, "step": 8046 }, { "epoch": 0.8219611848825332, "grad_norm": 1.475654081888823, "learning_rate": 1.6170200978544137e-06, "loss": 0.6479, "step": 8047 }, { "epoch": 0.8220633299284985, "grad_norm": 1.4680537357339023, "learning_rate": 1.6152168129295143e-06, "loss": 0.8026, "step": 8048 }, { "epoch": 0.8221654749744637, "grad_norm": 1.518214472174791, "learning_rate": 1.6134144457245493e-06, "loss": 0.6477, "step": 8049 }, { "epoch": 0.822267620020429, "grad_norm": 1.4166217488112398, "learning_rate": 1.6116129964367932e-06, "loss": 0.6504, "step": 8050 }, { "epoch": 0.8223697650663943, "grad_norm": 1.5368412228323969, "learning_rate": 1.609812465263415e-06, "loss": 0.6949, "step": 8051 }, { "epoch": 0.8224719101123595, "grad_norm": 1.4154965142983382, "learning_rate": 1.6080128524014825e-06, "loss": 0.62, "step": 8052 }, { "epoch": 0.8225740551583248, "grad_norm": 1.4214435690364278, "learning_rate": 1.6062141580479663e-06, "loss": 0.7587, "step": 8053 }, { "epoch": 0.8226762002042901, "grad_norm": 1.443646930356721, "learning_rate": 1.6044163823997284e-06, "loss": 0.6913, "step": 8054 }, { "epoch": 0.8227783452502554, "grad_norm": 1.5119383988814488, "learning_rate": 1.602619525653546e-06, "loss": 0.7308, "step": 8055 }, { "epoch": 0.8228804902962207, "grad_norm": 1.400605649521305, "learning_rate": 1.600823588006082e-06, "loss": 0.7269, "step": 8056 }, { "epoch": 0.8229826353421859, "grad_norm": 1.3815686815134187, "learning_rate": 1.5990285696539031e-06, "loss": 0.6954, "step": 8057 }, { "epoch": 0.8230847803881511, "grad_norm": 1.5197407934105915, "learning_rate": 1.597234470793475e-06, "loss": 0.9029, "step": 8058 }, { "epoch": 0.8231869254341164, "grad_norm": 1.417217389050144, "learning_rate": 1.5954412916211604e-06, "loss": 0.6137, "step": 8059 }, { "epoch": 0.8232890704800817, "grad_norm": 1.5327541937041673, "learning_rate": 1.5936490323332288e-06, "loss": 0.7317, "step": 8060 }, { "epoch": 0.823391215526047, "grad_norm": 1.56224927334137, "learning_rate": 1.5918576931258423e-06, "loss": 0.8155, "step": 8061 }, { "epoch": 0.8234933605720123, "grad_norm": 1.3413555421539292, "learning_rate": 1.5900672741950606e-06, "loss": 0.6917, "step": 8062 }, { "epoch": 0.8235955056179776, "grad_norm": 1.6104158371848574, "learning_rate": 1.5882777757368529e-06, "loss": 0.711, "step": 8063 }, { "epoch": 0.8236976506639428, "grad_norm": 1.5099048543031726, "learning_rate": 1.5864891979470731e-06, "loss": 0.6882, "step": 8064 }, { "epoch": 0.823799795709908, "grad_norm": 1.4412289091663713, "learning_rate": 1.5847015410214905e-06, "loss": 0.5785, "step": 8065 }, { "epoch": 0.8239019407558733, "grad_norm": 1.4527119126630987, "learning_rate": 1.5829148051557598e-06, "loss": 0.6907, "step": 8066 }, { "epoch": 0.8240040858018386, "grad_norm": 1.4480130380562979, "learning_rate": 1.581128990545443e-06, "loss": 0.6955, "step": 8067 }, { "epoch": 0.8241062308478039, "grad_norm": 1.5497488509131556, "learning_rate": 1.5793440973859942e-06, "loss": 0.7859, "step": 8068 }, { "epoch": 0.8242083758937692, "grad_norm": 1.523893576356234, "learning_rate": 1.5775601258727768e-06, "loss": 0.7549, "step": 8069 }, { "epoch": 0.8243105209397344, "grad_norm": 1.5303109961997998, "learning_rate": 1.5757770762010438e-06, "loss": 0.6069, "step": 8070 }, { "epoch": 0.8244126659856997, "grad_norm": 1.5061910204237077, "learning_rate": 1.5739949485659533e-06, "loss": 0.6478, "step": 8071 }, { "epoch": 0.824514811031665, "grad_norm": 1.4162350538099635, "learning_rate": 1.5722137431625606e-06, "loss": 0.6661, "step": 8072 }, { "epoch": 0.8246169560776302, "grad_norm": 1.505348775066783, "learning_rate": 1.5704334601858196e-06, "loss": 0.6522, "step": 8073 }, { "epoch": 0.8247191011235955, "grad_norm": 1.6089387026742075, "learning_rate": 1.5686540998305789e-06, "loss": 0.7021, "step": 8074 }, { "epoch": 0.8248212461695608, "grad_norm": 1.5175669889076127, "learning_rate": 1.566875662291597e-06, "loss": 0.6856, "step": 8075 }, { "epoch": 0.824923391215526, "grad_norm": 1.5352235054873198, "learning_rate": 1.5650981477635219e-06, "loss": 0.6367, "step": 8076 }, { "epoch": 0.8250255362614913, "grad_norm": 1.37583573139196, "learning_rate": 1.5633215564409054e-06, "loss": 0.571, "step": 8077 }, { "epoch": 0.8251276813074566, "grad_norm": 1.6331157663418598, "learning_rate": 1.5615458885181923e-06, "loss": 0.6756, "step": 8078 }, { "epoch": 0.8252298263534219, "grad_norm": 1.5080650415302426, "learning_rate": 1.5597711441897335e-06, "loss": 0.6381, "step": 8079 }, { "epoch": 0.8253319713993871, "grad_norm": 1.6040951449330934, "learning_rate": 1.5579973236497781e-06, "loss": 0.7291, "step": 8080 }, { "epoch": 0.8254341164453524, "grad_norm": 1.4701275500989344, "learning_rate": 1.5562244270924708e-06, "loss": 0.6621, "step": 8081 }, { "epoch": 0.8255362614913176, "grad_norm": 1.5604656056658814, "learning_rate": 1.5544524547118545e-06, "loss": 0.649, "step": 8082 }, { "epoch": 0.8256384065372829, "grad_norm": 1.4907088569773226, "learning_rate": 1.5526814067018715e-06, "loss": 0.6688, "step": 8083 }, { "epoch": 0.8257405515832482, "grad_norm": 1.501757899836109, "learning_rate": 1.550911283256369e-06, "loss": 0.7252, "step": 8084 }, { "epoch": 0.8258426966292135, "grad_norm": 1.6728978496297755, "learning_rate": 1.549142084569084e-06, "loss": 0.7446, "step": 8085 }, { "epoch": 0.8259448416751788, "grad_norm": 1.4452981367333813, "learning_rate": 1.5473738108336567e-06, "loss": 0.7121, "step": 8086 }, { "epoch": 0.8260469867211441, "grad_norm": 1.429457661254563, "learning_rate": 1.5456064622436296e-06, "loss": 0.6063, "step": 8087 }, { "epoch": 0.8261491317671092, "grad_norm": 1.4512304880192102, "learning_rate": 1.5438400389924335e-06, "loss": 0.6447, "step": 8088 }, { "epoch": 0.8262512768130745, "grad_norm": 1.558338100389552, "learning_rate": 1.5420745412734128e-06, "loss": 0.7563, "step": 8089 }, { "epoch": 0.8263534218590398, "grad_norm": 1.5160083871726717, "learning_rate": 1.5403099692797963e-06, "loss": 0.7114, "step": 8090 }, { "epoch": 0.8264555669050051, "grad_norm": 1.492123892181121, "learning_rate": 1.5385463232047204e-06, "loss": 0.7375, "step": 8091 }, { "epoch": 0.8265577119509704, "grad_norm": 1.6557691409532018, "learning_rate": 1.5367836032412153e-06, "loss": 0.6145, "step": 8092 }, { "epoch": 0.8266598569969357, "grad_norm": 1.5878017494240333, "learning_rate": 1.5350218095822112e-06, "loss": 0.695, "step": 8093 }, { "epoch": 0.826762002042901, "grad_norm": 1.387193819094124, "learning_rate": 1.5332609424205391e-06, "loss": 0.5701, "step": 8094 }, { "epoch": 0.8268641470888662, "grad_norm": 1.6277375090634056, "learning_rate": 1.5315010019489295e-06, "loss": 0.6686, "step": 8095 }, { "epoch": 0.8269662921348314, "grad_norm": 1.6622526157173243, "learning_rate": 1.5297419883600062e-06, "loss": 0.7579, "step": 8096 }, { "epoch": 0.8270684371807967, "grad_norm": 1.6550526240861725, "learning_rate": 1.527983901846295e-06, "loss": 0.8124, "step": 8097 }, { "epoch": 0.827170582226762, "grad_norm": 1.4494175785068921, "learning_rate": 1.5262267426002164e-06, "loss": 0.6293, "step": 8098 }, { "epoch": 0.8272727272727273, "grad_norm": 1.6822110823372496, "learning_rate": 1.524470510814099e-06, "loss": 0.6645, "step": 8099 }, { "epoch": 0.8273748723186926, "grad_norm": 1.4719520368114836, "learning_rate": 1.5227152066801598e-06, "loss": 0.6681, "step": 8100 }, { "epoch": 0.8274770173646578, "grad_norm": 1.5635445254915965, "learning_rate": 1.5209608303905177e-06, "loss": 0.6939, "step": 8101 }, { "epoch": 0.8275791624106231, "grad_norm": 1.4874623860123868, "learning_rate": 1.519207382137189e-06, "loss": 0.6963, "step": 8102 }, { "epoch": 0.8276813074565884, "grad_norm": 1.495710731359258, "learning_rate": 1.517454862112091e-06, "loss": 0.7601, "step": 8103 }, { "epoch": 0.8277834525025536, "grad_norm": 1.5570642158106203, "learning_rate": 1.5157032705070417e-06, "loss": 0.6905, "step": 8104 }, { "epoch": 0.8278855975485189, "grad_norm": 1.4503127087048633, "learning_rate": 1.5139526075137513e-06, "loss": 0.6903, "step": 8105 }, { "epoch": 0.8279877425944842, "grad_norm": 1.4071783301332017, "learning_rate": 1.5122028733238303e-06, "loss": 0.6499, "step": 8106 }, { "epoch": 0.8280898876404494, "grad_norm": 1.4821741294444453, "learning_rate": 1.5104540681287882e-06, "loss": 0.6229, "step": 8107 }, { "epoch": 0.8281920326864147, "grad_norm": 1.6628379335468808, "learning_rate": 1.5087061921200308e-06, "loss": 0.7799, "step": 8108 }, { "epoch": 0.82829417773238, "grad_norm": 1.423496250272961, "learning_rate": 1.5069592454888704e-06, "loss": 0.6158, "step": 8109 }, { "epoch": 0.8283963227783453, "grad_norm": 1.4417200216897927, "learning_rate": 1.505213228426504e-06, "loss": 0.7513, "step": 8110 }, { "epoch": 0.8284984678243105, "grad_norm": 1.3463908866510086, "learning_rate": 1.5034681411240414e-06, "loss": 0.7002, "step": 8111 }, { "epoch": 0.8286006128702758, "grad_norm": 1.5401210939308432, "learning_rate": 1.5017239837724795e-06, "loss": 0.644, "step": 8112 }, { "epoch": 0.828702757916241, "grad_norm": 1.6268378794525058, "learning_rate": 1.4999807565627167e-06, "loss": 0.7819, "step": 8113 }, { "epoch": 0.8288049029622063, "grad_norm": 1.5591847129074905, "learning_rate": 1.4982384596855537e-06, "loss": 0.6801, "step": 8114 }, { "epoch": 0.8289070480081716, "grad_norm": 1.6447593048461076, "learning_rate": 1.4964970933316836e-06, "loss": 0.6697, "step": 8115 }, { "epoch": 0.8290091930541369, "grad_norm": 1.5248259117396603, "learning_rate": 1.4947566576917016e-06, "loss": 0.6449, "step": 8116 }, { "epoch": 0.8291113381001022, "grad_norm": 1.560802603325432, "learning_rate": 1.4930171529560955e-06, "loss": 0.6817, "step": 8117 }, { "epoch": 0.8292134831460675, "grad_norm": 1.3704862435692438, "learning_rate": 1.4912785793152584e-06, "loss": 0.7968, "step": 8118 }, { "epoch": 0.8293156281920326, "grad_norm": 1.5810451100859217, "learning_rate": 1.4895409369594805e-06, "loss": 0.6009, "step": 8119 }, { "epoch": 0.8294177732379979, "grad_norm": 1.5572763229827489, "learning_rate": 1.487804226078946e-06, "loss": 0.7799, "step": 8120 }, { "epoch": 0.8295199182839632, "grad_norm": 1.5738311685225543, "learning_rate": 1.4860684468637376e-06, "loss": 0.7179, "step": 8121 }, { "epoch": 0.8296220633299285, "grad_norm": 1.542633263928474, "learning_rate": 1.4843335995038365e-06, "loss": 0.6726, "step": 8122 }, { "epoch": 0.8297242083758938, "grad_norm": 1.5234966994007288, "learning_rate": 1.4825996841891265e-06, "loss": 0.6665, "step": 8123 }, { "epoch": 0.8298263534218591, "grad_norm": 1.4979572308512723, "learning_rate": 1.4808667011093847e-06, "loss": 0.6412, "step": 8124 }, { "epoch": 0.8299284984678243, "grad_norm": 1.5224455304666085, "learning_rate": 1.479134650454287e-06, "loss": 0.6978, "step": 8125 }, { "epoch": 0.8300306435137896, "grad_norm": 1.2996654420346645, "learning_rate": 1.4774035324134039e-06, "loss": 0.6551, "step": 8126 }, { "epoch": 0.8301327885597548, "grad_norm": 1.7430308606116491, "learning_rate": 1.475673347176213e-06, "loss": 0.7088, "step": 8127 }, { "epoch": 0.8302349336057201, "grad_norm": 1.4430777783549522, "learning_rate": 1.4739440949320793e-06, "loss": 0.6459, "step": 8128 }, { "epoch": 0.8303370786516854, "grad_norm": 1.5432165007359933, "learning_rate": 1.472215775870275e-06, "loss": 0.769, "step": 8129 }, { "epoch": 0.8304392236976507, "grad_norm": 1.6864037795707907, "learning_rate": 1.4704883901799638e-06, "loss": 0.7402, "step": 8130 }, { "epoch": 0.830541368743616, "grad_norm": 1.3665145567453079, "learning_rate": 1.4687619380502094e-06, "loss": 0.5465, "step": 8131 }, { "epoch": 0.8306435137895812, "grad_norm": 1.5685708375511025, "learning_rate": 1.4670364196699704e-06, "loss": 0.6216, "step": 8132 }, { "epoch": 0.8307456588355465, "grad_norm": 1.5225160586101547, "learning_rate": 1.4653118352281104e-06, "loss": 0.6763, "step": 8133 }, { "epoch": 0.8308478038815117, "grad_norm": 1.5847370921095767, "learning_rate": 1.4635881849133827e-06, "loss": 0.842, "step": 8134 }, { "epoch": 0.830949948927477, "grad_norm": 1.4305351955090095, "learning_rate": 1.461865468914444e-06, "loss": 0.5666, "step": 8135 }, { "epoch": 0.8310520939734423, "grad_norm": 1.5064405743143834, "learning_rate": 1.460143687419847e-06, "loss": 0.7135, "step": 8136 }, { "epoch": 0.8311542390194075, "grad_norm": 1.4600573918276587, "learning_rate": 1.458422840618039e-06, "loss": 0.5929, "step": 8137 }, { "epoch": 0.8312563840653728, "grad_norm": 1.5316332444644378, "learning_rate": 1.4567029286973711e-06, "loss": 0.6954, "step": 8138 }, { "epoch": 0.8313585291113381, "grad_norm": 1.6322975956372432, "learning_rate": 1.4549839518460885e-06, "loss": 0.6733, "step": 8139 }, { "epoch": 0.8314606741573034, "grad_norm": 1.7032293639981881, "learning_rate": 1.4532659102523317e-06, "loss": 0.7648, "step": 8140 }, { "epoch": 0.8315628192032687, "grad_norm": 1.3869356671562079, "learning_rate": 1.4515488041041414e-06, "loss": 0.6266, "step": 8141 }, { "epoch": 0.8316649642492339, "grad_norm": 1.5881164408369794, "learning_rate": 1.4498326335894574e-06, "loss": 0.7423, "step": 8142 }, { "epoch": 0.8317671092951991, "grad_norm": 1.478319655150249, "learning_rate": 1.4481173988961183e-06, "loss": 0.6096, "step": 8143 }, { "epoch": 0.8318692543411644, "grad_norm": 1.4361053561800963, "learning_rate": 1.446403100211855e-06, "loss": 0.6312, "step": 8144 }, { "epoch": 0.8319713993871297, "grad_norm": 1.5280127812931918, "learning_rate": 1.4446897377242986e-06, "loss": 0.6373, "step": 8145 }, { "epoch": 0.832073544433095, "grad_norm": 1.4673911052403497, "learning_rate": 1.4429773116209778e-06, "loss": 0.6123, "step": 8146 }, { "epoch": 0.8321756894790603, "grad_norm": 1.4650189906991498, "learning_rate": 1.441265822089316e-06, "loss": 0.5905, "step": 8147 }, { "epoch": 0.8322778345250256, "grad_norm": 1.4337618372504215, "learning_rate": 1.4395552693166425e-06, "loss": 0.6642, "step": 8148 }, { "epoch": 0.8323799795709909, "grad_norm": 1.5808941433839487, "learning_rate": 1.4378456534901751e-06, "loss": 0.7413, "step": 8149 }, { "epoch": 0.832482124616956, "grad_norm": 1.4358912773241022, "learning_rate": 1.436136974797031e-06, "loss": 0.608, "step": 8150 }, { "epoch": 0.8325842696629213, "grad_norm": 1.4151565268533477, "learning_rate": 1.4344292334242306e-06, "loss": 0.6391, "step": 8151 }, { "epoch": 0.8326864147088866, "grad_norm": 1.6498398781341925, "learning_rate": 1.4327224295586818e-06, "loss": 0.6575, "step": 8152 }, { "epoch": 0.8327885597548519, "grad_norm": 1.391153572791816, "learning_rate": 1.4310165633872008e-06, "loss": 0.6756, "step": 8153 }, { "epoch": 0.8328907048008172, "grad_norm": 1.3981727528788448, "learning_rate": 1.4293116350964931e-06, "loss": 0.6239, "step": 8154 }, { "epoch": 0.8329928498467825, "grad_norm": 1.3891598561867289, "learning_rate": 1.4276076448731646e-06, "loss": 0.6216, "step": 8155 }, { "epoch": 0.8330949948927477, "grad_norm": 1.4565438494515128, "learning_rate": 1.4259045929037152e-06, "loss": 0.7296, "step": 8156 }, { "epoch": 0.833197139938713, "grad_norm": 1.564040574334429, "learning_rate": 1.4242024793745491e-06, "loss": 0.6471, "step": 8157 }, { "epoch": 0.8332992849846782, "grad_norm": 1.5621789293755448, "learning_rate": 1.4225013044719615e-06, "loss": 0.8013, "step": 8158 }, { "epoch": 0.8334014300306435, "grad_norm": 1.6261991901627544, "learning_rate": 1.4208010683821494e-06, "loss": 0.7156, "step": 8159 }, { "epoch": 0.8335035750766088, "grad_norm": 1.581343917332976, "learning_rate": 1.4191017712912036e-06, "loss": 0.6173, "step": 8160 }, { "epoch": 0.8336057201225741, "grad_norm": 1.4694883603552782, "learning_rate": 1.4174034133851122e-06, "loss": 0.6275, "step": 8161 }, { "epoch": 0.8337078651685393, "grad_norm": 1.4265547794511055, "learning_rate": 1.4157059948497608e-06, "loss": 0.7316, "step": 8162 }, { "epoch": 0.8338100102145046, "grad_norm": 1.482973350672315, "learning_rate": 1.4140095158709367e-06, "loss": 0.6361, "step": 8163 }, { "epoch": 0.8339121552604699, "grad_norm": 1.6518780881315658, "learning_rate": 1.4123139766343185e-06, "loss": 0.6771, "step": 8164 }, { "epoch": 0.8340143003064351, "grad_norm": 1.55166635753713, "learning_rate": 1.4106193773254828e-06, "loss": 0.7084, "step": 8165 }, { "epoch": 0.8341164453524004, "grad_norm": 1.4673043891868442, "learning_rate": 1.4089257181299042e-06, "loss": 0.62, "step": 8166 }, { "epoch": 0.8342185903983657, "grad_norm": 1.5887228739942647, "learning_rate": 1.4072329992329559e-06, "loss": 0.79, "step": 8167 }, { "epoch": 0.8343207354443309, "grad_norm": 1.5288142582639677, "learning_rate": 1.4055412208199105e-06, "loss": 0.648, "step": 8168 }, { "epoch": 0.8344228804902962, "grad_norm": 1.584683619803911, "learning_rate": 1.403850383075931e-06, "loss": 0.7647, "step": 8169 }, { "epoch": 0.8345250255362615, "grad_norm": 1.4209703937936162, "learning_rate": 1.4021604861860806e-06, "loss": 0.7439, "step": 8170 }, { "epoch": 0.8346271705822268, "grad_norm": 1.5295796253474756, "learning_rate": 1.4004715303353177e-06, "loss": 0.7212, "step": 8171 }, { "epoch": 0.8347293156281921, "grad_norm": 1.5514537122346788, "learning_rate": 1.398783515708504e-06, "loss": 0.6875, "step": 8172 }, { "epoch": 0.8348314606741573, "grad_norm": 1.4069697206909955, "learning_rate": 1.3970964424903922e-06, "loss": 0.6807, "step": 8173 }, { "epoch": 0.8349336057201225, "grad_norm": 1.4758736999932414, "learning_rate": 1.395410310865629e-06, "loss": 0.6668, "step": 8174 }, { "epoch": 0.8350357507660878, "grad_norm": 1.5208154030527188, "learning_rate": 1.3937251210187707e-06, "loss": 0.7003, "step": 8175 }, { "epoch": 0.8351378958120531, "grad_norm": 1.6157769322914122, "learning_rate": 1.392040873134255e-06, "loss": 0.7098, "step": 8176 }, { "epoch": 0.8352400408580184, "grad_norm": 1.5350439844888428, "learning_rate": 1.3903575673964298e-06, "loss": 0.7603, "step": 8177 }, { "epoch": 0.8353421859039837, "grad_norm": 1.404109471351054, "learning_rate": 1.3886752039895313e-06, "loss": 0.6764, "step": 8178 }, { "epoch": 0.835444330949949, "grad_norm": 1.5309618104151284, "learning_rate": 1.3869937830976953e-06, "loss": 0.6784, "step": 8179 }, { "epoch": 0.8355464759959143, "grad_norm": 1.6452933771467642, "learning_rate": 1.385313304904955e-06, "loss": 0.7598, "step": 8180 }, { "epoch": 0.8356486210418794, "grad_norm": 1.4722873622448365, "learning_rate": 1.3836337695952363e-06, "loss": 0.7737, "step": 8181 }, { "epoch": 0.8357507660878447, "grad_norm": 1.531119532443974, "learning_rate": 1.3819551773523687e-06, "loss": 0.7183, "step": 8182 }, { "epoch": 0.83585291113381, "grad_norm": 1.2898799151019351, "learning_rate": 1.3802775283600777e-06, "loss": 0.5687, "step": 8183 }, { "epoch": 0.8359550561797753, "grad_norm": 1.5324765304861911, "learning_rate": 1.3786008228019787e-06, "loss": 0.6958, "step": 8184 }, { "epoch": 0.8360572012257406, "grad_norm": 1.4132021785931987, "learning_rate": 1.3769250608615915e-06, "loss": 0.6944, "step": 8185 }, { "epoch": 0.8361593462717059, "grad_norm": 1.580361450078761, "learning_rate": 1.3752502427223246e-06, "loss": 0.5853, "step": 8186 }, { "epoch": 0.8362614913176711, "grad_norm": 1.4022249026009843, "learning_rate": 1.373576368567493e-06, "loss": 0.6908, "step": 8187 }, { "epoch": 0.8363636363636363, "grad_norm": 1.5632692823789802, "learning_rate": 1.371903438580302e-06, "loss": 0.6976, "step": 8188 }, { "epoch": 0.8364657814096016, "grad_norm": 1.4338842959147313, "learning_rate": 1.3702314529438532e-06, "loss": 0.703, "step": 8189 }, { "epoch": 0.8365679264555669, "grad_norm": 1.530211078486839, "learning_rate": 1.3685604118411456e-06, "loss": 0.6265, "step": 8190 }, { "epoch": 0.8366700715015322, "grad_norm": 1.519003409824781, "learning_rate": 1.3668903154550772e-06, "loss": 0.6647, "step": 8191 }, { "epoch": 0.8367722165474974, "grad_norm": 1.387155137396174, "learning_rate": 1.3652211639684442e-06, "loss": 0.7177, "step": 8192 }, { "epoch": 0.8368743615934627, "grad_norm": 1.5302453217138512, "learning_rate": 1.3635529575639339e-06, "loss": 0.6194, "step": 8193 }, { "epoch": 0.836976506639428, "grad_norm": 1.6940031791141499, "learning_rate": 1.3618856964241312e-06, "loss": 0.7269, "step": 8194 }, { "epoch": 0.8370786516853933, "grad_norm": 1.7065555506550198, "learning_rate": 1.360219380731519e-06, "loss": 0.7965, "step": 8195 }, { "epoch": 0.8371807967313585, "grad_norm": 1.5474877061726986, "learning_rate": 1.358554010668478e-06, "loss": 0.6894, "step": 8196 }, { "epoch": 0.8372829417773238, "grad_norm": 1.5514085998854454, "learning_rate": 1.3568895864172849e-06, "loss": 0.7555, "step": 8197 }, { "epoch": 0.837385086823289, "grad_norm": 1.5319729118185041, "learning_rate": 1.3552261081601091e-06, "loss": 0.7842, "step": 8198 }, { "epoch": 0.8374872318692543, "grad_norm": 1.4912627377211982, "learning_rate": 1.3535635760790223e-06, "loss": 0.6281, "step": 8199 }, { "epoch": 0.8375893769152196, "grad_norm": 1.4679135886781982, "learning_rate": 1.3519019903559882e-06, "loss": 0.6356, "step": 8200 }, { "epoch": 0.8376915219611849, "grad_norm": 1.521538659267852, "learning_rate": 1.3502413511728673e-06, "loss": 0.647, "step": 8201 }, { "epoch": 0.8377936670071502, "grad_norm": 1.4508042696362593, "learning_rate": 1.3485816587114199e-06, "loss": 0.74, "step": 8202 }, { "epoch": 0.8378958120531155, "grad_norm": 1.6743637746816158, "learning_rate": 1.3469229131533002e-06, "loss": 0.7477, "step": 8203 }, { "epoch": 0.8379979570990806, "grad_norm": 1.4225148950206428, "learning_rate": 1.3452651146800588e-06, "loss": 0.6269, "step": 8204 }, { "epoch": 0.8381001021450459, "grad_norm": 1.6255162083045618, "learning_rate": 1.3436082634731396e-06, "loss": 0.7296, "step": 8205 }, { "epoch": 0.8382022471910112, "grad_norm": 1.7366213878172718, "learning_rate": 1.3419523597138884e-06, "loss": 0.7473, "step": 8206 }, { "epoch": 0.8383043922369765, "grad_norm": 1.6207014259653467, "learning_rate": 1.340297403583548e-06, "loss": 0.6347, "step": 8207 }, { "epoch": 0.8384065372829418, "grad_norm": 1.441445096213815, "learning_rate": 1.3386433952632517e-06, "loss": 0.6865, "step": 8208 }, { "epoch": 0.8385086823289071, "grad_norm": 1.57702135907775, "learning_rate": 1.3369903349340308e-06, "loss": 0.7227, "step": 8209 }, { "epoch": 0.8386108273748724, "grad_norm": 1.5627318645714228, "learning_rate": 1.3353382227768142e-06, "loss": 0.6104, "step": 8210 }, { "epoch": 0.8387129724208376, "grad_norm": 1.467750960370785, "learning_rate": 1.3336870589724282e-06, "loss": 0.7352, "step": 8211 }, { "epoch": 0.8388151174668028, "grad_norm": 1.5306048359005233, "learning_rate": 1.332036843701593e-06, "loss": 0.6549, "step": 8212 }, { "epoch": 0.8389172625127681, "grad_norm": 1.5167162399969156, "learning_rate": 1.3303875771449247e-06, "loss": 0.5144, "step": 8213 }, { "epoch": 0.8390194075587334, "grad_norm": 1.5798163480212422, "learning_rate": 1.3287392594829385e-06, "loss": 0.694, "step": 8214 }, { "epoch": 0.8391215526046987, "grad_norm": 1.460791953716969, "learning_rate": 1.3270918908960406e-06, "loss": 0.7846, "step": 8215 }, { "epoch": 0.839223697650664, "grad_norm": 1.290478280059854, "learning_rate": 1.325445471564538e-06, "loss": 0.5491, "step": 8216 }, { "epoch": 0.8393258426966292, "grad_norm": 1.567184774325552, "learning_rate": 1.3238000016686347e-06, "loss": 0.6461, "step": 8217 }, { "epoch": 0.8394279877425945, "grad_norm": 1.589923232042974, "learning_rate": 1.3221554813884275e-06, "loss": 0.7656, "step": 8218 }, { "epoch": 0.8395301327885597, "grad_norm": 1.6273073151214594, "learning_rate": 1.32051191090391e-06, "loss": 0.663, "step": 8219 }, { "epoch": 0.839632277834525, "grad_norm": 1.6166074245705306, "learning_rate": 1.3188692903949685e-06, "loss": 0.6276, "step": 8220 }, { "epoch": 0.8397344228804903, "grad_norm": 1.541100519766431, "learning_rate": 1.3172276200413948e-06, "loss": 0.773, "step": 8221 }, { "epoch": 0.8398365679264556, "grad_norm": 1.5134877497328956, "learning_rate": 1.3155869000228672e-06, "loss": 0.7796, "step": 8222 }, { "epoch": 0.8399387129724208, "grad_norm": 1.5082955275520782, "learning_rate": 1.3139471305189622e-06, "loss": 0.6456, "step": 8223 }, { "epoch": 0.8400408580183861, "grad_norm": 1.5108386164303054, "learning_rate": 1.3123083117091573e-06, "loss": 0.6655, "step": 8224 }, { "epoch": 0.8401430030643514, "grad_norm": 1.5049696983412515, "learning_rate": 1.3106704437728191e-06, "loss": 0.6539, "step": 8225 }, { "epoch": 0.8402451481103167, "grad_norm": 1.647998124348615, "learning_rate": 1.3090335268892175e-06, "loss": 0.7149, "step": 8226 }, { "epoch": 0.8403472931562819, "grad_norm": 1.4826765492861074, "learning_rate": 1.3073975612375111e-06, "loss": 0.6772, "step": 8227 }, { "epoch": 0.8404494382022472, "grad_norm": 1.4323011394663685, "learning_rate": 1.3057625469967572e-06, "loss": 0.5417, "step": 8228 }, { "epoch": 0.8405515832482124, "grad_norm": 1.6227434337334146, "learning_rate": 1.3041284843459078e-06, "loss": 0.6587, "step": 8229 }, { "epoch": 0.8406537282941777, "grad_norm": 1.5499946650575884, "learning_rate": 1.3024953734638169e-06, "loss": 0.6153, "step": 8230 }, { "epoch": 0.840755873340143, "grad_norm": 1.4370497964387507, "learning_rate": 1.3008632145292244e-06, "loss": 0.675, "step": 8231 }, { "epoch": 0.8408580183861083, "grad_norm": 1.48623802203511, "learning_rate": 1.2992320077207744e-06, "loss": 0.6455, "step": 8232 }, { "epoch": 0.8409601634320736, "grad_norm": 1.4482110630727887, "learning_rate": 1.2976017532170037e-06, "loss": 0.7146, "step": 8233 }, { "epoch": 0.8410623084780389, "grad_norm": 1.4427367974474214, "learning_rate": 1.2959724511963434e-06, "loss": 0.6968, "step": 8234 }, { "epoch": 0.841164453524004, "grad_norm": 1.6512229444236333, "learning_rate": 1.2943441018371195e-06, "loss": 0.7645, "step": 8235 }, { "epoch": 0.8412665985699693, "grad_norm": 1.5576136113974224, "learning_rate": 1.2927167053175603e-06, "loss": 0.7044, "step": 8236 }, { "epoch": 0.8413687436159346, "grad_norm": 1.4597564096245685, "learning_rate": 1.291090261815784e-06, "loss": 0.5747, "step": 8237 }, { "epoch": 0.8414708886618999, "grad_norm": 1.593830716972166, "learning_rate": 1.289464771509804e-06, "loss": 0.6773, "step": 8238 }, { "epoch": 0.8415730337078652, "grad_norm": 1.5360531089034826, "learning_rate": 1.287840234577531e-06, "loss": 0.7113, "step": 8239 }, { "epoch": 0.8416751787538305, "grad_norm": 1.4305447624179328, "learning_rate": 1.2862166511967734e-06, "loss": 0.6585, "step": 8240 }, { "epoch": 0.8417773237997958, "grad_norm": 1.4609142021553054, "learning_rate": 1.2845940215452346e-06, "loss": 0.7431, "step": 8241 }, { "epoch": 0.8418794688457609, "grad_norm": 1.3977124218935364, "learning_rate": 1.2829723458005118e-06, "loss": 0.6428, "step": 8242 }, { "epoch": 0.8419816138917262, "grad_norm": 1.653584238364525, "learning_rate": 1.281351624140097e-06, "loss": 0.7709, "step": 8243 }, { "epoch": 0.8420837589376915, "grad_norm": 1.4678037674884628, "learning_rate": 1.2797318567413787e-06, "loss": 0.6809, "step": 8244 }, { "epoch": 0.8421859039836568, "grad_norm": 1.4854574280238055, "learning_rate": 1.2781130437816436e-06, "loss": 0.6845, "step": 8245 }, { "epoch": 0.8422880490296221, "grad_norm": 1.3998907811007884, "learning_rate": 1.2764951854380714e-06, "loss": 0.6528, "step": 8246 }, { "epoch": 0.8423901940755874, "grad_norm": 1.5150038870704903, "learning_rate": 1.2748782818877358e-06, "loss": 0.6328, "step": 8247 }, { "epoch": 0.8424923391215526, "grad_norm": 1.402649116622681, "learning_rate": 1.273262333307611e-06, "loss": 0.5897, "step": 8248 }, { "epoch": 0.8425944841675179, "grad_norm": 1.2636561222401679, "learning_rate": 1.27164733987456e-06, "loss": 0.6364, "step": 8249 }, { "epoch": 0.8426966292134831, "grad_norm": 1.648985617449909, "learning_rate": 1.2700333017653488e-06, "loss": 0.7184, "step": 8250 }, { "epoch": 0.8427987742594484, "grad_norm": 1.263915978062596, "learning_rate": 1.268420219156633e-06, "loss": 0.5707, "step": 8251 }, { "epoch": 0.8429009193054137, "grad_norm": 1.7063304367168142, "learning_rate": 1.2668080922249658e-06, "loss": 0.7565, "step": 8252 }, { "epoch": 0.843003064351379, "grad_norm": 1.4154782817642595, "learning_rate": 1.2651969211467952e-06, "loss": 0.7702, "step": 8253 }, { "epoch": 0.8431052093973442, "grad_norm": 1.5648961577124925, "learning_rate": 1.263586706098462e-06, "loss": 0.6425, "step": 8254 }, { "epoch": 0.8432073544433095, "grad_norm": 1.48461816336958, "learning_rate": 1.2619774472562097e-06, "loss": 0.6729, "step": 8255 }, { "epoch": 0.8433094994892748, "grad_norm": 1.5535651729256321, "learning_rate": 1.260369144796172e-06, "loss": 0.7646, "step": 8256 }, { "epoch": 0.8434116445352401, "grad_norm": 1.3404449509821161, "learning_rate": 1.258761798894379e-06, "loss": 0.6533, "step": 8257 }, { "epoch": 0.8435137895812053, "grad_norm": 1.6411279324971095, "learning_rate": 1.2571554097267546e-06, "loss": 0.7353, "step": 8258 }, { "epoch": 0.8436159346271705, "grad_norm": 1.5559675332493963, "learning_rate": 1.255549977469116e-06, "loss": 0.7454, "step": 8259 }, { "epoch": 0.8437180796731358, "grad_norm": 1.2796119926933718, "learning_rate": 1.2539455022971858e-06, "loss": 0.62, "step": 8260 }, { "epoch": 0.8438202247191011, "grad_norm": 1.4353597823676087, "learning_rate": 1.2523419843865692e-06, "loss": 0.6335, "step": 8261 }, { "epoch": 0.8439223697650664, "grad_norm": 1.6129497741580998, "learning_rate": 1.2507394239127757e-06, "loss": 0.7342, "step": 8262 }, { "epoch": 0.8440245148110317, "grad_norm": 1.4253424198325702, "learning_rate": 1.2491378210512018e-06, "loss": 0.7932, "step": 8263 }, { "epoch": 0.844126659856997, "grad_norm": 1.4982284523372822, "learning_rate": 1.2475371759771482e-06, "loss": 0.6584, "step": 8264 }, { "epoch": 0.8442288049029623, "grad_norm": 1.5270501704278463, "learning_rate": 1.245937488865807e-06, "loss": 0.634, "step": 8265 }, { "epoch": 0.8443309499489274, "grad_norm": 1.3908848541085905, "learning_rate": 1.244338759892263e-06, "loss": 0.5551, "step": 8266 }, { "epoch": 0.8444330949948927, "grad_norm": 1.4993400275080155, "learning_rate": 1.242740989231499e-06, "loss": 0.6868, "step": 8267 }, { "epoch": 0.844535240040858, "grad_norm": 1.5550121340985836, "learning_rate": 1.241144177058392e-06, "loss": 0.7091, "step": 8268 }, { "epoch": 0.8446373850868233, "grad_norm": 1.5192424878636976, "learning_rate": 1.2395483235477112e-06, "loss": 0.6799, "step": 8269 }, { "epoch": 0.8447395301327886, "grad_norm": 1.5909685747378701, "learning_rate": 1.237953428874129e-06, "loss": 0.7335, "step": 8270 }, { "epoch": 0.8448416751787539, "grad_norm": 1.593661788493861, "learning_rate": 1.2363594932122026e-06, "loss": 0.6705, "step": 8271 }, { "epoch": 0.8449438202247191, "grad_norm": 1.4357876486447376, "learning_rate": 1.2347665167363942e-06, "loss": 0.6598, "step": 8272 }, { "epoch": 0.8450459652706843, "grad_norm": 1.4742916599107387, "learning_rate": 1.2331744996210537e-06, "loss": 0.726, "step": 8273 }, { "epoch": 0.8451481103166496, "grad_norm": 1.462129482419131, "learning_rate": 1.231583442040425e-06, "loss": 0.5845, "step": 8274 }, { "epoch": 0.8452502553626149, "grad_norm": 1.5690825916418867, "learning_rate": 1.2299933441686562e-06, "loss": 0.6925, "step": 8275 }, { "epoch": 0.8453524004085802, "grad_norm": 1.4361393036662764, "learning_rate": 1.228404206179783e-06, "loss": 0.7037, "step": 8276 }, { "epoch": 0.8454545454545455, "grad_norm": 1.555600272565985, "learning_rate": 1.226816028247736e-06, "loss": 0.6705, "step": 8277 }, { "epoch": 0.8455566905005107, "grad_norm": 1.3872687460043418, "learning_rate": 1.2252288105463405e-06, "loss": 0.7308, "step": 8278 }, { "epoch": 0.845658835546476, "grad_norm": 1.642763269340115, "learning_rate": 1.2236425532493213e-06, "loss": 0.6662, "step": 8279 }, { "epoch": 0.8457609805924413, "grad_norm": 1.5024050731333365, "learning_rate": 1.222057256530297e-06, "loss": 0.6739, "step": 8280 }, { "epoch": 0.8458631256384065, "grad_norm": 1.4969847513172527, "learning_rate": 1.2204729205627774e-06, "loss": 0.7276, "step": 8281 }, { "epoch": 0.8459652706843718, "grad_norm": 1.4580292038690332, "learning_rate": 1.2188895455201688e-06, "loss": 0.6367, "step": 8282 }, { "epoch": 0.8460674157303371, "grad_norm": 1.6702878164215913, "learning_rate": 1.2173071315757701e-06, "loss": 0.6753, "step": 8283 }, { "epoch": 0.8461695607763023, "grad_norm": 1.3638612131921646, "learning_rate": 1.2157256789027828e-06, "loss": 0.6379, "step": 8284 }, { "epoch": 0.8462717058222676, "grad_norm": 1.3661290707863352, "learning_rate": 1.214145187674296e-06, "loss": 0.5924, "step": 8285 }, { "epoch": 0.8463738508682329, "grad_norm": 1.4734222442963858, "learning_rate": 1.2125656580632939e-06, "loss": 0.6418, "step": 8286 }, { "epoch": 0.8464759959141982, "grad_norm": 1.4844363405598546, "learning_rate": 1.2109870902426558e-06, "loss": 0.7947, "step": 8287 }, { "epoch": 0.8465781409601635, "grad_norm": 1.4409950843028163, "learning_rate": 1.2094094843851612e-06, "loss": 0.6364, "step": 8288 }, { "epoch": 0.8466802860061287, "grad_norm": 1.5554225116992695, "learning_rate": 1.2078328406634765e-06, "loss": 0.5967, "step": 8289 }, { "epoch": 0.8467824310520939, "grad_norm": 1.519089030800187, "learning_rate": 1.2062571592501692e-06, "loss": 0.7747, "step": 8290 }, { "epoch": 0.8468845760980592, "grad_norm": 1.4302614247190841, "learning_rate": 1.2046824403176983e-06, "loss": 0.6194, "step": 8291 }, { "epoch": 0.8469867211440245, "grad_norm": 1.668285188563771, "learning_rate": 1.2031086840384154e-06, "loss": 0.7204, "step": 8292 }, { "epoch": 0.8470888661899898, "grad_norm": 1.4063476492285198, "learning_rate": 1.2015358905845699e-06, "loss": 0.6491, "step": 8293 }, { "epoch": 0.8471910112359551, "grad_norm": 1.3783506569007256, "learning_rate": 1.1999640601283069e-06, "loss": 0.6473, "step": 8294 }, { "epoch": 0.8472931562819204, "grad_norm": 1.6438735016415327, "learning_rate": 1.1983931928416614e-06, "loss": 0.6941, "step": 8295 }, { "epoch": 0.8473953013278857, "grad_norm": 1.513510309176791, "learning_rate": 1.1968232888965692e-06, "loss": 0.623, "step": 8296 }, { "epoch": 0.8474974463738508, "grad_norm": 1.4043294562513438, "learning_rate": 1.195254348464856e-06, "loss": 0.693, "step": 8297 }, { "epoch": 0.8475995914198161, "grad_norm": 1.382566793179503, "learning_rate": 1.19368637171824e-06, "loss": 0.7131, "step": 8298 }, { "epoch": 0.8477017364657814, "grad_norm": 1.5928810802805253, "learning_rate": 1.192119358828343e-06, "loss": 0.8231, "step": 8299 }, { "epoch": 0.8478038815117467, "grad_norm": 1.422601486539526, "learning_rate": 1.1905533099666732e-06, "loss": 0.6791, "step": 8300 }, { "epoch": 0.847906026557712, "grad_norm": 1.5243141136028528, "learning_rate": 1.1889882253046347e-06, "loss": 0.5951, "step": 8301 }, { "epoch": 0.8480081716036773, "grad_norm": 1.5587905641221254, "learning_rate": 1.1874241050135283e-06, "loss": 0.6197, "step": 8302 }, { "epoch": 0.8481103166496425, "grad_norm": 1.6424808663493455, "learning_rate": 1.1858609492645435e-06, "loss": 0.7111, "step": 8303 }, { "epoch": 0.8482124616956077, "grad_norm": 1.6258600924568685, "learning_rate": 1.1842987582287734e-06, "loss": 0.6623, "step": 8304 }, { "epoch": 0.848314606741573, "grad_norm": 1.526471651358364, "learning_rate": 1.1827375320772027e-06, "loss": 0.6383, "step": 8305 }, { "epoch": 0.8484167517875383, "grad_norm": 1.38963719292274, "learning_rate": 1.1811772709807057e-06, "loss": 0.6806, "step": 8306 }, { "epoch": 0.8485188968335036, "grad_norm": 1.575124060407301, "learning_rate": 1.1796179751100533e-06, "loss": 0.6778, "step": 8307 }, { "epoch": 0.8486210418794689, "grad_norm": 1.4866530262146251, "learning_rate": 1.1780596446359105e-06, "loss": 0.5911, "step": 8308 }, { "epoch": 0.8487231869254341, "grad_norm": 1.5062414086633105, "learning_rate": 1.1765022797288418e-06, "loss": 0.6412, "step": 8309 }, { "epoch": 0.8488253319713994, "grad_norm": 1.5753247094481755, "learning_rate": 1.1749458805592983e-06, "loss": 0.6912, "step": 8310 }, { "epoch": 0.8489274770173647, "grad_norm": 1.431872908227566, "learning_rate": 1.1733904472976277e-06, "loss": 0.661, "step": 8311 }, { "epoch": 0.8490296220633299, "grad_norm": 1.3938312565265256, "learning_rate": 1.1718359801140788e-06, "loss": 0.6762, "step": 8312 }, { "epoch": 0.8491317671092952, "grad_norm": 1.6075948936753437, "learning_rate": 1.1702824791787825e-06, "loss": 0.7822, "step": 8313 }, { "epoch": 0.8492339121552605, "grad_norm": 1.5666264133552175, "learning_rate": 1.1687299446617762e-06, "loss": 0.7319, "step": 8314 }, { "epoch": 0.8493360572012257, "grad_norm": 1.4335010181056, "learning_rate": 1.1671783767329824e-06, "loss": 0.7015, "step": 8315 }, { "epoch": 0.849438202247191, "grad_norm": 1.4845499697012328, "learning_rate": 1.1656277755622225e-06, "loss": 0.7232, "step": 8316 }, { "epoch": 0.8495403472931563, "grad_norm": 1.3611465700380552, "learning_rate": 1.1640781413192082e-06, "loss": 0.7713, "step": 8317 }, { "epoch": 0.8496424923391216, "grad_norm": 1.5612776185721151, "learning_rate": 1.1625294741735527e-06, "loss": 0.576, "step": 8318 }, { "epoch": 0.8497446373850869, "grad_norm": 1.4335053682421963, "learning_rate": 1.1609817742947538e-06, "loss": 0.6209, "step": 8319 }, { "epoch": 0.849846782431052, "grad_norm": 1.5300787797336082, "learning_rate": 1.1594350418522115e-06, "loss": 0.7007, "step": 8320 }, { "epoch": 0.8499489274770173, "grad_norm": 1.4669915467675874, "learning_rate": 1.1578892770152162e-06, "loss": 0.6246, "step": 8321 }, { "epoch": 0.8500510725229826, "grad_norm": 1.6326682373033836, "learning_rate": 1.1563444799529522e-06, "loss": 0.7536, "step": 8322 }, { "epoch": 0.8501532175689479, "grad_norm": 1.496430257119027, "learning_rate": 1.1548006508344966e-06, "loss": 0.6003, "step": 8323 }, { "epoch": 0.8502553626149132, "grad_norm": 1.4847971629580081, "learning_rate": 1.1532577898288267e-06, "loss": 0.7235, "step": 8324 }, { "epoch": 0.8503575076608785, "grad_norm": 1.49297185425958, "learning_rate": 1.151715897104807e-06, "loss": 0.6805, "step": 8325 }, { "epoch": 0.8504596527068438, "grad_norm": 1.4577493639847692, "learning_rate": 1.1501749728311994e-06, "loss": 0.6483, "step": 8326 }, { "epoch": 0.8505617977528089, "grad_norm": 1.5627491730819745, "learning_rate": 1.148635017176657e-06, "loss": 0.704, "step": 8327 }, { "epoch": 0.8506639427987742, "grad_norm": 1.4827267529092356, "learning_rate": 1.14709603030973e-06, "loss": 0.6675, "step": 8328 }, { "epoch": 0.8507660878447395, "grad_norm": 1.49504066312726, "learning_rate": 1.1455580123988653e-06, "loss": 0.6509, "step": 8329 }, { "epoch": 0.8508682328907048, "grad_norm": 1.4408648997080775, "learning_rate": 1.1440209636123956e-06, "loss": 0.7024, "step": 8330 }, { "epoch": 0.8509703779366701, "grad_norm": 1.608215344639957, "learning_rate": 1.1424848841185542e-06, "loss": 0.6993, "step": 8331 }, { "epoch": 0.8510725229826354, "grad_norm": 1.5792241908133076, "learning_rate": 1.1409497740854625e-06, "loss": 0.7387, "step": 8332 }, { "epoch": 0.8511746680286006, "grad_norm": 1.6784978736053213, "learning_rate": 1.1394156336811436e-06, "loss": 0.7714, "step": 8333 }, { "epoch": 0.8512768130745659, "grad_norm": 1.4756358463644228, "learning_rate": 1.1378824630735087e-06, "loss": 0.665, "step": 8334 }, { "epoch": 0.8513789581205311, "grad_norm": 1.455234974518297, "learning_rate": 1.1363502624303614e-06, "loss": 0.8013, "step": 8335 }, { "epoch": 0.8514811031664964, "grad_norm": 1.5578478407176866, "learning_rate": 1.1348190319194064e-06, "loss": 0.7288, "step": 8336 }, { "epoch": 0.8515832482124617, "grad_norm": 1.5238399426281883, "learning_rate": 1.1332887717082342e-06, "loss": 0.6823, "step": 8337 }, { "epoch": 0.851685393258427, "grad_norm": 1.5100274966026, "learning_rate": 1.1317594819643362e-06, "loss": 0.6719, "step": 8338 }, { "epoch": 0.8517875383043922, "grad_norm": 1.9348803766528373, "learning_rate": 1.1302311628550933e-06, "loss": 0.7397, "step": 8339 }, { "epoch": 0.8518896833503575, "grad_norm": 1.5639655456892616, "learning_rate": 1.1287038145477791e-06, "loss": 0.6396, "step": 8340 }, { "epoch": 0.8519918283963228, "grad_norm": 1.4829619692123155, "learning_rate": 1.1271774372095646e-06, "loss": 0.7001, "step": 8341 }, { "epoch": 0.8520939734422881, "grad_norm": 1.5711843217568109, "learning_rate": 1.1256520310075103e-06, "loss": 0.6871, "step": 8342 }, { "epoch": 0.8521961184882533, "grad_norm": 1.5996771928356603, "learning_rate": 1.1241275961085751e-06, "loss": 0.6852, "step": 8343 }, { "epoch": 0.8522982635342186, "grad_norm": 1.3985494490443957, "learning_rate": 1.12260413267961e-06, "loss": 0.7239, "step": 8344 }, { "epoch": 0.8524004085801838, "grad_norm": 1.3838674990944548, "learning_rate": 1.1210816408873592e-06, "loss": 0.6503, "step": 8345 }, { "epoch": 0.8525025536261491, "grad_norm": 1.4871811748704775, "learning_rate": 1.1195601208984587e-06, "loss": 0.6034, "step": 8346 }, { "epoch": 0.8526046986721144, "grad_norm": 1.6507273123002324, "learning_rate": 1.118039572879439e-06, "loss": 0.6567, "step": 8347 }, { "epoch": 0.8527068437180797, "grad_norm": 1.631555295937028, "learning_rate": 1.1165199969967277e-06, "loss": 0.8299, "step": 8348 }, { "epoch": 0.852808988764045, "grad_norm": 1.4300233191496894, "learning_rate": 1.1150013934166426e-06, "loss": 0.7062, "step": 8349 }, { "epoch": 0.8529111338100103, "grad_norm": 1.60307805053133, "learning_rate": 1.1134837623053962e-06, "loss": 0.8125, "step": 8350 }, { "epoch": 0.8530132788559754, "grad_norm": 1.4187994086111455, "learning_rate": 1.1119671038290901e-06, "loss": 0.6542, "step": 8351 }, { "epoch": 0.8531154239019407, "grad_norm": 1.7344322105544165, "learning_rate": 1.1104514181537273e-06, "loss": 0.6951, "step": 8352 }, { "epoch": 0.853217568947906, "grad_norm": 1.6365063070819643, "learning_rate": 1.1089367054452028e-06, "loss": 0.7281, "step": 8353 }, { "epoch": 0.8533197139938713, "grad_norm": 1.5333027224389877, "learning_rate": 1.1074229658693003e-06, "loss": 0.7699, "step": 8354 }, { "epoch": 0.8534218590398366, "grad_norm": 1.6694082709363514, "learning_rate": 1.1059101995916988e-06, "loss": 0.6883, "step": 8355 }, { "epoch": 0.8535240040858019, "grad_norm": 1.5817948433424238, "learning_rate": 1.1043984067779723e-06, "loss": 0.7377, "step": 8356 }, { "epoch": 0.8536261491317672, "grad_norm": 1.5384613166492191, "learning_rate": 1.1028875875935863e-06, "loss": 0.6396, "step": 8357 }, { "epoch": 0.8537282941777323, "grad_norm": 1.5048784987433386, "learning_rate": 1.101377742203903e-06, "loss": 0.6462, "step": 8358 }, { "epoch": 0.8538304392236976, "grad_norm": 1.4491982865127386, "learning_rate": 1.0998688707741733e-06, "loss": 0.665, "step": 8359 }, { "epoch": 0.8539325842696629, "grad_norm": 1.5306767462310378, "learning_rate": 1.0983609734695488e-06, "loss": 0.6339, "step": 8360 }, { "epoch": 0.8540347293156282, "grad_norm": 1.4556056141121028, "learning_rate": 1.0968540504550661e-06, "loss": 0.659, "step": 8361 }, { "epoch": 0.8541368743615935, "grad_norm": 1.560099801058066, "learning_rate": 1.095348101895658e-06, "loss": 0.6243, "step": 8362 }, { "epoch": 0.8542390194075588, "grad_norm": 1.6843941265264888, "learning_rate": 1.0938431279561556e-06, "loss": 0.7801, "step": 8363 }, { "epoch": 0.854341164453524, "grad_norm": 1.5789251376130133, "learning_rate": 1.0923391288012764e-06, "loss": 0.7804, "step": 8364 }, { "epoch": 0.8544433094994893, "grad_norm": 1.5706988507324289, "learning_rate": 1.0908361045956352e-06, "loss": 0.6853, "step": 8365 }, { "epoch": 0.8545454545454545, "grad_norm": 1.6974830420637157, "learning_rate": 1.0893340555037356e-06, "loss": 0.7981, "step": 8366 }, { "epoch": 0.8546475995914198, "grad_norm": 1.4685955596527507, "learning_rate": 1.0878329816899813e-06, "loss": 0.7786, "step": 8367 }, { "epoch": 0.8547497446373851, "grad_norm": 1.511055685775897, "learning_rate": 1.086332883318667e-06, "loss": 0.7245, "step": 8368 }, { "epoch": 0.8548518896833504, "grad_norm": 1.4492622970916131, "learning_rate": 1.0848337605539782e-06, "loss": 0.6086, "step": 8369 }, { "epoch": 0.8549540347293156, "grad_norm": 1.5179494988495408, "learning_rate": 1.0833356135599938e-06, "loss": 0.6718, "step": 8370 }, { "epoch": 0.8550561797752809, "grad_norm": 1.4497075829838668, "learning_rate": 1.0818384425006844e-06, "loss": 0.7019, "step": 8371 }, { "epoch": 0.8551583248212462, "grad_norm": 1.5260078580520513, "learning_rate": 1.0803422475399228e-06, "loss": 0.7017, "step": 8372 }, { "epoch": 0.8552604698672115, "grad_norm": 1.4489602047079986, "learning_rate": 1.0788470288414642e-06, "loss": 0.5729, "step": 8373 }, { "epoch": 0.8553626149131767, "grad_norm": 1.6914722426647502, "learning_rate": 1.0773527865689625e-06, "loss": 0.7541, "step": 8374 }, { "epoch": 0.855464759959142, "grad_norm": 1.6822433057494055, "learning_rate": 1.07585952088596e-06, "loss": 0.7944, "step": 8375 }, { "epoch": 0.8555669050051072, "grad_norm": 1.348097100251337, "learning_rate": 1.0743672319559017e-06, "loss": 0.6721, "step": 8376 }, { "epoch": 0.8556690500510725, "grad_norm": 1.4963404354326346, "learning_rate": 1.0728759199421146e-06, "loss": 0.6947, "step": 8377 }, { "epoch": 0.8557711950970378, "grad_norm": 1.6349750018882885, "learning_rate": 1.071385585007828e-06, "loss": 0.7319, "step": 8378 }, { "epoch": 0.8558733401430031, "grad_norm": 1.285242558988449, "learning_rate": 1.0698962273161573e-06, "loss": 0.6166, "step": 8379 }, { "epoch": 0.8559754851889684, "grad_norm": 1.307375444013231, "learning_rate": 1.068407847030114e-06, "loss": 0.5787, "step": 8380 }, { "epoch": 0.8560776302349336, "grad_norm": 1.4056924453508923, "learning_rate": 1.0669204443126002e-06, "loss": 0.6134, "step": 8381 }, { "epoch": 0.8561797752808988, "grad_norm": 1.5096506350933856, "learning_rate": 1.065434019326418e-06, "loss": 0.68, "step": 8382 }, { "epoch": 0.8562819203268641, "grad_norm": 1.5705467604452903, "learning_rate": 1.0639485722342524e-06, "loss": 0.708, "step": 8383 }, { "epoch": 0.8563840653728294, "grad_norm": 1.6086545446162752, "learning_rate": 1.0624641031986903e-06, "loss": 0.7612, "step": 8384 }, { "epoch": 0.8564862104187947, "grad_norm": 1.4667618464165484, "learning_rate": 1.0609806123822076e-06, "loss": 0.6591, "step": 8385 }, { "epoch": 0.85658835546476, "grad_norm": 1.3851260883922176, "learning_rate": 1.0594980999471694e-06, "loss": 0.6937, "step": 8386 }, { "epoch": 0.8566905005107253, "grad_norm": 1.5715958739378904, "learning_rate": 1.0580165660558439e-06, "loss": 0.6998, "step": 8387 }, { "epoch": 0.8567926455566905, "grad_norm": 1.60621104241265, "learning_rate": 1.0565360108703816e-06, "loss": 0.7289, "step": 8388 }, { "epoch": 0.8568947906026557, "grad_norm": 1.5755417222954873, "learning_rate": 1.0550564345528302e-06, "loss": 0.6729, "step": 8389 }, { "epoch": 0.856996935648621, "grad_norm": 1.4740455403740886, "learning_rate": 1.0535778372651318e-06, "loss": 0.6897, "step": 8390 }, { "epoch": 0.8570990806945863, "grad_norm": 1.5783974294431247, "learning_rate": 1.0521002191691153e-06, "loss": 0.7223, "step": 8391 }, { "epoch": 0.8572012257405516, "grad_norm": 1.4022919468166204, "learning_rate": 1.0506235804265153e-06, "loss": 0.6716, "step": 8392 }, { "epoch": 0.8573033707865169, "grad_norm": 1.5579921920885453, "learning_rate": 1.0491479211989464e-06, "loss": 0.6409, "step": 8393 }, { "epoch": 0.8574055158324821, "grad_norm": 1.52293031855573, "learning_rate": 1.0476732416479207e-06, "loss": 0.6377, "step": 8394 }, { "epoch": 0.8575076608784474, "grad_norm": 1.4832990699534652, "learning_rate": 1.0461995419348425e-06, "loss": 0.6686, "step": 8395 }, { "epoch": 0.8576098059244127, "grad_norm": 1.550337822007131, "learning_rate": 1.0447268222210072e-06, "loss": 0.6739, "step": 8396 }, { "epoch": 0.8577119509703779, "grad_norm": 1.6782195940649836, "learning_rate": 1.043255082667608e-06, "loss": 0.6262, "step": 8397 }, { "epoch": 0.8578140960163432, "grad_norm": 1.5755145608127419, "learning_rate": 1.0417843234357283e-06, "loss": 0.6656, "step": 8398 }, { "epoch": 0.8579162410623085, "grad_norm": 1.5443480028971983, "learning_rate": 1.0403145446863394e-06, "loss": 0.7346, "step": 8399 }, { "epoch": 0.8580183861082737, "grad_norm": 1.5297780278216855, "learning_rate": 1.0388457465803148e-06, "loss": 0.6226, "step": 8400 }, { "epoch": 0.858120531154239, "grad_norm": 1.5091107474738008, "learning_rate": 1.0373779292784103e-06, "loss": 0.6427, "step": 8401 }, { "epoch": 0.8582226762002043, "grad_norm": 1.4666599466263683, "learning_rate": 1.0359110929412841e-06, "loss": 0.5913, "step": 8402 }, { "epoch": 0.8583248212461696, "grad_norm": 1.399957793209479, "learning_rate": 1.0344452377294812e-06, "loss": 0.6925, "step": 8403 }, { "epoch": 0.8584269662921349, "grad_norm": 1.5421864070525229, "learning_rate": 1.0329803638034386e-06, "loss": 0.6885, "step": 8404 }, { "epoch": 0.8585291113381001, "grad_norm": 1.6069704570268586, "learning_rate": 1.031516471323487e-06, "loss": 0.8394, "step": 8405 }, { "epoch": 0.8586312563840653, "grad_norm": 1.5521176037173332, "learning_rate": 1.030053560449854e-06, "loss": 0.6796, "step": 8406 }, { "epoch": 0.8587334014300306, "grad_norm": 1.4065750604015383, "learning_rate": 1.0285916313426513e-06, "loss": 0.5919, "step": 8407 }, { "epoch": 0.8588355464759959, "grad_norm": 1.534673093252651, "learning_rate": 1.0271306841618945e-06, "loss": 0.624, "step": 8408 }, { "epoch": 0.8589376915219612, "grad_norm": 1.5298296488323104, "learning_rate": 1.0256707190674797e-06, "loss": 0.7186, "step": 8409 }, { "epoch": 0.8590398365679265, "grad_norm": 1.6006599863585649, "learning_rate": 1.024211736219204e-06, "loss": 0.6672, "step": 8410 }, { "epoch": 0.8591419816138918, "grad_norm": 1.6403547054101784, "learning_rate": 1.0227537357767504e-06, "loss": 0.7179, "step": 8411 }, { "epoch": 0.8592441266598569, "grad_norm": 1.462667240267668, "learning_rate": 1.0212967178997024e-06, "loss": 0.7737, "step": 8412 }, { "epoch": 0.8593462717058222, "grad_norm": 1.5496130041531306, "learning_rate": 1.0198406827475304e-06, "loss": 0.6385, "step": 8413 }, { "epoch": 0.8594484167517875, "grad_norm": 1.4386234041606987, "learning_rate": 1.0183856304795969e-06, "loss": 0.7122, "step": 8414 }, { "epoch": 0.8595505617977528, "grad_norm": 1.4291252637253038, "learning_rate": 1.0169315612551566e-06, "loss": 0.7004, "step": 8415 }, { "epoch": 0.8596527068437181, "grad_norm": 1.409563058017546, "learning_rate": 1.0154784752333625e-06, "loss": 0.695, "step": 8416 }, { "epoch": 0.8597548518896834, "grad_norm": 1.5266882454159405, "learning_rate": 1.0140263725732546e-06, "loss": 0.7491, "step": 8417 }, { "epoch": 0.8598569969356487, "grad_norm": 1.5284731824300424, "learning_rate": 1.0125752534337664e-06, "loss": 0.6461, "step": 8418 }, { "epoch": 0.8599591419816139, "grad_norm": 1.2770840589351984, "learning_rate": 1.0111251179737225e-06, "loss": 0.6503, "step": 8419 }, { "epoch": 0.8600612870275791, "grad_norm": 1.5599679485567866, "learning_rate": 1.0096759663518407e-06, "loss": 0.6957, "step": 8420 }, { "epoch": 0.8601634320735444, "grad_norm": 1.5697499707897458, "learning_rate": 1.0082277987267341e-06, "loss": 0.6834, "step": 8421 }, { "epoch": 0.8602655771195097, "grad_norm": 1.5076440741097168, "learning_rate": 1.0067806152569048e-06, "loss": 0.726, "step": 8422 }, { "epoch": 0.860367722165475, "grad_norm": 1.4948877328610561, "learning_rate": 1.0053344161007461e-06, "loss": 0.6929, "step": 8423 }, { "epoch": 0.8604698672114403, "grad_norm": 1.5395570487238657, "learning_rate": 1.0038892014165491e-06, "loss": 0.6547, "step": 8424 }, { "epoch": 0.8605720122574055, "grad_norm": 1.3567920733803391, "learning_rate": 1.0024449713624885e-06, "loss": 0.5855, "step": 8425 }, { "epoch": 0.8606741573033708, "grad_norm": 1.5776620557456118, "learning_rate": 1.0010017260966409e-06, "loss": 0.829, "step": 8426 }, { "epoch": 0.8607763023493361, "grad_norm": 1.5773438314140968, "learning_rate": 9.99559465776968e-07, "loss": 0.6737, "step": 8427 }, { "epoch": 0.8608784473953013, "grad_norm": 1.5651514097935753, "learning_rate": 9.98118190561328e-07, "loss": 0.7276, "step": 8428 }, { "epoch": 0.8609805924412666, "grad_norm": 1.6425492111090085, "learning_rate": 9.966779006074666e-07, "loss": 0.7719, "step": 8429 }, { "epoch": 0.8610827374872319, "grad_norm": 1.3180986704982216, "learning_rate": 9.952385960730249e-07, "loss": 0.6816, "step": 8430 }, { "epoch": 0.8611848825331971, "grad_norm": 1.3720702559157816, "learning_rate": 9.938002771155363e-07, "loss": 0.6412, "step": 8431 }, { "epoch": 0.8612870275791624, "grad_norm": 1.523881839338781, "learning_rate": 9.923629438924275e-07, "loss": 0.65, "step": 8432 }, { "epoch": 0.8613891726251277, "grad_norm": 1.5530211463408303, "learning_rate": 9.90926596561015e-07, "loss": 0.7387, "step": 8433 }, { "epoch": 0.861491317671093, "grad_norm": 1.504503627322489, "learning_rate": 9.894912352785068e-07, "loss": 0.7267, "step": 8434 }, { "epoch": 0.8615934627170582, "grad_norm": 1.4995518305753064, "learning_rate": 9.880568602020024e-07, "loss": 0.635, "step": 8435 }, { "epoch": 0.8616956077630235, "grad_norm": 1.52295734799373, "learning_rate": 9.866234714884993e-07, "loss": 0.7249, "step": 8436 }, { "epoch": 0.8617977528089887, "grad_norm": 1.4918923691055421, "learning_rate": 9.851910692948808e-07, "loss": 0.6448, "step": 8437 }, { "epoch": 0.861899897854954, "grad_norm": 1.4268301794085236, "learning_rate": 9.837596537779236e-07, "loss": 0.6643, "step": 8438 }, { "epoch": 0.8620020429009193, "grad_norm": 1.5968089462852884, "learning_rate": 9.82329225094296e-07, "loss": 0.7822, "step": 8439 }, { "epoch": 0.8621041879468846, "grad_norm": 1.4662695372847874, "learning_rate": 9.808997834005608e-07, "loss": 0.5761, "step": 8440 }, { "epoch": 0.8622063329928499, "grad_norm": 1.462011915016091, "learning_rate": 9.794713288531732e-07, "loss": 0.6583, "step": 8441 }, { "epoch": 0.8623084780388152, "grad_norm": 1.3541053527917732, "learning_rate": 9.780438616084765e-07, "loss": 0.6328, "step": 8442 }, { "epoch": 0.8624106230847803, "grad_norm": 1.4395147613221053, "learning_rate": 9.766173818227086e-07, "loss": 0.6242, "step": 8443 }, { "epoch": 0.8625127681307456, "grad_norm": 1.4203046476061356, "learning_rate": 9.751918896519974e-07, "loss": 0.6014, "step": 8444 }, { "epoch": 0.8626149131767109, "grad_norm": 1.4173082398051704, "learning_rate": 9.737673852523632e-07, "loss": 0.6746, "step": 8445 }, { "epoch": 0.8627170582226762, "grad_norm": 1.4861742514945837, "learning_rate": 9.723438687797227e-07, "loss": 0.72, "step": 8446 }, { "epoch": 0.8628192032686415, "grad_norm": 1.541930701300977, "learning_rate": 9.709213403898753e-07, "loss": 0.6622, "step": 8447 }, { "epoch": 0.8629213483146068, "grad_norm": 1.6095812558113067, "learning_rate": 9.694998002385235e-07, "loss": 0.6841, "step": 8448 }, { "epoch": 0.863023493360572, "grad_norm": 1.4476702122682548, "learning_rate": 9.68079248481253e-07, "loss": 0.764, "step": 8449 }, { "epoch": 0.8631256384065373, "grad_norm": 1.579290552032581, "learning_rate": 9.66659685273542e-07, "loss": 0.7284, "step": 8450 }, { "epoch": 0.8632277834525025, "grad_norm": 1.4032523648897783, "learning_rate": 9.65241110770766e-07, "loss": 0.818, "step": 8451 }, { "epoch": 0.8633299284984678, "grad_norm": 1.5115535217447795, "learning_rate": 9.638235251281892e-07, "loss": 0.6009, "step": 8452 }, { "epoch": 0.8634320735444331, "grad_norm": 1.7035822410655495, "learning_rate": 9.624069285009641e-07, "loss": 0.6748, "step": 8453 }, { "epoch": 0.8635342185903984, "grad_norm": 1.5572569503985585, "learning_rate": 9.60991321044139e-07, "loss": 0.6837, "step": 8454 }, { "epoch": 0.8636363636363636, "grad_norm": 1.498325662260311, "learning_rate": 9.595767029126525e-07, "loss": 0.6289, "step": 8455 }, { "epoch": 0.8637385086823289, "grad_norm": 1.5135315534106113, "learning_rate": 9.581630742613402e-07, "loss": 0.7445, "step": 8456 }, { "epoch": 0.8638406537282942, "grad_norm": 1.480781254611083, "learning_rate": 9.567504352449198e-07, "loss": 0.7096, "step": 8457 }, { "epoch": 0.8639427987742595, "grad_norm": 1.5788290032879526, "learning_rate": 9.553387860180074e-07, "loss": 0.6986, "step": 8458 }, { "epoch": 0.8640449438202247, "grad_norm": 1.333410470461088, "learning_rate": 9.539281267351063e-07, "loss": 0.6328, "step": 8459 }, { "epoch": 0.86414708886619, "grad_norm": 1.4300906666381414, "learning_rate": 9.525184575506186e-07, "loss": 0.6508, "step": 8460 }, { "epoch": 0.8642492339121552, "grad_norm": 1.53269598830645, "learning_rate": 9.511097786188317e-07, "loss": 0.6998, "step": 8461 }, { "epoch": 0.8643513789581205, "grad_norm": 1.5242243094165946, "learning_rate": 9.497020900939247e-07, "loss": 0.754, "step": 8462 }, { "epoch": 0.8644535240040858, "grad_norm": 1.351601934160555, "learning_rate": 9.482953921299698e-07, "loss": 0.639, "step": 8463 }, { "epoch": 0.8645556690500511, "grad_norm": 1.4912752568954282, "learning_rate": 9.468896848809351e-07, "loss": 0.7049, "step": 8464 }, { "epoch": 0.8646578140960164, "grad_norm": 1.5719527752866476, "learning_rate": 9.454849685006706e-07, "loss": 0.7269, "step": 8465 }, { "epoch": 0.8647599591419816, "grad_norm": 1.5343672445535446, "learning_rate": 9.44081243142928e-07, "loss": 0.7199, "step": 8466 }, { "epoch": 0.8648621041879468, "grad_norm": 1.35141569322651, "learning_rate": 9.426785089613443e-07, "loss": 0.6627, "step": 8467 }, { "epoch": 0.8649642492339121, "grad_norm": 1.6700441846213627, "learning_rate": 9.412767661094502e-07, "loss": 0.6861, "step": 8468 }, { "epoch": 0.8650663942798774, "grad_norm": 1.4135988569194884, "learning_rate": 9.398760147406638e-07, "loss": 0.6371, "step": 8469 }, { "epoch": 0.8651685393258427, "grad_norm": 1.4854479191466268, "learning_rate": 9.384762550083037e-07, "loss": 0.6193, "step": 8470 }, { "epoch": 0.865270684371808, "grad_norm": 1.5333026786863024, "learning_rate": 9.370774870655708e-07, "loss": 0.649, "step": 8471 }, { "epoch": 0.8653728294177733, "grad_norm": 1.4164293776943533, "learning_rate": 9.356797110655624e-07, "loss": 0.725, "step": 8472 }, { "epoch": 0.8654749744637386, "grad_norm": 1.5361906535669616, "learning_rate": 9.342829271612675e-07, "loss": 0.6868, "step": 8473 }, { "epoch": 0.8655771195097037, "grad_norm": 1.492824730050042, "learning_rate": 9.328871355055613e-07, "loss": 0.6268, "step": 8474 }, { "epoch": 0.865679264555669, "grad_norm": 1.492059422984626, "learning_rate": 9.314923362512174e-07, "loss": 0.6288, "step": 8475 }, { "epoch": 0.8657814096016343, "grad_norm": 1.5530588581568225, "learning_rate": 9.300985295508968e-07, "loss": 0.6529, "step": 8476 }, { "epoch": 0.8658835546475996, "grad_norm": 1.470948097266118, "learning_rate": 9.287057155571522e-07, "loss": 0.7235, "step": 8477 }, { "epoch": 0.8659856996935649, "grad_norm": 1.4544606632321349, "learning_rate": 9.27313894422428e-07, "loss": 0.5794, "step": 8478 }, { "epoch": 0.8660878447395302, "grad_norm": 1.4097443726882306, "learning_rate": 9.259230662990559e-07, "loss": 0.6301, "step": 8479 }, { "epoch": 0.8661899897854954, "grad_norm": 1.4896353570747518, "learning_rate": 9.245332313392697e-07, "loss": 0.6299, "step": 8480 }, { "epoch": 0.8662921348314607, "grad_norm": 1.3633757944623672, "learning_rate": 9.231443896951852e-07, "loss": 0.6133, "step": 8481 }, { "epoch": 0.8663942798774259, "grad_norm": 1.4948359827885889, "learning_rate": 9.217565415188124e-07, "loss": 0.7572, "step": 8482 }, { "epoch": 0.8664964249233912, "grad_norm": 1.439120692668659, "learning_rate": 9.203696869620504e-07, "loss": 0.5754, "step": 8483 }, { "epoch": 0.8665985699693565, "grad_norm": 1.5752788035590148, "learning_rate": 9.189838261766915e-07, "loss": 0.6252, "step": 8484 }, { "epoch": 0.8667007150153218, "grad_norm": 1.3854623916692594, "learning_rate": 9.175989593144208e-07, "loss": 0.5902, "step": 8485 }, { "epoch": 0.866802860061287, "grad_norm": 1.3718944613475241, "learning_rate": 9.162150865268127e-07, "loss": 0.5524, "step": 8486 }, { "epoch": 0.8669050051072523, "grad_norm": 1.3424377460264791, "learning_rate": 9.148322079653305e-07, "loss": 0.7074, "step": 8487 }, { "epoch": 0.8670071501532176, "grad_norm": 2.0298161820636755, "learning_rate": 9.134503237813341e-07, "loss": 0.7532, "step": 8488 }, { "epoch": 0.8671092951991829, "grad_norm": 1.535983650914348, "learning_rate": 9.120694341260694e-07, "loss": 0.7474, "step": 8489 }, { "epoch": 0.8672114402451481, "grad_norm": 1.5463542255447147, "learning_rate": 9.106895391506776e-07, "loss": 0.7745, "step": 8490 }, { "epoch": 0.8673135852911134, "grad_norm": 1.5308511467459294, "learning_rate": 9.09310639006189e-07, "loss": 0.7632, "step": 8491 }, { "epoch": 0.8674157303370786, "grad_norm": 1.3248789986041865, "learning_rate": 9.07932733843524e-07, "loss": 0.5745, "step": 8492 }, { "epoch": 0.8675178753830439, "grad_norm": 1.579869419678343, "learning_rate": 9.065558238134931e-07, "loss": 0.6576, "step": 8493 }, { "epoch": 0.8676200204290092, "grad_norm": 1.4664582813563989, "learning_rate": 9.051799090668045e-07, "loss": 0.7024, "step": 8494 }, { "epoch": 0.8677221654749745, "grad_norm": 1.6180061212139403, "learning_rate": 9.038049897540491e-07, "loss": 0.6749, "step": 8495 }, { "epoch": 0.8678243105209398, "grad_norm": 1.5216693703687854, "learning_rate": 9.024310660257163e-07, "loss": 0.7122, "step": 8496 }, { "epoch": 0.867926455566905, "grad_norm": 1.5076061159070737, "learning_rate": 9.010581380321814e-07, "loss": 0.6326, "step": 8497 }, { "epoch": 0.8680286006128702, "grad_norm": 1.4446104240930326, "learning_rate": 8.996862059237122e-07, "loss": 0.586, "step": 8498 }, { "epoch": 0.8681307456588355, "grad_norm": 1.3800535367407576, "learning_rate": 8.983152698504649e-07, "loss": 0.6844, "step": 8499 }, { "epoch": 0.8682328907048008, "grad_norm": 1.4492833756849497, "learning_rate": 8.969453299624942e-07, "loss": 0.6985, "step": 8500 }, { "epoch": 0.8683350357507661, "grad_norm": 1.413258900593109, "learning_rate": 8.955763864097377e-07, "loss": 0.6097, "step": 8501 }, { "epoch": 0.8684371807967314, "grad_norm": 1.436552167088429, "learning_rate": 8.94208439342028e-07, "loss": 0.7653, "step": 8502 }, { "epoch": 0.8685393258426967, "grad_norm": 1.6099135064221297, "learning_rate": 8.928414889090864e-07, "loss": 0.6972, "step": 8503 }, { "epoch": 0.868641470888662, "grad_norm": 1.4850636089653453, "learning_rate": 8.914755352605276e-07, "loss": 0.6342, "step": 8504 }, { "epoch": 0.8687436159346271, "grad_norm": 1.5850498689616555, "learning_rate": 8.901105785458586e-07, "loss": 0.723, "step": 8505 }, { "epoch": 0.8688457609805924, "grad_norm": 1.570473682958406, "learning_rate": 8.887466189144711e-07, "loss": 0.7417, "step": 8506 }, { "epoch": 0.8689479060265577, "grad_norm": 1.5041022695298432, "learning_rate": 8.873836565156546e-07, "loss": 0.7084, "step": 8507 }, { "epoch": 0.869050051072523, "grad_norm": 1.5227889398483903, "learning_rate": 8.860216914985808e-07, "loss": 0.6533, "step": 8508 }, { "epoch": 0.8691521961184883, "grad_norm": 1.550466174210768, "learning_rate": 8.846607240123239e-07, "loss": 0.6859, "step": 8509 }, { "epoch": 0.8692543411644535, "grad_norm": 1.6365362235019254, "learning_rate": 8.833007542058403e-07, "loss": 0.6777, "step": 8510 }, { "epoch": 0.8693564862104188, "grad_norm": 1.6537623078095758, "learning_rate": 8.819417822279775e-07, "loss": 0.6165, "step": 8511 }, { "epoch": 0.8694586312563841, "grad_norm": 1.4080152255879594, "learning_rate": 8.805838082274798e-07, "loss": 0.7291, "step": 8512 }, { "epoch": 0.8695607763023493, "grad_norm": 1.4767296260763223, "learning_rate": 8.792268323529729e-07, "loss": 0.7772, "step": 8513 }, { "epoch": 0.8696629213483146, "grad_norm": 1.4522399282233833, "learning_rate": 8.778708547529846e-07, "loss": 0.6439, "step": 8514 }, { "epoch": 0.8697650663942799, "grad_norm": 1.4477429832951676, "learning_rate": 8.765158755759251e-07, "loss": 0.7148, "step": 8515 }, { "epoch": 0.8698672114402451, "grad_norm": 1.4903990855022875, "learning_rate": 8.75161894970098e-07, "loss": 0.6084, "step": 8516 }, { "epoch": 0.8699693564862104, "grad_norm": 1.6231633371002947, "learning_rate": 8.738089130836958e-07, "loss": 0.6265, "step": 8517 }, { "epoch": 0.8700715015321757, "grad_norm": 1.5547413094045741, "learning_rate": 8.724569300648034e-07, "loss": 0.6714, "step": 8518 }, { "epoch": 0.870173646578141, "grad_norm": 1.5296719447081175, "learning_rate": 8.711059460613968e-07, "loss": 0.745, "step": 8519 }, { "epoch": 0.8702757916241062, "grad_norm": 1.4171461707966937, "learning_rate": 8.697559612213447e-07, "loss": 0.6468, "step": 8520 }, { "epoch": 0.8703779366700715, "grad_norm": 1.49882351915601, "learning_rate": 8.684069756924007e-07, "loss": 0.6771, "step": 8521 }, { "epoch": 0.8704800817160367, "grad_norm": 1.4817187953441053, "learning_rate": 8.670589896222125e-07, "loss": 0.8399, "step": 8522 }, { "epoch": 0.870582226762002, "grad_norm": 1.5056321366140883, "learning_rate": 8.657120031583177e-07, "loss": 0.6628, "step": 8523 }, { "epoch": 0.8706843718079673, "grad_norm": 1.4200805201599531, "learning_rate": 8.643660164481482e-07, "loss": 0.6307, "step": 8524 }, { "epoch": 0.8707865168539326, "grad_norm": 1.469030613918922, "learning_rate": 8.630210296390195e-07, "loss": 0.6536, "step": 8525 }, { "epoch": 0.8708886618998979, "grad_norm": 1.6352237736306625, "learning_rate": 8.616770428781418e-07, "loss": 0.7216, "step": 8526 }, { "epoch": 0.8709908069458632, "grad_norm": 1.517830019913351, "learning_rate": 8.60334056312615e-07, "loss": 0.6887, "step": 8527 }, { "epoch": 0.8710929519918283, "grad_norm": 1.4386319399131136, "learning_rate": 8.589920700894306e-07, "loss": 0.6466, "step": 8528 }, { "epoch": 0.8711950970377936, "grad_norm": 1.4655113351727105, "learning_rate": 8.57651084355472e-07, "loss": 0.6312, "step": 8529 }, { "epoch": 0.8712972420837589, "grad_norm": 1.3701969653052128, "learning_rate": 8.563110992575086e-07, "loss": 0.6472, "step": 8530 }, { "epoch": 0.8713993871297242, "grad_norm": 1.5973111259943744, "learning_rate": 8.54972114942203e-07, "loss": 0.7185, "step": 8531 }, { "epoch": 0.8715015321756895, "grad_norm": 1.3370096445072075, "learning_rate": 8.53634131556108e-07, "loss": 0.6134, "step": 8532 }, { "epoch": 0.8716036772216548, "grad_norm": 1.5934781974486247, "learning_rate": 8.522971492456644e-07, "loss": 0.6986, "step": 8533 }, { "epoch": 0.8717058222676201, "grad_norm": 1.499213510961216, "learning_rate": 8.509611681572105e-07, "loss": 0.6527, "step": 8534 }, { "epoch": 0.8718079673135853, "grad_norm": 1.4781737241868766, "learning_rate": 8.49626188436965e-07, "loss": 0.6654, "step": 8535 }, { "epoch": 0.8719101123595505, "grad_norm": 1.5502328614563192, "learning_rate": 8.482922102310476e-07, "loss": 0.7051, "step": 8536 }, { "epoch": 0.8720122574055158, "grad_norm": 1.4146162700283884, "learning_rate": 8.469592336854604e-07, "loss": 0.5991, "step": 8537 }, { "epoch": 0.8721144024514811, "grad_norm": 1.5085383524854312, "learning_rate": 8.456272589460967e-07, "loss": 0.6433, "step": 8538 }, { "epoch": 0.8722165474974464, "grad_norm": 1.401819210848315, "learning_rate": 8.442962861587456e-07, "loss": 0.6463, "step": 8539 }, { "epoch": 0.8723186925434117, "grad_norm": 1.3243917281224575, "learning_rate": 8.429663154690826e-07, "loss": 0.6301, "step": 8540 }, { "epoch": 0.8724208375893769, "grad_norm": 1.393181121312515, "learning_rate": 8.416373470226713e-07, "loss": 0.6428, "step": 8541 }, { "epoch": 0.8725229826353422, "grad_norm": 1.645955037967443, "learning_rate": 8.403093809649676e-07, "loss": 0.6834, "step": 8542 }, { "epoch": 0.8726251276813075, "grad_norm": 1.5255078880438202, "learning_rate": 8.389824174413208e-07, "loss": 0.755, "step": 8543 }, { "epoch": 0.8727272727272727, "grad_norm": 1.5112261408721048, "learning_rate": 8.37656456596968e-07, "loss": 0.7299, "step": 8544 }, { "epoch": 0.872829417773238, "grad_norm": 1.5631188394198194, "learning_rate": 8.363314985770366e-07, "loss": 0.6294, "step": 8545 }, { "epoch": 0.8729315628192033, "grad_norm": 1.455378411794105, "learning_rate": 8.350075435265426e-07, "loss": 0.6452, "step": 8546 }, { "epoch": 0.8730337078651685, "grad_norm": 1.6293134673459684, "learning_rate": 8.336845915903935e-07, "loss": 0.7396, "step": 8547 }, { "epoch": 0.8731358529111338, "grad_norm": 1.4200009434812784, "learning_rate": 8.323626429133891e-07, "loss": 0.7063, "step": 8548 }, { "epoch": 0.8732379979570991, "grad_norm": 1.3219686683466194, "learning_rate": 8.310416976402158e-07, "loss": 0.6687, "step": 8549 }, { "epoch": 0.8733401430030644, "grad_norm": 1.2910262500146137, "learning_rate": 8.297217559154535e-07, "loss": 0.6098, "step": 8550 }, { "epoch": 0.8734422880490296, "grad_norm": 1.4434635361449777, "learning_rate": 8.28402817883568e-07, "loss": 0.6031, "step": 8551 }, { "epoch": 0.8735444330949949, "grad_norm": 1.5066255518082399, "learning_rate": 8.270848836889211e-07, "loss": 0.7031, "step": 8552 }, { "epoch": 0.8736465781409601, "grad_norm": 1.558414976811602, "learning_rate": 8.25767953475759e-07, "loss": 0.7637, "step": 8553 }, { "epoch": 0.8737487231869254, "grad_norm": 1.546544348284617, "learning_rate": 8.244520273882229e-07, "loss": 0.8463, "step": 8554 }, { "epoch": 0.8738508682328907, "grad_norm": 1.4544597602875897, "learning_rate": 8.231371055703408e-07, "loss": 0.6661, "step": 8555 }, { "epoch": 0.873953013278856, "grad_norm": 1.4887006339989877, "learning_rate": 8.21823188166031e-07, "loss": 0.5966, "step": 8556 }, { "epoch": 0.8740551583248213, "grad_norm": 1.563724029688523, "learning_rate": 8.205102753191019e-07, "loss": 0.7758, "step": 8557 }, { "epoch": 0.8741573033707866, "grad_norm": 1.4562864404556435, "learning_rate": 8.191983671732551e-07, "loss": 0.6223, "step": 8558 }, { "epoch": 0.8742594484167517, "grad_norm": 1.591158316125687, "learning_rate": 8.178874638720768e-07, "loss": 0.5759, "step": 8559 }, { "epoch": 0.874361593462717, "grad_norm": 1.5358970391281241, "learning_rate": 8.165775655590502e-07, "loss": 0.7006, "step": 8560 }, { "epoch": 0.8744637385086823, "grad_norm": 1.704286548479193, "learning_rate": 8.152686723775427e-07, "loss": 0.727, "step": 8561 }, { "epoch": 0.8745658835546476, "grad_norm": 1.5723117544428094, "learning_rate": 8.139607844708109e-07, "loss": 0.6993, "step": 8562 }, { "epoch": 0.8746680286006129, "grad_norm": 1.5594317069123798, "learning_rate": 8.126539019820079e-07, "loss": 0.7268, "step": 8563 }, { "epoch": 0.8747701736465782, "grad_norm": 1.5080306381971469, "learning_rate": 8.113480250541705e-07, "loss": 0.5758, "step": 8564 }, { "epoch": 0.8748723186925434, "grad_norm": 1.4846989613531036, "learning_rate": 8.100431538302289e-07, "loss": 0.6905, "step": 8565 }, { "epoch": 0.8749744637385087, "grad_norm": 1.5576973511517211, "learning_rate": 8.087392884530009e-07, "loss": 0.7594, "step": 8566 }, { "epoch": 0.8750766087844739, "grad_norm": 1.344881798560366, "learning_rate": 8.074364290651949e-07, "loss": 0.6065, "step": 8567 }, { "epoch": 0.8751787538304392, "grad_norm": 1.595445155285488, "learning_rate": 8.06134575809413e-07, "loss": 0.6508, "step": 8568 }, { "epoch": 0.8752808988764045, "grad_norm": 1.4548985294557544, "learning_rate": 8.048337288281416e-07, "loss": 0.6876, "step": 8569 }, { "epoch": 0.8753830439223698, "grad_norm": 1.513626647396427, "learning_rate": 8.035338882637589e-07, "loss": 0.662, "step": 8570 }, { "epoch": 0.875485188968335, "grad_norm": 1.539678238607038, "learning_rate": 8.022350542585344e-07, "loss": 0.8012, "step": 8571 }, { "epoch": 0.8755873340143003, "grad_norm": 1.3978700844617418, "learning_rate": 8.009372269546245e-07, "loss": 0.685, "step": 8572 }, { "epoch": 0.8756894790602656, "grad_norm": 1.378556083824567, "learning_rate": 7.996404064940788e-07, "loss": 0.7131, "step": 8573 }, { "epoch": 0.8757916241062308, "grad_norm": 1.4237547319750954, "learning_rate": 7.98344593018836e-07, "loss": 0.6774, "step": 8574 }, { "epoch": 0.8758937691521961, "grad_norm": 1.5627295229232614, "learning_rate": 7.970497866707205e-07, "loss": 0.7602, "step": 8575 }, { "epoch": 0.8759959141981614, "grad_norm": 1.5554458475825865, "learning_rate": 7.957559875914533e-07, "loss": 0.6669, "step": 8576 }, { "epoch": 0.8760980592441266, "grad_norm": 1.5046849168837526, "learning_rate": 7.944631959226379e-07, "loss": 0.6598, "step": 8577 }, { "epoch": 0.8762002042900919, "grad_norm": 1.4453780392770423, "learning_rate": 7.931714118057754e-07, "loss": 0.6527, "step": 8578 }, { "epoch": 0.8763023493360572, "grad_norm": 1.4376876804527154, "learning_rate": 7.918806353822506e-07, "loss": 0.6477, "step": 8579 }, { "epoch": 0.8764044943820225, "grad_norm": 1.5410413321115763, "learning_rate": 7.905908667933393e-07, "loss": 0.6957, "step": 8580 }, { "epoch": 0.8765066394279878, "grad_norm": 1.5379146364678327, "learning_rate": 7.893021061802053e-07, "loss": 0.6992, "step": 8581 }, { "epoch": 0.876608784473953, "grad_norm": 1.3803955126138514, "learning_rate": 7.880143536839091e-07, "loss": 0.648, "step": 8582 }, { "epoch": 0.8767109295199182, "grad_norm": 1.703007625140265, "learning_rate": 7.867276094453913e-07, "loss": 0.7048, "step": 8583 }, { "epoch": 0.8768130745658835, "grad_norm": 1.50489645940922, "learning_rate": 7.854418736054914e-07, "loss": 0.7859, "step": 8584 }, { "epoch": 0.8769152196118488, "grad_norm": 1.7200827094772944, "learning_rate": 7.841571463049314e-07, "loss": 0.6499, "step": 8585 }, { "epoch": 0.8770173646578141, "grad_norm": 1.4787639787350402, "learning_rate": 7.828734276843264e-07, "loss": 0.6687, "step": 8586 }, { "epoch": 0.8771195097037794, "grad_norm": 1.5250972562043013, "learning_rate": 7.815907178841775e-07, "loss": 0.6621, "step": 8587 }, { "epoch": 0.8772216547497447, "grad_norm": 1.5510542928341962, "learning_rate": 7.803090170448823e-07, "loss": 0.6696, "step": 8588 }, { "epoch": 0.87732379979571, "grad_norm": 1.3750955065125678, "learning_rate": 7.790283253067221e-07, "loss": 0.61, "step": 8589 }, { "epoch": 0.8774259448416751, "grad_norm": 1.3658964969384344, "learning_rate": 7.777486428098691e-07, "loss": 0.6461, "step": 8590 }, { "epoch": 0.8775280898876404, "grad_norm": 1.5079996126879893, "learning_rate": 7.764699696943845e-07, "loss": 0.5781, "step": 8591 }, { "epoch": 0.8776302349336057, "grad_norm": 1.4553655232353502, "learning_rate": 7.751923061002198e-07, "loss": 0.6794, "step": 8592 }, { "epoch": 0.877732379979571, "grad_norm": 1.4261302535883218, "learning_rate": 7.739156521672186e-07, "loss": 0.7295, "step": 8593 }, { "epoch": 0.8778345250255363, "grad_norm": 1.319671868067928, "learning_rate": 7.726400080351115e-07, "loss": 0.6581, "step": 8594 }, { "epoch": 0.8779366700715016, "grad_norm": 1.5631979798417883, "learning_rate": 7.713653738435156e-07, "loss": 0.7805, "step": 8595 }, { "epoch": 0.8780388151174668, "grad_norm": 1.4995817288307924, "learning_rate": 7.700917497319416e-07, "loss": 0.7012, "step": 8596 }, { "epoch": 0.8781409601634321, "grad_norm": 1.5570208610314946, "learning_rate": 7.688191358397901e-07, "loss": 0.6141, "step": 8597 }, { "epoch": 0.8782431052093973, "grad_norm": 1.5561113273219938, "learning_rate": 7.675475323063475e-07, "loss": 0.7406, "step": 8598 }, { "epoch": 0.8783452502553626, "grad_norm": 1.5180081044218017, "learning_rate": 7.662769392707914e-07, "loss": 0.6762, "step": 8599 }, { "epoch": 0.8784473953013279, "grad_norm": 1.4476990391794289, "learning_rate": 7.650073568721916e-07, "loss": 0.7489, "step": 8600 }, { "epoch": 0.8785495403472932, "grad_norm": 1.5529012561065447, "learning_rate": 7.637387852495026e-07, "loss": 0.6991, "step": 8601 }, { "epoch": 0.8786516853932584, "grad_norm": 1.4925115067151076, "learning_rate": 7.624712245415711e-07, "loss": 0.5875, "step": 8602 }, { "epoch": 0.8787538304392237, "grad_norm": 1.4516140630070322, "learning_rate": 7.612046748871327e-07, "loss": 0.6711, "step": 8603 }, { "epoch": 0.878855975485189, "grad_norm": 1.5456186065851505, "learning_rate": 7.599391364248121e-07, "loss": 0.6028, "step": 8604 }, { "epoch": 0.8789581205311542, "grad_norm": 1.4831391151566042, "learning_rate": 7.58674609293123e-07, "loss": 0.6434, "step": 8605 }, { "epoch": 0.8790602655771195, "grad_norm": 1.435413704644167, "learning_rate": 7.574110936304657e-07, "loss": 0.7102, "step": 8606 }, { "epoch": 0.8791624106230848, "grad_norm": 1.5716776250982578, "learning_rate": 7.561485895751386e-07, "loss": 0.6534, "step": 8607 }, { "epoch": 0.87926455566905, "grad_norm": 1.555282539114663, "learning_rate": 7.548870972653177e-07, "loss": 0.7367, "step": 8608 }, { "epoch": 0.8793667007150153, "grad_norm": 1.5078565978433505, "learning_rate": 7.536266168390804e-07, "loss": 0.7184, "step": 8609 }, { "epoch": 0.8794688457609806, "grad_norm": 1.6246840028017153, "learning_rate": 7.523671484343831e-07, "loss": 0.6848, "step": 8610 }, { "epoch": 0.8795709908069459, "grad_norm": 1.440878072381795, "learning_rate": 7.511086921890742e-07, "loss": 0.7016, "step": 8611 }, { "epoch": 0.8796731358529112, "grad_norm": 1.419549109145025, "learning_rate": 7.498512482408959e-07, "loss": 0.613, "step": 8612 }, { "epoch": 0.8797752808988764, "grad_norm": 1.6237718899505955, "learning_rate": 7.485948167274759e-07, "loss": 0.5679, "step": 8613 }, { "epoch": 0.8798774259448416, "grad_norm": 1.6046564414394051, "learning_rate": 7.473393977863297e-07, "loss": 0.6502, "step": 8614 }, { "epoch": 0.8799795709908069, "grad_norm": 1.4121589985117955, "learning_rate": 7.460849915548618e-07, "loss": 0.602, "step": 8615 }, { "epoch": 0.8800817160367722, "grad_norm": 1.5770159944563202, "learning_rate": 7.448315981703714e-07, "loss": 0.637, "step": 8616 }, { "epoch": 0.8801838610827375, "grad_norm": 1.4234742562001277, "learning_rate": 7.435792177700441e-07, "loss": 0.7638, "step": 8617 }, { "epoch": 0.8802860061287028, "grad_norm": 1.6016935628822655, "learning_rate": 7.423278504909515e-07, "loss": 0.6768, "step": 8618 }, { "epoch": 0.8803881511746681, "grad_norm": 1.4335603327837425, "learning_rate": 7.410774964700573e-07, "loss": 0.6658, "step": 8619 }, { "epoch": 0.8804902962206334, "grad_norm": 1.4807743815972503, "learning_rate": 7.398281558442111e-07, "loss": 0.717, "step": 8620 }, { "epoch": 0.8805924412665985, "grad_norm": 1.2937354161016952, "learning_rate": 7.385798287501578e-07, "loss": 0.689, "step": 8621 }, { "epoch": 0.8806945863125638, "grad_norm": 1.5090528781857355, "learning_rate": 7.373325153245259e-07, "loss": 0.7532, "step": 8622 }, { "epoch": 0.8807967313585291, "grad_norm": 1.480857079914353, "learning_rate": 7.36086215703834e-07, "loss": 0.6672, "step": 8623 }, { "epoch": 0.8808988764044944, "grad_norm": 1.5581295927233445, "learning_rate": 7.348409300244896e-07, "loss": 0.7659, "step": 8624 }, { "epoch": 0.8810010214504597, "grad_norm": 1.469544334535651, "learning_rate": 7.335966584227939e-07, "loss": 0.7415, "step": 8625 }, { "epoch": 0.881103166496425, "grad_norm": 1.35035026634553, "learning_rate": 7.323534010349287e-07, "loss": 0.6791, "step": 8626 }, { "epoch": 0.8812053115423902, "grad_norm": 1.461905426783304, "learning_rate": 7.311111579969732e-07, "loss": 0.6837, "step": 8627 }, { "epoch": 0.8813074565883555, "grad_norm": 1.5759890752225938, "learning_rate": 7.298699294448896e-07, "loss": 0.71, "step": 8628 }, { "epoch": 0.8814096016343207, "grad_norm": 1.3705500129798684, "learning_rate": 7.286297155145317e-07, "loss": 0.6622, "step": 8629 }, { "epoch": 0.881511746680286, "grad_norm": 1.2688729796951292, "learning_rate": 7.273905163416394e-07, "loss": 0.5325, "step": 8630 }, { "epoch": 0.8816138917262513, "grad_norm": 1.588866617864123, "learning_rate": 7.26152332061848e-07, "loss": 0.6501, "step": 8631 }, { "epoch": 0.8817160367722165, "grad_norm": 1.5735612369424323, "learning_rate": 7.249151628106744e-07, "loss": 0.734, "step": 8632 }, { "epoch": 0.8818181818181818, "grad_norm": 1.6342804297516687, "learning_rate": 7.236790087235302e-07, "loss": 0.6721, "step": 8633 }, { "epoch": 0.8819203268641471, "grad_norm": 1.4727447012501826, "learning_rate": 7.224438699357117e-07, "loss": 0.5519, "step": 8634 }, { "epoch": 0.8820224719101124, "grad_norm": 1.4364877225452468, "learning_rate": 7.212097465824031e-07, "loss": 0.6183, "step": 8635 }, { "epoch": 0.8821246169560776, "grad_norm": 1.499651177544667, "learning_rate": 7.199766387986851e-07, "loss": 0.6802, "step": 8636 }, { "epoch": 0.8822267620020429, "grad_norm": 1.45956453680312, "learning_rate": 7.187445467195198e-07, "loss": 0.6052, "step": 8637 }, { "epoch": 0.8823289070480081, "grad_norm": 1.5373904737649855, "learning_rate": 7.175134704797593e-07, "loss": 0.686, "step": 8638 }, { "epoch": 0.8824310520939734, "grad_norm": 1.4597139797263583, "learning_rate": 7.16283410214148e-07, "loss": 0.7785, "step": 8639 }, { "epoch": 0.8825331971399387, "grad_norm": 1.7062323283156, "learning_rate": 7.150543660573128e-07, "loss": 0.7869, "step": 8640 }, { "epoch": 0.882635342185904, "grad_norm": 1.6382279328375466, "learning_rate": 7.138263381437772e-07, "loss": 0.7401, "step": 8641 }, { "epoch": 0.8827374872318693, "grad_norm": 1.4180042610010084, "learning_rate": 7.12599326607949e-07, "loss": 0.7308, "step": 8642 }, { "epoch": 0.8828396322778346, "grad_norm": 1.4980664226934188, "learning_rate": 7.113733315841254e-07, "loss": 0.6746, "step": 8643 }, { "epoch": 0.8829417773237997, "grad_norm": 1.4209841713634492, "learning_rate": 7.101483532064923e-07, "loss": 0.6074, "step": 8644 }, { "epoch": 0.883043922369765, "grad_norm": 1.5638245721451667, "learning_rate": 7.089243916091215e-07, "loss": 0.7248, "step": 8645 }, { "epoch": 0.8831460674157303, "grad_norm": 1.5769991322710115, "learning_rate": 7.077014469259813e-07, "loss": 0.7183, "step": 8646 }, { "epoch": 0.8832482124616956, "grad_norm": 1.3453241286517867, "learning_rate": 7.064795192909213e-07, "loss": 0.6837, "step": 8647 }, { "epoch": 0.8833503575076609, "grad_norm": 1.6658428242549277, "learning_rate": 7.0525860883768e-07, "loss": 0.7996, "step": 8648 }, { "epoch": 0.8834525025536262, "grad_norm": 1.521745775608374, "learning_rate": 7.040387156998918e-07, "loss": 0.6886, "step": 8649 }, { "epoch": 0.8835546475995915, "grad_norm": 1.4888262831018038, "learning_rate": 7.028198400110697e-07, "loss": 0.6798, "step": 8650 }, { "epoch": 0.8836567926455567, "grad_norm": 1.584722758899668, "learning_rate": 7.016019819046239e-07, "loss": 0.658, "step": 8651 }, { "epoch": 0.8837589376915219, "grad_norm": 1.5803465791097386, "learning_rate": 7.003851415138497e-07, "loss": 0.6851, "step": 8652 }, { "epoch": 0.8838610827374872, "grad_norm": 1.3770448516571323, "learning_rate": 6.991693189719295e-07, "loss": 0.5981, "step": 8653 }, { "epoch": 0.8839632277834525, "grad_norm": 1.436312580607404, "learning_rate": 6.979545144119349e-07, "loss": 0.6978, "step": 8654 }, { "epoch": 0.8840653728294178, "grad_norm": 1.4846882638915846, "learning_rate": 6.967407279668304e-07, "loss": 0.5626, "step": 8655 }, { "epoch": 0.8841675178753831, "grad_norm": 1.4510727865474848, "learning_rate": 6.955279597694631e-07, "loss": 0.5806, "step": 8656 }, { "epoch": 0.8842696629213483, "grad_norm": 1.4093221789973185, "learning_rate": 6.943162099525724e-07, "loss": 0.7264, "step": 8657 }, { "epoch": 0.8843718079673136, "grad_norm": 1.5174851374251133, "learning_rate": 6.931054786487857e-07, "loss": 0.6673, "step": 8658 }, { "epoch": 0.8844739530132788, "grad_norm": 1.458727415236385, "learning_rate": 6.918957659906167e-07, "loss": 0.6337, "step": 8659 }, { "epoch": 0.8845760980592441, "grad_norm": 1.6251352448654643, "learning_rate": 6.906870721104686e-07, "loss": 0.6788, "step": 8660 }, { "epoch": 0.8846782431052094, "grad_norm": 1.5164862326234092, "learning_rate": 6.894793971406366e-07, "loss": 0.6953, "step": 8661 }, { "epoch": 0.8847803881511747, "grad_norm": 1.4912789400617983, "learning_rate": 6.882727412132995e-07, "loss": 0.7717, "step": 8662 }, { "epoch": 0.8848825331971399, "grad_norm": 1.506825891482042, "learning_rate": 6.87067104460527e-07, "loss": 0.6252, "step": 8663 }, { "epoch": 0.8849846782431052, "grad_norm": 1.4723967965827243, "learning_rate": 6.85862487014276e-07, "loss": 0.6878, "step": 8664 }, { "epoch": 0.8850868232890705, "grad_norm": 1.487931872838047, "learning_rate": 6.846588890063932e-07, "loss": 0.71, "step": 8665 }, { "epoch": 0.8851889683350358, "grad_norm": 1.4847231257089475, "learning_rate": 6.834563105686154e-07, "loss": 0.6695, "step": 8666 }, { "epoch": 0.885291113381001, "grad_norm": 1.5790069673911127, "learning_rate": 6.822547518325629e-07, "loss": 0.692, "step": 8667 }, { "epoch": 0.8853932584269663, "grad_norm": 1.5943548472849036, "learning_rate": 6.810542129297482e-07, "loss": 0.7065, "step": 8668 }, { "epoch": 0.8854954034729315, "grad_norm": 1.588287073739239, "learning_rate": 6.798546939915696e-07, "loss": 0.7357, "step": 8669 }, { "epoch": 0.8855975485188968, "grad_norm": 1.4473952425390095, "learning_rate": 6.786561951493176e-07, "loss": 0.6641, "step": 8670 }, { "epoch": 0.8856996935648621, "grad_norm": 1.5246084510771867, "learning_rate": 6.774587165341673e-07, "loss": 0.6996, "step": 8671 }, { "epoch": 0.8858018386108274, "grad_norm": 1.4868082906999878, "learning_rate": 6.762622582771817e-07, "loss": 0.6862, "step": 8672 }, { "epoch": 0.8859039836567927, "grad_norm": 1.429528597475294, "learning_rate": 6.750668205093181e-07, "loss": 0.6598, "step": 8673 }, { "epoch": 0.886006128702758, "grad_norm": 1.4463597751889898, "learning_rate": 6.73872403361413e-07, "loss": 0.6459, "step": 8674 }, { "epoch": 0.8861082737487231, "grad_norm": 1.5230479077773138, "learning_rate": 6.72679006964202e-07, "loss": 0.76, "step": 8675 }, { "epoch": 0.8862104187946884, "grad_norm": 1.6639118685268157, "learning_rate": 6.714866314482982e-07, "loss": 0.7702, "step": 8676 }, { "epoch": 0.8863125638406537, "grad_norm": 1.5108821019019738, "learning_rate": 6.702952769442106e-07, "loss": 0.6618, "step": 8677 }, { "epoch": 0.886414708886619, "grad_norm": 1.8006103149687902, "learning_rate": 6.691049435823327e-07, "loss": 0.7642, "step": 8678 }, { "epoch": 0.8865168539325843, "grad_norm": 1.6806126075107652, "learning_rate": 6.679156314929458e-07, "loss": 0.7118, "step": 8679 }, { "epoch": 0.8866189989785496, "grad_norm": 1.479644258481133, "learning_rate": 6.667273408062214e-07, "loss": 0.7622, "step": 8680 }, { "epoch": 0.8867211440245149, "grad_norm": 1.439301912073313, "learning_rate": 6.65540071652222e-07, "loss": 0.6384, "step": 8681 }, { "epoch": 0.8868232890704801, "grad_norm": 1.63038246133096, "learning_rate": 6.643538241608927e-07, "loss": 0.7015, "step": 8682 }, { "epoch": 0.8869254341164453, "grad_norm": 1.4942964900160889, "learning_rate": 6.631685984620684e-07, "loss": 0.7854, "step": 8683 }, { "epoch": 0.8870275791624106, "grad_norm": 1.456855076226014, "learning_rate": 6.61984394685472e-07, "loss": 0.6834, "step": 8684 }, { "epoch": 0.8871297242083759, "grad_norm": 1.412265149192123, "learning_rate": 6.608012129607189e-07, "loss": 0.6113, "step": 8685 }, { "epoch": 0.8872318692543412, "grad_norm": 1.4055654947328389, "learning_rate": 6.596190534173063e-07, "loss": 0.5073, "step": 8686 }, { "epoch": 0.8873340143003065, "grad_norm": 1.6181563723524108, "learning_rate": 6.584379161846222e-07, "loss": 0.6768, "step": 8687 }, { "epoch": 0.8874361593462717, "grad_norm": 1.5563665141725986, "learning_rate": 6.572578013919429e-07, "loss": 0.6727, "step": 8688 }, { "epoch": 0.887538304392237, "grad_norm": 1.414526633878522, "learning_rate": 6.56078709168434e-07, "loss": 0.7156, "step": 8689 }, { "epoch": 0.8876404494382022, "grad_norm": 1.6200848607184277, "learning_rate": 6.549006396431478e-07, "loss": 0.662, "step": 8690 }, { "epoch": 0.8877425944841675, "grad_norm": 1.4931208456925913, "learning_rate": 6.537235929450247e-07, "loss": 0.7092, "step": 8691 }, { "epoch": 0.8878447395301328, "grad_norm": 1.5654866097105613, "learning_rate": 6.525475692028926e-07, "loss": 0.7607, "step": 8692 }, { "epoch": 0.887946884576098, "grad_norm": 1.4103081637117254, "learning_rate": 6.513725685454686e-07, "loss": 0.6721, "step": 8693 }, { "epoch": 0.8880490296220633, "grad_norm": 1.5687115692647875, "learning_rate": 6.501985911013564e-07, "loss": 0.6849, "step": 8694 }, { "epoch": 0.8881511746680286, "grad_norm": 1.493855756922009, "learning_rate": 6.490256369990489e-07, "loss": 0.7048, "step": 8695 }, { "epoch": 0.8882533197139939, "grad_norm": 1.6214278216873494, "learning_rate": 6.478537063669266e-07, "loss": 0.701, "step": 8696 }, { "epoch": 0.8883554647599592, "grad_norm": 1.3964087724680025, "learning_rate": 6.466827993332591e-07, "loss": 0.677, "step": 8697 }, { "epoch": 0.8884576098059244, "grad_norm": 1.7057732681849032, "learning_rate": 6.45512916026203e-07, "loss": 0.7572, "step": 8698 }, { "epoch": 0.8885597548518896, "grad_norm": 1.3175212125384081, "learning_rate": 6.443440565738002e-07, "loss": 0.6532, "step": 8699 }, { "epoch": 0.8886618998978549, "grad_norm": 1.4204988931620914, "learning_rate": 6.431762211039861e-07, "loss": 0.6882, "step": 8700 }, { "epoch": 0.8887640449438202, "grad_norm": 1.4205872154479953, "learning_rate": 6.420094097445806e-07, "loss": 0.603, "step": 8701 }, { "epoch": 0.8888661899897855, "grad_norm": 1.410815896572816, "learning_rate": 6.408436226232906e-07, "loss": 0.6039, "step": 8702 }, { "epoch": 0.8889683350357508, "grad_norm": 1.4526254084414725, "learning_rate": 6.396788598677117e-07, "loss": 0.6674, "step": 8703 }, { "epoch": 0.8890704800817161, "grad_norm": 1.5670316413323149, "learning_rate": 6.385151216053287e-07, "loss": 0.7221, "step": 8704 }, { "epoch": 0.8891726251276814, "grad_norm": 1.4049155533490467, "learning_rate": 6.373524079635152e-07, "loss": 0.6239, "step": 8705 }, { "epoch": 0.8892747701736465, "grad_norm": 1.4981341705373787, "learning_rate": 6.361907190695304e-07, "loss": 0.6514, "step": 8706 }, { "epoch": 0.8893769152196118, "grad_norm": 1.5586782853849446, "learning_rate": 6.350300550505217e-07, "loss": 0.7045, "step": 8707 }, { "epoch": 0.8894790602655771, "grad_norm": 1.582349511447863, "learning_rate": 6.338704160335219e-07, "loss": 0.6999, "step": 8708 }, { "epoch": 0.8895812053115424, "grad_norm": 1.6103633406372455, "learning_rate": 6.32711802145457e-07, "loss": 0.7195, "step": 8709 }, { "epoch": 0.8896833503575077, "grad_norm": 1.3783794694241736, "learning_rate": 6.31554213513138e-07, "loss": 0.6439, "step": 8710 }, { "epoch": 0.889785495403473, "grad_norm": 1.4483351183961986, "learning_rate": 6.303976502632636e-07, "loss": 0.677, "step": 8711 }, { "epoch": 0.8898876404494382, "grad_norm": 1.4527275625705662, "learning_rate": 6.29242112522418e-07, "loss": 0.5745, "step": 8712 }, { "epoch": 0.8899897854954034, "grad_norm": 1.3471301953537171, "learning_rate": 6.2808760041708e-07, "loss": 0.6692, "step": 8713 }, { "epoch": 0.8900919305413687, "grad_norm": 1.6188563636696014, "learning_rate": 6.269341140736063e-07, "loss": 0.7548, "step": 8714 }, { "epoch": 0.890194075587334, "grad_norm": 1.4118017899349982, "learning_rate": 6.257816536182515e-07, "loss": 0.618, "step": 8715 }, { "epoch": 0.8902962206332993, "grad_norm": 1.54730451626402, "learning_rate": 6.246302191771514e-07, "loss": 0.6351, "step": 8716 }, { "epoch": 0.8903983656792646, "grad_norm": 1.5254066788204255, "learning_rate": 6.234798108763307e-07, "loss": 0.6171, "step": 8717 }, { "epoch": 0.8905005107252298, "grad_norm": 1.4138694857381509, "learning_rate": 6.22330428841702e-07, "loss": 0.6666, "step": 8718 }, { "epoch": 0.8906026557711951, "grad_norm": 1.7104141662884873, "learning_rate": 6.21182073199067e-07, "loss": 0.6307, "step": 8719 }, { "epoch": 0.8907048008171604, "grad_norm": 1.5563856663735036, "learning_rate": 6.200347440741128e-07, "loss": 0.6301, "step": 8720 }, { "epoch": 0.8908069458631256, "grad_norm": 1.5502106425169953, "learning_rate": 6.188884415924179e-07, "loss": 0.7038, "step": 8721 }, { "epoch": 0.8909090909090909, "grad_norm": 1.4890063308524728, "learning_rate": 6.177431658794431e-07, "loss": 0.5337, "step": 8722 }, { "epoch": 0.8910112359550562, "grad_norm": 1.3575587133797773, "learning_rate": 6.165989170605391e-07, "loss": 0.6168, "step": 8723 }, { "epoch": 0.8911133810010214, "grad_norm": 1.457395567773895, "learning_rate": 6.154556952609481e-07, "loss": 0.7976, "step": 8724 }, { "epoch": 0.8912155260469867, "grad_norm": 1.5692735339494268, "learning_rate": 6.143135006057943e-07, "loss": 0.7453, "step": 8725 }, { "epoch": 0.891317671092952, "grad_norm": 1.6201777451445352, "learning_rate": 6.131723332200923e-07, "loss": 0.6862, "step": 8726 }, { "epoch": 0.8914198161389173, "grad_norm": 1.5329035520926653, "learning_rate": 6.120321932287421e-07, "loss": 0.6912, "step": 8727 }, { "epoch": 0.8915219611848826, "grad_norm": 1.730828106130853, "learning_rate": 6.108930807565316e-07, "loss": 0.7921, "step": 8728 }, { "epoch": 0.8916241062308478, "grad_norm": 1.774290956196776, "learning_rate": 6.097549959281424e-07, "loss": 0.7159, "step": 8729 }, { "epoch": 0.891726251276813, "grad_norm": 1.5384140050101336, "learning_rate": 6.086179388681357e-07, "loss": 0.6935, "step": 8730 }, { "epoch": 0.8918283963227783, "grad_norm": 1.735607376501701, "learning_rate": 6.074819097009632e-07, "loss": 0.8618, "step": 8731 }, { "epoch": 0.8919305413687436, "grad_norm": 1.4797005832341164, "learning_rate": 6.063469085509632e-07, "loss": 0.6054, "step": 8732 }, { "epoch": 0.8920326864147089, "grad_norm": 1.4603111138388694, "learning_rate": 6.052129355423631e-07, "loss": 0.5767, "step": 8733 }, { "epoch": 0.8921348314606742, "grad_norm": 1.4501860582164268, "learning_rate": 6.040799907992778e-07, "loss": 0.6662, "step": 8734 }, { "epoch": 0.8922369765066395, "grad_norm": 1.414462586374828, "learning_rate": 6.029480744457072e-07, "loss": 0.5826, "step": 8735 }, { "epoch": 0.8923391215526048, "grad_norm": 1.5616616987694585, "learning_rate": 6.018171866055411e-07, "loss": 0.7253, "step": 8736 }, { "epoch": 0.8924412665985699, "grad_norm": 1.6074297043281531, "learning_rate": 6.006873274025571e-07, "loss": 0.6803, "step": 8737 }, { "epoch": 0.8925434116445352, "grad_norm": 1.3547492001570465, "learning_rate": 5.995584969604151e-07, "loss": 0.6023, "step": 8738 }, { "epoch": 0.8926455566905005, "grad_norm": 1.492475947995031, "learning_rate": 5.98430695402672e-07, "loss": 0.6885, "step": 8739 }, { "epoch": 0.8927477017364658, "grad_norm": 1.4902546087565836, "learning_rate": 5.973039228527633e-07, "loss": 0.7466, "step": 8740 }, { "epoch": 0.8928498467824311, "grad_norm": 1.370045388179979, "learning_rate": 5.961781794340149e-07, "loss": 0.7086, "step": 8741 }, { "epoch": 0.8929519918283964, "grad_norm": 1.4779713181599967, "learning_rate": 5.950534652696382e-07, "loss": 0.7071, "step": 8742 }, { "epoch": 0.8930541368743616, "grad_norm": 1.421556033918497, "learning_rate": 5.939297804827382e-07, "loss": 0.7084, "step": 8743 }, { "epoch": 0.8931562819203268, "grad_norm": 1.4809840657192253, "learning_rate": 5.928071251962996e-07, "loss": 0.6921, "step": 8744 }, { "epoch": 0.8932584269662921, "grad_norm": 1.5064407244508502, "learning_rate": 5.916854995331999e-07, "loss": 0.6395, "step": 8745 }, { "epoch": 0.8933605720122574, "grad_norm": 1.5078182312516784, "learning_rate": 5.90564903616202e-07, "loss": 0.6768, "step": 8746 }, { "epoch": 0.8934627170582227, "grad_norm": 1.5119788148300612, "learning_rate": 5.894453375679532e-07, "loss": 0.6475, "step": 8747 }, { "epoch": 0.893564862104188, "grad_norm": 1.690172145745588, "learning_rate": 5.883268015109911e-07, "loss": 0.7778, "step": 8748 }, { "epoch": 0.8936670071501532, "grad_norm": 1.4843278237519608, "learning_rate": 5.872092955677433e-07, "loss": 0.7007, "step": 8749 }, { "epoch": 0.8937691521961185, "grad_norm": 1.6404035426750663, "learning_rate": 5.860928198605198e-07, "loss": 0.666, "step": 8750 }, { "epoch": 0.8938712972420838, "grad_norm": 1.508786425044505, "learning_rate": 5.849773745115183e-07, "loss": 0.6746, "step": 8751 }, { "epoch": 0.893973442288049, "grad_norm": 1.390417382923603, "learning_rate": 5.838629596428247e-07, "loss": 0.7283, "step": 8752 }, { "epoch": 0.8940755873340143, "grad_norm": 1.4988054935561899, "learning_rate": 5.827495753764146e-07, "loss": 0.6404, "step": 8753 }, { "epoch": 0.8941777323799796, "grad_norm": 1.6360246787634953, "learning_rate": 5.816372218341482e-07, "loss": 0.7224, "step": 8754 }, { "epoch": 0.8942798774259448, "grad_norm": 1.5684150933680907, "learning_rate": 5.805258991377737e-07, "loss": 0.6918, "step": 8755 }, { "epoch": 0.8943820224719101, "grad_norm": 1.5422055534878594, "learning_rate": 5.794156074089253e-07, "loss": 0.6114, "step": 8756 }, { "epoch": 0.8944841675178754, "grad_norm": 1.4407465232181196, "learning_rate": 5.783063467691241e-07, "loss": 0.6602, "step": 8757 }, { "epoch": 0.8945863125638407, "grad_norm": 1.543354244918916, "learning_rate": 5.771981173397811e-07, "loss": 0.6327, "step": 8758 }, { "epoch": 0.894688457609806, "grad_norm": 1.3958533856507473, "learning_rate": 5.760909192421916e-07, "loss": 0.7049, "step": 8759 }, { "epoch": 0.8947906026557712, "grad_norm": 1.5120161491899482, "learning_rate": 5.749847525975393e-07, "loss": 0.6633, "step": 8760 }, { "epoch": 0.8948927477017364, "grad_norm": 1.5223496484705068, "learning_rate": 5.738796175268957e-07, "loss": 0.6321, "step": 8761 }, { "epoch": 0.8949948927477017, "grad_norm": 1.500562677731446, "learning_rate": 5.72775514151217e-07, "loss": 0.7773, "step": 8762 }, { "epoch": 0.895097037793667, "grad_norm": 1.5271248767253236, "learning_rate": 5.716724425913511e-07, "loss": 0.6594, "step": 8763 }, { "epoch": 0.8951991828396323, "grad_norm": 1.6346004479995313, "learning_rate": 5.70570402968027e-07, "loss": 0.6791, "step": 8764 }, { "epoch": 0.8953013278855976, "grad_norm": 1.4670581736969028, "learning_rate": 5.694693954018649e-07, "loss": 0.6715, "step": 8765 }, { "epoch": 0.8954034729315629, "grad_norm": 1.5029790183836607, "learning_rate": 5.683694200133705e-07, "loss": 0.6901, "step": 8766 }, { "epoch": 0.895505617977528, "grad_norm": 1.49086114473795, "learning_rate": 5.672704769229342e-07, "loss": 0.7195, "step": 8767 }, { "epoch": 0.8956077630234933, "grad_norm": 1.5644298866233797, "learning_rate": 5.661725662508399e-07, "loss": 0.648, "step": 8768 }, { "epoch": 0.8957099080694586, "grad_norm": 1.306758995326433, "learning_rate": 5.650756881172536e-07, "loss": 0.6355, "step": 8769 }, { "epoch": 0.8958120531154239, "grad_norm": 1.3986012492971005, "learning_rate": 5.639798426422293e-07, "loss": 0.608, "step": 8770 }, { "epoch": 0.8959141981613892, "grad_norm": 1.4969125098704537, "learning_rate": 5.628850299457078e-07, "loss": 0.7368, "step": 8771 }, { "epoch": 0.8960163432073545, "grad_norm": 1.6760131782442813, "learning_rate": 5.617912501475153e-07, "loss": 0.743, "step": 8772 }, { "epoch": 0.8961184882533197, "grad_norm": 1.644853612849001, "learning_rate": 5.606985033673706e-07, "loss": 0.7298, "step": 8773 }, { "epoch": 0.896220633299285, "grad_norm": 1.5134450622041864, "learning_rate": 5.596067897248724e-07, "loss": 0.6919, "step": 8774 }, { "epoch": 0.8963227783452502, "grad_norm": 1.5393771271788381, "learning_rate": 5.585161093395108e-07, "loss": 0.727, "step": 8775 }, { "epoch": 0.8964249233912155, "grad_norm": 1.5742064328712582, "learning_rate": 5.574264623306591e-07, "loss": 0.6037, "step": 8776 }, { "epoch": 0.8965270684371808, "grad_norm": 1.4336160282242218, "learning_rate": 5.563378488175819e-07, "loss": 0.6172, "step": 8777 }, { "epoch": 0.8966292134831461, "grad_norm": 1.4574072788285337, "learning_rate": 5.552502689194306e-07, "loss": 0.7123, "step": 8778 }, { "epoch": 0.8967313585291113, "grad_norm": 1.394934402350585, "learning_rate": 5.541637227552388e-07, "loss": 0.6557, "step": 8779 }, { "epoch": 0.8968335035750766, "grad_norm": 1.5835279095223485, "learning_rate": 5.530782104439303e-07, "loss": 0.72, "step": 8780 }, { "epoch": 0.8969356486210419, "grad_norm": 1.423143014684887, "learning_rate": 5.519937321043156e-07, "loss": 0.6148, "step": 8781 }, { "epoch": 0.8970377936670072, "grad_norm": 1.5779165503335235, "learning_rate": 5.509102878550887e-07, "loss": 0.7151, "step": 8782 }, { "epoch": 0.8971399387129724, "grad_norm": 1.4210091568252716, "learning_rate": 5.49827877814838e-07, "loss": 0.6478, "step": 8783 }, { "epoch": 0.8972420837589377, "grad_norm": 1.5651173570784946, "learning_rate": 5.487465021020299e-07, "loss": 0.7763, "step": 8784 }, { "epoch": 0.8973442288049029, "grad_norm": 1.6675001045835647, "learning_rate": 5.476661608350253e-07, "loss": 0.7791, "step": 8785 }, { "epoch": 0.8974463738508682, "grad_norm": 1.5041781455862193, "learning_rate": 5.465868541320662e-07, "loss": 0.6549, "step": 8786 }, { "epoch": 0.8975485188968335, "grad_norm": 1.453548114823558, "learning_rate": 5.455085821112827e-07, "loss": 0.6595, "step": 8787 }, { "epoch": 0.8976506639427988, "grad_norm": 1.4291824752251765, "learning_rate": 5.444313448906935e-07, "loss": 0.6616, "step": 8788 }, { "epoch": 0.8977528089887641, "grad_norm": 1.598893225593028, "learning_rate": 5.433551425882034e-07, "loss": 0.6932, "step": 8789 }, { "epoch": 0.8978549540347294, "grad_norm": 1.575748405002786, "learning_rate": 5.422799753216024e-07, "loss": 0.7382, "step": 8790 }, { "epoch": 0.8979570990806945, "grad_norm": 1.5479408463337505, "learning_rate": 5.412058432085676e-07, "loss": 0.7069, "step": 8791 }, { "epoch": 0.8980592441266598, "grad_norm": 1.574614019741522, "learning_rate": 5.40132746366664e-07, "loss": 0.6086, "step": 8792 }, { "epoch": 0.8981613891726251, "grad_norm": 1.5355385238222923, "learning_rate": 5.390606849133451e-07, "loss": 0.6341, "step": 8793 }, { "epoch": 0.8982635342185904, "grad_norm": 1.487011354504039, "learning_rate": 5.379896589659461e-07, "loss": 0.7245, "step": 8794 }, { "epoch": 0.8983656792645557, "grad_norm": 1.5588263710421255, "learning_rate": 5.369196686416933e-07, "loss": 0.6725, "step": 8795 }, { "epoch": 0.898467824310521, "grad_norm": 1.420305935395289, "learning_rate": 5.35850714057694e-07, "loss": 0.6474, "step": 8796 }, { "epoch": 0.8985699693564863, "grad_norm": 1.6177450656437609, "learning_rate": 5.347827953309504e-07, "loss": 0.6756, "step": 8797 }, { "epoch": 0.8986721144024514, "grad_norm": 1.4348818450335272, "learning_rate": 5.337159125783453e-07, "loss": 0.6149, "step": 8798 }, { "epoch": 0.8987742594484167, "grad_norm": 1.4864854539613406, "learning_rate": 5.326500659166501e-07, "loss": 0.699, "step": 8799 }, { "epoch": 0.898876404494382, "grad_norm": 1.409088934312831, "learning_rate": 5.3158525546252e-07, "loss": 0.6517, "step": 8800 }, { "epoch": 0.8989785495403473, "grad_norm": 1.5342041067996426, "learning_rate": 5.305214813325022e-07, "loss": 0.6875, "step": 8801 }, { "epoch": 0.8990806945863126, "grad_norm": 1.539628398080335, "learning_rate": 5.294587436430254e-07, "loss": 0.6893, "step": 8802 }, { "epoch": 0.8991828396322779, "grad_norm": 1.475777966386795, "learning_rate": 5.28397042510409e-07, "loss": 0.7241, "step": 8803 }, { "epoch": 0.8992849846782431, "grad_norm": 1.5220809546181582, "learning_rate": 5.273363780508556e-07, "loss": 0.7061, "step": 8804 }, { "epoch": 0.8993871297242084, "grad_norm": 1.4163136934225535, "learning_rate": 5.262767503804567e-07, "loss": 0.6963, "step": 8805 }, { "epoch": 0.8994892747701736, "grad_norm": 1.5347825227662513, "learning_rate": 5.252181596151861e-07, "loss": 0.6965, "step": 8806 }, { "epoch": 0.8995914198161389, "grad_norm": 1.4310649855530533, "learning_rate": 5.241606058709103e-07, "loss": 0.6632, "step": 8807 }, { "epoch": 0.8996935648621042, "grad_norm": 1.5356958321531848, "learning_rate": 5.231040892633776e-07, "loss": 0.6314, "step": 8808 }, { "epoch": 0.8997957099080695, "grad_norm": 1.5196074454375148, "learning_rate": 5.220486099082267e-07, "loss": 0.6707, "step": 8809 }, { "epoch": 0.8998978549540347, "grad_norm": 1.5241583031604422, "learning_rate": 5.209941679209785e-07, "loss": 0.6205, "step": 8810 }, { "epoch": 0.9, "grad_norm": 1.4436174097359282, "learning_rate": 5.199407634170417e-07, "loss": 0.5825, "step": 8811 }, { "epoch": 0.9001021450459653, "grad_norm": 1.4146828192017114, "learning_rate": 5.188883965117153e-07, "loss": 0.6646, "step": 8812 }, { "epoch": 0.9002042900919306, "grad_norm": 1.6336488084321334, "learning_rate": 5.178370673201783e-07, "loss": 0.6799, "step": 8813 }, { "epoch": 0.9003064351378958, "grad_norm": 1.6624283393709594, "learning_rate": 5.167867759575007e-07, "loss": 0.8012, "step": 8814 }, { "epoch": 0.900408580183861, "grad_norm": 1.6241143770122437, "learning_rate": 5.157375225386385e-07, "loss": 0.8004, "step": 8815 }, { "epoch": 0.9005107252298263, "grad_norm": 1.4745925098009403, "learning_rate": 5.146893071784286e-07, "loss": 0.6357, "step": 8816 }, { "epoch": 0.9006128702757916, "grad_norm": 1.4582728572214099, "learning_rate": 5.13642129991605e-07, "loss": 0.7024, "step": 8817 }, { "epoch": 0.9007150153217569, "grad_norm": 1.2825594919898262, "learning_rate": 5.125959910927792e-07, "loss": 0.6054, "step": 8818 }, { "epoch": 0.9008171603677222, "grad_norm": 1.4710055364365755, "learning_rate": 5.115508905964516e-07, "loss": 0.6937, "step": 8819 }, { "epoch": 0.9009193054136875, "grad_norm": 1.6439372858716164, "learning_rate": 5.1050682861701e-07, "loss": 0.7467, "step": 8820 }, { "epoch": 0.9010214504596528, "grad_norm": 1.4396129176353436, "learning_rate": 5.094638052687251e-07, "loss": 0.7089, "step": 8821 }, { "epoch": 0.9011235955056179, "grad_norm": 1.5651785705913253, "learning_rate": 5.084218206657609e-07, "loss": 0.7155, "step": 8822 }, { "epoch": 0.9012257405515832, "grad_norm": 1.4532324331369857, "learning_rate": 5.073808749221598e-07, "loss": 0.6197, "step": 8823 }, { "epoch": 0.9013278855975485, "grad_norm": 1.5145427598096115, "learning_rate": 5.063409681518528e-07, "loss": 0.6089, "step": 8824 }, { "epoch": 0.9014300306435138, "grad_norm": 1.6188235718444264, "learning_rate": 5.053021004686632e-07, "loss": 0.6844, "step": 8825 }, { "epoch": 0.9015321756894791, "grad_norm": 1.3689676461966298, "learning_rate": 5.042642719862912e-07, "loss": 0.613, "step": 8826 }, { "epoch": 0.9016343207354444, "grad_norm": 1.5328725465042532, "learning_rate": 5.032274828183314e-07, "loss": 0.6993, "step": 8827 }, { "epoch": 0.9017364657814096, "grad_norm": 1.660657033781915, "learning_rate": 5.021917330782589e-07, "loss": 0.6724, "step": 8828 }, { "epoch": 0.9018386108273748, "grad_norm": 1.5128467033815287, "learning_rate": 5.011570228794372e-07, "loss": 0.6643, "step": 8829 }, { "epoch": 0.9019407558733401, "grad_norm": 1.4433606531391006, "learning_rate": 5.001233523351157e-07, "loss": 0.7066, "step": 8830 }, { "epoch": 0.9020429009193054, "grad_norm": 1.486976924216084, "learning_rate": 4.990907215584317e-07, "loss": 0.7014, "step": 8831 }, { "epoch": 0.9021450459652707, "grad_norm": 1.6314475262840287, "learning_rate": 4.98059130662406e-07, "loss": 0.7836, "step": 8832 }, { "epoch": 0.902247191011236, "grad_norm": 1.5625608206048425, "learning_rate": 4.970285797599483e-07, "loss": 0.673, "step": 8833 }, { "epoch": 0.9023493360572012, "grad_norm": 1.6122864410066604, "learning_rate": 4.959990689638528e-07, "loss": 0.7773, "step": 8834 }, { "epoch": 0.9024514811031665, "grad_norm": 1.6472006477752754, "learning_rate": 4.949705983867992e-07, "loss": 0.6599, "step": 8835 }, { "epoch": 0.9025536261491318, "grad_norm": 1.566099470906536, "learning_rate": 4.939431681413531e-07, "loss": 0.7413, "step": 8836 }, { "epoch": 0.902655771195097, "grad_norm": 1.681186175650415, "learning_rate": 4.929167783399713e-07, "loss": 0.7308, "step": 8837 }, { "epoch": 0.9027579162410623, "grad_norm": 1.5094732122319936, "learning_rate": 4.918914290949894e-07, "loss": 0.6701, "step": 8838 }, { "epoch": 0.9028600612870276, "grad_norm": 1.7087569463222336, "learning_rate": 4.908671205186343e-07, "loss": 0.7326, "step": 8839 }, { "epoch": 0.9029622063329928, "grad_norm": 1.6200327547552695, "learning_rate": 4.898438527230143e-07, "loss": 0.702, "step": 8840 }, { "epoch": 0.9030643513789581, "grad_norm": 1.6373827743873604, "learning_rate": 4.888216258201295e-07, "loss": 0.777, "step": 8841 }, { "epoch": 0.9031664964249234, "grad_norm": 1.4875238017807653, "learning_rate": 4.878004399218639e-07, "loss": 0.5915, "step": 8842 }, { "epoch": 0.9032686414708887, "grad_norm": 1.5220620056936245, "learning_rate": 4.867802951399869e-07, "loss": 0.6729, "step": 8843 }, { "epoch": 0.903370786516854, "grad_norm": 1.5419295099625132, "learning_rate": 4.857611915861516e-07, "loss": 0.6535, "step": 8844 }, { "epoch": 0.9034729315628192, "grad_norm": 1.6060366159741524, "learning_rate": 4.847431293718996e-07, "loss": 0.6881, "step": 8845 }, { "epoch": 0.9035750766087844, "grad_norm": 1.4587362804717088, "learning_rate": 4.83726108608662e-07, "loss": 0.6459, "step": 8846 }, { "epoch": 0.9036772216547497, "grad_norm": 1.4689542771656021, "learning_rate": 4.827101294077496e-07, "loss": 0.6796, "step": 8847 }, { "epoch": 0.903779366700715, "grad_norm": 1.5097404121516667, "learning_rate": 4.816951918803603e-07, "loss": 0.8471, "step": 8848 }, { "epoch": 0.9038815117466803, "grad_norm": 1.5583876563658305, "learning_rate": 4.80681296137584e-07, "loss": 0.7164, "step": 8849 }, { "epoch": 0.9039836567926456, "grad_norm": 1.6252539420577212, "learning_rate": 4.796684422903897e-07, "loss": 0.7481, "step": 8850 }, { "epoch": 0.9040858018386109, "grad_norm": 1.6158086217363263, "learning_rate": 4.786566304496353e-07, "loss": 0.641, "step": 8851 }, { "epoch": 0.904187946884576, "grad_norm": 1.5673687557610714, "learning_rate": 4.776458607260648e-07, "loss": 0.713, "step": 8852 }, { "epoch": 0.9042900919305413, "grad_norm": 1.5371481756123881, "learning_rate": 4.766361332303071e-07, "loss": 0.6791, "step": 8853 }, { "epoch": 0.9043922369765066, "grad_norm": 1.6834504456794845, "learning_rate": 4.756274480728773e-07, "loss": 0.747, "step": 8854 }, { "epoch": 0.9044943820224719, "grad_norm": 1.5297964187782123, "learning_rate": 4.7461980536417486e-07, "loss": 0.5999, "step": 8855 }, { "epoch": 0.9045965270684372, "grad_norm": 1.4783836823864092, "learning_rate": 4.736132052144904e-07, "loss": 0.6728, "step": 8856 }, { "epoch": 0.9046986721144025, "grad_norm": 1.5597510906512044, "learning_rate": 4.726076477339958e-07, "loss": 0.7888, "step": 8857 }, { "epoch": 0.9048008171603678, "grad_norm": 1.449844809243343, "learning_rate": 4.7160313303274973e-07, "loss": 0.7078, "step": 8858 }, { "epoch": 0.904902962206333, "grad_norm": 1.32386251011179, "learning_rate": 4.7059966122069754e-07, "loss": 0.5522, "step": 8859 }, { "epoch": 0.9050051072522982, "grad_norm": 1.522791631146952, "learning_rate": 4.69597232407667e-07, "loss": 0.6658, "step": 8860 }, { "epoch": 0.9051072522982635, "grad_norm": 1.521605977924698, "learning_rate": 4.68595846703378e-07, "loss": 0.6937, "step": 8861 }, { "epoch": 0.9052093973442288, "grad_norm": 1.3172199429916436, "learning_rate": 4.6759550421743296e-07, "loss": 0.6262, "step": 8862 }, { "epoch": 0.9053115423901941, "grad_norm": 1.538402520690178, "learning_rate": 4.6659620505931757e-07, "loss": 0.655, "step": 8863 }, { "epoch": 0.9054136874361594, "grad_norm": 1.497635360228963, "learning_rate": 4.655979493384055e-07, "loss": 0.6821, "step": 8864 }, { "epoch": 0.9055158324821246, "grad_norm": 1.4427324945180435, "learning_rate": 4.6460073716395825e-07, "loss": 0.6222, "step": 8865 }, { "epoch": 0.9056179775280899, "grad_norm": 1.4990579454148953, "learning_rate": 4.6360456864512295e-07, "loss": 0.7028, "step": 8866 }, { "epoch": 0.9057201225740552, "grad_norm": 2.4911457101981767, "learning_rate": 4.6260944389092786e-07, "loss": 0.7062, "step": 8867 }, { "epoch": 0.9058222676200204, "grad_norm": 1.5194161247013336, "learning_rate": 4.6161536301029154e-07, "loss": 0.5935, "step": 8868 }, { "epoch": 0.9059244126659857, "grad_norm": 1.6078486022444718, "learning_rate": 4.6062232611201574e-07, "loss": 0.7296, "step": 8869 }, { "epoch": 0.906026557711951, "grad_norm": 1.7627956417542066, "learning_rate": 4.596303333047891e-07, "loss": 0.6883, "step": 8870 }, { "epoch": 0.9061287027579162, "grad_norm": 1.4282856351735906, "learning_rate": 4.5863938469718707e-07, "loss": 0.5914, "step": 8871 }, { "epoch": 0.9062308478038815, "grad_norm": 1.4479624608326627, "learning_rate": 4.576494803976683e-07, "loss": 0.5816, "step": 8872 }, { "epoch": 0.9063329928498468, "grad_norm": 1.5661694944863442, "learning_rate": 4.566606205145796e-07, "loss": 0.7052, "step": 8873 }, { "epoch": 0.9064351378958121, "grad_norm": 1.6261112888759972, "learning_rate": 4.5567280515615207e-07, "loss": 0.657, "step": 8874 }, { "epoch": 0.9065372829417774, "grad_norm": 1.4886309421632047, "learning_rate": 4.5468603443050155e-07, "loss": 0.6774, "step": 8875 }, { "epoch": 0.9066394279877426, "grad_norm": 1.6816665938043953, "learning_rate": 4.537003084456326e-07, "loss": 0.7622, "step": 8876 }, { "epoch": 0.9067415730337078, "grad_norm": 1.4337879321128262, "learning_rate": 4.527156273094324e-07, "loss": 0.8093, "step": 8877 }, { "epoch": 0.9068437180796731, "grad_norm": 1.5024111251009986, "learning_rate": 4.517319911296747e-07, "loss": 0.6909, "step": 8878 }, { "epoch": 0.9069458631256384, "grad_norm": 1.6235565533581433, "learning_rate": 4.507494000140189e-07, "loss": 0.7169, "step": 8879 }, { "epoch": 0.9070480081716037, "grad_norm": 1.4667139193675733, "learning_rate": 4.497678540700112e-07, "loss": 0.6314, "step": 8880 }, { "epoch": 0.907150153217569, "grad_norm": 1.4872571843058904, "learning_rate": 4.487873534050824e-07, "loss": 0.6926, "step": 8881 }, { "epoch": 0.9072522982635343, "grad_norm": 1.5771218572274353, "learning_rate": 4.478078981265499e-07, "loss": 0.7935, "step": 8882 }, { "epoch": 0.9073544433094994, "grad_norm": 1.6204191962407675, "learning_rate": 4.4682948834161356e-07, "loss": 0.7742, "step": 8883 }, { "epoch": 0.9074565883554647, "grad_norm": 1.5220100839105495, "learning_rate": 4.45852124157361e-07, "loss": 0.7307, "step": 8884 }, { "epoch": 0.90755873340143, "grad_norm": 1.449600603937364, "learning_rate": 4.448758056807667e-07, "loss": 0.6134, "step": 8885 }, { "epoch": 0.9076608784473953, "grad_norm": 1.3948453707094013, "learning_rate": 4.4390053301868965e-07, "loss": 0.7771, "step": 8886 }, { "epoch": 0.9077630234933606, "grad_norm": 1.5395122998005635, "learning_rate": 4.4292630627787324e-07, "loss": 0.6437, "step": 8887 }, { "epoch": 0.9078651685393259, "grad_norm": 1.5717364706375552, "learning_rate": 4.4195312556494563e-07, "loss": 0.7226, "step": 8888 }, { "epoch": 0.9079673135852911, "grad_norm": 1.4697836624598823, "learning_rate": 4.409809909864249e-07, "loss": 0.6474, "step": 8889 }, { "epoch": 0.9080694586312564, "grad_norm": 1.5280786654544793, "learning_rate": 4.4000990264871034e-07, "loss": 0.7204, "step": 8890 }, { "epoch": 0.9081716036772216, "grad_norm": 1.547719631065917, "learning_rate": 4.3903986065808924e-07, "loss": 0.7004, "step": 8891 }, { "epoch": 0.9082737487231869, "grad_norm": 1.2808077147523595, "learning_rate": 4.380708651207322e-07, "loss": 0.5752, "step": 8892 }, { "epoch": 0.9083758937691522, "grad_norm": 1.5475178003205454, "learning_rate": 4.371029161426965e-07, "loss": 0.6314, "step": 8893 }, { "epoch": 0.9084780388151175, "grad_norm": 1.6588241776290253, "learning_rate": 4.3613601382992423e-07, "loss": 0.7101, "step": 8894 }, { "epoch": 0.9085801838610827, "grad_norm": 1.5043304201134553, "learning_rate": 4.3517015828824615e-07, "loss": 0.7909, "step": 8895 }, { "epoch": 0.908682328907048, "grad_norm": 1.5912515577254782, "learning_rate": 4.3420534962337224e-07, "loss": 0.6342, "step": 8896 }, { "epoch": 0.9087844739530133, "grad_norm": 1.589553048190197, "learning_rate": 4.3324158794090354e-07, "loss": 0.8424, "step": 8897 }, { "epoch": 0.9088866189989786, "grad_norm": 1.3684204397981246, "learning_rate": 4.3227887334632455e-07, "loss": 0.6637, "step": 8898 }, { "epoch": 0.9089887640449438, "grad_norm": 1.487395639379782, "learning_rate": 4.3131720594500327e-07, "loss": 0.7116, "step": 8899 }, { "epoch": 0.9090909090909091, "grad_norm": 1.4186199981235292, "learning_rate": 4.3035658584219766e-07, "loss": 0.6089, "step": 8900 }, { "epoch": 0.9091930541368743, "grad_norm": 1.4210128453738904, "learning_rate": 4.2939701314304585e-07, "loss": 0.5864, "step": 8901 }, { "epoch": 0.9092951991828396, "grad_norm": 1.4347755942967642, "learning_rate": 4.284384879525749e-07, "loss": 0.6895, "step": 8902 }, { "epoch": 0.9093973442288049, "grad_norm": 1.6397514582558435, "learning_rate": 4.2748101037569323e-07, "loss": 0.8036, "step": 8903 }, { "epoch": 0.9094994892747702, "grad_norm": 1.4487434065996465, "learning_rate": 4.2652458051720025e-07, "loss": 0.701, "step": 8904 }, { "epoch": 0.9096016343207355, "grad_norm": 1.562841860425281, "learning_rate": 4.255691984817789e-07, "loss": 0.6536, "step": 8905 }, { "epoch": 0.9097037793667007, "grad_norm": 1.4333114283968933, "learning_rate": 4.2461486437399337e-07, "loss": 0.6667, "step": 8906 }, { "epoch": 0.909805924412666, "grad_norm": 1.5156292093867363, "learning_rate": 4.236615782982989e-07, "loss": 0.7312, "step": 8907 }, { "epoch": 0.9099080694586312, "grad_norm": 1.4952419336088052, "learning_rate": 4.22709340359031e-07, "loss": 0.6769, "step": 8908 }, { "epoch": 0.9100102145045965, "grad_norm": 1.476352048654321, "learning_rate": 4.217581506604118e-07, "loss": 0.689, "step": 8909 }, { "epoch": 0.9101123595505618, "grad_norm": 1.665372064813896, "learning_rate": 4.208080093065536e-07, "loss": 0.6626, "step": 8910 }, { "epoch": 0.9102145045965271, "grad_norm": 1.4178443355769028, "learning_rate": 4.198589164014477e-07, "loss": 0.6425, "step": 8911 }, { "epoch": 0.9103166496424924, "grad_norm": 1.3778041250777988, "learning_rate": 4.1891087204897097e-07, "loss": 0.546, "step": 8912 }, { "epoch": 0.9104187946884577, "grad_norm": 1.3684193349668414, "learning_rate": 4.179638763528904e-07, "loss": 0.6439, "step": 8913 }, { "epoch": 0.9105209397344228, "grad_norm": 1.7000013242510281, "learning_rate": 4.1701792941685415e-07, "loss": 0.7153, "step": 8914 }, { "epoch": 0.9106230847803881, "grad_norm": 1.5623888865660085, "learning_rate": 4.160730313443984e-07, "loss": 0.7497, "step": 8915 }, { "epoch": 0.9107252298263534, "grad_norm": 1.5197447254062728, "learning_rate": 4.151291822389403e-07, "loss": 0.6749, "step": 8916 }, { "epoch": 0.9108273748723187, "grad_norm": 1.4751899391816226, "learning_rate": 4.1418638220378616e-07, "loss": 0.7406, "step": 8917 }, { "epoch": 0.910929519918284, "grad_norm": 1.5789250643287935, "learning_rate": 4.132446313421246e-07, "loss": 0.7679, "step": 8918 }, { "epoch": 0.9110316649642493, "grad_norm": 1.5286779169960332, "learning_rate": 4.12303929757033e-07, "loss": 0.7821, "step": 8919 }, { "epoch": 0.9111338100102145, "grad_norm": 1.6311795683051307, "learning_rate": 4.1136427755147033e-07, "loss": 0.74, "step": 8920 }, { "epoch": 0.9112359550561798, "grad_norm": 1.567505604719893, "learning_rate": 4.104256748282831e-07, "loss": 0.7326, "step": 8921 }, { "epoch": 0.911338100102145, "grad_norm": 1.609353847079402, "learning_rate": 4.0948812169020137e-07, "loss": 0.7554, "step": 8922 }, { "epoch": 0.9114402451481103, "grad_norm": 1.6608640415024978, "learning_rate": 4.085516182398408e-07, "loss": 0.7194, "step": 8923 }, { "epoch": 0.9115423901940756, "grad_norm": 1.4365131400989493, "learning_rate": 4.076161645797017e-07, "loss": 0.6184, "step": 8924 }, { "epoch": 0.9116445352400409, "grad_norm": 1.4151770622874968, "learning_rate": 4.0668176081217205e-07, "loss": 0.6957, "step": 8925 }, { "epoch": 0.9117466802860061, "grad_norm": 1.57935756562091, "learning_rate": 4.057484070395212e-07, "loss": 0.7328, "step": 8926 }, { "epoch": 0.9118488253319714, "grad_norm": 1.4330170151818198, "learning_rate": 4.048161033639064e-07, "loss": 0.699, "step": 8927 }, { "epoch": 0.9119509703779367, "grad_norm": 1.5575680494422013, "learning_rate": 4.0388484988736707e-07, "loss": 0.6907, "step": 8928 }, { "epoch": 0.912053115423902, "grad_norm": 1.4193865648928499, "learning_rate": 4.029546467118306e-07, "loss": 0.6865, "step": 8929 }, { "epoch": 0.9121552604698672, "grad_norm": 1.6414761832542606, "learning_rate": 4.0202549393910995e-07, "loss": 0.7248, "step": 8930 }, { "epoch": 0.9122574055158325, "grad_norm": 1.4368967877193175, "learning_rate": 4.0109739167090046e-07, "loss": 0.5447, "step": 8931 }, { "epoch": 0.9123595505617977, "grad_norm": 1.4622601607345622, "learning_rate": 4.001703400087831e-07, "loss": 0.5971, "step": 8932 }, { "epoch": 0.912461695607763, "grad_norm": 1.4639563141980465, "learning_rate": 3.992443390542222e-07, "loss": 0.6798, "step": 8933 }, { "epoch": 0.9125638406537283, "grad_norm": 1.573735662441195, "learning_rate": 3.983193889085735e-07, "loss": 0.6937, "step": 8934 }, { "epoch": 0.9126659856996936, "grad_norm": 1.558385481466482, "learning_rate": 3.9739548967307027e-07, "loss": 0.6434, "step": 8935 }, { "epoch": 0.9127681307456589, "grad_norm": 1.506114295379127, "learning_rate": 3.96472641448834e-07, "loss": 0.6526, "step": 8936 }, { "epoch": 0.912870275791624, "grad_norm": 1.4721811399874458, "learning_rate": 3.9555084433687274e-07, "loss": 0.6869, "step": 8937 }, { "epoch": 0.9129724208375893, "grad_norm": 1.4750075423228408, "learning_rate": 3.9463009843807577e-07, "loss": 0.5846, "step": 8938 }, { "epoch": 0.9130745658835546, "grad_norm": 1.3862736327119547, "learning_rate": 3.9371040385322246e-07, "loss": 0.6674, "step": 8939 }, { "epoch": 0.9131767109295199, "grad_norm": 1.5618504086079787, "learning_rate": 3.927917606829712e-07, "loss": 0.83, "step": 8940 }, { "epoch": 0.9132788559754852, "grad_norm": 1.7585550876551548, "learning_rate": 3.9187416902786824e-07, "loss": 0.6304, "step": 8941 }, { "epoch": 0.9133810010214505, "grad_norm": 1.4592329154070418, "learning_rate": 3.9095762898834544e-07, "loss": 0.6356, "step": 8942 }, { "epoch": 0.9134831460674158, "grad_norm": 1.487034609094372, "learning_rate": 3.90042140664717e-07, "loss": 0.6722, "step": 8943 }, { "epoch": 0.913585291113381, "grad_norm": 1.4583344845508903, "learning_rate": 3.8912770415718615e-07, "loss": 0.6023, "step": 8944 }, { "epoch": 0.9136874361593462, "grad_norm": 1.5079050218450298, "learning_rate": 3.8821431956583834e-07, "loss": 0.6638, "step": 8945 }, { "epoch": 0.9137895812053115, "grad_norm": 1.6505793938913262, "learning_rate": 3.873019869906425e-07, "loss": 0.7783, "step": 8946 }, { "epoch": 0.9138917262512768, "grad_norm": 1.5682612576443133, "learning_rate": 3.863907065314554e-07, "loss": 0.6539, "step": 8947 }, { "epoch": 0.9139938712972421, "grad_norm": 1.5684555290459337, "learning_rate": 3.8548047828801505e-07, "loss": 0.7136, "step": 8948 }, { "epoch": 0.9140960163432074, "grad_norm": 1.387603166140642, "learning_rate": 3.8457130235994953e-07, "loss": 0.6154, "step": 8949 }, { "epoch": 0.9141981613891726, "grad_norm": 1.529300529123299, "learning_rate": 3.8366317884676705e-07, "loss": 0.6348, "step": 8950 }, { "epoch": 0.9143003064351379, "grad_norm": 1.4628415845835792, "learning_rate": 3.8275610784786254e-07, "loss": 0.6571, "step": 8951 }, { "epoch": 0.9144024514811032, "grad_norm": 1.360386918936227, "learning_rate": 3.8185008946251546e-07, "loss": 0.6809, "step": 8952 }, { "epoch": 0.9145045965270684, "grad_norm": 1.4956389469836187, "learning_rate": 3.809451237898887e-07, "loss": 0.6223, "step": 8953 }, { "epoch": 0.9146067415730337, "grad_norm": 1.4499482335013432, "learning_rate": 3.800412109290352e-07, "loss": 0.7809, "step": 8954 }, { "epoch": 0.914708886618999, "grad_norm": 1.489386402906914, "learning_rate": 3.7913835097888595e-07, "loss": 0.6731, "step": 8955 }, { "epoch": 0.9148110316649642, "grad_norm": 1.3721374810099194, "learning_rate": 3.7823654403826073e-07, "loss": 0.8223, "step": 8956 }, { "epoch": 0.9149131767109295, "grad_norm": 1.494396635422137, "learning_rate": 3.7733579020586166e-07, "loss": 0.6489, "step": 8957 }, { "epoch": 0.9150153217568948, "grad_norm": 1.3087647018785062, "learning_rate": 3.7643608958027543e-07, "loss": 0.6809, "step": 8958 }, { "epoch": 0.9151174668028601, "grad_norm": 1.5842353638303468, "learning_rate": 3.755374422599789e-07, "loss": 0.6988, "step": 8959 }, { "epoch": 0.9152196118488254, "grad_norm": 1.3212088564942823, "learning_rate": 3.7463984834332665e-07, "loss": 0.6711, "step": 8960 }, { "epoch": 0.9153217568947906, "grad_norm": 1.4140368632953197, "learning_rate": 3.737433079285624e-07, "loss": 0.6392, "step": 8961 }, { "epoch": 0.9154239019407558, "grad_norm": 1.4435915079052284, "learning_rate": 3.72847821113812e-07, "loss": 0.6723, "step": 8962 }, { "epoch": 0.9155260469867211, "grad_norm": 1.459297830367921, "learning_rate": 3.7195338799708716e-07, "loss": 0.7009, "step": 8963 }, { "epoch": 0.9156281920326864, "grad_norm": 1.403540378978877, "learning_rate": 3.71060008676285e-07, "loss": 0.699, "step": 8964 }, { "epoch": 0.9157303370786517, "grad_norm": 1.3929585991068962, "learning_rate": 3.701676832491863e-07, "loss": 0.6038, "step": 8965 }, { "epoch": 0.915832482124617, "grad_norm": 1.7431304800071328, "learning_rate": 3.692764118134573e-07, "loss": 0.6006, "step": 8966 }, { "epoch": 0.9159346271705823, "grad_norm": 1.4805836386151792, "learning_rate": 3.6838619446664447e-07, "loss": 0.6454, "step": 8967 }, { "epoch": 0.9160367722165474, "grad_norm": 1.5950779454773634, "learning_rate": 3.6749703130618653e-07, "loss": 0.6659, "step": 8968 }, { "epoch": 0.9161389172625127, "grad_norm": 1.4926889591244192, "learning_rate": 3.6660892242940227e-07, "loss": 0.6786, "step": 8969 }, { "epoch": 0.916241062308478, "grad_norm": 1.395532426091007, "learning_rate": 3.657218679334962e-07, "loss": 0.7226, "step": 8970 }, { "epoch": 0.9163432073544433, "grad_norm": 1.6064225298030648, "learning_rate": 3.6483586791555613e-07, "loss": 0.7747, "step": 8971 }, { "epoch": 0.9164453524004086, "grad_norm": 1.470024895145512, "learning_rate": 3.6395092247255347e-07, "loss": 0.6803, "step": 8972 }, { "epoch": 0.9165474974463739, "grad_norm": 1.4802091917970328, "learning_rate": 3.630670317013507e-07, "loss": 0.7218, "step": 8973 }, { "epoch": 0.9166496424923392, "grad_norm": 1.569370055803383, "learning_rate": 3.6218419569868603e-07, "loss": 0.75, "step": 8974 }, { "epoch": 0.9167517875383044, "grad_norm": 1.3999677276900633, "learning_rate": 3.6130241456118877e-07, "loss": 0.6385, "step": 8975 }, { "epoch": 0.9168539325842696, "grad_norm": 1.587790441220409, "learning_rate": 3.604216883853684e-07, "loss": 0.6115, "step": 8976 }, { "epoch": 0.9169560776302349, "grad_norm": 1.5352739715010324, "learning_rate": 3.595420172676234e-07, "loss": 0.6429, "step": 8977 }, { "epoch": 0.9170582226762002, "grad_norm": 1.529110107926348, "learning_rate": 3.5866340130423117e-07, "loss": 0.6986, "step": 8978 }, { "epoch": 0.9171603677221655, "grad_norm": 1.5132314831678404, "learning_rate": 3.577858405913615e-07, "loss": 0.7166, "step": 8979 }, { "epoch": 0.9172625127681308, "grad_norm": 1.51041731313612, "learning_rate": 3.569093352250597e-07, "loss": 0.7019, "step": 8980 }, { "epoch": 0.917364657814096, "grad_norm": 1.6401537984693644, "learning_rate": 3.5603388530126236e-07, "loss": 0.6936, "step": 8981 }, { "epoch": 0.9174668028600613, "grad_norm": 1.446058674937161, "learning_rate": 3.5515949091578514e-07, "loss": 0.691, "step": 8982 }, { "epoch": 0.9175689479060266, "grad_norm": 1.4541144754992703, "learning_rate": 3.542861521643348e-07, "loss": 0.5959, "step": 8983 }, { "epoch": 0.9176710929519918, "grad_norm": 1.4078097272429528, "learning_rate": 3.53413869142496e-07, "loss": 0.5902, "step": 8984 }, { "epoch": 0.9177732379979571, "grad_norm": 1.5058883115713082, "learning_rate": 3.525426419457423e-07, "loss": 0.7123, "step": 8985 }, { "epoch": 0.9178753830439224, "grad_norm": 1.6624353704939734, "learning_rate": 3.5167247066942966e-07, "loss": 0.7015, "step": 8986 }, { "epoch": 0.9179775280898876, "grad_norm": 1.3749528303274887, "learning_rate": 3.508033554087975e-07, "loss": 0.5711, "step": 8987 }, { "epoch": 0.9180796731358529, "grad_norm": 1.5481589834530205, "learning_rate": 3.4993529625897413e-07, "loss": 0.6992, "step": 8988 }, { "epoch": 0.9181818181818182, "grad_norm": 1.4951327018881502, "learning_rate": 3.4906829331496697e-07, "loss": 0.6781, "step": 8989 }, { "epoch": 0.9182839632277835, "grad_norm": 1.5599976498262236, "learning_rate": 3.4820234667166996e-07, "loss": 0.704, "step": 8990 }, { "epoch": 0.9183861082737487, "grad_norm": 1.4075024476783786, "learning_rate": 3.4733745642386076e-07, "loss": 0.6781, "step": 8991 }, { "epoch": 0.918488253319714, "grad_norm": 1.5104951243649418, "learning_rate": 3.4647362266620355e-07, "loss": 0.6887, "step": 8992 }, { "epoch": 0.9185903983656792, "grad_norm": 1.4790634653600672, "learning_rate": 3.4561084549324717e-07, "loss": 0.6612, "step": 8993 }, { "epoch": 0.9186925434116445, "grad_norm": 1.6897891201292183, "learning_rate": 3.447491249994206e-07, "loss": 0.8149, "step": 8994 }, { "epoch": 0.9187946884576098, "grad_norm": 1.873893086432714, "learning_rate": 3.438884612790405e-07, "loss": 0.7027, "step": 8995 }, { "epoch": 0.9188968335035751, "grad_norm": 1.5777267798783916, "learning_rate": 3.430288544263072e-07, "loss": 0.7475, "step": 8996 }, { "epoch": 0.9189989785495404, "grad_norm": 1.5427509641976325, "learning_rate": 3.421703045353031e-07, "loss": 0.6166, "step": 8997 }, { "epoch": 0.9191011235955057, "grad_norm": 1.4699686590321384, "learning_rate": 3.4131281170000085e-07, "loss": 0.6753, "step": 8998 }, { "epoch": 0.9192032686414708, "grad_norm": 1.503154415395642, "learning_rate": 3.4045637601425096e-07, "loss": 0.5698, "step": 8999 }, { "epoch": 0.9193054136874361, "grad_norm": 1.5537988184157074, "learning_rate": 3.396009975717929e-07, "loss": 0.7485, "step": 9000 }, { "epoch": 0.9194075587334014, "grad_norm": 1.331848729677276, "learning_rate": 3.3874667646624505e-07, "loss": 0.6175, "step": 9001 }, { "epoch": 0.9195097037793667, "grad_norm": 1.5417778672892601, "learning_rate": 3.3789341279111486e-07, "loss": 0.7749, "step": 9002 }, { "epoch": 0.919611848825332, "grad_norm": 1.5842283699843849, "learning_rate": 3.3704120663979547e-07, "loss": 0.6419, "step": 9003 }, { "epoch": 0.9197139938712973, "grad_norm": 1.4693781773151633, "learning_rate": 3.361900581055577e-07, "loss": 0.6462, "step": 9004 }, { "epoch": 0.9198161389172625, "grad_norm": 1.415116754658429, "learning_rate": 3.353399672815627e-07, "loss": 0.6259, "step": 9005 }, { "epoch": 0.9199182839632278, "grad_norm": 1.3865094119635362, "learning_rate": 3.3449093426085155e-07, "loss": 0.7106, "step": 9006 }, { "epoch": 0.920020429009193, "grad_norm": 1.5547271607281539, "learning_rate": 3.3364295913635214e-07, "loss": 0.6798, "step": 9007 }, { "epoch": 0.9201225740551583, "grad_norm": 1.5176673064971642, "learning_rate": 3.32796042000878e-07, "loss": 0.691, "step": 9008 }, { "epoch": 0.9202247191011236, "grad_norm": 1.5373958318402452, "learning_rate": 3.3195018294712056e-07, "loss": 0.7349, "step": 9009 }, { "epoch": 0.9203268641470889, "grad_norm": 1.5739228959164528, "learning_rate": 3.311053820676635e-07, "loss": 0.669, "step": 9010 }, { "epoch": 0.9204290091930541, "grad_norm": 1.3062468111676893, "learning_rate": 3.3026163945496846e-07, "loss": 0.5868, "step": 9011 }, { "epoch": 0.9205311542390194, "grad_norm": 1.3196193344612024, "learning_rate": 3.294189552013838e-07, "loss": 0.6519, "step": 9012 }, { "epoch": 0.9206332992849847, "grad_norm": 1.5739461201771177, "learning_rate": 3.2857732939914346e-07, "loss": 0.6389, "step": 9013 }, { "epoch": 0.92073544433095, "grad_norm": 1.47890485584071, "learning_rate": 3.2773676214036374e-07, "loss": 0.6463, "step": 9014 }, { "epoch": 0.9208375893769152, "grad_norm": 1.3578734540994402, "learning_rate": 3.268972535170434e-07, "loss": 0.6199, "step": 9015 }, { "epoch": 0.9209397344228805, "grad_norm": 1.4556942347216502, "learning_rate": 3.260588036210677e-07, "loss": 0.5595, "step": 9016 }, { "epoch": 0.9210418794688457, "grad_norm": 1.5053998505890394, "learning_rate": 3.252214125442066e-07, "loss": 0.7182, "step": 9017 }, { "epoch": 0.921144024514811, "grad_norm": 1.6029171949584784, "learning_rate": 3.2438508037811344e-07, "loss": 0.6599, "step": 9018 }, { "epoch": 0.9212461695607763, "grad_norm": 1.4636311768504586, "learning_rate": 3.2354980721432395e-07, "loss": 0.7372, "step": 9019 }, { "epoch": 0.9213483146067416, "grad_norm": 1.4634205016863506, "learning_rate": 3.227155931442605e-07, "loss": 0.7861, "step": 9020 }, { "epoch": 0.9214504596527069, "grad_norm": 1.6722636293229056, "learning_rate": 3.2188243825922673e-07, "loss": 0.6376, "step": 9021 }, { "epoch": 0.9215526046986721, "grad_norm": 1.4227670606688423, "learning_rate": 3.21050342650413e-07, "loss": 0.553, "step": 9022 }, { "epoch": 0.9216547497446373, "grad_norm": 1.4291560141603417, "learning_rate": 3.202193064088932e-07, "loss": 0.6951, "step": 9023 }, { "epoch": 0.9217568947906026, "grad_norm": 1.6180456420779927, "learning_rate": 3.193893296256245e-07, "loss": 0.6593, "step": 9024 }, { "epoch": 0.9218590398365679, "grad_norm": 1.6444997157311811, "learning_rate": 3.185604123914465e-07, "loss": 0.8316, "step": 9025 }, { "epoch": 0.9219611848825332, "grad_norm": 1.2987822154289879, "learning_rate": 3.1773255479708667e-07, "loss": 0.6064, "step": 9026 }, { "epoch": 0.9220633299284985, "grad_norm": 1.5653991315885252, "learning_rate": 3.169057569331557e-07, "loss": 0.735, "step": 9027 }, { "epoch": 0.9221654749744638, "grad_norm": 1.411708294119295, "learning_rate": 3.1608001889014583e-07, "loss": 0.6723, "step": 9028 }, { "epoch": 0.9222676200204291, "grad_norm": 1.4586030537323549, "learning_rate": 3.152553407584336e-07, "loss": 0.6181, "step": 9029 }, { "epoch": 0.9223697650663942, "grad_norm": 1.4305684572405277, "learning_rate": 3.1443172262828223e-07, "loss": 0.6079, "step": 9030 }, { "epoch": 0.9224719101123595, "grad_norm": 1.6809329578860155, "learning_rate": 3.1360916458983536e-07, "loss": 0.7612, "step": 9031 }, { "epoch": 0.9225740551583248, "grad_norm": 1.4867660470944883, "learning_rate": 3.127876667331242e-07, "loss": 0.6439, "step": 9032 }, { "epoch": 0.9226762002042901, "grad_norm": 1.4168839301688856, "learning_rate": 3.119672291480613e-07, "loss": 0.6244, "step": 9033 }, { "epoch": 0.9227783452502554, "grad_norm": 1.477687097653395, "learning_rate": 3.1114785192444484e-07, "loss": 0.6339, "step": 9034 }, { "epoch": 0.9228804902962207, "grad_norm": 1.5067663275072332, "learning_rate": 3.103295351519564e-07, "loss": 0.6743, "step": 9035 }, { "epoch": 0.9229826353421859, "grad_norm": 1.5274048832959097, "learning_rate": 3.0951227892015876e-07, "loss": 0.6449, "step": 9036 }, { "epoch": 0.9230847803881512, "grad_norm": 1.4777313174468392, "learning_rate": 3.0869608331850486e-07, "loss": 0.6378, "step": 9037 }, { "epoch": 0.9231869254341164, "grad_norm": 1.438864450074164, "learning_rate": 3.0788094843632655e-07, "loss": 0.5977, "step": 9038 }, { "epoch": 0.9232890704800817, "grad_norm": 1.455246240118996, "learning_rate": 3.070668743628391e-07, "loss": 0.6984, "step": 9039 }, { "epoch": 0.923391215526047, "grad_norm": 1.5559062050081613, "learning_rate": 3.0625386118714463e-07, "loss": 0.7428, "step": 9040 }, { "epoch": 0.9234933605720123, "grad_norm": 1.506460833201786, "learning_rate": 3.0544190899822743e-07, "loss": 0.6558, "step": 9041 }, { "epoch": 0.9235955056179775, "grad_norm": 1.3640118646974595, "learning_rate": 3.0463101788495766e-07, "loss": 0.6507, "step": 9042 }, { "epoch": 0.9236976506639428, "grad_norm": 1.3886200154493422, "learning_rate": 3.038211879360875e-07, "loss": 0.6353, "step": 9043 }, { "epoch": 0.9237997957099081, "grad_norm": 1.4802388563961917, "learning_rate": 3.0301241924025174e-07, "loss": 0.7455, "step": 9044 }, { "epoch": 0.9239019407558733, "grad_norm": 1.5268293932488866, "learning_rate": 3.0220471188597167e-07, "loss": 0.6755, "step": 9045 }, { "epoch": 0.9240040858018386, "grad_norm": 1.5967341125122239, "learning_rate": 3.0139806596165335e-07, "loss": 0.6352, "step": 9046 }, { "epoch": 0.9241062308478039, "grad_norm": 1.5912764583827683, "learning_rate": 3.0059248155558164e-07, "loss": 0.7739, "step": 9047 }, { "epoch": 0.9242083758937691, "grad_norm": 1.283414034363248, "learning_rate": 2.997879587559294e-07, "loss": 0.5687, "step": 9048 }, { "epoch": 0.9243105209397344, "grad_norm": 1.605010899614883, "learning_rate": 2.9898449765075165e-07, "loss": 0.6611, "step": 9049 }, { "epoch": 0.9244126659856997, "grad_norm": 1.4240546772911074, "learning_rate": 2.981820983279893e-07, "loss": 0.6561, "step": 9050 }, { "epoch": 0.924514811031665, "grad_norm": 1.474986078995608, "learning_rate": 2.9738076087546306e-07, "loss": 0.6954, "step": 9051 }, { "epoch": 0.9246169560776303, "grad_norm": 1.698047494105655, "learning_rate": 2.965804853808818e-07, "loss": 0.7551, "step": 9052 }, { "epoch": 0.9247191011235955, "grad_norm": 1.4638386748394576, "learning_rate": 2.957812719318365e-07, "loss": 0.6655, "step": 9053 }, { "epoch": 0.9248212461695607, "grad_norm": 1.4059410997079163, "learning_rate": 2.9498312061580047e-07, "loss": 0.6918, "step": 9054 }, { "epoch": 0.924923391215526, "grad_norm": 1.660114259671006, "learning_rate": 2.941860315201317e-07, "loss": 0.7143, "step": 9055 }, { "epoch": 0.9250255362614913, "grad_norm": 1.4605432428247527, "learning_rate": 2.933900047320726e-07, "loss": 0.6553, "step": 9056 }, { "epoch": 0.9251276813074566, "grad_norm": 1.554556514259989, "learning_rate": 2.9259504033874787e-07, "loss": 0.699, "step": 9057 }, { "epoch": 0.9252298263534219, "grad_norm": 1.4752230167128741, "learning_rate": 2.9180113842716904e-07, "loss": 0.6703, "step": 9058 }, { "epoch": 0.9253319713993872, "grad_norm": 1.6679577352754222, "learning_rate": 2.9100829908422777e-07, "loss": 0.8089, "step": 9059 }, { "epoch": 0.9254341164453525, "grad_norm": 1.4411931186836937, "learning_rate": 2.9021652239670015e-07, "loss": 0.7455, "step": 9060 }, { "epoch": 0.9255362614913176, "grad_norm": 1.50904518415986, "learning_rate": 2.8942580845124913e-07, "loss": 0.6452, "step": 9061 }, { "epoch": 0.9256384065372829, "grad_norm": 1.392430782657852, "learning_rate": 2.886361573344165e-07, "loss": 0.6879, "step": 9062 }, { "epoch": 0.9257405515832482, "grad_norm": 1.4677125556861943, "learning_rate": 2.878475691326299e-07, "loss": 0.6369, "step": 9063 }, { "epoch": 0.9258426966292135, "grad_norm": 1.392628003431798, "learning_rate": 2.8706004393220357e-07, "loss": 0.6439, "step": 9064 }, { "epoch": 0.9259448416751788, "grad_norm": 1.4078379416764641, "learning_rate": 2.862735818193285e-07, "loss": 0.7396, "step": 9065 }, { "epoch": 0.926046986721144, "grad_norm": 1.6687131859910673, "learning_rate": 2.8548818288008594e-07, "loss": 0.725, "step": 9066 }, { "epoch": 0.9261491317671093, "grad_norm": 1.376749934669538, "learning_rate": 2.847038472004393e-07, "loss": 0.6054, "step": 9067 }, { "epoch": 0.9262512768130746, "grad_norm": 1.4272631722917932, "learning_rate": 2.8392057486623326e-07, "loss": 0.6908, "step": 9068 }, { "epoch": 0.9263534218590398, "grad_norm": 1.5680103160275558, "learning_rate": 2.831383659631981e-07, "loss": 0.6256, "step": 9069 }, { "epoch": 0.9264555669050051, "grad_norm": 1.4734374085034243, "learning_rate": 2.8235722057694534e-07, "loss": 0.6608, "step": 9070 }, { "epoch": 0.9265577119509704, "grad_norm": 1.3950797085421625, "learning_rate": 2.815771387929744e-07, "loss": 0.6494, "step": 9071 }, { "epoch": 0.9266598569969356, "grad_norm": 1.5619883049854058, "learning_rate": 2.807981206966648e-07, "loss": 0.6533, "step": 9072 }, { "epoch": 0.9267620020429009, "grad_norm": 1.4889887493473386, "learning_rate": 2.800201663732782e-07, "loss": 0.718, "step": 9073 }, { "epoch": 0.9268641470888662, "grad_norm": 1.4574939610320594, "learning_rate": 2.792432759079666e-07, "loss": 0.5701, "step": 9074 }, { "epoch": 0.9269662921348315, "grad_norm": 1.5346680627067606, "learning_rate": 2.7846744938575753e-07, "loss": 0.6977, "step": 9075 }, { "epoch": 0.9270684371807967, "grad_norm": 1.5077624502454983, "learning_rate": 2.776926868915675e-07, "loss": 0.6703, "step": 9076 }, { "epoch": 0.927170582226762, "grad_norm": 1.3994226998726766, "learning_rate": 2.7691898851019526e-07, "loss": 0.579, "step": 9077 }, { "epoch": 0.9272727272727272, "grad_norm": 1.4955842533883532, "learning_rate": 2.76146354326321e-07, "loss": 0.606, "step": 9078 }, { "epoch": 0.9273748723186925, "grad_norm": 1.5446520803579855, "learning_rate": 2.7537478442450914e-07, "loss": 0.8061, "step": 9079 }, { "epoch": 0.9274770173646578, "grad_norm": 1.4336480407366239, "learning_rate": 2.7460427888921116e-07, "loss": 0.7232, "step": 9080 }, { "epoch": 0.9275791624106231, "grad_norm": 1.4667826062828704, "learning_rate": 2.738348378047584e-07, "loss": 0.717, "step": 9081 }, { "epoch": 0.9276813074565884, "grad_norm": 1.5105158315964704, "learning_rate": 2.7306646125536685e-07, "loss": 0.7791, "step": 9082 }, { "epoch": 0.9277834525025537, "grad_norm": 1.3897424326179395, "learning_rate": 2.7229914932513477e-07, "loss": 0.6402, "step": 9083 }, { "epoch": 0.9278855975485188, "grad_norm": 1.4825048051652712, "learning_rate": 2.715329020980473e-07, "loss": 0.5874, "step": 9084 }, { "epoch": 0.9279877425944841, "grad_norm": 1.3676974167783438, "learning_rate": 2.707677196579672e-07, "loss": 0.6263, "step": 9085 }, { "epoch": 0.9280898876404494, "grad_norm": 1.4622831549654354, "learning_rate": 2.700036020886465e-07, "loss": 0.6529, "step": 9086 }, { "epoch": 0.9281920326864147, "grad_norm": 1.3961191062572362, "learning_rate": 2.6924054947371935e-07, "loss": 0.5848, "step": 9087 }, { "epoch": 0.92829417773238, "grad_norm": 1.5778766938303936, "learning_rate": 2.6847856189670005e-07, "loss": 0.6885, "step": 9088 }, { "epoch": 0.9283963227783453, "grad_norm": 1.5088768814175193, "learning_rate": 2.677176394409886e-07, "loss": 0.7792, "step": 9089 }, { "epoch": 0.9284984678243106, "grad_norm": 1.5375145524313174, "learning_rate": 2.6695778218986837e-07, "loss": 0.6525, "step": 9090 }, { "epoch": 0.9286006128702758, "grad_norm": 1.490609658669047, "learning_rate": 2.661989902265094e-07, "loss": 0.8083, "step": 9091 }, { "epoch": 0.928702757916241, "grad_norm": 1.5375816327136986, "learning_rate": 2.6544126363395985e-07, "loss": 0.7022, "step": 9092 }, { "epoch": 0.9288049029622063, "grad_norm": 1.5356260020596588, "learning_rate": 2.6468460249515326e-07, "loss": 0.6506, "step": 9093 }, { "epoch": 0.9289070480081716, "grad_norm": 1.5062816751353731, "learning_rate": 2.6392900689290566e-07, "loss": 0.67, "step": 9094 }, { "epoch": 0.9290091930541369, "grad_norm": 1.3357023287484329, "learning_rate": 2.6317447690991983e-07, "loss": 0.6828, "step": 9095 }, { "epoch": 0.9291113381001022, "grad_norm": 1.6362464523164053, "learning_rate": 2.624210126287774e-07, "loss": 0.7265, "step": 9096 }, { "epoch": 0.9292134831460674, "grad_norm": 1.4658188218373034, "learning_rate": 2.6166861413194575e-07, "loss": 0.5894, "step": 9097 }, { "epoch": 0.9293156281920327, "grad_norm": 1.3602847075444386, "learning_rate": 2.60917281501778e-07, "loss": 0.5892, "step": 9098 }, { "epoch": 0.9294177732379979, "grad_norm": 1.4237917067622923, "learning_rate": 2.6016701482050377e-07, "loss": 0.6038, "step": 9099 }, { "epoch": 0.9295199182839632, "grad_norm": 1.4231037093136365, "learning_rate": 2.5941781417024413e-07, "loss": 0.6602, "step": 9100 }, { "epoch": 0.9296220633299285, "grad_norm": 1.421210852654467, "learning_rate": 2.5866967963299797e-07, "loss": 0.4705, "step": 9101 }, { "epoch": 0.9297242083758938, "grad_norm": 1.3909707842132233, "learning_rate": 2.579226112906486e-07, "loss": 0.7635, "step": 9102 }, { "epoch": 0.929826353421859, "grad_norm": 1.4334227508034074, "learning_rate": 2.571766092249639e-07, "loss": 0.6757, "step": 9103 }, { "epoch": 0.9299284984678243, "grad_norm": 1.5094007575381898, "learning_rate": 2.5643167351759315e-07, "loss": 0.61, "step": 9104 }, { "epoch": 0.9300306435137896, "grad_norm": 1.6028771423213608, "learning_rate": 2.5568780425007103e-07, "loss": 0.6586, "step": 9105 }, { "epoch": 0.9301327885597549, "grad_norm": 1.456463712651084, "learning_rate": 2.5494500150381463e-07, "loss": 0.7057, "step": 9106 }, { "epoch": 0.9302349336057201, "grad_norm": 1.546546619626533, "learning_rate": 2.542032653601245e-07, "loss": 0.6972, "step": 9107 }, { "epoch": 0.9303370786516854, "grad_norm": 1.5600063354197171, "learning_rate": 2.534625959001835e-07, "loss": 0.7408, "step": 9108 }, { "epoch": 0.9304392236976506, "grad_norm": 1.720129882822969, "learning_rate": 2.527229932050579e-07, "loss": 0.6684, "step": 9109 }, { "epoch": 0.9305413687436159, "grad_norm": 1.553326871768726, "learning_rate": 2.5198445735569844e-07, "loss": 0.6111, "step": 9110 }, { "epoch": 0.9306435137895812, "grad_norm": 1.3900504869759245, "learning_rate": 2.5124698843293824e-07, "loss": 0.7346, "step": 9111 }, { "epoch": 0.9307456588355465, "grad_norm": 1.4525328659889858, "learning_rate": 2.505105865174939e-07, "loss": 0.7573, "step": 9112 }, { "epoch": 0.9308478038815118, "grad_norm": 1.564158941724999, "learning_rate": 2.4977525168996433e-07, "loss": 0.765, "step": 9113 }, { "epoch": 0.9309499489274771, "grad_norm": 1.5094116640678852, "learning_rate": 2.490409840308328e-07, "loss": 0.6589, "step": 9114 }, { "epoch": 0.9310520939734422, "grad_norm": 1.3633111354005205, "learning_rate": 2.483077836204661e-07, "loss": 0.5708, "step": 9115 }, { "epoch": 0.9311542390194075, "grad_norm": 1.7184992629284188, "learning_rate": 2.475756505391125e-07, "loss": 0.6931, "step": 9116 }, { "epoch": 0.9312563840653728, "grad_norm": 1.4929170250902024, "learning_rate": 2.468445848669054e-07, "loss": 0.586, "step": 9117 }, { "epoch": 0.9313585291113381, "grad_norm": 1.5889289366213262, "learning_rate": 2.461145866838599e-07, "loss": 0.7276, "step": 9118 }, { "epoch": 0.9314606741573034, "grad_norm": 1.3105678542476284, "learning_rate": 2.453856560698731e-07, "loss": 0.6261, "step": 9119 }, { "epoch": 0.9315628192032687, "grad_norm": 1.5753071289609075, "learning_rate": 2.4465779310473e-07, "loss": 0.6837, "step": 9120 }, { "epoch": 0.931664964249234, "grad_norm": 1.4367084370419283, "learning_rate": 2.439309978680926e-07, "loss": 0.6192, "step": 9121 }, { "epoch": 0.9317671092951992, "grad_norm": 1.4839780640277067, "learning_rate": 2.432052704395127e-07, "loss": 0.6672, "step": 9122 }, { "epoch": 0.9318692543411644, "grad_norm": 1.6337771619732901, "learning_rate": 2.42480610898419e-07, "loss": 0.7254, "step": 9123 }, { "epoch": 0.9319713993871297, "grad_norm": 1.4842092966190576, "learning_rate": 2.417570193241259e-07, "loss": 0.6788, "step": 9124 }, { "epoch": 0.932073544433095, "grad_norm": 1.5510836969257646, "learning_rate": 2.4103449579583217e-07, "loss": 0.6493, "step": 9125 }, { "epoch": 0.9321756894790603, "grad_norm": 1.4659656858443209, "learning_rate": 2.4031304039261907e-07, "loss": 0.6895, "step": 9126 }, { "epoch": 0.9322778345250256, "grad_norm": 1.524726494334057, "learning_rate": 2.395926531934478e-07, "loss": 0.8226, "step": 9127 }, { "epoch": 0.9323799795709908, "grad_norm": 1.6145203447749423, "learning_rate": 2.3887333427716654e-07, "loss": 0.6599, "step": 9128 }, { "epoch": 0.9324821246169561, "grad_norm": 1.5572245511718075, "learning_rate": 2.381550837225055e-07, "loss": 0.6889, "step": 9129 }, { "epoch": 0.9325842696629213, "grad_norm": 1.5983137366946285, "learning_rate": 2.3743790160807746e-07, "loss": 0.6297, "step": 9130 }, { "epoch": 0.9326864147088866, "grad_norm": 1.5457715938545957, "learning_rate": 2.3672178801237954e-07, "loss": 0.7167, "step": 9131 }, { "epoch": 0.9327885597548519, "grad_norm": 1.5201206155782399, "learning_rate": 2.3600674301379022e-07, "loss": 0.7646, "step": 9132 }, { "epoch": 0.9328907048008171, "grad_norm": 1.4822701675640622, "learning_rate": 2.3529276669056911e-07, "loss": 0.6677, "step": 9133 }, { "epoch": 0.9329928498467824, "grad_norm": 1.7607538306372794, "learning_rate": 2.345798591208648e-07, "loss": 0.6689, "step": 9134 }, { "epoch": 0.9330949948927477, "grad_norm": 1.3750748004042912, "learning_rate": 2.3386802038270484e-07, "loss": 0.5917, "step": 9135 }, { "epoch": 0.933197139938713, "grad_norm": 1.4940383946217, "learning_rate": 2.3315725055399918e-07, "loss": 0.6666, "step": 9136 }, { "epoch": 0.9332992849846783, "grad_norm": 1.5364699432899165, "learning_rate": 2.3244754971254113e-07, "loss": 0.7197, "step": 9137 }, { "epoch": 0.9334014300306435, "grad_norm": 1.7078066608739093, "learning_rate": 2.3173891793601077e-07, "loss": 0.8291, "step": 9138 }, { "epoch": 0.9335035750766087, "grad_norm": 1.6523859493235273, "learning_rate": 2.3103135530196607e-07, "loss": 0.7272, "step": 9139 }, { "epoch": 0.933605720122574, "grad_norm": 1.5748282987865894, "learning_rate": 2.3032486188785286e-07, "loss": 0.689, "step": 9140 }, { "epoch": 0.9337078651685393, "grad_norm": 1.386508424126899, "learning_rate": 2.2961943777099484e-07, "loss": 0.6643, "step": 9141 }, { "epoch": 0.9338100102145046, "grad_norm": 1.316166920623976, "learning_rate": 2.2891508302860133e-07, "loss": 0.5659, "step": 9142 }, { "epoch": 0.9339121552604699, "grad_norm": 1.515443110306196, "learning_rate": 2.2821179773776404e-07, "loss": 0.666, "step": 9143 }, { "epoch": 0.9340143003064352, "grad_norm": 1.4330229975422812, "learning_rate": 2.2750958197546026e-07, "loss": 0.7577, "step": 9144 }, { "epoch": 0.9341164453524005, "grad_norm": 1.4537149398720624, "learning_rate": 2.2680843581854627e-07, "loss": 0.68, "step": 9145 }, { "epoch": 0.9342185903983656, "grad_norm": 1.5610385085610248, "learning_rate": 2.2610835934376296e-07, "loss": 0.7819, "step": 9146 }, { "epoch": 0.9343207354443309, "grad_norm": 1.4532287269657076, "learning_rate": 2.2540935262773568e-07, "loss": 0.8448, "step": 9147 }, { "epoch": 0.9344228804902962, "grad_norm": 1.413809825646478, "learning_rate": 2.2471141574696765e-07, "loss": 0.6938, "step": 9148 }, { "epoch": 0.9345250255362615, "grad_norm": 1.578987626850694, "learning_rate": 2.2401454877785223e-07, "loss": 0.7897, "step": 9149 }, { "epoch": 0.9346271705822268, "grad_norm": 1.3978398053753782, "learning_rate": 2.2331875179666174e-07, "loss": 0.6519, "step": 9150 }, { "epoch": 0.9347293156281921, "grad_norm": 1.3893884288178022, "learning_rate": 2.2262402487954859e-07, "loss": 0.6452, "step": 9151 }, { "epoch": 0.9348314606741573, "grad_norm": 1.4805054804097963, "learning_rate": 2.2193036810255418e-07, "loss": 0.643, "step": 9152 }, { "epoch": 0.9349336057201226, "grad_norm": 1.609762265211534, "learning_rate": 2.2123778154159558e-07, "loss": 0.6297, "step": 9153 }, { "epoch": 0.9350357507660878, "grad_norm": 1.474466727440794, "learning_rate": 2.2054626527248214e-07, "loss": 0.6667, "step": 9154 }, { "epoch": 0.9351378958120531, "grad_norm": 1.5944723513603682, "learning_rate": 2.1985581937089883e-07, "loss": 0.6466, "step": 9155 }, { "epoch": 0.9352400408580184, "grad_norm": 1.4484222395771356, "learning_rate": 2.1916644391241416e-07, "loss": 0.6751, "step": 9156 }, { "epoch": 0.9353421859039837, "grad_norm": 1.5329706700149432, "learning_rate": 2.1847813897248104e-07, "loss": 0.691, "step": 9157 }, { "epoch": 0.9354443309499489, "grad_norm": 1.4262688920839575, "learning_rate": 2.1779090462643483e-07, "loss": 0.6776, "step": 9158 }, { "epoch": 0.9355464759959142, "grad_norm": 1.850008836317782, "learning_rate": 2.171047409494953e-07, "loss": 0.7105, "step": 9159 }, { "epoch": 0.9356486210418795, "grad_norm": 1.539621491780249, "learning_rate": 2.164196480167624e-07, "loss": 0.6479, "step": 9160 }, { "epoch": 0.9357507660878447, "grad_norm": 1.696711499564608, "learning_rate": 2.157356259032195e-07, "loss": 0.7222, "step": 9161 }, { "epoch": 0.93585291113381, "grad_norm": 1.5699839132225384, "learning_rate": 2.1505267468373447e-07, "loss": 0.7121, "step": 9162 }, { "epoch": 0.9359550561797753, "grad_norm": 1.4204585945720691, "learning_rate": 2.143707944330542e-07, "loss": 0.5993, "step": 9163 }, { "epoch": 0.9360572012257405, "grad_norm": 1.5097873272466122, "learning_rate": 2.136899852258145e-07, "loss": 0.621, "step": 9164 }, { "epoch": 0.9361593462717058, "grad_norm": 1.472967099270891, "learning_rate": 2.1301024713652919e-07, "loss": 0.7536, "step": 9165 }, { "epoch": 0.9362614913176711, "grad_norm": 1.4288746778226458, "learning_rate": 2.123315802395942e-07, "loss": 0.6153, "step": 9166 }, { "epoch": 0.9363636363636364, "grad_norm": 1.8105445982856454, "learning_rate": 2.1165398460929133e-07, "loss": 0.608, "step": 9167 }, { "epoch": 0.9364657814096017, "grad_norm": 1.5726058721852456, "learning_rate": 2.1097746031978561e-07, "loss": 0.731, "step": 9168 }, { "epoch": 0.9365679264555669, "grad_norm": 1.4543231329066133, "learning_rate": 2.1030200744511896e-07, "loss": 0.6115, "step": 9169 }, { "epoch": 0.9366700715015321, "grad_norm": 1.546267395817543, "learning_rate": 2.096276260592245e-07, "loss": 0.7379, "step": 9170 }, { "epoch": 0.9367722165474974, "grad_norm": 1.6224828727306777, "learning_rate": 2.089543162359109e-07, "loss": 0.6338, "step": 9171 }, { "epoch": 0.9368743615934627, "grad_norm": 1.521620274363574, "learning_rate": 2.0828207804887367e-07, "loss": 0.7493, "step": 9172 }, { "epoch": 0.936976506639428, "grad_norm": 1.7000889555108607, "learning_rate": 2.0761091157168844e-07, "loss": 0.7303, "step": 9173 }, { "epoch": 0.9370786516853933, "grad_norm": 1.5214493794892727, "learning_rate": 2.0694081687781642e-07, "loss": 0.6618, "step": 9174 }, { "epoch": 0.9371807967313586, "grad_norm": 1.467202499243737, "learning_rate": 2.0627179404060004e-07, "loss": 0.6026, "step": 9175 }, { "epoch": 0.9372829417773239, "grad_norm": 1.3654687437728887, "learning_rate": 2.0560384313326187e-07, "loss": 0.5816, "step": 9176 }, { "epoch": 0.937385086823289, "grad_norm": 1.4692067681497907, "learning_rate": 2.049369642289112e-07, "loss": 0.6527, "step": 9177 }, { "epoch": 0.9374872318692543, "grad_norm": 1.654689827699916, "learning_rate": 2.0427115740053737e-07, "loss": 0.6393, "step": 9178 }, { "epoch": 0.9375893769152196, "grad_norm": 1.4348862470190473, "learning_rate": 2.036064227210155e-07, "loss": 0.633, "step": 9179 }, { "epoch": 0.9376915219611849, "grad_norm": 1.4484687154579214, "learning_rate": 2.0294276026310066e-07, "loss": 0.5937, "step": 9180 }, { "epoch": 0.9377936670071502, "grad_norm": 1.4794775571299401, "learning_rate": 2.0228017009942924e-07, "loss": 0.7494, "step": 9181 }, { "epoch": 0.9378958120531155, "grad_norm": 1.4270010521828056, "learning_rate": 2.016186523025232e-07, "loss": 0.6545, "step": 9182 }, { "epoch": 0.9379979570990807, "grad_norm": 1.3911152080698819, "learning_rate": 2.0095820694478685e-07, "loss": 0.642, "step": 9183 }, { "epoch": 0.9381001021450459, "grad_norm": 1.4067780513913422, "learning_rate": 2.002988340985057e-07, "loss": 0.6205, "step": 9184 }, { "epoch": 0.9382022471910112, "grad_norm": 1.4203092045042007, "learning_rate": 1.996405338358476e-07, "loss": 0.727, "step": 9185 }, { "epoch": 0.9383043922369765, "grad_norm": 1.4313456146321495, "learning_rate": 1.9898330622886597e-07, "loss": 0.657, "step": 9186 }, { "epoch": 0.9384065372829418, "grad_norm": 1.5764090005522304, "learning_rate": 1.9832715134949333e-07, "loss": 0.7166, "step": 9187 }, { "epoch": 0.938508682328907, "grad_norm": 1.546219421158126, "learning_rate": 1.9767206926954662e-07, "loss": 0.7675, "step": 9188 }, { "epoch": 0.9386108273748723, "grad_norm": 1.5311481796682007, "learning_rate": 1.9701806006072522e-07, "loss": 0.6447, "step": 9189 }, { "epoch": 0.9387129724208376, "grad_norm": 1.5861926173047243, "learning_rate": 1.963651237946107e-07, "loss": 0.7269, "step": 9190 }, { "epoch": 0.9388151174668029, "grad_norm": 1.4093799510424223, "learning_rate": 1.9571326054266814e-07, "loss": 0.5907, "step": 9191 }, { "epoch": 0.9389172625127681, "grad_norm": 1.3379601530316063, "learning_rate": 1.950624703762416e-07, "loss": 0.6797, "step": 9192 }, { "epoch": 0.9390194075587334, "grad_norm": 1.5222326324431352, "learning_rate": 1.9441275336656294e-07, "loss": 0.5817, "step": 9193 }, { "epoch": 0.9391215526046987, "grad_norm": 1.509801818368835, "learning_rate": 1.9376410958474534e-07, "loss": 0.7005, "step": 9194 }, { "epoch": 0.9392236976506639, "grad_norm": 1.5110512363911448, "learning_rate": 1.9311653910178085e-07, "loss": 0.6657, "step": 9195 }, { "epoch": 0.9393258426966292, "grad_norm": 1.649271908127654, "learning_rate": 1.9247004198854724e-07, "loss": 0.7076, "step": 9196 }, { "epoch": 0.9394279877425945, "grad_norm": 1.4048627828985776, "learning_rate": 1.9182461831580234e-07, "loss": 0.6968, "step": 9197 }, { "epoch": 0.9395301327885598, "grad_norm": 1.5583964896608415, "learning_rate": 1.9118026815419188e-07, "loss": 0.689, "step": 9198 }, { "epoch": 0.9396322778345251, "grad_norm": 1.417301813944971, "learning_rate": 1.9053699157423834e-07, "loss": 0.6415, "step": 9199 }, { "epoch": 0.9397344228804902, "grad_norm": 1.51873963909895, "learning_rate": 1.898947886463487e-07, "loss": 0.7756, "step": 9200 }, { "epoch": 0.9398365679264555, "grad_norm": 1.589273558421054, "learning_rate": 1.892536594408112e-07, "loss": 0.654, "step": 9201 }, { "epoch": 0.9399387129724208, "grad_norm": 1.4869529434282553, "learning_rate": 1.8861360402779972e-07, "loss": 0.8251, "step": 9202 }, { "epoch": 0.9400408580183861, "grad_norm": 1.437537377279064, "learning_rate": 1.879746224773682e-07, "loss": 0.6675, "step": 9203 }, { "epoch": 0.9401430030643514, "grad_norm": 1.5107359945466787, "learning_rate": 1.8733671485945516e-07, "loss": 0.7291, "step": 9204 }, { "epoch": 0.9402451481103167, "grad_norm": 1.4939796767948221, "learning_rate": 1.8669988124387695e-07, "loss": 0.7402, "step": 9205 }, { "epoch": 0.940347293156282, "grad_norm": 1.5448036014027644, "learning_rate": 1.8606412170033783e-07, "loss": 0.6909, "step": 9206 }, { "epoch": 0.9404494382022472, "grad_norm": 1.6652358930281643, "learning_rate": 1.8542943629841993e-07, "loss": 0.7863, "step": 9207 }, { "epoch": 0.9405515832482124, "grad_norm": 1.67465360743183, "learning_rate": 1.847958251075921e-07, "loss": 0.6942, "step": 9208 }, { "epoch": 0.9406537282941777, "grad_norm": 1.3793255517744074, "learning_rate": 1.841632881972022e-07, "loss": 0.6659, "step": 9209 }, { "epoch": 0.940755873340143, "grad_norm": 1.5210946436378445, "learning_rate": 1.8353182563648264e-07, "loss": 0.7153, "step": 9210 }, { "epoch": 0.9408580183861083, "grad_norm": 1.4801130401798237, "learning_rate": 1.8290143749454813e-07, "loss": 0.6562, "step": 9211 }, { "epoch": 0.9409601634320736, "grad_norm": 1.487967576262061, "learning_rate": 1.822721238403924e-07, "loss": 0.616, "step": 9212 }, { "epoch": 0.9410623084780388, "grad_norm": 1.565554998305901, "learning_rate": 1.8164388474289584e-07, "loss": 0.7236, "step": 9213 }, { "epoch": 0.9411644535240041, "grad_norm": 1.5585291193941702, "learning_rate": 1.8101672027082018e-07, "loss": 0.6923, "step": 9214 }, { "epoch": 0.9412665985699693, "grad_norm": 1.4756991123897936, "learning_rate": 1.8039063049280824e-07, "loss": 0.6622, "step": 9215 }, { "epoch": 0.9413687436159346, "grad_norm": 1.6939074763150774, "learning_rate": 1.7976561547738526e-07, "loss": 0.721, "step": 9216 }, { "epoch": 0.9414708886618999, "grad_norm": 1.5277423455869896, "learning_rate": 1.791416752929609e-07, "loss": 0.6835, "step": 9217 }, { "epoch": 0.9415730337078652, "grad_norm": 1.498827750617463, "learning_rate": 1.7851881000782612e-07, "loss": 0.6832, "step": 9218 }, { "epoch": 0.9416751787538304, "grad_norm": 1.4716570593858127, "learning_rate": 1.7789701969015195e-07, "loss": 0.6967, "step": 9219 }, { "epoch": 0.9417773237997957, "grad_norm": 1.511468326703934, "learning_rate": 1.7727630440799615e-07, "loss": 0.693, "step": 9220 }, { "epoch": 0.941879468845761, "grad_norm": 1.3614925169175607, "learning_rate": 1.766566642292933e-07, "loss": 0.6452, "step": 9221 }, { "epoch": 0.9419816138917263, "grad_norm": 1.530722700688656, "learning_rate": 1.7603809922186688e-07, "loss": 0.7506, "step": 9222 }, { "epoch": 0.9420837589376915, "grad_norm": 1.6692969357417948, "learning_rate": 1.754206094534161e-07, "loss": 0.8624, "step": 9223 }, { "epoch": 0.9421859039836568, "grad_norm": 1.503319904228773, "learning_rate": 1.7480419499152912e-07, "loss": 0.6789, "step": 9224 }, { "epoch": 0.942288049029622, "grad_norm": 1.57641052138822, "learning_rate": 1.7418885590366864e-07, "loss": 0.7784, "step": 9225 }, { "epoch": 0.9423901940755873, "grad_norm": 1.5827297238864475, "learning_rate": 1.735745922571874e-07, "loss": 0.7343, "step": 9226 }, { "epoch": 0.9424923391215526, "grad_norm": 1.4827221172831844, "learning_rate": 1.7296140411931507e-07, "loss": 0.6493, "step": 9227 }, { "epoch": 0.9425944841675179, "grad_norm": 1.4520911571448938, "learning_rate": 1.7234929155716673e-07, "loss": 0.6955, "step": 9228 }, { "epoch": 0.9426966292134832, "grad_norm": 1.7208341226702184, "learning_rate": 1.7173825463773774e-07, "loss": 0.765, "step": 9229 }, { "epoch": 0.9427987742594485, "grad_norm": 1.4892207631146965, "learning_rate": 1.7112829342790683e-07, "loss": 0.7009, "step": 9230 }, { "epoch": 0.9429009193054136, "grad_norm": 1.332369164366498, "learning_rate": 1.705194079944339e-07, "loss": 0.5831, "step": 9231 }, { "epoch": 0.9430030643513789, "grad_norm": 1.4322658593830402, "learning_rate": 1.6991159840396342e-07, "loss": 0.6594, "step": 9232 }, { "epoch": 0.9431052093973442, "grad_norm": 1.4983619132286408, "learning_rate": 1.6930486472301888e-07, "loss": 0.6423, "step": 9233 }, { "epoch": 0.9432073544433095, "grad_norm": 1.5645139771311332, "learning_rate": 1.6869920701800824e-07, "loss": 0.7148, "step": 9234 }, { "epoch": 0.9433094994892748, "grad_norm": 1.4439804397427218, "learning_rate": 1.680946253552218e-07, "loss": 0.7543, "step": 9235 }, { "epoch": 0.9434116445352401, "grad_norm": 1.5507773309709343, "learning_rate": 1.6749111980082998e-07, "loss": 0.6626, "step": 9236 }, { "epoch": 0.9435137895812054, "grad_norm": 1.5404616595603968, "learning_rate": 1.6688869042088773e-07, "loss": 0.7204, "step": 9237 }, { "epoch": 0.9436159346271705, "grad_norm": 1.4132372839595242, "learning_rate": 1.662873372813323e-07, "loss": 0.654, "step": 9238 }, { "epoch": 0.9437180796731358, "grad_norm": 1.6012398614855041, "learning_rate": 1.65687060447981e-07, "loss": 0.753, "step": 9239 }, { "epoch": 0.9438202247191011, "grad_norm": 1.4663519001830385, "learning_rate": 1.6508785998653466e-07, "loss": 0.7042, "step": 9240 }, { "epoch": 0.9439223697650664, "grad_norm": 1.3698891442492873, "learning_rate": 1.6448973596257412e-07, "loss": 0.6125, "step": 9241 }, { "epoch": 0.9440245148110317, "grad_norm": 1.661359149604778, "learning_rate": 1.6389268844156924e-07, "loss": 0.7368, "step": 9242 }, { "epoch": 0.944126659856997, "grad_norm": 1.5785198621726113, "learning_rate": 1.6329671748886333e-07, "loss": 0.7571, "step": 9243 }, { "epoch": 0.9442288049029622, "grad_norm": 1.3595098553347953, "learning_rate": 1.6270182316968753e-07, "loss": 0.5922, "step": 9244 }, { "epoch": 0.9443309499489275, "grad_norm": 1.3770029103886756, "learning_rate": 1.6210800554915306e-07, "loss": 0.6736, "step": 9245 }, { "epoch": 0.9444330949948927, "grad_norm": 1.538482605999784, "learning_rate": 1.6151526469225243e-07, "loss": 0.7113, "step": 9246 }, { "epoch": 0.944535240040858, "grad_norm": 1.440233041973123, "learning_rate": 1.609236006638637e-07, "loss": 0.5824, "step": 9247 }, { "epoch": 0.9446373850868233, "grad_norm": 1.5608158008113064, "learning_rate": 1.6033301352874396e-07, "loss": 0.7081, "step": 9248 }, { "epoch": 0.9447395301327886, "grad_norm": 1.6535352740355067, "learning_rate": 1.5974350335153154e-07, "loss": 0.7602, "step": 9249 }, { "epoch": 0.9448416751787538, "grad_norm": 1.534514069717892, "learning_rate": 1.5915507019675147e-07, "loss": 0.6574, "step": 9250 }, { "epoch": 0.9449438202247191, "grad_norm": 1.6210579115975412, "learning_rate": 1.5856771412880556e-07, "loss": 0.771, "step": 9251 }, { "epoch": 0.9450459652706844, "grad_norm": 1.3875732925312887, "learning_rate": 1.579814352119835e-07, "loss": 0.5545, "step": 9252 }, { "epoch": 0.9451481103166497, "grad_norm": 1.5308048078834509, "learning_rate": 1.5739623351045174e-07, "loss": 0.6268, "step": 9253 }, { "epoch": 0.9452502553626149, "grad_norm": 1.5468445179103205, "learning_rate": 1.5681210908826127e-07, "loss": 0.7112, "step": 9254 }, { "epoch": 0.9453524004085802, "grad_norm": 1.4732141894788657, "learning_rate": 1.5622906200934428e-07, "loss": 0.7093, "step": 9255 }, { "epoch": 0.9454545454545454, "grad_norm": 1.618852444463458, "learning_rate": 1.5564709233751752e-07, "loss": 0.6604, "step": 9256 }, { "epoch": 0.9455566905005107, "grad_norm": 1.349056677169564, "learning_rate": 1.5506620013647554e-07, "loss": 0.6521, "step": 9257 }, { "epoch": 0.945658835546476, "grad_norm": 1.455499690417291, "learning_rate": 1.5448638546979866e-07, "loss": 0.6139, "step": 9258 }, { "epoch": 0.9457609805924413, "grad_norm": 1.4138197586894834, "learning_rate": 1.539076484009494e-07, "loss": 0.6648, "step": 9259 }, { "epoch": 0.9458631256384066, "grad_norm": 1.5234178311215272, "learning_rate": 1.5332998899326823e-07, "loss": 0.7855, "step": 9260 }, { "epoch": 0.9459652706843719, "grad_norm": 1.5324659588522216, "learning_rate": 1.5275340730998122e-07, "loss": 0.7188, "step": 9261 }, { "epoch": 0.946067415730337, "grad_norm": 1.5263772527270794, "learning_rate": 1.521779034141968e-07, "loss": 0.7074, "step": 9262 }, { "epoch": 0.9461695607763023, "grad_norm": 1.5186616472286383, "learning_rate": 1.5160347736890235e-07, "loss": 0.6143, "step": 9263 }, { "epoch": 0.9462717058222676, "grad_norm": 1.5763677979185726, "learning_rate": 1.5103012923697201e-07, "loss": 0.6978, "step": 9264 }, { "epoch": 0.9463738508682329, "grad_norm": 1.420818036830884, "learning_rate": 1.5045785908115563e-07, "loss": 0.5854, "step": 9265 }, { "epoch": 0.9464759959141982, "grad_norm": 1.722681001924757, "learning_rate": 1.498866669640897e-07, "loss": 0.7203, "step": 9266 }, { "epoch": 0.9465781409601635, "grad_norm": 1.5844118930790396, "learning_rate": 1.4931655294829317e-07, "loss": 0.6346, "step": 9267 }, { "epoch": 0.9466802860061287, "grad_norm": 1.5041325508194328, "learning_rate": 1.4874751709616386e-07, "loss": 0.6227, "step": 9268 }, { "epoch": 0.9467824310520939, "grad_norm": 1.516447761938868, "learning_rate": 1.4817955946998418e-07, "loss": 0.7494, "step": 9269 }, { "epoch": 0.9468845760980592, "grad_norm": 1.6041140912539658, "learning_rate": 1.4761268013191555e-07, "loss": 0.6639, "step": 9270 }, { "epoch": 0.9469867211440245, "grad_norm": 1.4189435857675738, "learning_rate": 1.4704687914400605e-07, "loss": 0.7255, "step": 9271 }, { "epoch": 0.9470888661899898, "grad_norm": 1.493706875093232, "learning_rate": 1.4648215656818066e-07, "loss": 0.6629, "step": 9272 }, { "epoch": 0.9471910112359551, "grad_norm": 1.391188872442206, "learning_rate": 1.4591851246624878e-07, "loss": 0.6944, "step": 9273 }, { "epoch": 0.9472931562819203, "grad_norm": 1.4587415904822167, "learning_rate": 1.453559468999033e-07, "loss": 0.7308, "step": 9274 }, { "epoch": 0.9473953013278856, "grad_norm": 1.5452435889215106, "learning_rate": 1.4479445993071606e-07, "loss": 0.708, "step": 9275 }, { "epoch": 0.9474974463738509, "grad_norm": 1.4266428324147273, "learning_rate": 1.4423405162014238e-07, "loss": 0.606, "step": 9276 }, { "epoch": 0.9475995914198161, "grad_norm": 1.5547600755760407, "learning_rate": 1.4367472202951983e-07, "loss": 0.7454, "step": 9277 }, { "epoch": 0.9477017364657814, "grad_norm": 1.5009573897420434, "learning_rate": 1.4311647122006722e-07, "loss": 0.634, "step": 9278 }, { "epoch": 0.9478038815117467, "grad_norm": 1.5893430883430075, "learning_rate": 1.425592992528846e-07, "loss": 0.6661, "step": 9279 }, { "epoch": 0.947906026557712, "grad_norm": 1.4437110861867624, "learning_rate": 1.4200320618895424e-07, "loss": 0.6271, "step": 9280 }, { "epoch": 0.9480081716036772, "grad_norm": 1.5054424654550063, "learning_rate": 1.4144819208914306e-07, "loss": 0.7966, "step": 9281 }, { "epoch": 0.9481103166496425, "grad_norm": 1.4468942107725755, "learning_rate": 1.408942570141969e-07, "loss": 0.7719, "step": 9282 }, { "epoch": 0.9482124616956078, "grad_norm": 1.538404982684656, "learning_rate": 1.4034140102474392e-07, "loss": 0.6634, "step": 9283 }, { "epoch": 0.9483146067415731, "grad_norm": 1.6256133473944119, "learning_rate": 1.3978962418129572e-07, "loss": 0.6593, "step": 9284 }, { "epoch": 0.9484167517875383, "grad_norm": 1.5959561520228596, "learning_rate": 1.3923892654424177e-07, "loss": 0.6736, "step": 9285 }, { "epoch": 0.9485188968335035, "grad_norm": 1.6079001448065717, "learning_rate": 1.386893081738594e-07, "loss": 0.6901, "step": 9286 }, { "epoch": 0.9486210418794688, "grad_norm": 1.6480143755403525, "learning_rate": 1.3814076913030382e-07, "loss": 0.7224, "step": 9287 }, { "epoch": 0.9487231869254341, "grad_norm": 1.6143665708272015, "learning_rate": 1.375933094736126e-07, "loss": 0.6146, "step": 9288 }, { "epoch": 0.9488253319713994, "grad_norm": 1.4757356779022706, "learning_rate": 1.3704692926370444e-07, "loss": 0.689, "step": 9289 }, { "epoch": 0.9489274770173647, "grad_norm": 1.4433858343052577, "learning_rate": 1.3650162856038153e-07, "loss": 0.5997, "step": 9290 }, { "epoch": 0.94902962206333, "grad_norm": 1.5715214428091502, "learning_rate": 1.359574074233294e-07, "loss": 0.7138, "step": 9291 }, { "epoch": 0.9491317671092953, "grad_norm": 1.6137140443060531, "learning_rate": 1.3541426591211272e-07, "loss": 0.6572, "step": 9292 }, { "epoch": 0.9492339121552604, "grad_norm": 1.546687893978786, "learning_rate": 1.3487220408617718e-07, "loss": 0.6914, "step": 9293 }, { "epoch": 0.9493360572012257, "grad_norm": 1.518421044470318, "learning_rate": 1.3433122200485315e-07, "loss": 0.7158, "step": 9294 }, { "epoch": 0.949438202247191, "grad_norm": 1.5543457163777104, "learning_rate": 1.3379131972734884e-07, "loss": 0.7023, "step": 9295 }, { "epoch": 0.9495403472931563, "grad_norm": 1.5975011108937367, "learning_rate": 1.3325249731276134e-07, "loss": 0.679, "step": 9296 }, { "epoch": 0.9496424923391216, "grad_norm": 1.6024613259367313, "learning_rate": 1.3271475482006134e-07, "loss": 0.6689, "step": 9297 }, { "epoch": 0.9497446373850869, "grad_norm": 1.4728127270988278, "learning_rate": 1.321780923081073e-07, "loss": 0.6868, "step": 9298 }, { "epoch": 0.9498467824310521, "grad_norm": 1.62832985516429, "learning_rate": 1.3164250983563665e-07, "loss": 0.7491, "step": 9299 }, { "epoch": 0.9499489274770173, "grad_norm": 1.5068663065776413, "learning_rate": 1.3110800746126805e-07, "loss": 0.7455, "step": 9300 }, { "epoch": 0.9500510725229826, "grad_norm": 1.512544023129672, "learning_rate": 1.3057458524350476e-07, "loss": 0.5987, "step": 9301 }, { "epoch": 0.9501532175689479, "grad_norm": 1.3087888161853105, "learning_rate": 1.3004224324073e-07, "loss": 0.6404, "step": 9302 }, { "epoch": 0.9502553626149132, "grad_norm": 1.4617228192995908, "learning_rate": 1.295109815112072e-07, "loss": 0.6679, "step": 9303 }, { "epoch": 0.9503575076608785, "grad_norm": 2.766873574774778, "learning_rate": 1.2898080011308543e-07, "loss": 0.669, "step": 9304 }, { "epoch": 0.9504596527068437, "grad_norm": 1.491754081125532, "learning_rate": 1.284516991043927e-07, "loss": 0.6872, "step": 9305 }, { "epoch": 0.950561797752809, "grad_norm": 1.4022827435182528, "learning_rate": 1.2792367854303933e-07, "loss": 0.6924, "step": 9306 }, { "epoch": 0.9506639427987743, "grad_norm": 1.513732646340184, "learning_rate": 1.2739673848681688e-07, "loss": 0.65, "step": 9307 }, { "epoch": 0.9507660878447395, "grad_norm": 1.5332202763447564, "learning_rate": 1.2687087899340144e-07, "loss": 0.6633, "step": 9308 }, { "epoch": 0.9508682328907048, "grad_norm": 1.4633387505028197, "learning_rate": 1.2634610012034586e-07, "loss": 0.6771, "step": 9309 }, { "epoch": 0.95097037793667, "grad_norm": 1.5941804218443754, "learning_rate": 1.2582240192508865e-07, "loss": 0.7348, "step": 9310 }, { "epoch": 0.9510725229826353, "grad_norm": 1.5389927026890677, "learning_rate": 1.2529978446495063e-07, "loss": 0.5975, "step": 9311 }, { "epoch": 0.9511746680286006, "grad_norm": 1.4698878710359864, "learning_rate": 1.2477824779712932e-07, "loss": 0.6688, "step": 9312 }, { "epoch": 0.9512768130745659, "grad_norm": 1.5144857667993492, "learning_rate": 1.2425779197871024e-07, "loss": 0.6762, "step": 9313 }, { "epoch": 0.9513789581205312, "grad_norm": 1.4004750619021893, "learning_rate": 1.2373841706665556e-07, "loss": 0.6904, "step": 9314 }, { "epoch": 0.9514811031664965, "grad_norm": 1.4403383846228797, "learning_rate": 1.2322012311781205e-07, "loss": 0.687, "step": 9315 }, { "epoch": 0.9515832482124617, "grad_norm": 1.6750578519595565, "learning_rate": 1.2270291018890767e-07, "loss": 0.731, "step": 9316 }, { "epoch": 0.9516853932584269, "grad_norm": 1.5818994239982531, "learning_rate": 1.2218677833655157e-07, "loss": 0.6856, "step": 9317 }, { "epoch": 0.9517875383043922, "grad_norm": 1.268700335400305, "learning_rate": 1.2167172761723412e-07, "loss": 0.6599, "step": 9318 }, { "epoch": 0.9518896833503575, "grad_norm": 1.3657047608950803, "learning_rate": 1.2115775808732799e-07, "loss": 0.657, "step": 9319 }, { "epoch": 0.9519918283963228, "grad_norm": 1.556957583595499, "learning_rate": 1.206448698030882e-07, "loss": 0.5482, "step": 9320 }, { "epoch": 0.9520939734422881, "grad_norm": 1.492298082963576, "learning_rate": 1.2013306282064985e-07, "loss": 0.6639, "step": 9321 }, { "epoch": 0.9521961184882534, "grad_norm": 1.4752585547574129, "learning_rate": 1.1962233719603144e-07, "loss": 0.6281, "step": 9322 }, { "epoch": 0.9522982635342185, "grad_norm": 1.5151434569571445, "learning_rate": 1.1911269298513162e-07, "loss": 0.6453, "step": 9323 }, { "epoch": 0.9524004085801838, "grad_norm": 1.5449130116422798, "learning_rate": 1.186041302437313e-07, "loss": 0.5954, "step": 9324 }, { "epoch": 0.9525025536261491, "grad_norm": 1.453585818555242, "learning_rate": 1.1809664902749262e-07, "loss": 0.7877, "step": 9325 }, { "epoch": 0.9526046986721144, "grad_norm": 1.5939350188551138, "learning_rate": 1.1759024939196117e-07, "loss": 0.6176, "step": 9326 }, { "epoch": 0.9527068437180797, "grad_norm": 1.4507339328571252, "learning_rate": 1.1708493139256149e-07, "loss": 0.5803, "step": 9327 }, { "epoch": 0.952808988764045, "grad_norm": 1.301136977892548, "learning_rate": 1.1658069508460157e-07, "loss": 0.555, "step": 9328 }, { "epoch": 0.9529111338100102, "grad_norm": 1.3785820368436714, "learning_rate": 1.1607754052326836e-07, "loss": 0.5953, "step": 9329 }, { "epoch": 0.9530132788559755, "grad_norm": 1.6277974290056627, "learning_rate": 1.155754677636367e-07, "loss": 0.7546, "step": 9330 }, { "epoch": 0.9531154239019407, "grad_norm": 1.3665797519326228, "learning_rate": 1.1507447686065487e-07, "loss": 0.5786, "step": 9331 }, { "epoch": 0.953217568947906, "grad_norm": 1.4373501304917398, "learning_rate": 1.1457456786915788e-07, "loss": 0.6574, "step": 9332 }, { "epoch": 0.9533197139938713, "grad_norm": 1.520732629464046, "learning_rate": 1.1407574084386197e-07, "loss": 0.6363, "step": 9333 }, { "epoch": 0.9534218590398366, "grad_norm": 1.4503950169192572, "learning_rate": 1.1357799583936236e-07, "loss": 0.6362, "step": 9334 }, { "epoch": 0.9535240040858018, "grad_norm": 1.6272060895125915, "learning_rate": 1.130813329101399e-07, "loss": 0.6273, "step": 9335 }, { "epoch": 0.9536261491317671, "grad_norm": 1.5154591964098143, "learning_rate": 1.1258575211055223e-07, "loss": 0.6523, "step": 9336 }, { "epoch": 0.9537282941777324, "grad_norm": 1.603956192689196, "learning_rate": 1.1209125349484263e-07, "loss": 0.7404, "step": 9337 }, { "epoch": 0.9538304392236977, "grad_norm": 1.5311618597428467, "learning_rate": 1.1159783711713335e-07, "loss": 0.6245, "step": 9338 }, { "epoch": 0.9539325842696629, "grad_norm": 1.593823869286098, "learning_rate": 1.1110550303143008e-07, "loss": 0.75, "step": 9339 }, { "epoch": 0.9540347293156282, "grad_norm": 1.3862848543822053, "learning_rate": 1.1061425129161752e-07, "loss": 0.578, "step": 9340 }, { "epoch": 0.9541368743615934, "grad_norm": 1.4522786173209419, "learning_rate": 1.1012408195146596e-07, "loss": 0.6946, "step": 9341 }, { "epoch": 0.9542390194075587, "grad_norm": 1.4420361895243479, "learning_rate": 1.096349950646225e-07, "loss": 0.6743, "step": 9342 }, { "epoch": 0.954341164453524, "grad_norm": 1.596877206868667, "learning_rate": 1.0914699068461876e-07, "loss": 0.6523, "step": 9343 }, { "epoch": 0.9544433094994893, "grad_norm": 1.3998708225106409, "learning_rate": 1.0866006886486757e-07, "loss": 0.6013, "step": 9344 }, { "epoch": 0.9545454545454546, "grad_norm": 1.50468343526662, "learning_rate": 1.0817422965866187e-07, "loss": 0.7695, "step": 9345 }, { "epoch": 0.9546475995914199, "grad_norm": 1.3692514906557283, "learning_rate": 1.0768947311917799e-07, "loss": 0.7239, "step": 9346 }, { "epoch": 0.954749744637385, "grad_norm": 1.6372260736670652, "learning_rate": 1.0720579929947239e-07, "loss": 0.7884, "step": 9347 }, { "epoch": 0.9548518896833503, "grad_norm": 1.353239302600394, "learning_rate": 1.0672320825248383e-07, "loss": 0.7376, "step": 9348 }, { "epoch": 0.9549540347293156, "grad_norm": 1.3684272137669975, "learning_rate": 1.0624170003103119e-07, "loss": 0.6095, "step": 9349 }, { "epoch": 0.9550561797752809, "grad_norm": 1.5666563533156415, "learning_rate": 1.0576127468781782e-07, "loss": 0.7431, "step": 9350 }, { "epoch": 0.9551583248212462, "grad_norm": 1.5055420510615423, "learning_rate": 1.0528193227542505e-07, "loss": 0.6715, "step": 9351 }, { "epoch": 0.9552604698672115, "grad_norm": 1.4846171725221482, "learning_rate": 1.0480367284631865e-07, "loss": 0.6499, "step": 9352 }, { "epoch": 0.9553626149131768, "grad_norm": 1.4326165507769513, "learning_rate": 1.0432649645284121e-07, "loss": 0.6252, "step": 9353 }, { "epoch": 0.9554647599591419, "grad_norm": 1.4765100035115832, "learning_rate": 1.0385040314722317e-07, "loss": 0.6469, "step": 9354 }, { "epoch": 0.9555669050051072, "grad_norm": 1.6171419773985063, "learning_rate": 1.0337539298157284e-07, "loss": 0.6463, "step": 9355 }, { "epoch": 0.9556690500510725, "grad_norm": 1.2663803841964771, "learning_rate": 1.0290146600787865e-07, "loss": 0.6441, "step": 9356 }, { "epoch": 0.9557711950970378, "grad_norm": 1.3253496491213006, "learning_rate": 1.0242862227801464e-07, "loss": 0.6188, "step": 9357 }, { "epoch": 0.9558733401430031, "grad_norm": 1.5306638179849967, "learning_rate": 1.0195686184373165e-07, "loss": 0.7339, "step": 9358 }, { "epoch": 0.9559754851889684, "grad_norm": 1.5703878828020956, "learning_rate": 1.0148618475666505e-07, "loss": 0.7261, "step": 9359 }, { "epoch": 0.9560776302349336, "grad_norm": 1.457665550747588, "learning_rate": 1.0101659106833139e-07, "loss": 0.632, "step": 9360 }, { "epoch": 0.9561797752808989, "grad_norm": 1.350885491983239, "learning_rate": 1.0054808083012624e-07, "loss": 0.6986, "step": 9361 }, { "epoch": 0.9562819203268641, "grad_norm": 1.4812234474606873, "learning_rate": 1.0008065409333079e-07, "loss": 0.669, "step": 9362 }, { "epoch": 0.9563840653728294, "grad_norm": 1.6030211409494997, "learning_rate": 9.961431090910301e-08, "loss": 0.6623, "step": 9363 }, { "epoch": 0.9564862104187947, "grad_norm": 1.5431489267831724, "learning_rate": 9.914905132848651e-08, "loss": 0.6128, "step": 9364 }, { "epoch": 0.95658835546476, "grad_norm": 1.5522403341024757, "learning_rate": 9.868487540240169e-08, "loss": 0.6819, "step": 9365 }, { "epoch": 0.9566905005107252, "grad_norm": 1.3750890110132559, "learning_rate": 9.822178318165565e-08, "loss": 0.6588, "step": 9366 }, { "epoch": 0.9567926455566905, "grad_norm": 1.4668446674202464, "learning_rate": 9.775977471693232e-08, "loss": 0.6428, "step": 9367 }, { "epoch": 0.9568947906026558, "grad_norm": 1.5977532017307068, "learning_rate": 9.729885005879902e-08, "loss": 0.7452, "step": 9368 }, { "epoch": 0.9569969356486211, "grad_norm": 1.4503373504467043, "learning_rate": 9.683900925770429e-08, "loss": 0.6075, "step": 9369 }, { "epoch": 0.9570990806945863, "grad_norm": 1.5850948464398236, "learning_rate": 9.638025236397897e-08, "loss": 0.6982, "step": 9370 }, { "epoch": 0.9572012257405516, "grad_norm": 1.715027277017169, "learning_rate": 9.59225794278329e-08, "loss": 0.7414, "step": 9371 }, { "epoch": 0.9573033707865168, "grad_norm": 1.4886562540181885, "learning_rate": 9.546599049936045e-08, "loss": 0.6563, "step": 9372 }, { "epoch": 0.9574055158324821, "grad_norm": 1.5013492329991132, "learning_rate": 9.501048562853277e-08, "loss": 0.646, "step": 9373 }, { "epoch": 0.9575076608784474, "grad_norm": 1.459513253038374, "learning_rate": 9.455606486520886e-08, "loss": 0.6805, "step": 9374 }, { "epoch": 0.9576098059244127, "grad_norm": 1.4566920212137846, "learning_rate": 9.410272825912225e-08, "loss": 0.709, "step": 9375 }, { "epoch": 0.957711950970378, "grad_norm": 1.5489703885160664, "learning_rate": 9.365047585989218e-08, "loss": 0.71, "step": 9376 }, { "epoch": 0.9578140960163432, "grad_norm": 1.568723791793979, "learning_rate": 9.31993077170179e-08, "loss": 0.5782, "step": 9377 }, { "epoch": 0.9579162410623084, "grad_norm": 1.6086113870166991, "learning_rate": 9.274922387987995e-08, "loss": 0.7567, "step": 9378 }, { "epoch": 0.9580183861082737, "grad_norm": 1.4776654009659025, "learning_rate": 9.230022439774e-08, "loss": 0.7302, "step": 9379 }, { "epoch": 0.958120531154239, "grad_norm": 1.3246881217050994, "learning_rate": 9.185230931974209e-08, "loss": 0.6887, "step": 9380 }, { "epoch": 0.9582226762002043, "grad_norm": 1.408572675121359, "learning_rate": 9.140547869491146e-08, "loss": 0.6767, "step": 9381 }, { "epoch": 0.9583248212461696, "grad_norm": 1.4158015990770736, "learning_rate": 9.095973257215118e-08, "loss": 0.6254, "step": 9382 }, { "epoch": 0.9584269662921349, "grad_norm": 1.5412811612403026, "learning_rate": 9.051507100025114e-08, "loss": 0.7331, "step": 9383 }, { "epoch": 0.9585291113381001, "grad_norm": 1.438132437890826, "learning_rate": 9.007149402787908e-08, "loss": 0.5668, "step": 9384 }, { "epoch": 0.9586312563840653, "grad_norm": 1.5154947069615163, "learning_rate": 8.962900170358391e-08, "loss": 0.7401, "step": 9385 }, { "epoch": 0.9587334014300306, "grad_norm": 1.5694325811432663, "learning_rate": 8.918759407579803e-08, "loss": 0.7009, "step": 9386 }, { "epoch": 0.9588355464759959, "grad_norm": 1.5618867561996117, "learning_rate": 8.874727119283278e-08, "loss": 0.7412, "step": 9387 }, { "epoch": 0.9589376915219612, "grad_norm": 1.5995842436155452, "learning_rate": 8.830803310288183e-08, "loss": 0.7114, "step": 9388 }, { "epoch": 0.9590398365679265, "grad_norm": 1.6121765845625322, "learning_rate": 8.786987985402118e-08, "loss": 0.759, "step": 9389 }, { "epoch": 0.9591419816138917, "grad_norm": 1.5512657837643504, "learning_rate": 8.743281149420691e-08, "loss": 0.7675, "step": 9390 }, { "epoch": 0.959244126659857, "grad_norm": 1.4321518660314434, "learning_rate": 8.699682807127518e-08, "loss": 0.7207, "step": 9391 }, { "epoch": 0.9593462717058223, "grad_norm": 1.4878019006342704, "learning_rate": 8.656192963294452e-08, "loss": 0.5416, "step": 9392 }, { "epoch": 0.9594484167517875, "grad_norm": 1.4556023525616115, "learning_rate": 8.612811622681572e-08, "loss": 0.6952, "step": 9393 }, { "epoch": 0.9595505617977528, "grad_norm": 1.6324140306475445, "learning_rate": 8.569538790036969e-08, "loss": 0.7132, "step": 9394 }, { "epoch": 0.9596527068437181, "grad_norm": 1.412991930622943, "learning_rate": 8.526374470096966e-08, "loss": 0.6558, "step": 9395 }, { "epoch": 0.9597548518896833, "grad_norm": 1.5396416009971017, "learning_rate": 8.483318667585782e-08, "loss": 0.6499, "step": 9396 }, { "epoch": 0.9598569969356486, "grad_norm": 1.4551567615479741, "learning_rate": 8.440371387215985e-08, "loss": 0.7292, "step": 9397 }, { "epoch": 0.9599591419816139, "grad_norm": 1.5899256961579473, "learning_rate": 8.397532633688254e-08, "loss": 0.6429, "step": 9398 }, { "epoch": 0.9600612870275792, "grad_norm": 1.5968969253704848, "learning_rate": 8.354802411691176e-08, "loss": 0.7615, "step": 9399 }, { "epoch": 0.9601634320735445, "grad_norm": 1.4311261822672456, "learning_rate": 8.312180725901676e-08, "loss": 0.6478, "step": 9400 }, { "epoch": 0.9602655771195097, "grad_norm": 1.5778032467725869, "learning_rate": 8.269667580984687e-08, "loss": 0.5741, "step": 9401 }, { "epoch": 0.960367722165475, "grad_norm": 1.5117445016673505, "learning_rate": 8.227262981593265e-08, "loss": 0.757, "step": 9402 }, { "epoch": 0.9604698672114402, "grad_norm": 1.650827259679051, "learning_rate": 8.184966932368698e-08, "loss": 0.6937, "step": 9403 }, { "epoch": 0.9605720122574055, "grad_norm": 1.4979105231437397, "learning_rate": 8.142779437940285e-08, "loss": 0.6729, "step": 9404 }, { "epoch": 0.9606741573033708, "grad_norm": 1.6875897906033999, "learning_rate": 8.100700502925551e-08, "loss": 0.697, "step": 9405 }, { "epoch": 0.9607763023493361, "grad_norm": 1.5737806282144426, "learning_rate": 8.058730131930037e-08, "loss": 0.7054, "step": 9406 }, { "epoch": 0.9608784473953014, "grad_norm": 1.7461437791338588, "learning_rate": 8.01686832954729e-08, "loss": 0.7345, "step": 9407 }, { "epoch": 0.9609805924412665, "grad_norm": 1.4058027721577275, "learning_rate": 7.975115100359199e-08, "loss": 0.7928, "step": 9408 }, { "epoch": 0.9610827374872318, "grad_norm": 1.4343287615359337, "learning_rate": 7.933470448935776e-08, "loss": 0.6059, "step": 9409 }, { "epoch": 0.9611848825331971, "grad_norm": 1.6369556621740613, "learning_rate": 7.89193437983482e-08, "loss": 0.7246, "step": 9410 }, { "epoch": 0.9612870275791624, "grad_norm": 1.6181778251997279, "learning_rate": 7.850506897602805e-08, "loss": 0.6423, "step": 9411 }, { "epoch": 0.9613891726251277, "grad_norm": 1.4868455943295904, "learning_rate": 7.80918800677366e-08, "loss": 0.7151, "step": 9412 }, { "epoch": 0.961491317671093, "grad_norm": 1.4084743714659225, "learning_rate": 7.767977711869989e-08, "loss": 0.6322, "step": 9413 }, { "epoch": 0.9615934627170583, "grad_norm": 1.4631699770694613, "learning_rate": 7.726876017402296e-08, "loss": 0.7022, "step": 9414 }, { "epoch": 0.9616956077630235, "grad_norm": 1.3218703429764336, "learning_rate": 7.685882927869093e-08, "loss": 0.7034, "step": 9415 }, { "epoch": 0.9617977528089887, "grad_norm": 1.580264219215216, "learning_rate": 7.644998447757013e-08, "loss": 0.6453, "step": 9416 }, { "epoch": 0.961899897854954, "grad_norm": 1.5839179877348373, "learning_rate": 7.604222581541143e-08, "loss": 0.7191, "step": 9417 }, { "epoch": 0.9620020429009193, "grad_norm": 1.5600931509003377, "learning_rate": 7.563555333684136e-08, "loss": 0.7548, "step": 9418 }, { "epoch": 0.9621041879468846, "grad_norm": 1.7209103932740446, "learning_rate": 7.52299670863732e-08, "loss": 0.6365, "step": 9419 }, { "epoch": 0.9622063329928499, "grad_norm": 1.3747184700942165, "learning_rate": 7.4825467108397e-08, "loss": 0.6227, "step": 9420 }, { "epoch": 0.9623084780388151, "grad_norm": 1.5857416349249942, "learning_rate": 7.442205344718511e-08, "loss": 0.7638, "step": 9421 }, { "epoch": 0.9624106230847804, "grad_norm": 1.6210903850081748, "learning_rate": 7.401972614689335e-08, "loss": 0.6529, "step": 9422 }, { "epoch": 0.9625127681307457, "grad_norm": 1.375792963072189, "learning_rate": 7.361848525155536e-08, "loss": 0.696, "step": 9423 }, { "epoch": 0.9626149131767109, "grad_norm": 1.581692218164805, "learning_rate": 7.321833080508711e-08, "loss": 0.758, "step": 9424 }, { "epoch": 0.9627170582226762, "grad_norm": 1.5117543085238077, "learning_rate": 7.281926285128582e-08, "loss": 0.7818, "step": 9425 }, { "epoch": 0.9628192032686415, "grad_norm": 1.6581299585325628, "learning_rate": 7.242128143382986e-08, "loss": 0.7561, "step": 9426 }, { "epoch": 0.9629213483146067, "grad_norm": 1.5356630420732904, "learning_rate": 7.202438659627886e-08, "loss": 0.7218, "step": 9427 }, { "epoch": 0.963023493360572, "grad_norm": 1.4384114802202057, "learning_rate": 7.16285783820736e-08, "loss": 0.5566, "step": 9428 }, { "epoch": 0.9631256384065373, "grad_norm": 1.5833799840745688, "learning_rate": 7.123385683453498e-08, "loss": 0.6545, "step": 9429 }, { "epoch": 0.9632277834525026, "grad_norm": 1.5651942844053226, "learning_rate": 7.084022199686513e-08, "loss": 0.6841, "step": 9430 }, { "epoch": 0.9633299284984678, "grad_norm": 1.5170535949495294, "learning_rate": 7.044767391214735e-08, "loss": 0.7431, "step": 9431 }, { "epoch": 0.963432073544433, "grad_norm": 1.5535938019735855, "learning_rate": 7.005621262334838e-08, "loss": 0.6592, "step": 9432 }, { "epoch": 0.9635342185903983, "grad_norm": 1.3597329078172913, "learning_rate": 6.966583817331173e-08, "loss": 0.5713, "step": 9433 }, { "epoch": 0.9636363636363636, "grad_norm": 1.4469071383086436, "learning_rate": 6.927655060476435e-08, "loss": 0.6549, "step": 9434 }, { "epoch": 0.9637385086823289, "grad_norm": 1.5355677754442751, "learning_rate": 6.888834996031546e-08, "loss": 0.7258, "step": 9435 }, { "epoch": 0.9638406537282942, "grad_norm": 1.6418660352232157, "learning_rate": 6.85012362824522e-08, "loss": 0.7007, "step": 9436 }, { "epoch": 0.9639427987742595, "grad_norm": 1.373550107994046, "learning_rate": 6.811520961354623e-08, "loss": 0.6179, "step": 9437 }, { "epoch": 0.9640449438202248, "grad_norm": 1.459613965201276, "learning_rate": 6.773026999584709e-08, "loss": 0.6966, "step": 9438 }, { "epoch": 0.9641470888661899, "grad_norm": 1.5327654069557626, "learning_rate": 6.734641747148663e-08, "loss": 0.7015, "step": 9439 }, { "epoch": 0.9642492339121552, "grad_norm": 1.3806572828581034, "learning_rate": 6.696365208247901e-08, "loss": 0.5251, "step": 9440 }, { "epoch": 0.9643513789581205, "grad_norm": 1.4115136410232567, "learning_rate": 6.65819738707163e-08, "loss": 0.5973, "step": 9441 }, { "epoch": 0.9644535240040858, "grad_norm": 1.5939776343000227, "learning_rate": 6.620138287797396e-08, "loss": 0.7133, "step": 9442 }, { "epoch": 0.9645556690500511, "grad_norm": 1.4992191517543352, "learning_rate": 6.582187914590865e-08, "loss": 0.6344, "step": 9443 }, { "epoch": 0.9646578140960164, "grad_norm": 1.6731812465418523, "learning_rate": 6.544346271605827e-08, "loss": 0.6989, "step": 9444 }, { "epoch": 0.9647599591419816, "grad_norm": 1.5289622286134268, "learning_rate": 6.50661336298386e-08, "loss": 0.6932, "step": 9445 }, { "epoch": 0.9648621041879469, "grad_norm": 1.5716535302409274, "learning_rate": 6.46898919285488e-08, "loss": 0.6442, "step": 9446 }, { "epoch": 0.9649642492339121, "grad_norm": 1.5283770706606583, "learning_rate": 6.431473765336927e-08, "loss": 0.746, "step": 9447 }, { "epoch": 0.9650663942798774, "grad_norm": 1.518666071142373, "learning_rate": 6.39406708453616e-08, "loss": 0.7097, "step": 9448 }, { "epoch": 0.9651685393258427, "grad_norm": 1.5901600603052117, "learning_rate": 6.35676915454675e-08, "loss": 0.7361, "step": 9449 }, { "epoch": 0.965270684371808, "grad_norm": 1.5543987184215513, "learning_rate": 6.319579979450763e-08, "loss": 0.6601, "step": 9450 }, { "epoch": 0.9653728294177732, "grad_norm": 1.4963180787331731, "learning_rate": 6.282499563318834e-08, "loss": 0.63, "step": 9451 }, { "epoch": 0.9654749744637385, "grad_norm": 1.4893446949582094, "learning_rate": 6.245527910209381e-08, "loss": 0.5909, "step": 9452 }, { "epoch": 0.9655771195097038, "grad_norm": 1.2956980760342613, "learning_rate": 6.208665024168948e-08, "loss": 0.6457, "step": 9453 }, { "epoch": 0.9656792645556691, "grad_norm": 1.3979771673912176, "learning_rate": 6.171910909232193e-08, "loss": 0.6424, "step": 9454 }, { "epoch": 0.9657814096016343, "grad_norm": 1.4049358845470743, "learning_rate": 6.1352655694219e-08, "loss": 0.5582, "step": 9455 }, { "epoch": 0.9658835546475996, "grad_norm": 1.593607483537053, "learning_rate": 6.09872900874886e-08, "loss": 0.7171, "step": 9456 }, { "epoch": 0.9659856996935648, "grad_norm": 1.5275074764633596, "learning_rate": 6.062301231212209e-08, "loss": 0.737, "step": 9457 }, { "epoch": 0.9660878447395301, "grad_norm": 1.3703185143529415, "learning_rate": 6.025982240798644e-08, "loss": 0.6366, "step": 9458 }, { "epoch": 0.9661899897854954, "grad_norm": 1.4913914237554737, "learning_rate": 5.989772041483654e-08, "loss": 0.7163, "step": 9459 }, { "epoch": 0.9662921348314607, "grad_norm": 1.3886987997205227, "learning_rate": 5.953670637230291e-08, "loss": 0.6704, "step": 9460 }, { "epoch": 0.966394279877426, "grad_norm": 1.3719624460438704, "learning_rate": 5.9176780319898374e-08, "loss": 0.555, "step": 9461 }, { "epoch": 0.9664964249233912, "grad_norm": 1.479999817934045, "learning_rate": 5.88179422970192e-08, "loss": 0.6494, "step": 9462 }, { "epoch": 0.9665985699693564, "grad_norm": 1.3924134684179825, "learning_rate": 5.8460192342938425e-08, "loss": 0.7124, "step": 9463 }, { "epoch": 0.9667007150153217, "grad_norm": 1.5376962057786092, "learning_rate": 5.81035304968125e-08, "loss": 0.6669, "step": 9464 }, { "epoch": 0.966802860061287, "grad_norm": 1.495720478037599, "learning_rate": 5.774795679767797e-08, "loss": 0.6706, "step": 9465 }, { "epoch": 0.9669050051072523, "grad_norm": 1.4199342944038444, "learning_rate": 5.739347128445372e-08, "loss": 0.6361, "step": 9466 }, { "epoch": 0.9670071501532176, "grad_norm": 1.3577487469193374, "learning_rate": 5.704007399593758e-08, "loss": 0.6541, "step": 9467 }, { "epoch": 0.9671092951991829, "grad_norm": 1.528714545671224, "learning_rate": 5.668776497080974e-08, "loss": 0.7498, "step": 9468 }, { "epoch": 0.9672114402451482, "grad_norm": 1.3503032769149808, "learning_rate": 5.633654424763046e-08, "loss": 0.6852, "step": 9469 }, { "epoch": 0.9673135852911133, "grad_norm": 1.610890383080021, "learning_rate": 5.5986411864840106e-08, "loss": 0.7749, "step": 9470 }, { "epoch": 0.9674157303370786, "grad_norm": 1.4660905683235932, "learning_rate": 5.5637367860762456e-08, "loss": 0.6813, "step": 9471 }, { "epoch": 0.9675178753830439, "grad_norm": 1.570312981471915, "learning_rate": 5.5289412273599184e-08, "loss": 0.7291, "step": 9472 }, { "epoch": 0.9676200204290092, "grad_norm": 1.5775111672502657, "learning_rate": 5.494254514143427e-08, "loss": 0.6656, "step": 9473 }, { "epoch": 0.9677221654749745, "grad_norm": 1.5348538479929605, "learning_rate": 5.4596766502234e-08, "loss": 0.6433, "step": 9474 }, { "epoch": 0.9678243105209398, "grad_norm": 1.5800024963794863, "learning_rate": 5.425207639384256e-08, "loss": 0.6607, "step": 9475 }, { "epoch": 0.967926455566905, "grad_norm": 1.5246106341589787, "learning_rate": 5.390847485398754e-08, "loss": 0.6499, "step": 9476 }, { "epoch": 0.9680286006128703, "grad_norm": 1.5107145837211426, "learning_rate": 5.3565961920275524e-08, "loss": 0.5696, "step": 9477 }, { "epoch": 0.9681307456588355, "grad_norm": 1.354845949199645, "learning_rate": 5.3224537630196526e-08, "loss": 0.6399, "step": 9478 }, { "epoch": 0.9682328907048008, "grad_norm": 1.5038465589660885, "learning_rate": 5.288420202111732e-08, "loss": 0.6815, "step": 9479 }, { "epoch": 0.9683350357507661, "grad_norm": 1.562126247919074, "learning_rate": 5.254495513028812e-08, "loss": 0.6831, "step": 9480 }, { "epoch": 0.9684371807967314, "grad_norm": 1.5940707311951237, "learning_rate": 5.2206796994841434e-08, "loss": 0.6663, "step": 9481 }, { "epoch": 0.9685393258426966, "grad_norm": 1.6050796668608505, "learning_rate": 5.186972765178766e-08, "loss": 0.7101, "step": 9482 }, { "epoch": 0.9686414708886619, "grad_norm": 1.519690025804949, "learning_rate": 5.1533747138019505e-08, "loss": 0.6581, "step": 9483 }, { "epoch": 0.9687436159346272, "grad_norm": 1.358258785941762, "learning_rate": 5.1198855490310895e-08, "loss": 0.6192, "step": 9484 }, { "epoch": 0.9688457609805925, "grad_norm": 1.5812310068873439, "learning_rate": 5.086505274531362e-08, "loss": 0.7611, "step": 9485 }, { "epoch": 0.9689479060265577, "grad_norm": 1.344306262110424, "learning_rate": 5.0532338939566215e-08, "loss": 0.6947, "step": 9486 }, { "epoch": 0.969050051072523, "grad_norm": 1.423471154175696, "learning_rate": 5.0200714109481797e-08, "loss": 0.6058, "step": 9487 }, { "epoch": 0.9691521961184882, "grad_norm": 1.457089415560458, "learning_rate": 4.9870178291356874e-08, "loss": 0.6875, "step": 9488 }, { "epoch": 0.9692543411644535, "grad_norm": 1.2908270849236776, "learning_rate": 4.954073152137029e-08, "loss": 0.524, "step": 9489 }, { "epoch": 0.9693564862104188, "grad_norm": 1.5405178132739534, "learning_rate": 4.9212373835579865e-08, "loss": 0.6823, "step": 9490 }, { "epoch": 0.9694586312563841, "grad_norm": 1.3594330511638362, "learning_rate": 4.888510526992241e-08, "loss": 0.6308, "step": 9491 }, { "epoch": 0.9695607763023494, "grad_norm": 1.44628820048114, "learning_rate": 4.8558925860221486e-08, "loss": 0.6934, "step": 9492 }, { "epoch": 0.9696629213483146, "grad_norm": 1.68256728969364, "learning_rate": 4.8233835642174096e-08, "loss": 0.6404, "step": 9493 }, { "epoch": 0.9697650663942798, "grad_norm": 1.4982268824456026, "learning_rate": 4.7909834651364006e-08, "loss": 0.6301, "step": 9494 }, { "epoch": 0.9698672114402451, "grad_norm": 1.4958152577436736, "learning_rate": 4.7586922923251734e-08, "loss": 0.6751, "step": 9495 }, { "epoch": 0.9699693564862104, "grad_norm": 1.3914487449837927, "learning_rate": 4.7265100493181227e-08, "loss": 0.5993, "step": 9496 }, { "epoch": 0.9700715015321757, "grad_norm": 1.5445884551693123, "learning_rate": 4.6944367396376533e-08, "loss": 0.685, "step": 9497 }, { "epoch": 0.970173646578141, "grad_norm": 1.6482077271455042, "learning_rate": 4.662472366793957e-08, "loss": 0.7283, "step": 9498 }, { "epoch": 0.9702757916241063, "grad_norm": 1.4736148144054146, "learning_rate": 4.6306169342859034e-08, "loss": 0.701, "step": 9499 }, { "epoch": 0.9703779366700716, "grad_norm": 1.397677181778376, "learning_rate": 4.598870445599812e-08, "loss": 0.5897, "step": 9500 }, { "epoch": 0.9704800817160367, "grad_norm": 1.6702363181601776, "learning_rate": 4.5672329042104614e-08, "loss": 0.6774, "step": 9501 }, { "epoch": 0.970582226762002, "grad_norm": 1.5402377376108611, "learning_rate": 4.535704313580636e-08, "loss": 0.7501, "step": 9502 }, { "epoch": 0.9706843718079673, "grad_norm": 1.613641986255052, "learning_rate": 4.5042846771611306e-08, "loss": 0.665, "step": 9503 }, { "epoch": 0.9707865168539326, "grad_norm": 1.6001981384326, "learning_rate": 4.4729739983907506e-08, "loss": 0.638, "step": 9504 }, { "epoch": 0.9708886618998979, "grad_norm": 1.5324990804193306, "learning_rate": 4.4417722806966434e-08, "loss": 0.7435, "step": 9505 }, { "epoch": 0.9709908069458631, "grad_norm": 1.4388552817664748, "learning_rate": 4.410679527493744e-08, "loss": 0.6339, "step": 9506 }, { "epoch": 0.9710929519918284, "grad_norm": 1.6178740944704002, "learning_rate": 4.3796957421852194e-08, "loss": 0.6464, "step": 9507 }, { "epoch": 0.9711950970377937, "grad_norm": 1.3683178759279988, "learning_rate": 4.348820928162356e-08, "loss": 0.6608, "step": 9508 }, { "epoch": 0.9712972420837589, "grad_norm": 1.563470099820946, "learning_rate": 4.3180550888041184e-08, "loss": 0.6824, "step": 9509 }, { "epoch": 0.9713993871297242, "grad_norm": 1.5718656058408509, "learning_rate": 4.287398227478146e-08, "loss": 0.6418, "step": 9510 }, { "epoch": 0.9715015321756895, "grad_norm": 1.34762984021176, "learning_rate": 4.256850347539754e-08, "loss": 0.6517, "step": 9511 }, { "epoch": 0.9716036772216547, "grad_norm": 1.6545414638671812, "learning_rate": 4.22641145233238e-08, "loss": 0.7761, "step": 9512 }, { "epoch": 0.97170582226762, "grad_norm": 1.4495666051927938, "learning_rate": 4.1960815451876915e-08, "loss": 0.7226, "step": 9513 }, { "epoch": 0.9718079673135853, "grad_norm": 1.548819577746712, "learning_rate": 4.165860629425256e-08, "loss": 0.6956, "step": 9514 }, { "epoch": 0.9719101123595506, "grad_norm": 1.6365714756909788, "learning_rate": 4.135748708352649e-08, "loss": 0.6986, "step": 9515 }, { "epoch": 0.9720122574055158, "grad_norm": 1.516481069875011, "learning_rate": 4.105745785265791e-08, "loss": 0.6947, "step": 9516 }, { "epoch": 0.9721144024514811, "grad_norm": 1.303587542824108, "learning_rate": 4.07585186344861e-08, "loss": 0.5145, "step": 9517 }, { "epoch": 0.9722165474974463, "grad_norm": 1.4942874676515192, "learning_rate": 4.046066946172822e-08, "loss": 0.6938, "step": 9518 }, { "epoch": 0.9723186925434116, "grad_norm": 1.511778461744693, "learning_rate": 4.016391036698375e-08, "loss": 0.6875, "step": 9519 }, { "epoch": 0.9724208375893769, "grad_norm": 1.5980375305737715, "learning_rate": 3.9868241382735593e-08, "loss": 0.7166, "step": 9520 }, { "epoch": 0.9725229826353422, "grad_norm": 1.6164000074195612, "learning_rate": 3.957366254134343e-08, "loss": 0.6107, "step": 9521 }, { "epoch": 0.9726251276813075, "grad_norm": 1.5137971229109781, "learning_rate": 3.928017387504812e-08, "loss": 0.7072, "step": 9522 }, { "epoch": 0.9727272727272728, "grad_norm": 1.5750236411039198, "learning_rate": 3.8987775415973985e-08, "loss": 0.7215, "step": 9523 }, { "epoch": 0.972829417773238, "grad_norm": 1.6236838475805129, "learning_rate": 3.869646719612208e-08, "loss": 0.6757, "step": 9524 }, { "epoch": 0.9729315628192032, "grad_norm": 1.481965595978336, "learning_rate": 3.8406249247379125e-08, "loss": 0.6168, "step": 9525 }, { "epoch": 0.9730337078651685, "grad_norm": 1.6073848592798912, "learning_rate": 3.81171216015086e-08, "loss": 0.779, "step": 9526 }, { "epoch": 0.9731358529111338, "grad_norm": 1.4613419756581134, "learning_rate": 3.782908429015408e-08, "loss": 0.75, "step": 9527 }, { "epoch": 0.9732379979570991, "grad_norm": 1.5015264697572668, "learning_rate": 3.754213734484369e-08, "loss": 0.6461, "step": 9528 }, { "epoch": 0.9733401430030644, "grad_norm": 1.4596954847177306, "learning_rate": 3.725628079698229e-08, "loss": 0.5707, "step": 9529 }, { "epoch": 0.9734422880490297, "grad_norm": 1.568039595285662, "learning_rate": 3.697151467785709e-08, "loss": 0.6853, "step": 9530 }, { "epoch": 0.9735444330949949, "grad_norm": 1.6446362610640897, "learning_rate": 3.668783901863759e-08, "loss": 0.6792, "step": 9531 }, { "epoch": 0.9736465781409601, "grad_norm": 1.4393981056455176, "learning_rate": 3.640525385037119e-08, "loss": 0.7216, "step": 9532 }, { "epoch": 0.9737487231869254, "grad_norm": 1.639716264514587, "learning_rate": 3.612375920398758e-08, "loss": 0.7035, "step": 9533 }, { "epoch": 0.9738508682328907, "grad_norm": 1.4739016254029207, "learning_rate": 3.584335511029435e-08, "loss": 0.536, "step": 9534 }, { "epoch": 0.973953013278856, "grad_norm": 1.5055429639859754, "learning_rate": 3.556404159998472e-08, "loss": 0.6992, "step": 9535 }, { "epoch": 0.9740551583248213, "grad_norm": 1.6432180643801644, "learning_rate": 3.5285818703628685e-08, "loss": 0.6703, "step": 9536 }, { "epoch": 0.9741573033707865, "grad_norm": 1.5587980090175149, "learning_rate": 3.500868645167743e-08, "loss": 0.6455, "step": 9537 }, { "epoch": 0.9742594484167518, "grad_norm": 1.600001722365177, "learning_rate": 3.473264487446337e-08, "loss": 0.6777, "step": 9538 }, { "epoch": 0.9743615934627171, "grad_norm": 1.6793618859511104, "learning_rate": 3.4457694002200113e-08, "loss": 0.6717, "step": 9539 }, { "epoch": 0.9744637385086823, "grad_norm": 1.5993122432909426, "learning_rate": 3.4183833864981364e-08, "loss": 0.6538, "step": 9540 }, { "epoch": 0.9745658835546476, "grad_norm": 1.5786056126647705, "learning_rate": 3.391106449277981e-08, "loss": 0.7136, "step": 9541 }, { "epoch": 0.9746680286006129, "grad_norm": 1.4091406895569083, "learning_rate": 3.363938591545157e-08, "loss": 0.6148, "step": 9542 }, { "epoch": 0.9747701736465781, "grad_norm": 1.601398670530783, "learning_rate": 3.336879816273175e-08, "loss": 0.6846, "step": 9543 }, { "epoch": 0.9748723186925434, "grad_norm": 1.5286110008342757, "learning_rate": 3.309930126423555e-08, "loss": 0.6699, "step": 9544 }, { "epoch": 0.9749744637385087, "grad_norm": 1.7036285960927844, "learning_rate": 3.283089524946159e-08, "loss": 0.6894, "step": 9545 }, { "epoch": 0.975076608784474, "grad_norm": 1.3957397900301574, "learning_rate": 3.256358014778416e-08, "loss": 0.6442, "step": 9546 }, { "epoch": 0.9751787538304392, "grad_norm": 1.4083808968544371, "learning_rate": 3.2297355988463175e-08, "loss": 0.6952, "step": 9547 }, { "epoch": 0.9752808988764045, "grad_norm": 1.5298540895386814, "learning_rate": 3.203222280063756e-08, "loss": 0.7081, "step": 9548 }, { "epoch": 0.9753830439223697, "grad_norm": 1.3842890520127713, "learning_rate": 3.17681806133241e-08, "loss": 0.6561, "step": 9549 }, { "epoch": 0.975485188968335, "grad_norm": 1.4401027857329183, "learning_rate": 3.150522945542411e-08, "loss": 0.6705, "step": 9550 }, { "epoch": 0.9755873340143003, "grad_norm": 1.495655116384697, "learning_rate": 3.1243369355717924e-08, "loss": 0.6697, "step": 9551 }, { "epoch": 0.9756894790602656, "grad_norm": 1.577291416180316, "learning_rate": 3.098260034286482e-08, "loss": 0.7457, "step": 9552 }, { "epoch": 0.9757916241062309, "grad_norm": 1.528082975056903, "learning_rate": 3.0722922445406424e-08, "loss": 0.7735, "step": 9553 }, { "epoch": 0.9758937691521962, "grad_norm": 1.4543632115296194, "learning_rate": 3.0464335691765546e-08, "loss": 0.7646, "step": 9554 }, { "epoch": 0.9759959141981613, "grad_norm": 1.3632637853783933, "learning_rate": 3.0206840110243994e-08, "loss": 0.6018, "step": 9555 }, { "epoch": 0.9760980592441266, "grad_norm": 1.5229510073776469, "learning_rate": 2.9950435729025895e-08, "loss": 0.7266, "step": 9556 }, { "epoch": 0.9762002042900919, "grad_norm": 1.9557100214958654, "learning_rate": 2.969512257617324e-08, "loss": 0.7012, "step": 9557 }, { "epoch": 0.9763023493360572, "grad_norm": 1.4273053973910352, "learning_rate": 2.9440900679631457e-08, "loss": 0.652, "step": 9558 }, { "epoch": 0.9764044943820225, "grad_norm": 1.518432184930031, "learning_rate": 2.918777006722495e-08, "loss": 0.6849, "step": 9559 }, { "epoch": 0.9765066394279878, "grad_norm": 1.4173453629070492, "learning_rate": 2.8935730766659343e-08, "loss": 0.6504, "step": 9560 }, { "epoch": 0.976608784473953, "grad_norm": 1.426963462560284, "learning_rate": 2.868478280552034e-08, "loss": 0.7194, "step": 9561 }, { "epoch": 0.9767109295199183, "grad_norm": 1.4719385756639292, "learning_rate": 2.843492621127264e-08, "loss": 0.6424, "step": 9562 }, { "epoch": 0.9768130745658835, "grad_norm": 1.6671012205609284, "learning_rate": 2.818616101126548e-08, "loss": 0.6343, "step": 9563 }, { "epoch": 0.9769152196118488, "grad_norm": 1.5201975232090417, "learning_rate": 2.7938487232725963e-08, "loss": 0.6276, "step": 9564 }, { "epoch": 0.9770173646578141, "grad_norm": 1.6435017878103428, "learning_rate": 2.7691904902761303e-08, "loss": 0.6551, "step": 9565 }, { "epoch": 0.9771195097037794, "grad_norm": 1.4201351051267848, "learning_rate": 2.7446414048361015e-08, "loss": 0.6415, "step": 9566 }, { "epoch": 0.9772216547497447, "grad_norm": 1.4925709761146007, "learning_rate": 2.720201469639361e-08, "loss": 0.7688, "step": 9567 }, { "epoch": 0.9773237997957099, "grad_norm": 1.344894689484712, "learning_rate": 2.6958706873608797e-08, "loss": 0.553, "step": 9568 }, { "epoch": 0.9774259448416752, "grad_norm": 1.5109545782915559, "learning_rate": 2.6716490606637502e-08, "loss": 0.6168, "step": 9569 }, { "epoch": 0.9775280898876404, "grad_norm": 1.4912209162707604, "learning_rate": 2.6475365921989627e-08, "loss": 0.7729, "step": 9570 }, { "epoch": 0.9776302349336057, "grad_norm": 1.4054086389856157, "learning_rate": 2.623533284605628e-08, "loss": 0.6214, "step": 9571 }, { "epoch": 0.977732379979571, "grad_norm": 1.4878537346610572, "learning_rate": 2.5996391405109788e-08, "loss": 0.6096, "step": 9572 }, { "epoch": 0.9778345250255362, "grad_norm": 1.5573787517484798, "learning_rate": 2.5758541625302557e-08, "loss": 0.6912, "step": 9573 }, { "epoch": 0.9779366700715015, "grad_norm": 1.5263052039024527, "learning_rate": 2.55217835326671e-08, "loss": 0.7665, "step": 9574 }, { "epoch": 0.9780388151174668, "grad_norm": 1.329772607509056, "learning_rate": 2.528611715311713e-08, "loss": 0.6562, "step": 9575 }, { "epoch": 0.9781409601634321, "grad_norm": 1.5646938821135543, "learning_rate": 2.505154251244535e-08, "loss": 0.7519, "step": 9576 }, { "epoch": 0.9782431052093974, "grad_norm": 1.508611110923295, "learning_rate": 2.4818059636327883e-08, "loss": 0.6628, "step": 9577 }, { "epoch": 0.9783452502553626, "grad_norm": 1.58646737498991, "learning_rate": 2.458566855031652e-08, "loss": 0.7416, "step": 9578 }, { "epoch": 0.9784473953013278, "grad_norm": 1.5252148721230188, "learning_rate": 2.435436927985091e-08, "loss": 0.6956, "step": 9579 }, { "epoch": 0.9785495403472931, "grad_norm": 1.4694159907337836, "learning_rate": 2.4124161850243023e-08, "loss": 0.6843, "step": 9580 }, { "epoch": 0.9786516853932584, "grad_norm": 1.5835426346202524, "learning_rate": 2.3895046286692702e-08, "loss": 0.663, "step": 9581 }, { "epoch": 0.9787538304392237, "grad_norm": 1.4425845089103262, "learning_rate": 2.3667022614273226e-08, "loss": 0.6395, "step": 9582 }, { "epoch": 0.978855975485189, "grad_norm": 1.2993039957199546, "learning_rate": 2.344009085794463e-08, "loss": 0.6411, "step": 9583 }, { "epoch": 0.9789581205311543, "grad_norm": 1.3944930652913556, "learning_rate": 2.321425104254371e-08, "loss": 0.6684, "step": 9584 }, { "epoch": 0.9790602655771196, "grad_norm": 1.3852702486705006, "learning_rate": 2.2989503192788477e-08, "loss": 0.5782, "step": 9585 }, { "epoch": 0.9791624106230847, "grad_norm": 1.4672234833582334, "learning_rate": 2.2765847333278134e-08, "loss": 0.6398, "step": 9586 }, { "epoch": 0.97926455566905, "grad_norm": 1.29680080741187, "learning_rate": 2.2543283488491997e-08, "loss": 0.6014, "step": 9587 }, { "epoch": 0.9793667007150153, "grad_norm": 1.5616374147123422, "learning_rate": 2.2321811682789463e-08, "loss": 0.7107, "step": 9588 }, { "epoch": 0.9794688457609806, "grad_norm": 1.518530184861213, "learning_rate": 2.2101431940411145e-08, "loss": 0.6046, "step": 9589 }, { "epoch": 0.9795709908069459, "grad_norm": 1.5752752798408691, "learning_rate": 2.1882144285477748e-08, "loss": 0.6313, "step": 9590 }, { "epoch": 0.9796731358529112, "grad_norm": 1.6287133253558443, "learning_rate": 2.1663948741991182e-08, "loss": 0.8074, "step": 9591 }, { "epoch": 0.9797752808988764, "grad_norm": 1.51243944349324, "learning_rate": 2.1446845333831236e-08, "loss": 0.7394, "step": 9592 }, { "epoch": 0.9798774259448417, "grad_norm": 1.6201216683377506, "learning_rate": 2.123083408476112e-08, "loss": 0.6371, "step": 9593 }, { "epoch": 0.9799795709908069, "grad_norm": 1.5693196093017254, "learning_rate": 2.101591501842304e-08, "loss": 0.616, "step": 9594 }, { "epoch": 0.9800817160367722, "grad_norm": 1.5863920479457674, "learning_rate": 2.0802088158341505e-08, "loss": 0.6765, "step": 9595 }, { "epoch": 0.9801838610827375, "grad_norm": 1.495760355170821, "learning_rate": 2.05893535279178e-08, "loss": 0.7358, "step": 9596 }, { "epoch": 0.9802860061287028, "grad_norm": 1.4806875076317487, "learning_rate": 2.037771115043774e-08, "loss": 0.6015, "step": 9597 }, { "epoch": 0.980388151174668, "grad_norm": 1.4893376918397716, "learning_rate": 2.016716104906391e-08, "loss": 0.7127, "step": 9598 }, { "epoch": 0.9804902962206333, "grad_norm": 1.719515823626762, "learning_rate": 1.995770324684232e-08, "loss": 0.6996, "step": 9599 }, { "epoch": 0.9805924412665986, "grad_norm": 1.389484967218732, "learning_rate": 1.9749337766697962e-08, "loss": 0.6825, "step": 9600 }, { "epoch": 0.9806945863125638, "grad_norm": 1.404600958605843, "learning_rate": 1.954206463143704e-08, "loss": 0.5668, "step": 9601 }, { "epoch": 0.9807967313585291, "grad_norm": 1.6048339290695426, "learning_rate": 1.933588386374585e-08, "loss": 0.6326, "step": 9602 }, { "epoch": 0.9808988764044944, "grad_norm": 1.7030109064952923, "learning_rate": 1.913079548618968e-08, "loss": 0.6714, "step": 9603 }, { "epoch": 0.9810010214504596, "grad_norm": 1.515482101094523, "learning_rate": 1.8926799521216123e-08, "loss": 0.7125, "step": 9604 }, { "epoch": 0.9811031664964249, "grad_norm": 1.538760306235086, "learning_rate": 1.8723895991153984e-08, "loss": 0.6417, "step": 9605 }, { "epoch": 0.9812053115423902, "grad_norm": 1.3813989569681897, "learning_rate": 1.8522084918208837e-08, "loss": 0.5928, "step": 9606 }, { "epoch": 0.9813074565883555, "grad_norm": 1.6335133251818597, "learning_rate": 1.8321366324471902e-08, "loss": 0.6499, "step": 9607 }, { "epoch": 0.9814096016343208, "grad_norm": 1.5145008896531882, "learning_rate": 1.8121740231908934e-08, "loss": 0.6612, "step": 9608 }, { "epoch": 0.981511746680286, "grad_norm": 1.590724230407954, "learning_rate": 1.792320666237135e-08, "loss": 0.8148, "step": 9609 }, { "epoch": 0.9816138917262512, "grad_norm": 1.3663934855250255, "learning_rate": 1.772576563758843e-08, "loss": 0.653, "step": 9610 }, { "epoch": 0.9817160367722165, "grad_norm": 1.5434207296504072, "learning_rate": 1.7529417179169562e-08, "loss": 0.7085, "step": 9611 }, { "epoch": 0.9818181818181818, "grad_norm": 1.6024033310838954, "learning_rate": 1.7334161308604215e-08, "loss": 0.695, "step": 9612 }, { "epoch": 0.9819203268641471, "grad_norm": 1.4052211264769003, "learning_rate": 1.7139998047266403e-08, "loss": 0.6054, "step": 9613 }, { "epoch": 0.9820224719101124, "grad_norm": 1.4506094367674345, "learning_rate": 1.6946927416404692e-08, "loss": 0.641, "step": 9614 }, { "epoch": 0.9821246169560777, "grad_norm": 1.433893746359972, "learning_rate": 1.675494943715217e-08, "loss": 0.5743, "step": 9615 }, { "epoch": 0.982226762002043, "grad_norm": 1.539141008389926, "learning_rate": 1.656406413051981e-08, "loss": 0.7094, "step": 9616 }, { "epoch": 0.9823289070480081, "grad_norm": 1.538186413240415, "learning_rate": 1.6374271517400896e-08, "loss": 0.6573, "step": 9617 }, { "epoch": 0.9824310520939734, "grad_norm": 1.439709734841971, "learning_rate": 1.618557161856771e-08, "loss": 0.6072, "step": 9618 }, { "epoch": 0.9825331971399387, "grad_norm": 1.7538173775842172, "learning_rate": 1.599796445467483e-08, "loss": 0.6658, "step": 9619 }, { "epoch": 0.982635342185904, "grad_norm": 1.5249896488665171, "learning_rate": 1.5811450046255837e-08, "loss": 0.6955, "step": 9620 }, { "epoch": 0.9827374872318693, "grad_norm": 1.7833892058365306, "learning_rate": 1.5626028413723293e-08, "loss": 0.7294, "step": 9621 }, { "epoch": 0.9828396322778346, "grad_norm": 1.4623360092080147, "learning_rate": 1.544169957737207e-08, "loss": 0.7016, "step": 9622 }, { "epoch": 0.9829417773237998, "grad_norm": 1.7034121791708219, "learning_rate": 1.5258463557379366e-08, "loss": 0.6878, "step": 9623 }, { "epoch": 0.9830439223697651, "grad_norm": 1.5068524145876203, "learning_rate": 1.5076320373796915e-08, "loss": 0.6909, "step": 9624 }, { "epoch": 0.9831460674157303, "grad_norm": 1.5741935951466728, "learning_rate": 1.4895270046564325e-08, "loss": 0.6425, "step": 9625 }, { "epoch": 0.9832482124616956, "grad_norm": 1.3829933190833323, "learning_rate": 1.4715312595493525e-08, "loss": 0.6514, "step": 9626 }, { "epoch": 0.9833503575076609, "grad_norm": 1.6045151679087422, "learning_rate": 1.4536448040284312e-08, "loss": 0.7654, "step": 9627 }, { "epoch": 0.9834525025536262, "grad_norm": 1.5521353335178172, "learning_rate": 1.435867640051214e-08, "loss": 0.7333, "step": 9628 }, { "epoch": 0.9835546475995914, "grad_norm": 1.493969088695074, "learning_rate": 1.4181997695634774e-08, "loss": 0.755, "step": 9629 }, { "epoch": 0.9836567926455567, "grad_norm": 1.5295369466667021, "learning_rate": 1.4006411944988974e-08, "loss": 0.7005, "step": 9630 }, { "epoch": 0.983758937691522, "grad_norm": 1.4928266772062624, "learning_rate": 1.3831919167792696e-08, "loss": 0.6877, "step": 9631 }, { "epoch": 0.9838610827374872, "grad_norm": 1.511075559829526, "learning_rate": 1.3658519383145107e-08, "loss": 0.6153, "step": 9632 }, { "epoch": 0.9839632277834525, "grad_norm": 1.524093725343304, "learning_rate": 1.3486212610025473e-08, "loss": 0.6323, "step": 9633 }, { "epoch": 0.9840653728294178, "grad_norm": 1.2840936666822316, "learning_rate": 1.331499886729093e-08, "loss": 0.5463, "step": 9634 }, { "epoch": 0.984167517875383, "grad_norm": 1.386038076316431, "learning_rate": 1.3144878173682042e-08, "loss": 0.5476, "step": 9635 }, { "epoch": 0.9842696629213483, "grad_norm": 1.4410513904890978, "learning_rate": 1.2975850547819469e-08, "loss": 0.718, "step": 9636 }, { "epoch": 0.9843718079673136, "grad_norm": 1.4002649937019942, "learning_rate": 1.2807916008201748e-08, "loss": 0.7207, "step": 9637 }, { "epoch": 0.9844739530132789, "grad_norm": 1.508212872791826, "learning_rate": 1.2641074573209732e-08, "loss": 0.6219, "step": 9638 }, { "epoch": 0.9845760980592442, "grad_norm": 1.3945012588945809, "learning_rate": 1.247532626110548e-08, "loss": 0.5929, "step": 9639 }, { "epoch": 0.9846782431052093, "grad_norm": 1.5964522655457851, "learning_rate": 1.2310671090028925e-08, "loss": 0.6758, "step": 9640 }, { "epoch": 0.9847803881511746, "grad_norm": 1.4155939957396753, "learning_rate": 1.2147109078003427e-08, "loss": 0.6987, "step": 9641 }, { "epoch": 0.9848825331971399, "grad_norm": 1.3846225169398596, "learning_rate": 1.1984640242928003e-08, "loss": 0.6224, "step": 9642 }, { "epoch": 0.9849846782431052, "grad_norm": 1.5419409063243765, "learning_rate": 1.1823264602588425e-08, "loss": 0.6709, "step": 9643 }, { "epoch": 0.9850868232890705, "grad_norm": 1.2232222990208017, "learning_rate": 1.166298217464501e-08, "loss": 0.5902, "step": 9644 }, { "epoch": 0.9851889683350358, "grad_norm": 1.4142089390947932, "learning_rate": 1.1503792976641503e-08, "loss": 0.5448, "step": 9645 }, { "epoch": 0.9852911133810011, "grad_norm": 1.452238411588004, "learning_rate": 1.1345697026001745e-08, "loss": 0.7287, "step": 9646 }, { "epoch": 0.9853932584269663, "grad_norm": 1.3323098026295412, "learning_rate": 1.118869434002856e-08, "loss": 0.6466, "step": 9647 }, { "epoch": 0.9854954034729315, "grad_norm": 1.5670632088011618, "learning_rate": 1.1032784935907093e-08, "loss": 0.7177, "step": 9648 }, { "epoch": 0.9855975485188968, "grad_norm": 1.3957745522448013, "learning_rate": 1.0877968830700358e-08, "loss": 0.6075, "step": 9649 }, { "epoch": 0.9856996935648621, "grad_norm": 1.5766434019194155, "learning_rate": 1.0724246041353692e-08, "loss": 0.7226, "step": 9650 }, { "epoch": 0.9858018386108274, "grad_norm": 1.4354718841725742, "learning_rate": 1.0571616584691414e-08, "loss": 0.6535, "step": 9651 }, { "epoch": 0.9859039836567927, "grad_norm": 1.4940211959896346, "learning_rate": 1.0420080477421269e-08, "loss": 0.7099, "step": 9652 }, { "epoch": 0.986006128702758, "grad_norm": 1.6028894237796012, "learning_rate": 1.0269637736126659e-08, "loss": 0.6312, "step": 9653 }, { "epoch": 0.9861082737487232, "grad_norm": 1.4450661106538019, "learning_rate": 1.0120288377274412e-08, "loss": 0.7073, "step": 9654 }, { "epoch": 0.9862104187946884, "grad_norm": 1.496838114799703, "learning_rate": 9.972032417210342e-09, "loss": 0.7378, "step": 9655 }, { "epoch": 0.9863125638406537, "grad_norm": 1.5035543882968891, "learning_rate": 9.824869872162578e-09, "loss": 0.702, "step": 9656 }, { "epoch": 0.986414708886619, "grad_norm": 1.4777435299532664, "learning_rate": 9.678800758237128e-09, "loss": 0.6985, "step": 9657 }, { "epoch": 0.9865168539325843, "grad_norm": 1.4408904193117742, "learning_rate": 9.5338250914212e-09, "loss": 0.6752, "step": 9658 }, { "epoch": 0.9866189989785495, "grad_norm": 1.515999849005689, "learning_rate": 9.389942887582105e-09, "loss": 0.6406, "step": 9659 }, { "epoch": 0.9867211440245148, "grad_norm": 1.5386956906805456, "learning_rate": 9.247154162469464e-09, "loss": 0.689, "step": 9660 }, { "epoch": 0.9868232890704801, "grad_norm": 1.4331736300054096, "learning_rate": 9.105458931710776e-09, "loss": 0.6807, "step": 9661 }, { "epoch": 0.9869254341164454, "grad_norm": 1.3808399906777045, "learning_rate": 8.964857210814748e-09, "loss": 0.6656, "step": 9662 }, { "epoch": 0.9870275791624106, "grad_norm": 1.4924563118890635, "learning_rate": 8.825349015169071e-09, "loss": 0.7078, "step": 9663 }, { "epoch": 0.9871297242083759, "grad_norm": 1.6857126354382221, "learning_rate": 8.686934360044863e-09, "loss": 0.7466, "step": 9664 }, { "epoch": 0.9872318692543411, "grad_norm": 1.4322930877257218, "learning_rate": 8.549613260591117e-09, "loss": 0.7173, "step": 9665 }, { "epoch": 0.9873340143003064, "grad_norm": 1.458555071455001, "learning_rate": 8.41338573183692e-09, "loss": 0.6366, "step": 9666 }, { "epoch": 0.9874361593462717, "grad_norm": 1.4427834316735153, "learning_rate": 8.27825178869257e-09, "loss": 0.6441, "step": 9667 }, { "epoch": 0.987538304392237, "grad_norm": 1.5531175409143838, "learning_rate": 8.144211445949568e-09, "loss": 0.6719, "step": 9668 }, { "epoch": 0.9876404494382023, "grad_norm": 1.4759623987391284, "learning_rate": 8.0112647182784e-09, "loss": 0.5701, "step": 9669 }, { "epoch": 0.9877425944841676, "grad_norm": 1.5225063886883665, "learning_rate": 7.87941162023076e-09, "loss": 0.7285, "step": 9670 }, { "epoch": 0.9878447395301327, "grad_norm": 1.8225258087700142, "learning_rate": 7.748652166236215e-09, "loss": 0.7687, "step": 9671 }, { "epoch": 0.987946884576098, "grad_norm": 1.6645775452305411, "learning_rate": 7.61898637060665e-09, "loss": 0.6574, "step": 9672 }, { "epoch": 0.9880490296220633, "grad_norm": 1.4877050788480064, "learning_rate": 7.49041424753627e-09, "loss": 0.6308, "step": 9673 }, { "epoch": 0.9881511746680286, "grad_norm": 1.4417494569901594, "learning_rate": 7.3629358110960395e-09, "loss": 0.6104, "step": 9674 }, { "epoch": 0.9882533197139939, "grad_norm": 1.397734290251388, "learning_rate": 7.23655107523813e-09, "loss": 0.681, "step": 9675 }, { "epoch": 0.9883554647599592, "grad_norm": 1.4225654302225892, "learning_rate": 7.111260053795921e-09, "loss": 0.6585, "step": 9676 }, { "epoch": 0.9884576098059245, "grad_norm": 1.5560650172386679, "learning_rate": 6.987062760482888e-09, "loss": 0.7472, "step": 9677 }, { "epoch": 0.9885597548518897, "grad_norm": 1.433487842387203, "learning_rate": 6.863959208891491e-09, "loss": 0.6792, "step": 9678 }, { "epoch": 0.9886618998978549, "grad_norm": 1.4770699024837601, "learning_rate": 6.741949412496507e-09, "loss": 0.6715, "step": 9679 }, { "epoch": 0.9887640449438202, "grad_norm": 1.5884365156230937, "learning_rate": 6.6210333846516986e-09, "loss": 0.7384, "step": 9680 }, { "epoch": 0.9888661899897855, "grad_norm": 1.4769738194409154, "learning_rate": 6.501211138590924e-09, "loss": 0.6984, "step": 9681 }, { "epoch": 0.9889683350357508, "grad_norm": 1.6284689701538175, "learning_rate": 6.382482687429247e-09, "loss": 0.7621, "step": 9682 }, { "epoch": 0.989070480081716, "grad_norm": 1.5473833828638142, "learning_rate": 6.264848044161831e-09, "loss": 0.6189, "step": 9683 }, { "epoch": 0.9891726251276813, "grad_norm": 1.6014523065484592, "learning_rate": 6.148307221663929e-09, "loss": 0.7433, "step": 9684 }, { "epoch": 0.9892747701736466, "grad_norm": 1.4128570645182545, "learning_rate": 6.032860232690896e-09, "loss": 0.7049, "step": 9685 }, { "epoch": 0.9893769152196118, "grad_norm": 1.6397437051187032, "learning_rate": 5.918507089877068e-09, "loss": 0.684, "step": 9686 }, { "epoch": 0.9894790602655771, "grad_norm": 1.5233757928283498, "learning_rate": 5.805247805740211e-09, "loss": 0.6894, "step": 9687 }, { "epoch": 0.9895812053115424, "grad_norm": 1.5277121616229623, "learning_rate": 5.693082392675964e-09, "loss": 0.6869, "step": 9688 }, { "epoch": 0.9896833503575077, "grad_norm": 1.5914027769467198, "learning_rate": 5.582010862961173e-09, "loss": 0.6807, "step": 9689 }, { "epoch": 0.9897854954034729, "grad_norm": 1.5737661534500027, "learning_rate": 5.472033228752782e-09, "loss": 0.7387, "step": 9690 }, { "epoch": 0.9898876404494382, "grad_norm": 1.5365617308726318, "learning_rate": 5.363149502086717e-09, "loss": 0.6527, "step": 9691 }, { "epoch": 0.9899897854954035, "grad_norm": 1.5828146635050324, "learning_rate": 5.255359694882334e-09, "loss": 0.6918, "step": 9692 }, { "epoch": 0.9900919305413688, "grad_norm": 1.5098778239805088, "learning_rate": 5.148663818935751e-09, "loss": 0.7175, "step": 9693 }, { "epoch": 0.990194075587334, "grad_norm": 1.5757670001733775, "learning_rate": 5.043061885925404e-09, "loss": 0.6487, "step": 9694 }, { "epoch": 0.9902962206332993, "grad_norm": 1.543442969364944, "learning_rate": 4.9385539074098265e-09, "loss": 0.6719, "step": 9695 }, { "epoch": 0.9903983656792645, "grad_norm": 1.5458135656241583, "learning_rate": 4.835139894826535e-09, "loss": 0.6054, "step": 9696 }, { "epoch": 0.9905005107252298, "grad_norm": 1.4603862657651896, "learning_rate": 4.732819859495363e-09, "loss": 0.6772, "step": 9697 }, { "epoch": 0.9906026557711951, "grad_norm": 1.4815428091732459, "learning_rate": 4.631593812614021e-09, "loss": 0.6821, "step": 9698 }, { "epoch": 0.9907048008171604, "grad_norm": 1.5146836796916139, "learning_rate": 4.531461765263645e-09, "loss": 0.6797, "step": 9699 }, { "epoch": 0.9908069458631257, "grad_norm": 1.4444515065787251, "learning_rate": 4.432423728402135e-09, "loss": 0.7226, "step": 9700 }, { "epoch": 0.990909090909091, "grad_norm": 1.3998400462277139, "learning_rate": 4.33447971286971e-09, "loss": 0.6329, "step": 9701 }, { "epoch": 0.9910112359550561, "grad_norm": 1.5773829833884847, "learning_rate": 4.237629729387793e-09, "loss": 0.6806, "step": 9702 }, { "epoch": 0.9911133810010214, "grad_norm": 1.5238353944196297, "learning_rate": 4.141873788553463e-09, "loss": 0.643, "step": 9703 }, { "epoch": 0.9912155260469867, "grad_norm": 1.5986699686960189, "learning_rate": 4.047211900850556e-09, "loss": 0.7402, "step": 9704 }, { "epoch": 0.991317671092952, "grad_norm": 1.4298476636782917, "learning_rate": 3.953644076638563e-09, "loss": 0.6601, "step": 9705 }, { "epoch": 0.9914198161389173, "grad_norm": 1.4038314891185384, "learning_rate": 3.861170326157071e-09, "loss": 0.6782, "step": 9706 }, { "epoch": 0.9915219611848826, "grad_norm": 1.590110716614218, "learning_rate": 3.769790659530204e-09, "loss": 0.6652, "step": 9707 }, { "epoch": 0.9916241062308478, "grad_norm": 1.6411131544942885, "learning_rate": 3.67950508675774e-09, "loss": 0.6967, "step": 9708 }, { "epoch": 0.991726251276813, "grad_norm": 1.4016242073432523, "learning_rate": 3.5903136177217744e-09, "loss": 0.7005, "step": 9709 }, { "epoch": 0.9918283963227783, "grad_norm": 1.4000700295605912, "learning_rate": 3.502216262184499e-09, "loss": 0.6595, "step": 9710 }, { "epoch": 0.9919305413687436, "grad_norm": 1.6845058129393669, "learning_rate": 3.4152130297882003e-09, "loss": 0.7433, "step": 9711 }, { "epoch": 0.9920326864147089, "grad_norm": 1.5610450899616208, "learning_rate": 3.329303930055261e-09, "loss": 0.7315, "step": 9712 }, { "epoch": 0.9921348314606742, "grad_norm": 1.629786125282944, "learning_rate": 3.2444889723892702e-09, "loss": 0.6837, "step": 9713 }, { "epoch": 0.9922369765066394, "grad_norm": 1.5353016555086143, "learning_rate": 3.160768166072803e-09, "loss": 0.6821, "step": 9714 }, { "epoch": 0.9923391215526047, "grad_norm": 1.6737665477959751, "learning_rate": 3.0781415202685293e-09, "loss": 0.7553, "step": 9715 }, { "epoch": 0.99244126659857, "grad_norm": 1.512155379472392, "learning_rate": 2.9966090440203264e-09, "loss": 0.6331, "step": 9716 }, { "epoch": 0.9925434116445352, "grad_norm": 1.4524805653500787, "learning_rate": 2.916170746252167e-09, "loss": 0.7318, "step": 9717 }, { "epoch": 0.9926455566905005, "grad_norm": 1.4286117923143586, "learning_rate": 2.8368266357681195e-09, "loss": 0.6967, "step": 9718 }, { "epoch": 0.9927477017364658, "grad_norm": 1.4520311352785522, "learning_rate": 2.7585767212534587e-09, "loss": 0.5924, "step": 9719 }, { "epoch": 0.992849846782431, "grad_norm": 1.4907033731896187, "learning_rate": 2.6814210112702245e-09, "loss": 0.6227, "step": 9720 }, { "epoch": 0.9929519918283963, "grad_norm": 1.440712116757037, "learning_rate": 2.6053595142649936e-09, "loss": 0.7134, "step": 9721 }, { "epoch": 0.9930541368743616, "grad_norm": 1.5190057869631894, "learning_rate": 2.5303922385622183e-09, "loss": 0.5884, "step": 9722 }, { "epoch": 0.9931562819203269, "grad_norm": 1.3575247305573648, "learning_rate": 2.4565191923675568e-09, "loss": 0.5936, "step": 9723 }, { "epoch": 0.9932584269662922, "grad_norm": 1.4595834482568206, "learning_rate": 2.3837403837656536e-09, "loss": 0.6344, "step": 9724 }, { "epoch": 0.9933605720122574, "grad_norm": 1.5763056876861297, "learning_rate": 2.312055820723469e-09, "loss": 0.8151, "step": 9725 }, { "epoch": 0.9934627170582226, "grad_norm": 1.577924075792768, "learning_rate": 2.2414655110858385e-09, "loss": 0.7893, "step": 9726 }, { "epoch": 0.9935648621041879, "grad_norm": 1.563689832023081, "learning_rate": 2.1719694625788046e-09, "loss": 0.6476, "step": 9727 }, { "epoch": 0.9936670071501532, "grad_norm": 1.427880016951051, "learning_rate": 2.103567682808505e-09, "loss": 0.7267, "step": 9728 }, { "epoch": 0.9937691521961185, "grad_norm": 1.4099044144074537, "learning_rate": 2.036260179263394e-09, "loss": 0.5795, "step": 9729 }, { "epoch": 0.9938712972420838, "grad_norm": 1.5308154578199733, "learning_rate": 1.970046959308691e-09, "loss": 0.6904, "step": 9730 }, { "epoch": 0.9939734422880491, "grad_norm": 1.5152319127819918, "learning_rate": 1.9049280301919324e-09, "loss": 0.6576, "step": 9731 }, { "epoch": 0.9940755873340144, "grad_norm": 1.6258738679917872, "learning_rate": 1.8409033990407498e-09, "loss": 0.7749, "step": 9732 }, { "epoch": 0.9941777323799795, "grad_norm": 1.5610616565163176, "learning_rate": 1.7779730728617606e-09, "loss": 0.6791, "step": 9733 }, { "epoch": 0.9942798774259448, "grad_norm": 1.544344074422544, "learning_rate": 1.7161370585427883e-09, "loss": 0.7594, "step": 9734 }, { "epoch": 0.9943820224719101, "grad_norm": 1.4595376190228746, "learning_rate": 1.655395362852863e-09, "loss": 0.6629, "step": 9735 }, { "epoch": 0.9944841675178754, "grad_norm": 1.4811938931076363, "learning_rate": 1.59574799244e-09, "loss": 0.7351, "step": 9736 }, { "epoch": 0.9945863125638407, "grad_norm": 1.5457695137180496, "learning_rate": 1.5371949538323105e-09, "loss": 0.5872, "step": 9737 }, { "epoch": 0.994688457609806, "grad_norm": 1.489779053503393, "learning_rate": 1.4797362534380022e-09, "loss": 0.6846, "step": 9738 }, { "epoch": 0.9947906026557712, "grad_norm": 1.6585808377352385, "learning_rate": 1.4233718975464882e-09, "loss": 0.6865, "step": 9739 }, { "epoch": 0.9948927477017364, "grad_norm": 1.519439407015822, "learning_rate": 1.3681018923272782e-09, "loss": 0.6736, "step": 9740 }, { "epoch": 0.9949948927477017, "grad_norm": 1.5999052402906233, "learning_rate": 1.3139262438288669e-09, "loss": 0.6347, "step": 9741 }, { "epoch": 0.995097037793667, "grad_norm": 1.3866816015103618, "learning_rate": 1.260844957982066e-09, "loss": 0.6333, "step": 9742 }, { "epoch": 0.9951991828396323, "grad_norm": 1.6134795770108707, "learning_rate": 1.2088580405944518e-09, "loss": 0.673, "step": 9743 }, { "epoch": 0.9953013278855976, "grad_norm": 1.5431427787906, "learning_rate": 1.1579654973581378e-09, "loss": 0.7002, "step": 9744 }, { "epoch": 0.9954034729315628, "grad_norm": 1.4031345199660101, "learning_rate": 1.1081673338431132e-09, "loss": 0.5915, "step": 9745 }, { "epoch": 0.9955056179775281, "grad_norm": 1.4622381357635421, "learning_rate": 1.0594635554983523e-09, "loss": 0.6514, "step": 9746 }, { "epoch": 0.9956077630234934, "grad_norm": 1.5920166256335029, "learning_rate": 1.0118541676551464e-09, "loss": 0.7037, "step": 9747 }, { "epoch": 0.9957099080694586, "grad_norm": 1.4236857423848537, "learning_rate": 9.653391755259922e-10, "loss": 0.628, "step": 9748 }, { "epoch": 0.9958120531154239, "grad_norm": 1.6001965627746497, "learning_rate": 9.199185841990421e-10, "loss": 0.7382, "step": 9749 }, { "epoch": 0.9959141981613892, "grad_norm": 1.5231220871086684, "learning_rate": 8.755923986480952e-10, "loss": 0.7116, "step": 9750 }, { "epoch": 0.9960163432073544, "grad_norm": 1.4768030596704838, "learning_rate": 8.32360623724826e-10, "loss": 0.6933, "step": 9751 }, { "epoch": 0.9961184882533197, "grad_norm": 1.4130833974663959, "learning_rate": 7.902232641587848e-10, "loss": 0.6501, "step": 9752 }, { "epoch": 0.996220633299285, "grad_norm": 1.6667250575018075, "learning_rate": 7.49180324562948e-10, "loss": 0.7086, "step": 9753 }, { "epoch": 0.9963227783452503, "grad_norm": 1.4475514906626687, "learning_rate": 7.092318094303885e-10, "loss": 0.6168, "step": 9754 }, { "epoch": 0.9964249233912156, "grad_norm": 1.3490862199882951, "learning_rate": 6.70377723132054e-10, "loss": 0.5767, "step": 9755 }, { "epoch": 0.9965270684371808, "grad_norm": 1.4844768059310436, "learning_rate": 6.326180699212092e-10, "loss": 0.6089, "step": 9756 }, { "epoch": 0.996629213483146, "grad_norm": 1.4230602273197972, "learning_rate": 5.959528539312143e-10, "loss": 0.702, "step": 9757 }, { "epoch": 0.9967313585291113, "grad_norm": 1.5348659223309693, "learning_rate": 5.603820791755255e-10, "loss": 0.792, "step": 9758 }, { "epoch": 0.9968335035750766, "grad_norm": 1.3595078039927926, "learning_rate": 5.259057495454745e-10, "loss": 0.5807, "step": 9759 }, { "epoch": 0.9969356486210419, "grad_norm": 1.4200480888989915, "learning_rate": 4.925238688147094e-10, "loss": 0.7224, "step": 9760 }, { "epoch": 0.9970377936670072, "grad_norm": 1.6254310117968056, "learning_rate": 4.602364406391946e-10, "loss": 0.6745, "step": 9761 }, { "epoch": 0.9971399387129725, "grad_norm": 1.6436805560680758, "learning_rate": 4.2904346855054956e-10, "loss": 0.7283, "step": 9762 }, { "epoch": 0.9972420837589376, "grad_norm": 1.405297572331721, "learning_rate": 3.989449559638203e-10, "loss": 0.7248, "step": 9763 }, { "epoch": 0.9973442288049029, "grad_norm": 1.5040018132234394, "learning_rate": 3.699409061730386e-10, "loss": 0.7067, "step": 9764 }, { "epoch": 0.9974463738508682, "grad_norm": 1.3729144049432551, "learning_rate": 3.4203132235344216e-10, "loss": 0.678, "step": 9765 }, { "epoch": 0.9975485188968335, "grad_norm": 1.5627317186419327, "learning_rate": 3.152162075581444e-10, "loss": 0.5959, "step": 9766 }, { "epoch": 0.9976506639427988, "grad_norm": 1.4802854560337235, "learning_rate": 2.8949556472368525e-10, "loss": 0.7342, "step": 9767 }, { "epoch": 0.9977528089887641, "grad_norm": 1.463311580761161, "learning_rate": 2.6486939666447995e-10, "loss": 0.6555, "step": 9768 }, { "epoch": 0.9978549540347293, "grad_norm": 1.4481895723207845, "learning_rate": 2.4133770607615017e-10, "loss": 0.675, "step": 9769 }, { "epoch": 0.9979570990806946, "grad_norm": 1.4608510300673103, "learning_rate": 2.1890049553330296e-10, "loss": 0.6292, "step": 9770 }, { "epoch": 0.9980592441266598, "grad_norm": 1.5097394213347115, "learning_rate": 1.9755776749286192e-10, "loss": 0.7008, "step": 9771 }, { "epoch": 0.9981613891726251, "grad_norm": 1.4701023532438253, "learning_rate": 1.7730952429073635e-10, "loss": 0.6532, "step": 9772 }, { "epoch": 0.9982635342185904, "grad_norm": 1.4968961650950559, "learning_rate": 1.5815576814293132e-10, "loss": 0.6848, "step": 9773 }, { "epoch": 0.9983656792645557, "grad_norm": 1.4542113865838224, "learning_rate": 1.4009650114554796e-10, "loss": 0.6673, "step": 9774 }, { "epoch": 0.998467824310521, "grad_norm": 1.6873313989121506, "learning_rate": 1.2313172527478322e-10, "loss": 0.7112, "step": 9775 }, { "epoch": 0.9985699693564862, "grad_norm": 1.3646728786806015, "learning_rate": 1.0726144238804027e-10, "loss": 0.6289, "step": 9776 }, { "epoch": 0.9986721144024515, "grad_norm": 1.5085952661021869, "learning_rate": 9.248565422281808e-11, "loss": 0.6718, "step": 9777 }, { "epoch": 0.9987742594484168, "grad_norm": 1.5267520961320835, "learning_rate": 7.88043623956014e-11, "loss": 0.6658, "step": 9778 }, { "epoch": 0.998876404494382, "grad_norm": 1.5776261395842917, "learning_rate": 6.621756840408111e-11, "loss": 0.6793, "step": 9779 }, { "epoch": 0.9989785495403473, "grad_norm": 1.4143593982621947, "learning_rate": 5.472527362604396e-11, "loss": 0.6749, "step": 9780 }, { "epoch": 0.9990806945863125, "grad_norm": 1.6317060793543992, "learning_rate": 4.4327479319372647e-11, "loss": 0.7876, "step": 9781 }, { "epoch": 0.9991828396322778, "grad_norm": 1.4425819779577098, "learning_rate": 3.502418662093554e-11, "loss": 0.7695, "step": 9782 }, { "epoch": 0.9992849846782431, "grad_norm": 1.4936037265260123, "learning_rate": 2.6815396549917383e-11, "loss": 0.6939, "step": 9783 }, { "epoch": 0.9993871297242084, "grad_norm": 1.5272704943858435, "learning_rate": 1.9701110005598822e-11, "loss": 0.638, "step": 9784 }, { "epoch": 0.9994892747701737, "grad_norm": 1.4363305195173535, "learning_rate": 1.3681327765135976e-11, "loss": 0.6003, "step": 9785 }, { "epoch": 0.999591419816139, "grad_norm": 1.528606615469795, "learning_rate": 8.756050489111546e-12, "loss": 0.6423, "step": 9786 }, { "epoch": 0.9996935648621041, "grad_norm": 1.5275074852477024, "learning_rate": 4.925278714873472e-12, "loss": 0.7543, "step": 9787 }, { "epoch": 0.9997957099080694, "grad_norm": 1.3559350401499568, "learning_rate": 2.1890128620860597e-12, "loss": 0.705, "step": 9788 }, { "epoch": 0.9998978549540347, "grad_norm": 1.6260612351761496, "learning_rate": 5.472532305095257e-13, "loss": 0.6681, "step": 9789 }, { "epoch": 1.0, "grad_norm": 1.502086650767526, "learning_rate": 0.0, "loss": 0.6916, "step": 9790 }, { "epoch": 1.0, "step": 9790, "total_flos": 1611723555823616.0, "train_loss": 0.7202048060142714, "train_runtime": 204003.8028, "train_samples_per_second": 6.142, "train_steps_per_second": 0.048 } ], "logging_steps": 1.0, "max_steps": 9790, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1611723555823616.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }