{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9995100440960314, "eval_steps": 383, "global_step": 1530, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006532745386248571, "grad_norm": 6.782679973808308e+18, "learning_rate": 4.000000000000001e-06, "loss": 1.1939, "step": 1 }, { "epoch": 0.0006532745386248571, "eval_loss": 2.101844310760498, "eval_runtime": 159.9057, "eval_samples_per_second": 16.122, "eval_steps_per_second": 4.034, "step": 1 }, { "epoch": 0.0013065490772497142, "grad_norm": 1.0602286061923074e+19, "learning_rate": 8.000000000000001e-06, "loss": 1.3265, "step": 2 }, { "epoch": 0.0019598236158745713, "grad_norm": 9.040893238818374e+18, "learning_rate": 1.2e-05, "loss": 1.2696, "step": 3 }, { "epoch": 0.0026130981544994283, "grad_norm": 8.791663639615504e+18, "learning_rate": 1.6000000000000003e-05, "loss": 1.4, "step": 4 }, { "epoch": 0.0032663726931242854, "grad_norm": 1.0141305916373336e+19, "learning_rate": 2e-05, "loss": 1.4178, "step": 5 }, { "epoch": 0.0039196472317491425, "grad_norm": 1.5361737647030534e+19, "learning_rate": 2.4e-05, "loss": 1.4652, "step": 6 }, { "epoch": 0.004572921770374, "grad_norm": 1.329169150346317e+19, "learning_rate": 2.8000000000000003e-05, "loss": 1.548, "step": 7 }, { "epoch": 0.005226196308998857, "grad_norm": 1.5421850146744304e+19, "learning_rate": 3.2000000000000005e-05, "loss": 1.5716, "step": 8 }, { "epoch": 0.005879470847623714, "grad_norm": Infinity, "learning_rate": 3.6e-05, "loss": 1.5789, "step": 9 }, { "epoch": 0.006532745386248571, "grad_norm": Infinity, "learning_rate": 4e-05, "loss": 1.6679, "step": 10 }, { "epoch": 0.007186019924873428, "grad_norm": Infinity, "learning_rate": 4.4000000000000006e-05, "loss": 1.7226, "step": 11 }, { "epoch": 0.007839294463498285, "grad_norm": Infinity, "learning_rate": 4.8e-05, "loss": 1.7121, "step": 12 }, { "epoch": 0.008492569002123142, "grad_norm": Infinity, "learning_rate": 5.2000000000000004e-05, "loss": 1.7012, "step": 13 }, { "epoch": 0.009145843540748, "grad_norm": Infinity, "learning_rate": 5.6000000000000006e-05, "loss": 1.8244, "step": 14 }, { "epoch": 0.009799118079372856, "grad_norm": Infinity, "learning_rate": 6e-05, "loss": 1.9075, "step": 15 }, { "epoch": 0.010452392617997713, "grad_norm": Infinity, "learning_rate": 6.400000000000001e-05, "loss": 2.2215, "step": 16 }, { "epoch": 0.01110566715662257, "grad_norm": Infinity, "learning_rate": 6.800000000000001e-05, "loss": 2.3717, "step": 17 }, { "epoch": 0.011758941695247428, "grad_norm": Infinity, "learning_rate": 7.2e-05, "loss": 2.5005, "step": 18 }, { "epoch": 0.012412216233872285, "grad_norm": Infinity, "learning_rate": 7.6e-05, "loss": 2.5937, "step": 19 }, { "epoch": 0.013065490772497142, "grad_norm": Infinity, "learning_rate": 8e-05, "loss": 2.4281, "step": 20 }, { "epoch": 0.013718765311121999, "grad_norm": Infinity, "learning_rate": 8.4e-05, "loss": 2.8442, "step": 21 }, { "epoch": 0.014372039849746856, "grad_norm": Infinity, "learning_rate": 8.800000000000001e-05, "loss": 2.6257, "step": 22 }, { "epoch": 0.015025314388371713, "grad_norm": Infinity, "learning_rate": 9.200000000000001e-05, "loss": 3.0521, "step": 23 }, { "epoch": 0.01567858892699657, "grad_norm": Infinity, "learning_rate": 9.6e-05, "loss": 3.4209, "step": 24 }, { "epoch": 0.01633186346562143, "grad_norm": Infinity, "learning_rate": 0.0001, "loss": 4.3767, "step": 25 }, { "epoch": 0.016985138004246284, "grad_norm": 1.6052595987134284e+19, "learning_rate": 0.00010400000000000001, "loss": 1.2769, "step": 26 }, { "epoch": 0.017638412542871143, "grad_norm": 1.6565584132189454e+19, "learning_rate": 0.00010800000000000001, "loss": 1.3066, "step": 27 }, { "epoch": 0.018291687081496, "grad_norm": Infinity, "learning_rate": 0.00011200000000000001, "loss": 1.3591, "step": 28 }, { "epoch": 0.018944961620120857, "grad_norm": Infinity, "learning_rate": 0.000116, "loss": 1.4514, "step": 29 }, { "epoch": 0.019598236158745713, "grad_norm": Infinity, "learning_rate": 0.00012, "loss": 1.4569, "step": 30 }, { "epoch": 0.02025151069737057, "grad_norm": Infinity, "learning_rate": 0.000124, "loss": 1.4416, "step": 31 }, { "epoch": 0.020904785235995427, "grad_norm": Infinity, "learning_rate": 0.00012800000000000002, "loss": 1.5548, "step": 32 }, { "epoch": 0.021558059774620286, "grad_norm": Infinity, "learning_rate": 0.000132, "loss": 1.4983, "step": 33 }, { "epoch": 0.02221133431324514, "grad_norm": Infinity, "learning_rate": 0.00013600000000000003, "loss": 1.5397, "step": 34 }, { "epoch": 0.02286460885187, "grad_norm": Infinity, "learning_rate": 0.00014, "loss": 1.5866, "step": 35 }, { "epoch": 0.023517883390494855, "grad_norm": Infinity, "learning_rate": 0.000144, "loss": 1.7101, "step": 36 }, { "epoch": 0.024171157929119714, "grad_norm": Infinity, "learning_rate": 0.000148, "loss": 1.7309, "step": 37 }, { "epoch": 0.02482443246774457, "grad_norm": Infinity, "learning_rate": 0.000152, "loss": 1.8801, "step": 38 }, { "epoch": 0.025477707006369428, "grad_norm": Infinity, "learning_rate": 0.00015600000000000002, "loss": 1.9034, "step": 39 }, { "epoch": 0.026130981544994283, "grad_norm": Infinity, "learning_rate": 0.00016, "loss": 1.8048, "step": 40 }, { "epoch": 0.026784256083619142, "grad_norm": Infinity, "learning_rate": 0.000164, "loss": 2.1254, "step": 41 }, { "epoch": 0.027437530622243998, "grad_norm": Infinity, "learning_rate": 0.000168, "loss": 2.3971, "step": 42 }, { "epoch": 0.028090805160868856, "grad_norm": Infinity, "learning_rate": 0.000172, "loss": 2.7006, "step": 43 }, { "epoch": 0.02874407969949371, "grad_norm": Infinity, "learning_rate": 0.00017600000000000002, "loss": 2.474, "step": 44 }, { "epoch": 0.02939735423811857, "grad_norm": Infinity, "learning_rate": 0.00018, "loss": 2.9188, "step": 45 }, { "epoch": 0.030050628776743426, "grad_norm": Infinity, "learning_rate": 0.00018400000000000003, "loss": 3.0924, "step": 46 }, { "epoch": 0.030703903315368285, "grad_norm": Infinity, "learning_rate": 0.000188, "loss": 3.0522, "step": 47 }, { "epoch": 0.03135717785399314, "grad_norm": Infinity, "learning_rate": 0.000192, "loss": 3.1227, "step": 48 }, { "epoch": 0.032010452392617995, "grad_norm": Infinity, "learning_rate": 0.000196, "loss": 3.6011, "step": 49 }, { "epoch": 0.03266372693124286, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 4.8325, "step": 50 }, { "epoch": 0.03331700146986771, "grad_norm": Infinity, "learning_rate": 0.00019999977470780007, "loss": 1.2339, "step": 51 }, { "epoch": 0.03397027600849257, "grad_norm": Infinity, "learning_rate": 0.00019999909883221535, "loss": 1.3726, "step": 52 }, { "epoch": 0.034623550547117424, "grad_norm": Infinity, "learning_rate": 0.0001999979723762913, "loss": 1.4039, "step": 53 }, { "epoch": 0.035276825085742286, "grad_norm": Infinity, "learning_rate": 0.00019999639534510347, "loss": 1.3955, "step": 54 }, { "epoch": 0.03593009962436714, "grad_norm": Infinity, "learning_rate": 0.0001999943677457578, "loss": 1.4902, "step": 55 }, { "epoch": 0.036583374162992, "grad_norm": Infinity, "learning_rate": 0.00019999188958739027, "loss": 1.4463, "step": 56 }, { "epoch": 0.03723664870161685, "grad_norm": Infinity, "learning_rate": 0.00019998896088116715, "loss": 1.5482, "step": 57 }, { "epoch": 0.037889923240241714, "grad_norm": Infinity, "learning_rate": 0.00019998558164028465, "loss": 1.5274, "step": 58 }, { "epoch": 0.03854319777886657, "grad_norm": Infinity, "learning_rate": 0.00019998175187996916, "loss": 1.5272, "step": 59 }, { "epoch": 0.039196472317491425, "grad_norm": Infinity, "learning_rate": 0.00019997747161747695, "loss": 1.7031, "step": 60 }, { "epoch": 0.03984974685611628, "grad_norm": Infinity, "learning_rate": 0.00019997274087209423, "loss": 1.6404, "step": 61 }, { "epoch": 0.04050302139474114, "grad_norm": Infinity, "learning_rate": 0.000199967559665137, "loss": 1.8366, "step": 62 }, { "epoch": 0.041156295933366, "grad_norm": Infinity, "learning_rate": 0.00019996192801995097, "loss": 1.8651, "step": 63 }, { "epoch": 0.04180957047199085, "grad_norm": Infinity, "learning_rate": 0.00019995584596191145, "loss": 1.9868, "step": 64 }, { "epoch": 0.04246284501061571, "grad_norm": Infinity, "learning_rate": 0.00019994931351842327, "loss": 2.0719, "step": 65 }, { "epoch": 0.04311611954924057, "grad_norm": Infinity, "learning_rate": 0.00019994233071892056, "loss": 2.1893, "step": 66 }, { "epoch": 0.043769394087865426, "grad_norm": Infinity, "learning_rate": 0.00019993489759486673, "loss": 2.3754, "step": 67 }, { "epoch": 0.04442266862649028, "grad_norm": Infinity, "learning_rate": 0.00019992701417975427, "loss": 2.4019, "step": 68 }, { "epoch": 0.04507594316511514, "grad_norm": Infinity, "learning_rate": 0.0001999186805091047, "loss": 2.5356, "step": 69 }, { "epoch": 0.04572921770374, "grad_norm": Infinity, "learning_rate": 0.00019990989662046818, "loss": 2.8341, "step": 70 }, { "epoch": 0.046382492242364855, "grad_norm": Infinity, "learning_rate": 0.00019990066255342348, "loss": 3.1333, "step": 71 }, { "epoch": 0.04703576678098971, "grad_norm": Infinity, "learning_rate": 0.00019989097834957799, "loss": 2.9525, "step": 72 }, { "epoch": 0.047689041319614565, "grad_norm": Infinity, "learning_rate": 0.00019988084405256714, "loss": 3.3206, "step": 73 }, { "epoch": 0.04834231585823943, "grad_norm": Infinity, "learning_rate": 0.00019987025970805448, "loss": 3.5856, "step": 74 }, { "epoch": 0.04899559039686428, "grad_norm": Infinity, "learning_rate": 0.00019985922536373146, "loss": 3.9854, "step": 75 }, { "epoch": 0.04964886493548914, "grad_norm": Infinity, "learning_rate": 0.00019984774106931714, "loss": 1.2761, "step": 76 }, { "epoch": 0.050302139474113994, "grad_norm": Infinity, "learning_rate": 0.0001998358068765579, "loss": 1.426, "step": 77 }, { "epoch": 0.050955414012738856, "grad_norm": Infinity, "learning_rate": 0.00019982342283922738, "loss": 1.4982, "step": 78 }, { "epoch": 0.05160868855136371, "grad_norm": Infinity, "learning_rate": 0.00019981058901312606, "loss": 1.4369, "step": 79 }, { "epoch": 0.05226196308998857, "grad_norm": Infinity, "learning_rate": 0.00019979730545608126, "loss": 1.4131, "step": 80 }, { "epoch": 0.05291523762861342, "grad_norm": Infinity, "learning_rate": 0.00019978357222794654, "loss": 1.4518, "step": 81 }, { "epoch": 0.053568512167238284, "grad_norm": Infinity, "learning_rate": 0.00019976938939060172, "loss": 1.5077, "step": 82 }, { "epoch": 0.05422178670586314, "grad_norm": Infinity, "learning_rate": 0.00019975475700795246, "loss": 1.5611, "step": 83 }, { "epoch": 0.054875061244487995, "grad_norm": Infinity, "learning_rate": 0.00019973967514592996, "loss": 1.467, "step": 84 }, { "epoch": 0.05552833578311285, "grad_norm": Infinity, "learning_rate": 0.00019972414387249072, "loss": 1.5768, "step": 85 }, { "epoch": 0.05618161032173771, "grad_norm": Infinity, "learning_rate": 0.00019970816325761627, "loss": 1.6242, "step": 86 }, { "epoch": 0.05683488486036257, "grad_norm": Infinity, "learning_rate": 0.0001996917333733128, "loss": 1.7764, "step": 87 }, { "epoch": 0.05748815939898742, "grad_norm": Infinity, "learning_rate": 0.00019967485429361076, "loss": 1.8326, "step": 88 }, { "epoch": 0.05814143393761228, "grad_norm": Infinity, "learning_rate": 0.00019965752609456464, "loss": 1.8779, "step": 89 }, { "epoch": 0.05879470847623714, "grad_norm": Infinity, "learning_rate": 0.00019963974885425266, "loss": 2.1007, "step": 90 }, { "epoch": 0.059447983014861996, "grad_norm": Infinity, "learning_rate": 0.00019962152265277623, "loss": 2.2433, "step": 91 }, { "epoch": 0.06010125755348685, "grad_norm": Infinity, "learning_rate": 0.0001996028475722598, "loss": 2.2192, "step": 92 }, { "epoch": 0.06075453209211171, "grad_norm": Infinity, "learning_rate": 0.00019958372369685033, "loss": 2.1113, "step": 93 }, { "epoch": 0.06140780663073657, "grad_norm": Infinity, "learning_rate": 0.00019956415111271712, "loss": 2.6662, "step": 94 }, { "epoch": 0.062061081169361425, "grad_norm": Infinity, "learning_rate": 0.00019954412990805107, "loss": 2.7781, "step": 95 }, { "epoch": 0.06271435570798628, "grad_norm": Infinity, "learning_rate": 0.00019952366017306466, "loss": 2.7183, "step": 96 }, { "epoch": 0.06336763024661114, "grad_norm": Infinity, "learning_rate": 0.00019950274199999132, "loss": 2.7068, "step": 97 }, { "epoch": 0.06402090478523599, "grad_norm": Infinity, "learning_rate": 0.00019948137548308502, "loss": 3.1326, "step": 98 }, { "epoch": 0.06467417932386085, "grad_norm": Infinity, "learning_rate": 0.00019945956071862003, "loss": 3.1585, "step": 99 }, { "epoch": 0.06532745386248572, "grad_norm": Infinity, "learning_rate": 0.00019943729780489027, "loss": 4.3935, "step": 100 }, { "epoch": 0.06598072840111056, "grad_norm": Infinity, "learning_rate": 0.0001994145868422089, "loss": 1.3574, "step": 101 }, { "epoch": 0.06663400293973543, "grad_norm": Infinity, "learning_rate": 0.00019939142793290798, "loss": 1.3933, "step": 102 }, { "epoch": 0.06728727747836027, "grad_norm": Infinity, "learning_rate": 0.000199367821181338, "loss": 1.4518, "step": 103 }, { "epoch": 0.06794055201698514, "grad_norm": Infinity, "learning_rate": 0.00019934376669386727, "loss": 1.3938, "step": 104 }, { "epoch": 0.06859382655561, "grad_norm": Infinity, "learning_rate": 0.00019931926457888156, "loss": 1.4311, "step": 105 }, { "epoch": 0.06924710109423485, "grad_norm": Infinity, "learning_rate": 0.00019929431494678356, "loss": 1.4887, "step": 106 }, { "epoch": 0.06990037563285971, "grad_norm": Infinity, "learning_rate": 0.00019926891790999243, "loss": 1.5323, "step": 107 }, { "epoch": 0.07055365017148457, "grad_norm": Infinity, "learning_rate": 0.00019924307358294322, "loss": 1.5969, "step": 108 }, { "epoch": 0.07120692471010942, "grad_norm": Infinity, "learning_rate": 0.00019921678208208654, "loss": 1.5895, "step": 109 }, { "epoch": 0.07186019924873428, "grad_norm": Infinity, "learning_rate": 0.00019919004352588767, "loss": 1.5687, "step": 110 }, { "epoch": 0.07251347378735913, "grad_norm": Infinity, "learning_rate": 0.00019916285803482647, "loss": 1.7395, "step": 111 }, { "epoch": 0.073166748325984, "grad_norm": Infinity, "learning_rate": 0.0001991352257313965, "loss": 1.8653, "step": 112 }, { "epoch": 0.07382002286460886, "grad_norm": Infinity, "learning_rate": 0.00019910714674010454, "loss": 1.8805, "step": 113 }, { "epoch": 0.0744732974032337, "grad_norm": Infinity, "learning_rate": 0.00019907862118747022, "loss": 1.9816, "step": 114 }, { "epoch": 0.07512657194185857, "grad_norm": Infinity, "learning_rate": 0.0001990496492020252, "loss": 2.0509, "step": 115 }, { "epoch": 0.07577984648048343, "grad_norm": Infinity, "learning_rate": 0.0001990202309143127, "loss": 2.2874, "step": 116 }, { "epoch": 0.07643312101910828, "grad_norm": Infinity, "learning_rate": 0.000198990366456887, "loss": 2.2876, "step": 117 }, { "epoch": 0.07708639555773314, "grad_norm": Infinity, "learning_rate": 0.00019896005596431264, "loss": 2.5601, "step": 118 }, { "epoch": 0.07773967009635799, "grad_norm": Infinity, "learning_rate": 0.00019892929957316397, "loss": 2.3229, "step": 119 }, { "epoch": 0.07839294463498285, "grad_norm": Infinity, "learning_rate": 0.00019889809742202455, "loss": 2.4374, "step": 120 }, { "epoch": 0.07904621917360771, "grad_norm": Infinity, "learning_rate": 0.0001988664496514863, "loss": 2.6645, "step": 121 }, { "epoch": 0.07969949371223256, "grad_norm": Infinity, "learning_rate": 0.00019883435640414922, "loss": 2.954, "step": 122 }, { "epoch": 0.08035276825085742, "grad_norm": Infinity, "learning_rate": 0.0001988018178246205, "loss": 3.252, "step": 123 }, { "epoch": 0.08100604278948229, "grad_norm": Infinity, "learning_rate": 0.00019876883405951377, "loss": 3.2595, "step": 124 }, { "epoch": 0.08165931732810713, "grad_norm": Infinity, "learning_rate": 0.00019873540525744887, "loss": 4.0661, "step": 125 }, { "epoch": 0.082312591866732, "grad_norm": Infinity, "learning_rate": 0.00019870153156905068, "loss": 1.2808, "step": 126 }, { "epoch": 0.08296586640535684, "grad_norm": Infinity, "learning_rate": 0.00019866721314694882, "loss": 1.3348, "step": 127 }, { "epoch": 0.0836191409439817, "grad_norm": Infinity, "learning_rate": 0.00019863245014577668, "loss": 1.3889, "step": 128 }, { "epoch": 0.08427241548260657, "grad_norm": Infinity, "learning_rate": 0.00019859724272217099, "loss": 1.3553, "step": 129 }, { "epoch": 0.08492569002123142, "grad_norm": Infinity, "learning_rate": 0.00019856159103477086, "loss": 1.4116, "step": 130 }, { "epoch": 0.08557896455985628, "grad_norm": Infinity, "learning_rate": 0.00019852549524421723, "loss": 1.5371, "step": 131 }, { "epoch": 0.08623223909848114, "grad_norm": Infinity, "learning_rate": 0.0001984889555131521, "loss": 1.5529, "step": 132 }, { "epoch": 0.08688551363710599, "grad_norm": Infinity, "learning_rate": 0.00019845197200621785, "loss": 1.5542, "step": 133 }, { "epoch": 0.08753878817573085, "grad_norm": Infinity, "learning_rate": 0.00019841454489005636, "loss": 1.5355, "step": 134 }, { "epoch": 0.0881920627143557, "grad_norm": Infinity, "learning_rate": 0.00019837667433330838, "loss": 1.5569, "step": 135 }, { "epoch": 0.08884533725298056, "grad_norm": Infinity, "learning_rate": 0.0001983383605066127, "loss": 1.6497, "step": 136 }, { "epoch": 0.08949861179160543, "grad_norm": Infinity, "learning_rate": 0.00019829960358260545, "loss": 1.8195, "step": 137 }, { "epoch": 0.09015188633023027, "grad_norm": Infinity, "learning_rate": 0.00019826040373591933, "loss": 1.9526, "step": 138 }, { "epoch": 0.09080516086885514, "grad_norm": Infinity, "learning_rate": 0.0001982207611431827, "loss": 2.011, "step": 139 }, { "epoch": 0.09145843540748, "grad_norm": Infinity, "learning_rate": 0.0001981806759830189, "loss": 2.1913, "step": 140 }, { "epoch": 0.09211170994610485, "grad_norm": Infinity, "learning_rate": 0.00019814014843604543, "loss": 2.1102, "step": 141 }, { "epoch": 0.09276498448472971, "grad_norm": Infinity, "learning_rate": 0.00019809917868487308, "loss": 2.3621, "step": 142 }, { "epoch": 0.09341825902335456, "grad_norm": Infinity, "learning_rate": 0.00019805776691410516, "loss": 2.2622, "step": 143 }, { "epoch": 0.09407153356197942, "grad_norm": Infinity, "learning_rate": 0.00019801591331033663, "loss": 2.7022, "step": 144 }, { "epoch": 0.09472480810060428, "grad_norm": Infinity, "learning_rate": 0.00019797361806215332, "loss": 2.9089, "step": 145 }, { "epoch": 0.09537808263922913, "grad_norm": Infinity, "learning_rate": 0.000197930881360131, "loss": 2.8431, "step": 146 }, { "epoch": 0.096031357177854, "grad_norm": Infinity, "learning_rate": 0.00019788770339683462, "loss": 3.0716, "step": 147 }, { "epoch": 0.09668463171647886, "grad_norm": Infinity, "learning_rate": 0.00019784408436681732, "loss": 2.9731, "step": 148 }, { "epoch": 0.0973379062551037, "grad_norm": Infinity, "learning_rate": 0.00019780002446661966, "loss": 3.4067, "step": 149 }, { "epoch": 0.09799118079372857, "grad_norm": Infinity, "learning_rate": 0.00019775552389476864, "loss": 4.0305, "step": 150 }, { "epoch": 0.09864445533235343, "grad_norm": Infinity, "learning_rate": 0.0001977105828517769, "loss": 1.3048, "step": 151 }, { "epoch": 0.09929772987097828, "grad_norm": Infinity, "learning_rate": 0.00019766520154014183, "loss": 1.3672, "step": 152 }, { "epoch": 0.09995100440960314, "grad_norm": Infinity, "learning_rate": 0.00019761938016434448, "loss": 1.3763, "step": 153 }, { "epoch": 0.10060427894822799, "grad_norm": Infinity, "learning_rate": 0.00019757311893084885, "loss": 1.3836, "step": 154 }, { "epoch": 0.10125755348685285, "grad_norm": Infinity, "learning_rate": 0.00019752641804810084, "loss": 1.4765, "step": 155 }, { "epoch": 0.10191082802547771, "grad_norm": Infinity, "learning_rate": 0.0001974792777265273, "loss": 1.6151, "step": 156 }, { "epoch": 0.10256410256410256, "grad_norm": Infinity, "learning_rate": 0.00019743169817853525, "loss": 1.5203, "step": 157 }, { "epoch": 0.10321737710272742, "grad_norm": Infinity, "learning_rate": 0.00019738367961851064, "loss": 1.5259, "step": 158 }, { "epoch": 0.10387065164135229, "grad_norm": Infinity, "learning_rate": 0.0001973352222628176, "loss": 1.6711, "step": 159 }, { "epoch": 0.10452392617997713, "grad_norm": Infinity, "learning_rate": 0.00019728632632979746, "loss": 1.7773, "step": 160 }, { "epoch": 0.105177200718602, "grad_norm": Infinity, "learning_rate": 0.00019723699203976766, "loss": 1.7367, "step": 161 }, { "epoch": 0.10583047525722684, "grad_norm": Infinity, "learning_rate": 0.0001971872196150208, "loss": 1.8355, "step": 162 }, { "epoch": 0.1064837497958517, "grad_norm": Infinity, "learning_rate": 0.00019713700927982372, "loss": 1.8341, "step": 163 }, { "epoch": 0.10713702433447657, "grad_norm": Infinity, "learning_rate": 0.0001970863612604162, "loss": 1.8942, "step": 164 }, { "epoch": 0.10779029887310142, "grad_norm": Infinity, "learning_rate": 0.0001970352757850105, "loss": 2.0185, "step": 165 }, { "epoch": 0.10844357341172628, "grad_norm": Infinity, "learning_rate": 0.00019698375308378974, "loss": 2.1895, "step": 166 }, { "epoch": 0.10909684795035114, "grad_norm": Infinity, "learning_rate": 0.0001969317933889071, "loss": 2.3008, "step": 167 }, { "epoch": 0.10975012248897599, "grad_norm": Infinity, "learning_rate": 0.00019687939693448494, "loss": 2.5866, "step": 168 }, { "epoch": 0.11040339702760085, "grad_norm": Infinity, "learning_rate": 0.0001968265639566135, "loss": 2.3347, "step": 169 }, { "epoch": 0.1110566715662257, "grad_norm": Infinity, "learning_rate": 0.0001967732946933499, "loss": 2.5208, "step": 170 }, { "epoch": 0.11170994610485056, "grad_norm": Infinity, "learning_rate": 0.00019671958938471715, "loss": 2.7925, "step": 171 }, { "epoch": 0.11236322064347543, "grad_norm": Infinity, "learning_rate": 0.000196665448272703, "loss": 3.3472, "step": 172 }, { "epoch": 0.11301649518210027, "grad_norm": Infinity, "learning_rate": 0.00019661087160125886, "loss": 3.157, "step": 173 }, { "epoch": 0.11366976972072514, "grad_norm": Infinity, "learning_rate": 0.00019655585961629867, "loss": 3.5798, "step": 174 }, { "epoch": 0.11432304425935, "grad_norm": Infinity, "learning_rate": 0.00019650041256569792, "loss": 4.4552, "step": 175 }, { "epoch": 0.11497631879797485, "grad_norm": Infinity, "learning_rate": 0.00019644453069929228, "loss": 1.1694, "step": 176 }, { "epoch": 0.11562959333659971, "grad_norm": Infinity, "learning_rate": 0.00019638821426887673, "loss": 1.3668, "step": 177 }, { "epoch": 0.11628286787522456, "grad_norm": Infinity, "learning_rate": 0.0001963314635282044, "loss": 1.4632, "step": 178 }, { "epoch": 0.11693614241384942, "grad_norm": Infinity, "learning_rate": 0.0001962742787329852, "loss": 1.4713, "step": 179 }, { "epoch": 0.11758941695247428, "grad_norm": Infinity, "learning_rate": 0.00019621666014088494, "loss": 1.4127, "step": 180 }, { "epoch": 0.11824269149109913, "grad_norm": Infinity, "learning_rate": 0.00019615860801152398, "loss": 1.5774, "step": 181 }, { "epoch": 0.11889596602972399, "grad_norm": Infinity, "learning_rate": 0.00019610012260647618, "loss": 1.4787, "step": 182 }, { "epoch": 0.11954924056834886, "grad_norm": Infinity, "learning_rate": 0.00019604120418926764, "loss": 1.6504, "step": 183 }, { "epoch": 0.1202025151069737, "grad_norm": Infinity, "learning_rate": 0.00019598185302537556, "loss": 1.6627, "step": 184 }, { "epoch": 0.12085578964559857, "grad_norm": Infinity, "learning_rate": 0.00019592206938222703, "loss": 1.7108, "step": 185 }, { "epoch": 0.12150906418422341, "grad_norm": Infinity, "learning_rate": 0.0001958618535291978, "loss": 1.7505, "step": 186 }, { "epoch": 0.12216233872284828, "grad_norm": Infinity, "learning_rate": 0.00019580120573761112, "loss": 1.7437, "step": 187 }, { "epoch": 0.12281561326147314, "grad_norm": Infinity, "learning_rate": 0.00019574012628073648, "loss": 1.7792, "step": 188 }, { "epoch": 0.12346888780009799, "grad_norm": Infinity, "learning_rate": 0.00019567861543378837, "loss": 1.6856, "step": 189 }, { "epoch": 0.12412216233872285, "grad_norm": Infinity, "learning_rate": 0.00019561667347392508, "loss": 2.2671, "step": 190 }, { "epoch": 0.12477543687734771, "grad_norm": Infinity, "learning_rate": 0.00019555430068024748, "loss": 2.0929, "step": 191 }, { "epoch": 0.12542871141597256, "grad_norm": Infinity, "learning_rate": 0.00019549149733379755, "loss": 2.4075, "step": 192 }, { "epoch": 0.1260819859545974, "grad_norm": Infinity, "learning_rate": 0.00019542826371755743, "loss": 2.4512, "step": 193 }, { "epoch": 0.12673526049322228, "grad_norm": Infinity, "learning_rate": 0.0001953646001164479, "loss": 2.6113, "step": 194 }, { "epoch": 0.12738853503184713, "grad_norm": Infinity, "learning_rate": 0.0001953005068173272, "loss": 2.7185, "step": 195 }, { "epoch": 0.12804180957047198, "grad_norm": Infinity, "learning_rate": 0.0001952359841089898, "loss": 2.7951, "step": 196 }, { "epoch": 0.12869508410909686, "grad_norm": Infinity, "learning_rate": 0.00019517103228216493, "loss": 2.7111, "step": 197 }, { "epoch": 0.1293483586477217, "grad_norm": Infinity, "learning_rate": 0.00019510565162951537, "loss": 2.8432, "step": 198 }, { "epoch": 0.13000163318634655, "grad_norm": Infinity, "learning_rate": 0.00019503984244563616, "loss": 3.8419, "step": 199 }, { "epoch": 0.13065490772497143, "grad_norm": Infinity, "learning_rate": 0.0001949736050270532, "loss": 4.3049, "step": 200 }, { "epoch": 0.13130818226359628, "grad_norm": Infinity, "learning_rate": 0.00019490693967222199, "loss": 1.308, "step": 201 }, { "epoch": 0.13196145680222113, "grad_norm": Infinity, "learning_rate": 0.00019483984668152617, "loss": 1.2917, "step": 202 }, { "epoch": 0.132614731340846, "grad_norm": Infinity, "learning_rate": 0.00019477232635727637, "loss": 1.47, "step": 203 }, { "epoch": 0.13326800587947085, "grad_norm": Infinity, "learning_rate": 0.00019470437900370857, "loss": 1.3956, "step": 204 }, { "epoch": 0.1339212804180957, "grad_norm": Infinity, "learning_rate": 0.00019463600492698296, "loss": 1.4451, "step": 205 }, { "epoch": 0.13457455495672055, "grad_norm": Infinity, "learning_rate": 0.00019456720443518247, "loss": 1.45, "step": 206 }, { "epoch": 0.13522782949534543, "grad_norm": Infinity, "learning_rate": 0.0001944979778383114, "loss": 1.5642, "step": 207 }, { "epoch": 0.13588110403397027, "grad_norm": Infinity, "learning_rate": 0.00019442832544829398, "loss": 1.4935, "step": 208 }, { "epoch": 0.13653437857259512, "grad_norm": Infinity, "learning_rate": 0.000194358247578973, "loss": 1.6163, "step": 209 }, { "epoch": 0.13718765311122, "grad_norm": Infinity, "learning_rate": 0.00019428774454610843, "loss": 1.6751, "step": 210 }, { "epoch": 0.13784092764984485, "grad_norm": Infinity, "learning_rate": 0.00019421681666737594, "loss": 1.714, "step": 211 }, { "epoch": 0.1384942021884697, "grad_norm": Infinity, "learning_rate": 0.00019414546426236543, "loss": 1.8134, "step": 212 }, { "epoch": 0.13914747672709457, "grad_norm": Infinity, "learning_rate": 0.00019407368765257977, "loss": 1.7647, "step": 213 }, { "epoch": 0.13980075126571942, "grad_norm": Infinity, "learning_rate": 0.00019400148716143317, "loss": 1.8783, "step": 214 }, { "epoch": 0.14045402580434427, "grad_norm": Infinity, "learning_rate": 0.00019392886311424973, "loss": 1.9749, "step": 215 }, { "epoch": 0.14110730034296914, "grad_norm": Infinity, "learning_rate": 0.00019385581583826212, "loss": 2.1572, "step": 216 }, { "epoch": 0.141760574881594, "grad_norm": Infinity, "learning_rate": 0.00019378234566260995, "loss": 2.2505, "step": 217 }, { "epoch": 0.14241384942021884, "grad_norm": Infinity, "learning_rate": 0.00019370845291833837, "loss": 2.4754, "step": 218 }, { "epoch": 0.14306712395884372, "grad_norm": Infinity, "learning_rate": 0.00019363413793839658, "loss": 2.6163, "step": 219 }, { "epoch": 0.14372039849746857, "grad_norm": Infinity, "learning_rate": 0.0001935594010576362, "loss": 2.4796, "step": 220 }, { "epoch": 0.14437367303609341, "grad_norm": Infinity, "learning_rate": 0.0001934842426128101, "loss": 2.7531, "step": 221 }, { "epoch": 0.14502694757471826, "grad_norm": Infinity, "learning_rate": 0.00019340866294257042, "loss": 2.954, "step": 222 }, { "epoch": 0.14568022211334314, "grad_norm": Infinity, "learning_rate": 0.00019333266238746736, "loss": 2.8526, "step": 223 }, { "epoch": 0.146333496651968, "grad_norm": Infinity, "learning_rate": 0.0001932562412899476, "loss": 3.3364, "step": 224 }, { "epoch": 0.14698677119059284, "grad_norm": Infinity, "learning_rate": 0.0001931793999943526, "loss": 4.5349, "step": 225 }, { "epoch": 0.1476400457292177, "grad_norm": Infinity, "learning_rate": 0.0001931021388469174, "loss": 1.228, "step": 226 }, { "epoch": 0.14829332026784256, "grad_norm": Infinity, "learning_rate": 0.00019302445819576855, "loss": 1.3118, "step": 227 }, { "epoch": 0.1489465948064674, "grad_norm": Infinity, "learning_rate": 0.000192946358390923, "loss": 1.3919, "step": 228 }, { "epoch": 0.14959986934509228, "grad_norm": Infinity, "learning_rate": 0.00019286783978428624, "loss": 1.5169, "step": 229 }, { "epoch": 0.15025314388371713, "grad_norm": Infinity, "learning_rate": 0.00019278890272965096, "loss": 1.4527, "step": 230 }, { "epoch": 0.15090641842234198, "grad_norm": Infinity, "learning_rate": 0.00019270954758269512, "loss": 1.5709, "step": 231 }, { "epoch": 0.15155969296096686, "grad_norm": Infinity, "learning_rate": 0.00019262977470098065, "loss": 1.5521, "step": 232 }, { "epoch": 0.1522129674995917, "grad_norm": Infinity, "learning_rate": 0.00019254958444395173, "loss": 1.6832, "step": 233 }, { "epoch": 0.15286624203821655, "grad_norm": Infinity, "learning_rate": 0.00019246897717293315, "loss": 1.6709, "step": 234 }, { "epoch": 0.15351951657684143, "grad_norm": Infinity, "learning_rate": 0.0001923879532511287, "loss": 1.6955, "step": 235 }, { "epoch": 0.15417279111546628, "grad_norm": Infinity, "learning_rate": 0.0001923065130436195, "loss": 1.6892, "step": 236 }, { "epoch": 0.15482606565409113, "grad_norm": Infinity, "learning_rate": 0.00019222465691736247, "loss": 1.8645, "step": 237 }, { "epoch": 0.15547934019271598, "grad_norm": Infinity, "learning_rate": 0.0001921423852411885, "loss": 1.8915, "step": 238 }, { "epoch": 0.15613261473134085, "grad_norm": Infinity, "learning_rate": 0.00019205969838580094, "loss": 1.8416, "step": 239 }, { "epoch": 0.1567858892699657, "grad_norm": Infinity, "learning_rate": 0.0001919765967237739, "loss": 1.8057, "step": 240 }, { "epoch": 0.15743916380859055, "grad_norm": Infinity, "learning_rate": 0.00019189308062955043, "loss": 1.9947, "step": 241 }, { "epoch": 0.15809243834721542, "grad_norm": Infinity, "learning_rate": 0.00019180915047944112, "loss": 2.3449, "step": 242 }, { "epoch": 0.15874571288584027, "grad_norm": Infinity, "learning_rate": 0.000191724806651622, "loss": 2.4693, "step": 243 }, { "epoch": 0.15939898742446512, "grad_norm": Infinity, "learning_rate": 0.00019164004952613336, "loss": 2.3417, "step": 244 }, { "epoch": 0.16005226196309, "grad_norm": Infinity, "learning_rate": 0.00019155487948487748, "loss": 2.2969, "step": 245 }, { "epoch": 0.16070553650171485, "grad_norm": Infinity, "learning_rate": 0.00019146929691161727, "loss": 2.8115, "step": 246 }, { "epoch": 0.1613588110403397, "grad_norm": Infinity, "learning_rate": 0.0001913833021919745, "loss": 2.8346, "step": 247 }, { "epoch": 0.16201208557896457, "grad_norm": Infinity, "learning_rate": 0.00019129689571342797, "loss": 3.143, "step": 248 }, { "epoch": 0.16266536011758942, "grad_norm": Infinity, "learning_rate": 0.00019121007786531178, "loss": 3.4042, "step": 249 }, { "epoch": 0.16331863465621427, "grad_norm": Infinity, "learning_rate": 0.0001911228490388136, "loss": 3.7222, "step": 250 }, { "epoch": 0.16397190919483914, "grad_norm": Infinity, "learning_rate": 0.0001910352096269729, "loss": 1.2778, "step": 251 }, { "epoch": 0.164625183733464, "grad_norm": Infinity, "learning_rate": 0.00019094716002467918, "loss": 1.3826, "step": 252 }, { "epoch": 0.16527845827208884, "grad_norm": Infinity, "learning_rate": 0.0001908587006286703, "loss": 1.3929, "step": 253 }, { "epoch": 0.1659317328107137, "grad_norm": Infinity, "learning_rate": 0.00019076983183753045, "loss": 1.4139, "step": 254 }, { "epoch": 0.16658500734933857, "grad_norm": Infinity, "learning_rate": 0.0001906805540516885, "loss": 1.3002, "step": 255 }, { "epoch": 0.1672382818879634, "grad_norm": Infinity, "learning_rate": 0.00019059086767341627, "loss": 1.414, "step": 256 }, { "epoch": 0.16789155642658826, "grad_norm": Infinity, "learning_rate": 0.00019050077310682657, "loss": 1.6117, "step": 257 }, { "epoch": 0.16854483096521314, "grad_norm": Infinity, "learning_rate": 0.0001904102707578715, "loss": 1.5172, "step": 258 }, { "epoch": 0.169198105503838, "grad_norm": Infinity, "learning_rate": 0.00019031936103434044, "loss": 1.6757, "step": 259 }, { "epoch": 0.16985138004246284, "grad_norm": Infinity, "learning_rate": 0.00019022804434585852, "loss": 1.6502, "step": 260 }, { "epoch": 0.1705046545810877, "grad_norm": Infinity, "learning_rate": 0.00019013632110388446, "loss": 1.6107, "step": 261 }, { "epoch": 0.17115792911971256, "grad_norm": Infinity, "learning_rate": 0.00019004419172170887, "loss": 1.6386, "step": 262 }, { "epoch": 0.1718112036583374, "grad_norm": Infinity, "learning_rate": 0.00018995165661445234, "loss": 1.7794, "step": 263 }, { "epoch": 0.17246447819696228, "grad_norm": Infinity, "learning_rate": 0.0001898587161990637, "loss": 1.8321, "step": 264 }, { "epoch": 0.17311775273558713, "grad_norm": Infinity, "learning_rate": 0.0001897653708943179, "loss": 1.9069, "step": 265 }, { "epoch": 0.17377102727421198, "grad_norm": Infinity, "learning_rate": 0.00018967162112081438, "loss": 2.1029, "step": 266 }, { "epoch": 0.17442430181283686, "grad_norm": Infinity, "learning_rate": 0.00018957746730097495, "loss": 2.2505, "step": 267 }, { "epoch": 0.1750775763514617, "grad_norm": Infinity, "learning_rate": 0.00018948290985904204, "loss": 2.4312, "step": 268 }, { "epoch": 0.17573085089008655, "grad_norm": Infinity, "learning_rate": 0.00018938794922107675, "loss": 2.544, "step": 269 }, { "epoch": 0.1763841254287114, "grad_norm": Infinity, "learning_rate": 0.00018929258581495685, "loss": 2.8027, "step": 270 }, { "epoch": 0.17703739996733628, "grad_norm": Infinity, "learning_rate": 0.00018919682007037506, "loss": 2.7801, "step": 271 }, { "epoch": 0.17769067450596113, "grad_norm": Infinity, "learning_rate": 0.0001891006524188368, "loss": 3.1716, "step": 272 }, { "epoch": 0.17834394904458598, "grad_norm": Infinity, "learning_rate": 0.00018900408329365856, "loss": 2.8948, "step": 273 }, { "epoch": 0.17899722358321085, "grad_norm": Infinity, "learning_rate": 0.0001889071131299657, "loss": 3.2573, "step": 274 }, { "epoch": 0.1796504981218357, "grad_norm": Infinity, "learning_rate": 0.0001888097423646907, "loss": 4.1261, "step": 275 }, { "epoch": 0.18030377266046055, "grad_norm": Infinity, "learning_rate": 0.00018871197143657104, "loss": 1.2702, "step": 276 }, { "epoch": 0.18095704719908542, "grad_norm": Infinity, "learning_rate": 0.00018861380078614726, "loss": 1.3885, "step": 277 }, { "epoch": 0.18161032173771027, "grad_norm": Infinity, "learning_rate": 0.00018851523085576096, "loss": 1.3993, "step": 278 }, { "epoch": 0.18226359627633512, "grad_norm": Infinity, "learning_rate": 0.00018841626208955292, "loss": 1.3527, "step": 279 }, { "epoch": 0.18291687081496, "grad_norm": Infinity, "learning_rate": 0.00018831689493346095, "loss": 1.3832, "step": 280 }, { "epoch": 0.18357014535358485, "grad_norm": Infinity, "learning_rate": 0.0001882171298352179, "loss": 1.4736, "step": 281 }, { "epoch": 0.1842234198922097, "grad_norm": Infinity, "learning_rate": 0.00018811696724434983, "loss": 1.5195, "step": 282 }, { "epoch": 0.18487669443083457, "grad_norm": Infinity, "learning_rate": 0.00018801640761217367, "loss": 1.5279, "step": 283 }, { "epoch": 0.18552996896945942, "grad_norm": Infinity, "learning_rate": 0.00018791545139179548, "loss": 1.5788, "step": 284 }, { "epoch": 0.18618324350808427, "grad_norm": Infinity, "learning_rate": 0.00018781409903810821, "loss": 1.6494, "step": 285 }, { "epoch": 0.18683651804670912, "grad_norm": Infinity, "learning_rate": 0.0001877123510077898, "loss": 1.5504, "step": 286 }, { "epoch": 0.187489792585334, "grad_norm": Infinity, "learning_rate": 0.00018761020775930095, "loss": 1.7568, "step": 287 }, { "epoch": 0.18814306712395884, "grad_norm": Infinity, "learning_rate": 0.0001875076697528832, "loss": 1.8331, "step": 288 }, { "epoch": 0.1887963416625837, "grad_norm": Infinity, "learning_rate": 0.0001874047374505569, "loss": 1.8368, "step": 289 }, { "epoch": 0.18944961620120856, "grad_norm": Infinity, "learning_rate": 0.00018730141131611882, "loss": 1.9461, "step": 290 }, { "epoch": 0.1901028907398334, "grad_norm": Infinity, "learning_rate": 0.0001871976918151405, "loss": 2.0115, "step": 291 }, { "epoch": 0.19075616527845826, "grad_norm": Infinity, "learning_rate": 0.0001870935794149658, "loss": 2.0988, "step": 292 }, { "epoch": 0.19140943981708314, "grad_norm": Infinity, "learning_rate": 0.00018698907458470894, "loss": 2.4166, "step": 293 }, { "epoch": 0.192062714355708, "grad_norm": Infinity, "learning_rate": 0.0001868841777952524, "loss": 2.4735, "step": 294 }, { "epoch": 0.19271598889433283, "grad_norm": Infinity, "learning_rate": 0.00018677888951924474, "loss": 2.6137, "step": 295 }, { "epoch": 0.1933692634329577, "grad_norm": Infinity, "learning_rate": 0.0001866732102310985, "loss": 2.9701, "step": 296 }, { "epoch": 0.19402253797158256, "grad_norm": Infinity, "learning_rate": 0.0001865671404069881, "loss": 3.195, "step": 297 }, { "epoch": 0.1946758125102074, "grad_norm": Infinity, "learning_rate": 0.00018646068052484755, "loss": 3.4701, "step": 298 }, { "epoch": 0.19532908704883228, "grad_norm": Infinity, "learning_rate": 0.00018635383106436855, "loss": 3.0494, "step": 299 }, { "epoch": 0.19598236158745713, "grad_norm": Infinity, "learning_rate": 0.00018624659250699805, "loss": 3.8624, "step": 300 }, { "epoch": 0.19663563612608198, "grad_norm": Infinity, "learning_rate": 0.00018613896533593632, "loss": 1.318, "step": 301 }, { "epoch": 0.19728891066470686, "grad_norm": Infinity, "learning_rate": 0.0001860309500361345, "loss": 1.3523, "step": 302 }, { "epoch": 0.1979421852033317, "grad_norm": Infinity, "learning_rate": 0.0001859225470942928, "loss": 1.3658, "step": 303 }, { "epoch": 0.19859545974195655, "grad_norm": Infinity, "learning_rate": 0.00018581375699885786, "loss": 1.4371, "step": 304 }, { "epoch": 0.1992487342805814, "grad_norm": Infinity, "learning_rate": 0.00018570458024002093, "loss": 1.4728, "step": 305 }, { "epoch": 0.19990200881920628, "grad_norm": Infinity, "learning_rate": 0.00018559501730971544, "loss": 1.4962, "step": 306 }, { "epoch": 0.20055528335783113, "grad_norm": Infinity, "learning_rate": 0.00018548506870161492, "loss": 1.5558, "step": 307 }, { "epoch": 0.20120855789645598, "grad_norm": Infinity, "learning_rate": 0.00018537473491113054, "loss": 1.5558, "step": 308 }, { "epoch": 0.20186183243508085, "grad_norm": Infinity, "learning_rate": 0.00018526401643540922, "loss": 1.6514, "step": 309 }, { "epoch": 0.2025151069737057, "grad_norm": Infinity, "learning_rate": 0.00018515291377333112, "loss": 1.6654, "step": 310 }, { "epoch": 0.20316838151233055, "grad_norm": Infinity, "learning_rate": 0.00018504142742550755, "loss": 1.6443, "step": 311 }, { "epoch": 0.20382165605095542, "grad_norm": Infinity, "learning_rate": 0.0001849295578942785, "loss": 1.7981, "step": 312 }, { "epoch": 0.20447493058958027, "grad_norm": Infinity, "learning_rate": 0.00018481730568371073, "loss": 2.0421, "step": 313 }, { "epoch": 0.20512820512820512, "grad_norm": Infinity, "learning_rate": 0.0001847046712995951, "loss": 1.9165, "step": 314 }, { "epoch": 0.20578147966683, "grad_norm": Infinity, "learning_rate": 0.0001845916552494446, "loss": 2.0489, "step": 315 }, { "epoch": 0.20643475420545485, "grad_norm": Infinity, "learning_rate": 0.00018447825804249201, "loss": 2.2374, "step": 316 }, { "epoch": 0.2070880287440797, "grad_norm": Infinity, "learning_rate": 0.00018436448018968731, "loss": 2.232, "step": 317 }, { "epoch": 0.20774130328270457, "grad_norm": Infinity, "learning_rate": 0.00018425032220369589, "loss": 2.3227, "step": 318 }, { "epoch": 0.20839457782132942, "grad_norm": Infinity, "learning_rate": 0.0001841357845988957, "loss": 2.6041, "step": 319 }, { "epoch": 0.20904785235995427, "grad_norm": Infinity, "learning_rate": 0.00018402086789137546, "loss": 2.6858, "step": 320 }, { "epoch": 0.20970112689857912, "grad_norm": Infinity, "learning_rate": 0.00018390557259893178, "loss": 2.9323, "step": 321 }, { "epoch": 0.210354401437204, "grad_norm": Infinity, "learning_rate": 0.00018378989924106736, "loss": 2.9611, "step": 322 }, { "epoch": 0.21100767597582884, "grad_norm": Infinity, "learning_rate": 0.00018367384833898828, "loss": 3.4458, "step": 323 }, { "epoch": 0.2116609505144537, "grad_norm": Infinity, "learning_rate": 0.0001835574204156018, "loss": 3.5276, "step": 324 }, { "epoch": 0.21231422505307856, "grad_norm": Infinity, "learning_rate": 0.00018344061599551398, "loss": 4.3704, "step": 325 }, { "epoch": 0.2129674995917034, "grad_norm": Infinity, "learning_rate": 0.0001833234356050273, "loss": 1.3251, "step": 326 }, { "epoch": 0.21362077413032826, "grad_norm": Infinity, "learning_rate": 0.0001832058797721383, "loss": 1.3373, "step": 327 }, { "epoch": 0.21427404866895314, "grad_norm": Infinity, "learning_rate": 0.00018308794902653533, "loss": 1.3725, "step": 328 }, { "epoch": 0.21492732320757799, "grad_norm": Infinity, "learning_rate": 0.00018296964389959578, "loss": 1.4768, "step": 329 }, { "epoch": 0.21558059774620283, "grad_norm": Infinity, "learning_rate": 0.00018285096492438424, "loss": 1.4613, "step": 330 }, { "epoch": 0.2162338722848277, "grad_norm": Infinity, "learning_rate": 0.00018273191263564956, "loss": 1.4252, "step": 331 }, { "epoch": 0.21688714682345256, "grad_norm": Infinity, "learning_rate": 0.0001826124875698228, "loss": 1.5251, "step": 332 }, { "epoch": 0.2175404213620774, "grad_norm": Infinity, "learning_rate": 0.00018249269026501472, "loss": 1.6811, "step": 333 }, { "epoch": 0.21819369590070228, "grad_norm": Infinity, "learning_rate": 0.00018237252126101323, "loss": 1.6452, "step": 334 }, { "epoch": 0.21884697043932713, "grad_norm": Infinity, "learning_rate": 0.00018225198109928114, "loss": 1.6707, "step": 335 }, { "epoch": 0.21950024497795198, "grad_norm": Infinity, "learning_rate": 0.00018213107032295363, "loss": 1.6945, "step": 336 }, { "epoch": 0.22015351951657683, "grad_norm": Infinity, "learning_rate": 0.00018200978947683583, "loss": 1.8109, "step": 337 }, { "epoch": 0.2208067940552017, "grad_norm": Infinity, "learning_rate": 0.0001818881391074002, "loss": 2.0009, "step": 338 }, { "epoch": 0.22146006859382655, "grad_norm": Infinity, "learning_rate": 0.00018176611976278441, "loss": 2.0717, "step": 339 }, { "epoch": 0.2221133431324514, "grad_norm": Infinity, "learning_rate": 0.00018164373199278856, "loss": 2.1003, "step": 340 }, { "epoch": 0.22276661767107628, "grad_norm": Infinity, "learning_rate": 0.0001815209763488729, "loss": 2.1306, "step": 341 }, { "epoch": 0.22341989220970113, "grad_norm": Infinity, "learning_rate": 0.00018139785338415517, "loss": 2.2117, "step": 342 }, { "epoch": 0.22407316674832597, "grad_norm": Infinity, "learning_rate": 0.0001812743636534082, "loss": 2.5837, "step": 343 }, { "epoch": 0.22472644128695085, "grad_norm": Infinity, "learning_rate": 0.00018115050771305756, "loss": 2.4268, "step": 344 }, { "epoch": 0.2253797158255757, "grad_norm": Infinity, "learning_rate": 0.00018102628612117865, "loss": 2.4221, "step": 345 }, { "epoch": 0.22603299036420055, "grad_norm": Infinity, "learning_rate": 0.00018090169943749476, "loss": 2.8648, "step": 346 }, { "epoch": 0.22668626490282542, "grad_norm": Infinity, "learning_rate": 0.00018077674822337392, "loss": 2.9829, "step": 347 }, { "epoch": 0.22733953944145027, "grad_norm": Infinity, "learning_rate": 0.00018065143304182683, "loss": 3.1063, "step": 348 }, { "epoch": 0.22799281398007512, "grad_norm": Infinity, "learning_rate": 0.00018052575445750419, "loss": 3.2922, "step": 349 }, { "epoch": 0.2286460885187, "grad_norm": Infinity, "learning_rate": 0.00018039971303669407, "loss": 4.3134, "step": 350 }, { "epoch": 0.22929936305732485, "grad_norm": Infinity, "learning_rate": 0.00018027330934731946, "loss": 1.2538, "step": 351 }, { "epoch": 0.2299526375959497, "grad_norm": Infinity, "learning_rate": 0.00018014654395893563, "loss": 1.3942, "step": 352 }, { "epoch": 0.23060591213457454, "grad_norm": Infinity, "learning_rate": 0.00018001941744272767, "loss": 1.3873, "step": 353 }, { "epoch": 0.23125918667319942, "grad_norm": Infinity, "learning_rate": 0.00017989193037150784, "loss": 1.3868, "step": 354 }, { "epoch": 0.23191246121182427, "grad_norm": Infinity, "learning_rate": 0.00017976408331971298, "loss": 1.5054, "step": 355 }, { "epoch": 0.23256573575044912, "grad_norm": Infinity, "learning_rate": 0.00017963587686340197, "loss": 1.4309, "step": 356 }, { "epoch": 0.233219010289074, "grad_norm": Infinity, "learning_rate": 0.0001795073115802531, "loss": 1.5283, "step": 357 }, { "epoch": 0.23387228482769884, "grad_norm": Infinity, "learning_rate": 0.0001793783880495615, "loss": 1.6016, "step": 358 }, { "epoch": 0.2345255593663237, "grad_norm": Infinity, "learning_rate": 0.00017924910685223643, "loss": 1.508, "step": 359 }, { "epoch": 0.23517883390494856, "grad_norm": Infinity, "learning_rate": 0.00017911946857079888, "loss": 1.735, "step": 360 }, { "epoch": 0.2358321084435734, "grad_norm": Infinity, "learning_rate": 0.0001789894737893786, "loss": 1.6061, "step": 361 }, { "epoch": 0.23648538298219826, "grad_norm": Infinity, "learning_rate": 0.00017885912309371192, "loss": 1.6814, "step": 362 }, { "epoch": 0.23713865752082314, "grad_norm": Infinity, "learning_rate": 0.0001787284170711387, "loss": 1.7403, "step": 363 }, { "epoch": 0.23779193205944799, "grad_norm": Infinity, "learning_rate": 0.00017859735631059985, "loss": 1.8771, "step": 364 }, { "epoch": 0.23844520659807283, "grad_norm": Infinity, "learning_rate": 0.00017846594140263474, "loss": 1.954, "step": 365 }, { "epoch": 0.2390984811366977, "grad_norm": Infinity, "learning_rate": 0.00017833417293937847, "loss": 2.1301, "step": 366 }, { "epoch": 0.23975175567532256, "grad_norm": Infinity, "learning_rate": 0.00017820205151455914, "loss": 2.0567, "step": 367 }, { "epoch": 0.2404050302139474, "grad_norm": Infinity, "learning_rate": 0.00017806957772349526, "loss": 2.4691, "step": 368 }, { "epoch": 0.24105830475257226, "grad_norm": Infinity, "learning_rate": 0.0001779367521630931, "loss": 2.4948, "step": 369 }, { "epoch": 0.24171157929119713, "grad_norm": Infinity, "learning_rate": 0.00017780357543184397, "loss": 2.7374, "step": 370 }, { "epoch": 0.24236485382982198, "grad_norm": Infinity, "learning_rate": 0.00017767004812982133, "loss": 2.746, "step": 371 }, { "epoch": 0.24301812836844683, "grad_norm": Infinity, "learning_rate": 0.00017753617085867847, "loss": 2.6322, "step": 372 }, { "epoch": 0.2436714029070717, "grad_norm": Infinity, "learning_rate": 0.00017740194422164542, "loss": 2.9857, "step": 373 }, { "epoch": 0.24432467744569655, "grad_norm": Infinity, "learning_rate": 0.0001772673688235265, "loss": 3.2684, "step": 374 }, { "epoch": 0.2449779519843214, "grad_norm": Infinity, "learning_rate": 0.0001771324452706975, "loss": 4.7209, "step": 375 }, { "epoch": 0.24563122652294628, "grad_norm": Infinity, "learning_rate": 0.00017699717417110283, "loss": 1.3461, "step": 376 }, { "epoch": 0.24628450106157113, "grad_norm": Infinity, "learning_rate": 0.00017686155613425296, "loss": 1.4335, "step": 377 }, { "epoch": 0.24693777560019597, "grad_norm": Infinity, "learning_rate": 0.00017672559177122165, "loss": 1.4385, "step": 378 }, { "epoch": 0.24759105013882085, "grad_norm": Infinity, "learning_rate": 0.00017658928169464312, "loss": 1.3503, "step": 379 }, { "epoch": 0.2482443246774457, "grad_norm": Infinity, "learning_rate": 0.00017645262651870926, "loss": 1.4273, "step": 380 }, { "epoch": 0.24889759921607055, "grad_norm": Infinity, "learning_rate": 0.00017631562685916703, "loss": 1.3989, "step": 381 }, { "epoch": 0.24955087375469542, "grad_norm": Infinity, "learning_rate": 0.00017617828333331545, "loss": 1.5455, "step": 382 }, { "epoch": 0.25020414829332027, "grad_norm": Infinity, "learning_rate": 0.0001760405965600031, "loss": 1.5684, "step": 383 }, { "epoch": 0.25020414829332027, "eval_loss": 2.097975254058838, "eval_runtime": 160.3657, "eval_samples_per_second": 16.076, "eval_steps_per_second": 4.022, "step": 383 }, { "epoch": 0.2508574228319451, "grad_norm": Infinity, "learning_rate": 0.00017590256715962506, "loss": 1.4462, "step": 384 }, { "epoch": 0.25151069737056997, "grad_norm": Infinity, "learning_rate": 0.0001757641957541203, "loss": 1.5079, "step": 385 }, { "epoch": 0.2521639719091948, "grad_norm": Infinity, "learning_rate": 0.00017562548296696875, "loss": 1.536, "step": 386 }, { "epoch": 0.2528172464478197, "grad_norm": Infinity, "learning_rate": 0.0001754864294231886, "loss": 1.5538, "step": 387 }, { "epoch": 0.25347052098644457, "grad_norm": Infinity, "learning_rate": 0.00017534703574933347, "loss": 1.8808, "step": 388 }, { "epoch": 0.2541237955250694, "grad_norm": Infinity, "learning_rate": 0.00017520730257348946, "loss": 1.9979, "step": 389 }, { "epoch": 0.25477707006369427, "grad_norm": Infinity, "learning_rate": 0.00017506723052527242, "loss": 1.9746, "step": 390 }, { "epoch": 0.2554303446023191, "grad_norm": Infinity, "learning_rate": 0.00017492682023582528, "loss": 2.23, "step": 391 }, { "epoch": 0.25608361914094396, "grad_norm": Infinity, "learning_rate": 0.0001747860723378148, "loss": 2.2459, "step": 392 }, { "epoch": 0.2567368936795688, "grad_norm": Infinity, "learning_rate": 0.0001746449874654291, "loss": 2.1848, "step": 393 }, { "epoch": 0.2573901682181937, "grad_norm": Infinity, "learning_rate": 0.0001745035662543745, "loss": 2.5955, "step": 394 }, { "epoch": 0.25804344275681856, "grad_norm": Infinity, "learning_rate": 0.00017436180934187308, "loss": 2.6265, "step": 395 }, { "epoch": 0.2586967172954434, "grad_norm": Infinity, "learning_rate": 0.00017421971736665925, "loss": 2.9216, "step": 396 }, { "epoch": 0.25934999183406826, "grad_norm": Infinity, "learning_rate": 0.00017407729096897737, "loss": 2.6603, "step": 397 }, { "epoch": 0.2600032663726931, "grad_norm": Infinity, "learning_rate": 0.00017393453079057847, "loss": 3.1459, "step": 398 }, { "epoch": 0.26065654091131796, "grad_norm": Infinity, "learning_rate": 0.00017379143747471768, "loss": 3.2171, "step": 399 }, { "epoch": 0.26130981544994286, "grad_norm": Infinity, "learning_rate": 0.00017364801166615124, "loss": 4.4529, "step": 400 }, { "epoch": 0.2619630899885677, "grad_norm": Infinity, "learning_rate": 0.00017350425401113336, "loss": 1.3081, "step": 401 }, { "epoch": 0.26261636452719256, "grad_norm": Infinity, "learning_rate": 0.00017336016515741366, "loss": 1.4575, "step": 402 }, { "epoch": 0.2632696390658174, "grad_norm": Infinity, "learning_rate": 0.00017321574575423406, "loss": 1.4, "step": 403 }, { "epoch": 0.26392291360444226, "grad_norm": Infinity, "learning_rate": 0.00017307099645232578, "loss": 1.4742, "step": 404 }, { "epoch": 0.2645761881430671, "grad_norm": Infinity, "learning_rate": 0.00017292591790390665, "loss": 1.4765, "step": 405 }, { "epoch": 0.265229462681692, "grad_norm": Infinity, "learning_rate": 0.00017278051076267796, "loss": 1.5007, "step": 406 }, { "epoch": 0.26588273722031686, "grad_norm": Infinity, "learning_rate": 0.00017263477568382166, "loss": 1.5344, "step": 407 }, { "epoch": 0.2665360117589417, "grad_norm": Infinity, "learning_rate": 0.0001724887133239972, "loss": 1.5091, "step": 408 }, { "epoch": 0.26718928629756655, "grad_norm": Infinity, "learning_rate": 0.00017234232434133883, "loss": 1.6508, "step": 409 }, { "epoch": 0.2678425608361914, "grad_norm": Infinity, "learning_rate": 0.00017219560939545246, "loss": 1.7905, "step": 410 }, { "epoch": 0.26849583537481625, "grad_norm": Infinity, "learning_rate": 0.00017204856914741274, "loss": 1.686, "step": 411 }, { "epoch": 0.2691491099134411, "grad_norm": Infinity, "learning_rate": 0.0001719012042597601, "loss": 1.9091, "step": 412 }, { "epoch": 0.269802384452066, "grad_norm": Infinity, "learning_rate": 0.00017175351539649774, "loss": 1.9509, "step": 413 }, { "epoch": 0.27045565899069085, "grad_norm": Infinity, "learning_rate": 0.00017160550322308863, "loss": 1.9816, "step": 414 }, { "epoch": 0.2711089335293157, "grad_norm": Infinity, "learning_rate": 0.00017145716840645254, "loss": 2.3551, "step": 415 }, { "epoch": 0.27176220806794055, "grad_norm": Infinity, "learning_rate": 0.000171308511614963, "loss": 2.2993, "step": 416 }, { "epoch": 0.2724154826065654, "grad_norm": Infinity, "learning_rate": 0.00017115953351844434, "loss": 2.4097, "step": 417 }, { "epoch": 0.27306875714519024, "grad_norm": Infinity, "learning_rate": 0.00017101023478816857, "loss": 2.6394, "step": 418 }, { "epoch": 0.27372203168381515, "grad_norm": Infinity, "learning_rate": 0.00017086061609685257, "loss": 2.7541, "step": 419 }, { "epoch": 0.27437530622244, "grad_norm": Infinity, "learning_rate": 0.00017071067811865476, "loss": 2.6986, "step": 420 }, { "epoch": 0.27502858076106484, "grad_norm": Infinity, "learning_rate": 0.0001705604215291723, "loss": 2.8947, "step": 421 }, { "epoch": 0.2756818552996897, "grad_norm": Infinity, "learning_rate": 0.00017040984700543793, "loss": 2.5362, "step": 422 }, { "epoch": 0.27633512983831454, "grad_norm": Infinity, "learning_rate": 0.00017025895522591693, "loss": 2.8326, "step": 423 }, { "epoch": 0.2769884043769394, "grad_norm": Infinity, "learning_rate": 0.00017010774687050418, "loss": 3.813, "step": 424 }, { "epoch": 0.27764167891556424, "grad_norm": Infinity, "learning_rate": 0.00016995622262052092, "loss": 3.9088, "step": 425 }, { "epoch": 0.27829495345418914, "grad_norm": Infinity, "learning_rate": 0.00016980438315871178, "loss": 1.4111, "step": 426 }, { "epoch": 0.278948227992814, "grad_norm": Infinity, "learning_rate": 0.00016965222916924167, "loss": 1.3758, "step": 427 }, { "epoch": 0.27960150253143884, "grad_norm": Infinity, "learning_rate": 0.0001694997613376928, "loss": 1.4541, "step": 428 }, { "epoch": 0.2802547770700637, "grad_norm": Infinity, "learning_rate": 0.00016934698035106133, "loss": 1.3758, "step": 429 }, { "epoch": 0.28090805160868854, "grad_norm": Infinity, "learning_rate": 0.00016919388689775464, "loss": 1.491, "step": 430 }, { "epoch": 0.2815613261473134, "grad_norm": Infinity, "learning_rate": 0.0001690404816675879, "loss": 1.5145, "step": 431 }, { "epoch": 0.2822146006859383, "grad_norm": Infinity, "learning_rate": 0.00016888676535178115, "loss": 1.5918, "step": 432 }, { "epoch": 0.28286787522456314, "grad_norm": Infinity, "learning_rate": 0.00016873273864295612, "loss": 1.5333, "step": 433 }, { "epoch": 0.283521149763188, "grad_norm": Infinity, "learning_rate": 0.00016857840223513315, "loss": 1.7168, "step": 434 }, { "epoch": 0.28417442430181283, "grad_norm": Infinity, "learning_rate": 0.00016842375682372805, "loss": 1.5902, "step": 435 }, { "epoch": 0.2848276988404377, "grad_norm": Infinity, "learning_rate": 0.00016826880310554887, "loss": 1.6932, "step": 436 }, { "epoch": 0.28548097337906253, "grad_norm": Infinity, "learning_rate": 0.00016811354177879287, "loss": 1.8251, "step": 437 }, { "epoch": 0.28613424791768743, "grad_norm": Infinity, "learning_rate": 0.00016795797354304345, "loss": 1.7183, "step": 438 }, { "epoch": 0.2867875224563123, "grad_norm": Infinity, "learning_rate": 0.00016780209909926676, "loss": 1.836, "step": 439 }, { "epoch": 0.28744079699493713, "grad_norm": Infinity, "learning_rate": 0.0001676459191498087, "loss": 2.0649, "step": 440 }, { "epoch": 0.288094071533562, "grad_norm": Infinity, "learning_rate": 0.0001674894343983918, "loss": 2.058, "step": 441 }, { "epoch": 0.28874734607218683, "grad_norm": Infinity, "learning_rate": 0.00016733264555011195, "loss": 2.1594, "step": 442 }, { "epoch": 0.2894006206108117, "grad_norm": Infinity, "learning_rate": 0.0001671755533114352, "loss": 2.3795, "step": 443 }, { "epoch": 0.2900538951494365, "grad_norm": Infinity, "learning_rate": 0.00016701815839019468, "loss": 2.6133, "step": 444 }, { "epoch": 0.29070716968806143, "grad_norm": Infinity, "learning_rate": 0.00016686046149558736, "loss": 2.5835, "step": 445 }, { "epoch": 0.2913604442266863, "grad_norm": Infinity, "learning_rate": 0.00016670246333817088, "loss": 2.9254, "step": 446 }, { "epoch": 0.2920137187653111, "grad_norm": Infinity, "learning_rate": 0.00016654416462986022, "loss": 2.9367, "step": 447 }, { "epoch": 0.292666993303936, "grad_norm": Infinity, "learning_rate": 0.00016638556608392473, "loss": 3.2409, "step": 448 }, { "epoch": 0.2933202678425608, "grad_norm": Infinity, "learning_rate": 0.00016622666841498463, "loss": 3.7771, "step": 449 }, { "epoch": 0.29397354238118567, "grad_norm": Infinity, "learning_rate": 0.00016606747233900815, "loss": 4.364, "step": 450 }, { "epoch": 0.2946268169198106, "grad_norm": Infinity, "learning_rate": 0.00016590797857330789, "loss": 1.2995, "step": 451 }, { "epoch": 0.2952800914584354, "grad_norm": Infinity, "learning_rate": 0.0001657481878365379, "loss": 1.4345, "step": 452 }, { "epoch": 0.29593336599706027, "grad_norm": Infinity, "learning_rate": 0.0001655881008486903, "loss": 1.421, "step": 453 }, { "epoch": 0.2965866405356851, "grad_norm": Infinity, "learning_rate": 0.0001654277183310921, "loss": 1.4081, "step": 454 }, { "epoch": 0.29723991507430997, "grad_norm": Infinity, "learning_rate": 0.0001652670410064019, "loss": 1.4488, "step": 455 }, { "epoch": 0.2978931896129348, "grad_norm": Infinity, "learning_rate": 0.00016510606959860665, "loss": 1.4637, "step": 456 }, { "epoch": 0.2985464641515597, "grad_norm": Infinity, "learning_rate": 0.00016494480483301836, "loss": 1.5709, "step": 457 }, { "epoch": 0.29919973869018457, "grad_norm": Infinity, "learning_rate": 0.00016478324743627101, "loss": 1.4784, "step": 458 }, { "epoch": 0.2998530132288094, "grad_norm": Infinity, "learning_rate": 0.00016462139813631693, "loss": 1.5429, "step": 459 }, { "epoch": 0.30050628776743427, "grad_norm": Infinity, "learning_rate": 0.00016445925766242391, "loss": 1.6066, "step": 460 }, { "epoch": 0.3011595623060591, "grad_norm": Infinity, "learning_rate": 0.00016429682674517156, "loss": 1.8577, "step": 461 }, { "epoch": 0.30181283684468396, "grad_norm": Infinity, "learning_rate": 0.00016413410611644825, "loss": 1.7408, "step": 462 }, { "epoch": 0.3024661113833088, "grad_norm": Infinity, "learning_rate": 0.0001639710965094478, "loss": 1.8884, "step": 463 }, { "epoch": 0.3031193859219337, "grad_norm": Infinity, "learning_rate": 0.00016380779865866603, "loss": 2.052, "step": 464 }, { "epoch": 0.30377266046055856, "grad_norm": Infinity, "learning_rate": 0.00016364421329989755, "loss": 1.9189, "step": 465 }, { "epoch": 0.3044259349991834, "grad_norm": Infinity, "learning_rate": 0.00016348034117023258, "loss": 2.2974, "step": 466 }, { "epoch": 0.30507920953780826, "grad_norm": Infinity, "learning_rate": 0.00016331618300805326, "loss": 2.187, "step": 467 }, { "epoch": 0.3057324840764331, "grad_norm": Infinity, "learning_rate": 0.00016315173955303068, "loss": 2.4096, "step": 468 }, { "epoch": 0.30638575861505796, "grad_norm": Infinity, "learning_rate": 0.00016298701154612147, "loss": 2.3334, "step": 469 }, { "epoch": 0.30703903315368286, "grad_norm": Infinity, "learning_rate": 0.00016282199972956425, "loss": 2.7411, "step": 470 }, { "epoch": 0.3076923076923077, "grad_norm": Infinity, "learning_rate": 0.00016265670484687654, "loss": 2.7242, "step": 471 }, { "epoch": 0.30834558223093256, "grad_norm": Infinity, "learning_rate": 0.0001624911276428513, "loss": 3.1537, "step": 472 }, { "epoch": 0.3089988567695574, "grad_norm": Infinity, "learning_rate": 0.0001623252688635536, "loss": 3.5625, "step": 473 }, { "epoch": 0.30965213130818225, "grad_norm": Infinity, "learning_rate": 0.00016215912925631723, "loss": 3.6483, "step": 474 }, { "epoch": 0.3103054058468071, "grad_norm": Infinity, "learning_rate": 0.00016199270956974128, "loss": 4.2782, "step": 475 }, { "epoch": 0.31095868038543195, "grad_norm": Infinity, "learning_rate": 0.00016182601055368697, "loss": 1.3157, "step": 476 }, { "epoch": 0.31161195492405686, "grad_norm": Infinity, "learning_rate": 0.00016165903295927401, "loss": 1.4153, "step": 477 }, { "epoch": 0.3122652294626817, "grad_norm": Infinity, "learning_rate": 0.00016149177753887746, "loss": 1.4591, "step": 478 }, { "epoch": 0.31291850400130655, "grad_norm": Infinity, "learning_rate": 0.00016132424504612406, "loss": 1.4796, "step": 479 }, { "epoch": 0.3135717785399314, "grad_norm": Infinity, "learning_rate": 0.00016115643623588915, "loss": 1.5419, "step": 480 }, { "epoch": 0.31422505307855625, "grad_norm": Infinity, "learning_rate": 0.00016098835186429303, "loss": 1.4612, "step": 481 }, { "epoch": 0.3148783276171811, "grad_norm": Infinity, "learning_rate": 0.00016081999268869766, "loss": 1.6256, "step": 482 }, { "epoch": 0.315531602155806, "grad_norm": Infinity, "learning_rate": 0.00016065135946770323, "loss": 1.6535, "step": 483 }, { "epoch": 0.31618487669443085, "grad_norm": Infinity, "learning_rate": 0.00016048245296114468, "loss": 1.6035, "step": 484 }, { "epoch": 0.3168381512330557, "grad_norm": Infinity, "learning_rate": 0.00016031327393008845, "loss": 1.575, "step": 485 }, { "epoch": 0.31749142577168055, "grad_norm": Infinity, "learning_rate": 0.00016014382313682881, "loss": 1.792, "step": 486 }, { "epoch": 0.3181447003103054, "grad_norm": Infinity, "learning_rate": 0.00015997410134488464, "loss": 1.7314, "step": 487 }, { "epoch": 0.31879797484893024, "grad_norm": Infinity, "learning_rate": 0.00015980410931899582, "loss": 1.8793, "step": 488 }, { "epoch": 0.31945124938755515, "grad_norm": Infinity, "learning_rate": 0.00015963384782511993, "loss": 1.9586, "step": 489 }, { "epoch": 0.32010452392618, "grad_norm": Infinity, "learning_rate": 0.00015946331763042867, "loss": 1.9951, "step": 490 }, { "epoch": 0.32075779846480484, "grad_norm": Infinity, "learning_rate": 0.0001592925195033045, "loss": 2.1583, "step": 491 }, { "epoch": 0.3214110730034297, "grad_norm": Infinity, "learning_rate": 0.00015912145421333719, "loss": 2.3153, "step": 492 }, { "epoch": 0.32206434754205454, "grad_norm": Infinity, "learning_rate": 0.00015895012253132017, "loss": 2.3933, "step": 493 }, { "epoch": 0.3227176220806794, "grad_norm": Infinity, "learning_rate": 0.00015877852522924732, "loss": 2.3513, "step": 494 }, { "epoch": 0.32337089661930424, "grad_norm": Infinity, "learning_rate": 0.00015860666308030932, "loss": 2.6687, "step": 495 }, { "epoch": 0.32402417115792914, "grad_norm": Infinity, "learning_rate": 0.00015843453685889016, "loss": 2.8344, "step": 496 }, { "epoch": 0.324677445696554, "grad_norm": Infinity, "learning_rate": 0.00015826214734056374, "loss": 2.883, "step": 497 }, { "epoch": 0.32533072023517884, "grad_norm": Infinity, "learning_rate": 0.0001580894953020904, "loss": 3.1467, "step": 498 }, { "epoch": 0.3259839947738037, "grad_norm": Infinity, "learning_rate": 0.00015791658152141327, "loss": 3.3732, "step": 499 }, { "epoch": 0.32663726931242854, "grad_norm": Infinity, "learning_rate": 0.0001577434067776548, "loss": 4.2269, "step": 500 }, { "epoch": 0.3272905438510534, "grad_norm": Infinity, "learning_rate": 0.00015756997185111348, "loss": 1.2486, "step": 501 }, { "epoch": 0.3279438183896783, "grad_norm": Infinity, "learning_rate": 0.00015739627752325996, "loss": 1.3895, "step": 502 }, { "epoch": 0.32859709292830314, "grad_norm": Infinity, "learning_rate": 0.0001572223245767338, "loss": 1.422, "step": 503 }, { "epoch": 0.329250367466928, "grad_norm": Infinity, "learning_rate": 0.00015704811379533987, "loss": 1.4185, "step": 504 }, { "epoch": 0.32990364200555283, "grad_norm": Infinity, "learning_rate": 0.0001568736459640447, "loss": 1.4656, "step": 505 }, { "epoch": 0.3305569165441777, "grad_norm": Infinity, "learning_rate": 0.00015669892186897318, "loss": 1.4505, "step": 506 }, { "epoch": 0.33121019108280253, "grad_norm": Infinity, "learning_rate": 0.00015652394229740484, "loss": 1.6007, "step": 507 }, { "epoch": 0.3318634656214274, "grad_norm": Infinity, "learning_rate": 0.00015634870803777026, "loss": 1.6112, "step": 508 }, { "epoch": 0.3325167401600523, "grad_norm": Infinity, "learning_rate": 0.00015617321987964776, "loss": 1.7202, "step": 509 }, { "epoch": 0.33317001469867713, "grad_norm": Infinity, "learning_rate": 0.00015599747861375955, "loss": 1.6825, "step": 510 }, { "epoch": 0.333823289237302, "grad_norm": Infinity, "learning_rate": 0.0001558214850319684, "loss": 1.8399, "step": 511 }, { "epoch": 0.3344765637759268, "grad_norm": Infinity, "learning_rate": 0.00015564523992727387, "loss": 1.7425, "step": 512 }, { "epoch": 0.3351298383145517, "grad_norm": Infinity, "learning_rate": 0.00015546874409380897, "loss": 1.7817, "step": 513 }, { "epoch": 0.3357831128531765, "grad_norm": Infinity, "learning_rate": 0.00015529199832683635, "loss": 2.0647, "step": 514 }, { "epoch": 0.33643638739180143, "grad_norm": Infinity, "learning_rate": 0.0001551150034227449, "loss": 1.9344, "step": 515 }, { "epoch": 0.3370896619304263, "grad_norm": Infinity, "learning_rate": 0.00015493776017904602, "loss": 2.2739, "step": 516 }, { "epoch": 0.3377429364690511, "grad_norm": Infinity, "learning_rate": 0.00015476026939437014, "loss": 2.4646, "step": 517 }, { "epoch": 0.338396211007676, "grad_norm": Infinity, "learning_rate": 0.00015458253186846301, "loss": 2.5544, "step": 518 }, { "epoch": 0.3390494855463008, "grad_norm": Infinity, "learning_rate": 0.00015440454840218225, "loss": 2.4784, "step": 519 }, { "epoch": 0.33970276008492567, "grad_norm": Infinity, "learning_rate": 0.00015422631979749354, "loss": 2.8688, "step": 520 }, { "epoch": 0.3403560346235506, "grad_norm": Infinity, "learning_rate": 0.00015404784685746716, "loss": 2.8859, "step": 521 }, { "epoch": 0.3410093091621754, "grad_norm": Infinity, "learning_rate": 0.0001538691303862744, "loss": 3.362, "step": 522 }, { "epoch": 0.34166258370080027, "grad_norm": Infinity, "learning_rate": 0.00015369017118918373, "loss": 3.297, "step": 523 }, { "epoch": 0.3423158582394251, "grad_norm": Infinity, "learning_rate": 0.00015351097007255742, "loss": 3.6869, "step": 524 }, { "epoch": 0.34296913277804997, "grad_norm": Infinity, "learning_rate": 0.00015333152784384777, "loss": 4.2406, "step": 525 }, { "epoch": 0.3436224073166748, "grad_norm": Infinity, "learning_rate": 0.0001531518453115934, "loss": 1.3543, "step": 526 }, { "epoch": 0.34427568185529966, "grad_norm": Infinity, "learning_rate": 0.00015297192328541582, "loss": 1.4295, "step": 527 }, { "epoch": 0.34492895639392457, "grad_norm": Infinity, "learning_rate": 0.00015279176257601557, "loss": 1.4386, "step": 528 }, { "epoch": 0.3455822309325494, "grad_norm": Infinity, "learning_rate": 0.00015261136399516873, "loss": 1.4315, "step": 529 }, { "epoch": 0.34623550547117427, "grad_norm": Infinity, "learning_rate": 0.00015243072835572318, "loss": 1.4927, "step": 530 }, { "epoch": 0.3468887800097991, "grad_norm": Infinity, "learning_rate": 0.0001522498564715949, "loss": 1.4866, "step": 531 }, { "epoch": 0.34754205454842396, "grad_norm": Infinity, "learning_rate": 0.0001520687491577644, "loss": 1.5698, "step": 532 }, { "epoch": 0.3481953290870488, "grad_norm": Infinity, "learning_rate": 0.00015188740723027296, "loss": 1.5634, "step": 533 }, { "epoch": 0.3488486036256737, "grad_norm": Infinity, "learning_rate": 0.00015170583150621905, "loss": 1.6559, "step": 534 }, { "epoch": 0.34950187816429856, "grad_norm": Infinity, "learning_rate": 0.00015152402280375454, "loss": 1.726, "step": 535 }, { "epoch": 0.3501551527029234, "grad_norm": Infinity, "learning_rate": 0.00015134198194208107, "loss": 1.752, "step": 536 }, { "epoch": 0.35080842724154826, "grad_norm": Infinity, "learning_rate": 0.00015115970974144635, "loss": 1.7894, "step": 537 }, { "epoch": 0.3514617017801731, "grad_norm": Infinity, "learning_rate": 0.00015097720702314055, "loss": 1.8187, "step": 538 }, { "epoch": 0.35211497631879796, "grad_norm": Infinity, "learning_rate": 0.00015079447460949238, "loss": 1.8663, "step": 539 }, { "epoch": 0.3527682508574228, "grad_norm": Infinity, "learning_rate": 0.00015061151332386566, "loss": 1.8478, "step": 540 }, { "epoch": 0.3534215253960477, "grad_norm": Infinity, "learning_rate": 0.0001504283239906553, "loss": 1.8881, "step": 541 }, { "epoch": 0.35407479993467256, "grad_norm": Infinity, "learning_rate": 0.00015024490743528393, "loss": 2.1571, "step": 542 }, { "epoch": 0.3547280744732974, "grad_norm": Infinity, "learning_rate": 0.00015006126448419791, "loss": 2.4004, "step": 543 }, { "epoch": 0.35538134901192225, "grad_norm": Infinity, "learning_rate": 0.00014987739596486374, "loss": 2.4008, "step": 544 }, { "epoch": 0.3560346235505471, "grad_norm": Infinity, "learning_rate": 0.00014969330270576427, "loss": 2.7406, "step": 545 }, { "epoch": 0.35668789808917195, "grad_norm": Infinity, "learning_rate": 0.00014950898553639505, "loss": 2.6133, "step": 546 }, { "epoch": 0.35734117262779685, "grad_norm": Infinity, "learning_rate": 0.00014932444528726043, "loss": 2.9329, "step": 547 }, { "epoch": 0.3579944471664217, "grad_norm": Infinity, "learning_rate": 0.00014913968278987004, "loss": 3.1703, "step": 548 }, { "epoch": 0.35864772170504655, "grad_norm": Infinity, "learning_rate": 0.00014895469887673483, "loss": 2.9231, "step": 549 }, { "epoch": 0.3593009962436714, "grad_norm": Infinity, "learning_rate": 0.00014876949438136347, "loss": 4.0923, "step": 550 }, { "epoch": 0.35995427078229625, "grad_norm": Infinity, "learning_rate": 0.00014858407013825854, "loss": 1.3462, "step": 551 }, { "epoch": 0.3606075453209211, "grad_norm": Infinity, "learning_rate": 0.00014839842698291267, "loss": 1.4769, "step": 552 }, { "epoch": 0.361260819859546, "grad_norm": Infinity, "learning_rate": 0.00014821256575180507, "loss": 1.3811, "step": 553 }, { "epoch": 0.36191409439817085, "grad_norm": Infinity, "learning_rate": 0.00014802648728239742, "loss": 1.3628, "step": 554 }, { "epoch": 0.3625673689367957, "grad_norm": Infinity, "learning_rate": 0.00014784019241313026, "loss": 1.4687, "step": 555 }, { "epoch": 0.36322064347542055, "grad_norm": Infinity, "learning_rate": 0.00014765368198341918, "loss": 1.4745, "step": 556 }, { "epoch": 0.3638739180140454, "grad_norm": Infinity, "learning_rate": 0.00014746695683365112, "loss": 1.4466, "step": 557 }, { "epoch": 0.36452719255267024, "grad_norm": Infinity, "learning_rate": 0.0001472800178051805, "loss": 1.6015, "step": 558 }, { "epoch": 0.3651804670912951, "grad_norm": Infinity, "learning_rate": 0.00014709286574032536, "loss": 1.5199, "step": 559 }, { "epoch": 0.36583374162992, "grad_norm": Infinity, "learning_rate": 0.0001469055014823637, "loss": 1.5306, "step": 560 }, { "epoch": 0.36648701616854484, "grad_norm": Infinity, "learning_rate": 0.0001467179258755297, "loss": 1.745, "step": 561 }, { "epoch": 0.3671402907071697, "grad_norm": Infinity, "learning_rate": 0.00014653013976500975, "loss": 1.8516, "step": 562 }, { "epoch": 0.36779356524579454, "grad_norm": Infinity, "learning_rate": 0.0001463421439969388, "loss": 1.8714, "step": 563 }, { "epoch": 0.3684468397844194, "grad_norm": Infinity, "learning_rate": 0.00014615393941839637, "loss": 1.8113, "step": 564 }, { "epoch": 0.36910011432304424, "grad_norm": Infinity, "learning_rate": 0.00014596552687740302, "loss": 2.0245, "step": 565 }, { "epoch": 0.36975338886166914, "grad_norm": Infinity, "learning_rate": 0.00014577690722291622, "loss": 1.963, "step": 566 }, { "epoch": 0.370406663400294, "grad_norm": Infinity, "learning_rate": 0.00014558808130482674, "loss": 2.3101, "step": 567 }, { "epoch": 0.37105993793891884, "grad_norm": Infinity, "learning_rate": 0.00014539904997395468, "loss": 2.2081, "step": 568 }, { "epoch": 0.3717132124775437, "grad_norm": Infinity, "learning_rate": 0.00014520981408204574, "loss": 2.5591, "step": 569 }, { "epoch": 0.37236648701616853, "grad_norm": Infinity, "learning_rate": 0.00014502037448176734, "loss": 2.301, "step": 570 }, { "epoch": 0.3730197615547934, "grad_norm": Infinity, "learning_rate": 0.00014483073202670475, "loss": 2.8042, "step": 571 }, { "epoch": 0.37367303609341823, "grad_norm": Infinity, "learning_rate": 0.00014464088757135728, "loss": 2.9381, "step": 572 }, { "epoch": 0.37432631063204314, "grad_norm": Infinity, "learning_rate": 0.00014445084197113443, "loss": 2.9872, "step": 573 }, { "epoch": 0.374979585170668, "grad_norm": Infinity, "learning_rate": 0.00014426059608235208, "loss": 3.4324, "step": 574 }, { "epoch": 0.37563285970929283, "grad_norm": Infinity, "learning_rate": 0.00014407015076222846, "loss": 4.1841, "step": 575 }, { "epoch": 0.3762861342479177, "grad_norm": Infinity, "learning_rate": 0.00014387950686888047, "loss": 1.1931, "step": 576 }, { "epoch": 0.37693940878654253, "grad_norm": Infinity, "learning_rate": 0.0001436886652613198, "loss": 1.3665, "step": 577 }, { "epoch": 0.3775926833251674, "grad_norm": Infinity, "learning_rate": 0.00014349762679944896, "loss": 1.402, "step": 578 }, { "epoch": 0.3782459578637923, "grad_norm": Infinity, "learning_rate": 0.00014330639234405742, "loss": 1.4069, "step": 579 }, { "epoch": 0.37889923240241713, "grad_norm": Infinity, "learning_rate": 0.00014311496275681783, "loss": 1.4134, "step": 580 }, { "epoch": 0.379552506941042, "grad_norm": Infinity, "learning_rate": 0.00014292333890028204, "loss": 1.5248, "step": 581 }, { "epoch": 0.3802057814796668, "grad_norm": Infinity, "learning_rate": 0.00014273152163787726, "loss": 1.5282, "step": 582 }, { "epoch": 0.3808590560182917, "grad_norm": Infinity, "learning_rate": 0.00014253951183390215, "loss": 1.6725, "step": 583 }, { "epoch": 0.3815123305569165, "grad_norm": Infinity, "learning_rate": 0.0001423473103535229, "loss": 1.5775, "step": 584 }, { "epoch": 0.3821656050955414, "grad_norm": Infinity, "learning_rate": 0.00014215491806276944, "loss": 1.7086, "step": 585 }, { "epoch": 0.3828188796341663, "grad_norm": Infinity, "learning_rate": 0.0001419623358285314, "loss": 1.7671, "step": 586 }, { "epoch": 0.3834721541727911, "grad_norm": Infinity, "learning_rate": 0.0001417695645185543, "loss": 1.7763, "step": 587 }, { "epoch": 0.384125428711416, "grad_norm": Infinity, "learning_rate": 0.00014157660500143553, "loss": 1.9369, "step": 588 }, { "epoch": 0.3847787032500408, "grad_norm": Infinity, "learning_rate": 0.00014138345814662068, "loss": 1.8933, "step": 589 }, { "epoch": 0.38543197778866567, "grad_norm": Infinity, "learning_rate": 0.0001411901248243993, "loss": 1.9815, "step": 590 }, { "epoch": 0.3860852523272905, "grad_norm": Infinity, "learning_rate": 0.00014099660590590114, "loss": 2.2974, "step": 591 }, { "epoch": 0.3867385268659154, "grad_norm": Infinity, "learning_rate": 0.00014080290226309224, "loss": 2.2827, "step": 592 }, { "epoch": 0.38739180140454027, "grad_norm": Infinity, "learning_rate": 0.00014060901476877107, "loss": 2.8547, "step": 593 }, { "epoch": 0.3880450759431651, "grad_norm": Infinity, "learning_rate": 0.00014041494429656442, "loss": 2.4635, "step": 594 }, { "epoch": 0.38869835048178997, "grad_norm": Infinity, "learning_rate": 0.00014022069172092352, "loss": 2.9595, "step": 595 }, { "epoch": 0.3893516250204148, "grad_norm": Infinity, "learning_rate": 0.00014002625791712021, "loss": 2.7913, "step": 596 }, { "epoch": 0.39000489955903966, "grad_norm": Infinity, "learning_rate": 0.00013983164376124286, "loss": 2.8025, "step": 597 }, { "epoch": 0.39065817409766457, "grad_norm": Infinity, "learning_rate": 0.0001396368501301925, "loss": 3.4075, "step": 598 }, { "epoch": 0.3913114486362894, "grad_norm": Infinity, "learning_rate": 0.0001394418779016789, "loss": 3.6667, "step": 599 }, { "epoch": 0.39196472317491426, "grad_norm": Infinity, "learning_rate": 0.00013924672795421637, "loss": 4.7289, "step": 600 }, { "epoch": 0.3926179977135391, "grad_norm": Infinity, "learning_rate": 0.00013905140116712026, "loss": 1.3319, "step": 601 }, { "epoch": 0.39327127225216396, "grad_norm": Infinity, "learning_rate": 0.00013885589842050253, "loss": 1.4609, "step": 602 }, { "epoch": 0.3939245467907888, "grad_norm": Infinity, "learning_rate": 0.0001386602205952681, "loss": 1.4258, "step": 603 }, { "epoch": 0.3945778213294137, "grad_norm": Infinity, "learning_rate": 0.00013846436857311068, "loss": 1.4458, "step": 604 }, { "epoch": 0.39523109586803856, "grad_norm": Infinity, "learning_rate": 0.000138268343236509, "loss": 1.2812, "step": 605 }, { "epoch": 0.3958843704066634, "grad_norm": Infinity, "learning_rate": 0.00013807214546872256, "loss": 1.5763, "step": 606 }, { "epoch": 0.39653764494528826, "grad_norm": Infinity, "learning_rate": 0.00013787577615378792, "loss": 1.5288, "step": 607 }, { "epoch": 0.3971909194839131, "grad_norm": Infinity, "learning_rate": 0.00013767923617651463, "loss": 1.5984, "step": 608 }, { "epoch": 0.39784419402253796, "grad_norm": Infinity, "learning_rate": 0.00013748252642248115, "loss": 1.5448, "step": 609 }, { "epoch": 0.3984974685611628, "grad_norm": Infinity, "learning_rate": 0.00013728564777803088, "loss": 1.745, "step": 610 }, { "epoch": 0.3991507430997877, "grad_norm": Infinity, "learning_rate": 0.00013708860113026834, "loss": 1.7803, "step": 611 }, { "epoch": 0.39980401763841256, "grad_norm": Infinity, "learning_rate": 0.00013689138736705495, "loss": 1.7888, "step": 612 }, { "epoch": 0.4004572921770374, "grad_norm": Infinity, "learning_rate": 0.0001366940073770052, "loss": 1.8004, "step": 613 }, { "epoch": 0.40111056671566225, "grad_norm": Infinity, "learning_rate": 0.00013649646204948255, "loss": 1.7229, "step": 614 }, { "epoch": 0.4017638412542871, "grad_norm": Infinity, "learning_rate": 0.00013629875227459532, "loss": 2.0557, "step": 615 }, { "epoch": 0.40241711579291195, "grad_norm": Infinity, "learning_rate": 0.00013610087894319302, "loss": 1.9848, "step": 616 }, { "epoch": 0.40307039033153685, "grad_norm": Infinity, "learning_rate": 0.00013590284294686203, "loss": 2.3338, "step": 617 }, { "epoch": 0.4037236648701617, "grad_norm": Infinity, "learning_rate": 0.00013570464517792153, "loss": 2.2821, "step": 618 }, { "epoch": 0.40437693940878655, "grad_norm": Infinity, "learning_rate": 0.00013550628652941985, "loss": 2.5272, "step": 619 }, { "epoch": 0.4050302139474114, "grad_norm": Infinity, "learning_rate": 0.0001353077678951301, "loss": 2.847, "step": 620 }, { "epoch": 0.40568348848603625, "grad_norm": Infinity, "learning_rate": 0.00013510909016954624, "loss": 3.0231, "step": 621 }, { "epoch": 0.4063367630246611, "grad_norm": Infinity, "learning_rate": 0.00013491025424787915, "loss": 2.7372, "step": 622 }, { "epoch": 0.40699003756328594, "grad_norm": Infinity, "learning_rate": 0.00013471126102605245, "loss": 3.2824, "step": 623 }, { "epoch": 0.40764331210191085, "grad_norm": Infinity, "learning_rate": 0.00013451211140069858, "loss": 3.2836, "step": 624 }, { "epoch": 0.4082965866405357, "grad_norm": Infinity, "learning_rate": 0.00013431280626915467, "loss": 4.2905, "step": 625 }, { "epoch": 0.40894986117916055, "grad_norm": Infinity, "learning_rate": 0.0001341133465294585, "loss": 1.1817, "step": 626 }, { "epoch": 0.4096031357177854, "grad_norm": Infinity, "learning_rate": 0.0001339137330803446, "loss": 1.3825, "step": 627 }, { "epoch": 0.41025641025641024, "grad_norm": Infinity, "learning_rate": 0.00013371396682124005, "loss": 1.4202, "step": 628 }, { "epoch": 0.4109096847950351, "grad_norm": Infinity, "learning_rate": 0.0001335140486522604, "loss": 1.4288, "step": 629 }, { "epoch": 0.41156295933366, "grad_norm": Infinity, "learning_rate": 0.00013331397947420576, "loss": 1.4444, "step": 630 }, { "epoch": 0.41221623387228484, "grad_norm": Infinity, "learning_rate": 0.00013311376018855663, "loss": 1.5484, "step": 631 }, { "epoch": 0.4128695084109097, "grad_norm": Infinity, "learning_rate": 0.0001329133916974699, "loss": 1.5702, "step": 632 }, { "epoch": 0.41352278294953454, "grad_norm": Infinity, "learning_rate": 0.00013271287490377467, "loss": 1.6131, "step": 633 }, { "epoch": 0.4141760574881594, "grad_norm": Infinity, "learning_rate": 0.00013251221071096836, "loss": 1.6361, "step": 634 }, { "epoch": 0.41482933202678424, "grad_norm": Infinity, "learning_rate": 0.00013231140002321253, "loss": 1.6556, "step": 635 }, { "epoch": 0.41548260656540914, "grad_norm": Infinity, "learning_rate": 0.00013211044374532882, "loss": 1.785, "step": 636 }, { "epoch": 0.416135881104034, "grad_norm": Infinity, "learning_rate": 0.00013190934278279487, "loss": 1.7989, "step": 637 }, { "epoch": 0.41678915564265884, "grad_norm": Infinity, "learning_rate": 0.00013170809804174022, "loss": 1.8345, "step": 638 }, { "epoch": 0.4174424301812837, "grad_norm": Infinity, "learning_rate": 0.00013150671042894228, "loss": 1.8782, "step": 639 }, { "epoch": 0.41809570471990853, "grad_norm": Infinity, "learning_rate": 0.00013130518085182225, "loss": 1.9949, "step": 640 }, { "epoch": 0.4187489792585334, "grad_norm": Infinity, "learning_rate": 0.00013110351021844094, "loss": 2.1345, "step": 641 }, { "epoch": 0.41940225379715823, "grad_norm": Infinity, "learning_rate": 0.00013090169943749476, "loss": 2.5038, "step": 642 }, { "epoch": 0.42005552833578313, "grad_norm": Infinity, "learning_rate": 0.0001306997494183116, "loss": 2.7144, "step": 643 }, { "epoch": 0.420708802874408, "grad_norm": Infinity, "learning_rate": 0.00013049766107084678, "loss": 2.7682, "step": 644 }, { "epoch": 0.42136207741303283, "grad_norm": Infinity, "learning_rate": 0.00013029543530567884, "loss": 2.5713, "step": 645 }, { "epoch": 0.4220153519516577, "grad_norm": Infinity, "learning_rate": 0.00013009307303400556, "loss": 2.7619, "step": 646 }, { "epoch": 0.42266862649028253, "grad_norm": Infinity, "learning_rate": 0.0001298905751676397, "loss": 2.9728, "step": 647 }, { "epoch": 0.4233219010289074, "grad_norm": Infinity, "learning_rate": 0.0001296879426190051, "loss": 2.9242, "step": 648 }, { "epoch": 0.4239751755675323, "grad_norm": Infinity, "learning_rate": 0.00012948517630113245, "loss": 3.7497, "step": 649 }, { "epoch": 0.42462845010615713, "grad_norm": Infinity, "learning_rate": 0.00012928227712765504, "loss": 3.9534, "step": 650 }, { "epoch": 0.425281724644782, "grad_norm": Infinity, "learning_rate": 0.00012907924601280498, "loss": 1.3583, "step": 651 }, { "epoch": 0.4259349991834068, "grad_norm": Infinity, "learning_rate": 0.0001288760838714088, "loss": 1.3782, "step": 652 }, { "epoch": 0.4265882737220317, "grad_norm": Infinity, "learning_rate": 0.0001286727916188834, "loss": 1.3568, "step": 653 }, { "epoch": 0.4272415482606565, "grad_norm": Infinity, "learning_rate": 0.00012846937017123197, "loss": 1.3555, "step": 654 }, { "epoch": 0.42789482279928137, "grad_norm": Infinity, "learning_rate": 0.00012826582044503978, "loss": 1.5263, "step": 655 }, { "epoch": 0.4285480973379063, "grad_norm": Infinity, "learning_rate": 0.00012806214335747018, "loss": 1.502, "step": 656 }, { "epoch": 0.4292013718765311, "grad_norm": Infinity, "learning_rate": 0.00012785833982626032, "loss": 1.4846, "step": 657 }, { "epoch": 0.42985464641515597, "grad_norm": Infinity, "learning_rate": 0.00012765441076971712, "loss": 1.5687, "step": 658 }, { "epoch": 0.4305079209537808, "grad_norm": Infinity, "learning_rate": 0.0001274503571067131, "loss": 1.6626, "step": 659 }, { "epoch": 0.43116119549240567, "grad_norm": Infinity, "learning_rate": 0.0001272461797566823, "loss": 1.7197, "step": 660 }, { "epoch": 0.4318144700310305, "grad_norm": Infinity, "learning_rate": 0.0001270418796396159, "loss": 1.7319, "step": 661 }, { "epoch": 0.4324677445696554, "grad_norm": Infinity, "learning_rate": 0.00012683745767605846, "loss": 1.8091, "step": 662 }, { "epoch": 0.43312101910828027, "grad_norm": Infinity, "learning_rate": 0.00012663291478710336, "loss": 1.8593, "step": 663 }, { "epoch": 0.4337742936469051, "grad_norm": Infinity, "learning_rate": 0.00012642825189438902, "loss": 1.9488, "step": 664 }, { "epoch": 0.43442756818552997, "grad_norm": Infinity, "learning_rate": 0.00012622346992009447, "loss": 2.1199, "step": 665 }, { "epoch": 0.4350808427241548, "grad_norm": Infinity, "learning_rate": 0.0001260185697869353, "loss": 2.0434, "step": 666 }, { "epoch": 0.43573411726277966, "grad_norm": Infinity, "learning_rate": 0.00012581355241815965, "loss": 2.3479, "step": 667 }, { "epoch": 0.43638739180140457, "grad_norm": Infinity, "learning_rate": 0.00012560841873754375, "loss": 2.2125, "step": 668 }, { "epoch": 0.4370406663400294, "grad_norm": Infinity, "learning_rate": 0.00012540316966938795, "loss": 2.3785, "step": 669 }, { "epoch": 0.43769394087865426, "grad_norm": Infinity, "learning_rate": 0.00012519780613851254, "loss": 2.6029, "step": 670 }, { "epoch": 0.4383472154172791, "grad_norm": Infinity, "learning_rate": 0.00012499232907025353, "loss": 2.5433, "step": 671 }, { "epoch": 0.43900048995590396, "grad_norm": Infinity, "learning_rate": 0.00012478673939045863, "loss": 2.7926, "step": 672 }, { "epoch": 0.4396537644945288, "grad_norm": Infinity, "learning_rate": 0.00012458103802548274, "loss": 3.0005, "step": 673 }, { "epoch": 0.44030703903315366, "grad_norm": Infinity, "learning_rate": 0.00012437522590218417, "loss": 3.1366, "step": 674 }, { "epoch": 0.44096031357177856, "grad_norm": Infinity, "learning_rate": 0.00012416930394792026, "loss": 4.2293, "step": 675 }, { "epoch": 0.4416135881104034, "grad_norm": Infinity, "learning_rate": 0.0001239632730905432, "loss": 1.2318, "step": 676 }, { "epoch": 0.44226686264902826, "grad_norm": Infinity, "learning_rate": 0.0001237571342583959, "loss": 1.3872, "step": 677 }, { "epoch": 0.4429201371876531, "grad_norm": Infinity, "learning_rate": 0.00012355088838030776, "loss": 1.4916, "step": 678 }, { "epoch": 0.44357341172627796, "grad_norm": Infinity, "learning_rate": 0.00012334453638559057, "loss": 1.434, "step": 679 }, { "epoch": 0.4442266862649028, "grad_norm": Infinity, "learning_rate": 0.00012313807920403419, "loss": 1.457, "step": 680 }, { "epoch": 0.4448799608035277, "grad_norm": Infinity, "learning_rate": 0.00012293151776590245, "loss": 1.4525, "step": 681 }, { "epoch": 0.44553323534215256, "grad_norm": Infinity, "learning_rate": 0.00012272485300192902, "loss": 1.4693, "step": 682 }, { "epoch": 0.4461865098807774, "grad_norm": Infinity, "learning_rate": 0.00012251808584331304, "loss": 1.493, "step": 683 }, { "epoch": 0.44683978441940225, "grad_norm": Infinity, "learning_rate": 0.00012231121722171512, "loss": 1.5844, "step": 684 }, { "epoch": 0.4474930589580271, "grad_norm": Infinity, "learning_rate": 0.00012210424806925301, "loss": 1.6257, "step": 685 }, { "epoch": 0.44814633349665195, "grad_norm": Infinity, "learning_rate": 0.00012189717931849731, "loss": 1.6306, "step": 686 }, { "epoch": 0.4487996080352768, "grad_norm": Infinity, "learning_rate": 0.00012169001190246765, "loss": 1.7162, "step": 687 }, { "epoch": 0.4494528825739017, "grad_norm": Infinity, "learning_rate": 0.00012148274675462801, "loss": 1.8626, "step": 688 }, { "epoch": 0.45010615711252655, "grad_norm": Infinity, "learning_rate": 0.00012127538480888283, "loss": 1.857, "step": 689 }, { "epoch": 0.4507594316511514, "grad_norm": Infinity, "learning_rate": 0.00012106792699957263, "loss": 1.9262, "step": 690 }, { "epoch": 0.45141270618977625, "grad_norm": Infinity, "learning_rate": 0.00012086037426147003, "loss": 2.0395, "step": 691 }, { "epoch": 0.4520659807284011, "grad_norm": Infinity, "learning_rate": 0.00012065272752977526, "loss": 2.0832, "step": 692 }, { "epoch": 0.45271925526702594, "grad_norm": Infinity, "learning_rate": 0.00012044498774011209, "loss": 2.5049, "step": 693 }, { "epoch": 0.45337252980565085, "grad_norm": Infinity, "learning_rate": 0.00012023715582852357, "loss": 2.4738, "step": 694 }, { "epoch": 0.4540258043442757, "grad_norm": Infinity, "learning_rate": 0.00012002923273146794, "loss": 2.2563, "step": 695 }, { "epoch": 0.45467907888290054, "grad_norm": Infinity, "learning_rate": 0.0001198212193858142, "loss": 2.936, "step": 696 }, { "epoch": 0.4553323534215254, "grad_norm": Infinity, "learning_rate": 0.00011961311672883804, "loss": 3.3187, "step": 697 }, { "epoch": 0.45598562796015024, "grad_norm": Infinity, "learning_rate": 0.00011940492569821753, "loss": 3.1426, "step": 698 }, { "epoch": 0.4566389024987751, "grad_norm": Infinity, "learning_rate": 0.00011919664723202906, "loss": 3.4786, "step": 699 }, { "epoch": 0.4572921770374, "grad_norm": Infinity, "learning_rate": 0.00011898828226874284, "loss": 4.6187, "step": 700 }, { "epoch": 0.45794545157602484, "grad_norm": Infinity, "learning_rate": 0.00011877983174721892, "loss": 1.315, "step": 701 }, { "epoch": 0.4585987261146497, "grad_norm": Infinity, "learning_rate": 0.00011857129660670281, "loss": 1.3934, "step": 702 }, { "epoch": 0.45925200065327454, "grad_norm": Infinity, "learning_rate": 0.00011836267778682133, "loss": 1.4157, "step": 703 }, { "epoch": 0.4599052751918994, "grad_norm": Infinity, "learning_rate": 0.00011815397622757838, "loss": 1.3945, "step": 704 }, { "epoch": 0.46055854973052424, "grad_norm": Infinity, "learning_rate": 0.00011794519286935055, "loss": 1.4258, "step": 705 }, { "epoch": 0.4612118242691491, "grad_norm": Infinity, "learning_rate": 0.00011773632865288309, "loss": 1.4199, "step": 706 }, { "epoch": 0.461865098807774, "grad_norm": Infinity, "learning_rate": 0.00011752738451928566, "loss": 1.5205, "step": 707 }, { "epoch": 0.46251837334639884, "grad_norm": Infinity, "learning_rate": 0.00011731836141002787, "loss": 1.5761, "step": 708 }, { "epoch": 0.4631716478850237, "grad_norm": Infinity, "learning_rate": 0.00011710926026693525, "loss": 1.6457, "step": 709 }, { "epoch": 0.46382492242364853, "grad_norm": Infinity, "learning_rate": 0.00011690008203218493, "loss": 1.8223, "step": 710 }, { "epoch": 0.4644781969622734, "grad_norm": Infinity, "learning_rate": 0.0001166908276483014, "loss": 1.7146, "step": 711 }, { "epoch": 0.46513147150089823, "grad_norm": Infinity, "learning_rate": 0.00011648149805815227, "loss": 1.6361, "step": 712 }, { "epoch": 0.46578474603952313, "grad_norm": Infinity, "learning_rate": 0.000116272094204944, "loss": 1.7237, "step": 713 }, { "epoch": 0.466438020578148, "grad_norm": Infinity, "learning_rate": 0.00011606261703221772, "loss": 1.8705, "step": 714 }, { "epoch": 0.46709129511677283, "grad_norm": Infinity, "learning_rate": 0.0001158530674838449, "loss": 1.9701, "step": 715 }, { "epoch": 0.4677445696553977, "grad_norm": Infinity, "learning_rate": 0.0001156434465040231, "loss": 2.136, "step": 716 }, { "epoch": 0.46839784419402253, "grad_norm": Infinity, "learning_rate": 0.00011543375503727174, "loss": 2.0878, "step": 717 }, { "epoch": 0.4690511187326474, "grad_norm": Infinity, "learning_rate": 0.00011522399402842783, "loss": 2.4607, "step": 718 }, { "epoch": 0.4697043932712722, "grad_norm": Infinity, "learning_rate": 0.00011501416442264184, "loss": 2.4705, "step": 719 }, { "epoch": 0.47035766780989713, "grad_norm": Infinity, "learning_rate": 0.00011480426716537315, "loss": 2.6055, "step": 720 }, { "epoch": 0.471010942348522, "grad_norm": Infinity, "learning_rate": 0.00011459430320238611, "loss": 2.7481, "step": 721 }, { "epoch": 0.4716642168871468, "grad_norm": Infinity, "learning_rate": 0.00011438427347974554, "loss": 2.9075, "step": 722 }, { "epoch": 0.4723174914257717, "grad_norm": Infinity, "learning_rate": 0.00011417417894381268, "loss": 3.147, "step": 723 }, { "epoch": 0.4729707659643965, "grad_norm": Infinity, "learning_rate": 0.00011396402054124067, "loss": 3.0189, "step": 724 }, { "epoch": 0.47362404050302137, "grad_norm": Infinity, "learning_rate": 0.00011375379921897051, "loss": 4.2631, "step": 725 }, { "epoch": 0.4742773150416463, "grad_norm": Infinity, "learning_rate": 0.00011354351592422665, "loss": 1.4092, "step": 726 }, { "epoch": 0.4749305895802711, "grad_norm": Infinity, "learning_rate": 0.00011333317160451286, "loss": 1.4541, "step": 727 }, { "epoch": 0.47558386411889597, "grad_norm": Infinity, "learning_rate": 0.00011312276720760782, "loss": 1.4517, "step": 728 }, { "epoch": 0.4762371386575208, "grad_norm": Infinity, "learning_rate": 0.00011291230368156087, "loss": 1.3585, "step": 729 }, { "epoch": 0.47689041319614567, "grad_norm": Infinity, "learning_rate": 0.00011270178197468789, "loss": 1.4466, "step": 730 }, { "epoch": 0.4775436877347705, "grad_norm": Infinity, "learning_rate": 0.00011249120303556681, "loss": 1.6079, "step": 731 }, { "epoch": 0.4781969622733954, "grad_norm": Infinity, "learning_rate": 0.0001122805678130335, "loss": 1.5759, "step": 732 }, { "epoch": 0.47885023681202027, "grad_norm": Infinity, "learning_rate": 0.00011206987725617741, "loss": 1.4571, "step": 733 }, { "epoch": 0.4795035113506451, "grad_norm": Infinity, "learning_rate": 0.00011185913231433733, "loss": 1.6658, "step": 734 }, { "epoch": 0.48015678588926997, "grad_norm": Infinity, "learning_rate": 0.00011164833393709706, "loss": 1.7304, "step": 735 }, { "epoch": 0.4808100604278948, "grad_norm": Infinity, "learning_rate": 0.00011143748307428126, "loss": 1.6036, "step": 736 }, { "epoch": 0.48146333496651966, "grad_norm": Infinity, "learning_rate": 0.00011122658067595094, "loss": 1.7269, "step": 737 }, { "epoch": 0.4821166095051445, "grad_norm": Infinity, "learning_rate": 0.00011101562769239946, "loss": 1.7812, "step": 738 }, { "epoch": 0.4827698840437694, "grad_norm": Infinity, "learning_rate": 0.00011080462507414806, "loss": 1.9052, "step": 739 }, { "epoch": 0.48342315858239426, "grad_norm": Infinity, "learning_rate": 0.00011059357377194161, "loss": 2.0699, "step": 740 }, { "epoch": 0.4840764331210191, "grad_norm": Infinity, "learning_rate": 0.00011038247473674434, "loss": 2.2282, "step": 741 }, { "epoch": 0.48472970765964396, "grad_norm": Infinity, "learning_rate": 0.0001101713289197356, "loss": 2.5375, "step": 742 }, { "epoch": 0.4853829821982688, "grad_norm": Infinity, "learning_rate": 0.0001099601372723055, "loss": 2.5702, "step": 743 }, { "epoch": 0.48603625673689366, "grad_norm": Infinity, "learning_rate": 0.00010974890074605062, "loss": 2.7113, "step": 744 }, { "epoch": 0.48668953127551856, "grad_norm": Infinity, "learning_rate": 0.00010953762029276982, "loss": 3.0058, "step": 745 }, { "epoch": 0.4873428058141434, "grad_norm": Infinity, "learning_rate": 0.00010932629686445986, "loss": 2.9393, "step": 746 }, { "epoch": 0.48799608035276826, "grad_norm": Infinity, "learning_rate": 0.00010911493141331113, "loss": 3.0283, "step": 747 }, { "epoch": 0.4886493548913931, "grad_norm": Infinity, "learning_rate": 0.00010890352489170341, "loss": 3.3168, "step": 748 }, { "epoch": 0.48930262943001795, "grad_norm": Infinity, "learning_rate": 0.00010869207825220147, "loss": 3.5297, "step": 749 }, { "epoch": 0.4899559039686428, "grad_norm": Infinity, "learning_rate": 0.00010848059244755093, "loss": 4.1073, "step": 750 }, { "epoch": 0.4906091785072677, "grad_norm": Infinity, "learning_rate": 0.0001082690684306738, "loss": 1.3489, "step": 751 }, { "epoch": 0.49126245304589256, "grad_norm": Infinity, "learning_rate": 0.00010805750715466429, "loss": 1.3607, "step": 752 }, { "epoch": 0.4919157275845174, "grad_norm": Infinity, "learning_rate": 0.0001078459095727845, "loss": 1.4329, "step": 753 }, { "epoch": 0.49256900212314225, "grad_norm": Infinity, "learning_rate": 0.00010763427663846015, "loss": 1.4151, "step": 754 }, { "epoch": 0.4932222766617671, "grad_norm": Infinity, "learning_rate": 0.00010742260930527625, "loss": 1.3808, "step": 755 }, { "epoch": 0.49387555120039195, "grad_norm": Infinity, "learning_rate": 0.00010721090852697275, "loss": 1.5492, "step": 756 }, { "epoch": 0.4945288257390168, "grad_norm": Infinity, "learning_rate": 0.00010699917525744032, "loss": 1.4695, "step": 757 }, { "epoch": 0.4951821002776417, "grad_norm": Infinity, "learning_rate": 0.00010678741045071609, "loss": 1.5456, "step": 758 }, { "epoch": 0.49583537481626655, "grad_norm": Infinity, "learning_rate": 0.0001065756150609792, "loss": 1.5857, "step": 759 }, { "epoch": 0.4964886493548914, "grad_norm": Infinity, "learning_rate": 0.00010636379004254664, "loss": 1.4613, "step": 760 }, { "epoch": 0.49714192389351625, "grad_norm": Infinity, "learning_rate": 0.0001061519363498689, "loss": 1.704, "step": 761 }, { "epoch": 0.4977951984321411, "grad_norm": Infinity, "learning_rate": 0.00010594005493752568, "loss": 1.6429, "step": 762 }, { "epoch": 0.49844847297076594, "grad_norm": Infinity, "learning_rate": 0.00010572814676022158, "loss": 1.7032, "step": 763 }, { "epoch": 0.49910174750939085, "grad_norm": Infinity, "learning_rate": 0.00010551621277278176, "loss": 1.6818, "step": 764 }, { "epoch": 0.4997550220480157, "grad_norm": Infinity, "learning_rate": 0.00010530425393014774, "loss": 2.0321, "step": 765 }, { "epoch": 0.5004082965866405, "grad_norm": Infinity, "learning_rate": 0.00010509227118737298, "loss": 1.8955, "step": 766 }, { "epoch": 0.5004082965866405, "eval_loss": 2.0970346927642822, "eval_runtime": 161.1777, "eval_samples_per_second": 15.995, "eval_steps_per_second": 4.002, "step": 766 }, { "epoch": 0.5010615711252654, "grad_norm": Infinity, "learning_rate": 0.00010488026549961863, "loss": 2.3278, "step": 767 }, { "epoch": 0.5017148456638902, "grad_norm": Infinity, "learning_rate": 0.00010466823782214927, "loss": 2.3015, "step": 768 }, { "epoch": 0.5023681202025151, "grad_norm": Infinity, "learning_rate": 0.00010445618911032853, "loss": 2.6376, "step": 769 }, { "epoch": 0.5030213947411399, "grad_norm": Infinity, "learning_rate": 0.00010424412031961484, "loss": 2.5291, "step": 770 }, { "epoch": 0.5036746692797648, "grad_norm": Infinity, "learning_rate": 0.00010403203240555707, "loss": 2.7269, "step": 771 }, { "epoch": 0.5043279438183896, "grad_norm": Infinity, "learning_rate": 0.0001038199263237903, "loss": 2.5242, "step": 772 }, { "epoch": 0.5049812183570145, "grad_norm": Infinity, "learning_rate": 0.00010360780303003139, "loss": 3.0819, "step": 773 }, { "epoch": 0.5056344928956394, "grad_norm": Infinity, "learning_rate": 0.00010339566348007487, "loss": 3.4961, "step": 774 }, { "epoch": 0.5062877674342643, "grad_norm": Infinity, "learning_rate": 0.00010318350862978848, "loss": 4.0663, "step": 775 }, { "epoch": 0.5069410419728891, "grad_norm": Infinity, "learning_rate": 0.00010297133943510879, "loss": 1.2678, "step": 776 }, { "epoch": 0.507594316511514, "grad_norm": Infinity, "learning_rate": 0.00010275915685203712, "loss": 1.3923, "step": 777 }, { "epoch": 0.5082475910501388, "grad_norm": Infinity, "learning_rate": 0.00010254696183663511, "loss": 1.403, "step": 778 }, { "epoch": 0.5089008655887637, "grad_norm": Infinity, "learning_rate": 0.00010233475534502042, "loss": 1.4813, "step": 779 }, { "epoch": 0.5095541401273885, "grad_norm": Infinity, "learning_rate": 0.00010212253833336237, "loss": 1.454, "step": 780 }, { "epoch": 0.5102074146660134, "grad_norm": Infinity, "learning_rate": 0.00010191031175787768, "loss": 1.4327, "step": 781 }, { "epoch": 0.5108606892046382, "grad_norm": Infinity, "learning_rate": 0.00010169807657482623, "loss": 1.4784, "step": 782 }, { "epoch": 0.5115139637432631, "grad_norm": Infinity, "learning_rate": 0.00010148583374050667, "loss": 1.6763, "step": 783 }, { "epoch": 0.5121672382818879, "grad_norm": Infinity, "learning_rate": 0.00010127358421125204, "loss": 1.6503, "step": 784 }, { "epoch": 0.5128205128205128, "grad_norm": Infinity, "learning_rate": 0.00010106132894342564, "loss": 1.8225, "step": 785 }, { "epoch": 0.5134737873591376, "grad_norm": Infinity, "learning_rate": 0.00010084906889341656, "loss": 1.5835, "step": 786 }, { "epoch": 0.5141270618977626, "grad_norm": Infinity, "learning_rate": 0.00010063680501763552, "loss": 1.8056, "step": 787 }, { "epoch": 0.5147803364363874, "grad_norm": Infinity, "learning_rate": 0.00010042453827251044, "loss": 1.9522, "step": 788 }, { "epoch": 0.5154336109750123, "grad_norm": Infinity, "learning_rate": 0.00010021226961448209, "loss": 2.0843, "step": 789 }, { "epoch": 0.5160868855136371, "grad_norm": Infinity, "learning_rate": 0.0001, "loss": 2.0254, "step": 790 }, { "epoch": 0.516740160052262, "grad_norm": Infinity, "learning_rate": 9.97877303855179e-05, "loss": 2.1822, "step": 791 }, { "epoch": 0.5173934345908868, "grad_norm": Infinity, "learning_rate": 9.957546172748958e-05, "loss": 2.2943, "step": 792 }, { "epoch": 0.5180467091295117, "grad_norm": Infinity, "learning_rate": 9.936319498236446e-05, "loss": 2.3479, "step": 793 }, { "epoch": 0.5186999836681365, "grad_norm": Infinity, "learning_rate": 9.915093110658346e-05, "loss": 2.7068, "step": 794 }, { "epoch": 0.5193532582067614, "grad_norm": Infinity, "learning_rate": 9.89386710565744e-05, "loss": 2.4189, "step": 795 }, { "epoch": 0.5200065327453862, "grad_norm": Infinity, "learning_rate": 9.8726415788748e-05, "loss": 3.0649, "step": 796 }, { "epoch": 0.5206598072840111, "grad_norm": Infinity, "learning_rate": 9.851416625949334e-05, "loss": 3.1619, "step": 797 }, { "epoch": 0.5213130818226359, "grad_norm": Infinity, "learning_rate": 9.830192342517379e-05, "loss": 3.4088, "step": 798 }, { "epoch": 0.5219663563612609, "grad_norm": Infinity, "learning_rate": 9.808968824212234e-05, "loss": 3.5337, "step": 799 }, { "epoch": 0.5226196308998857, "grad_norm": Infinity, "learning_rate": 9.787746166663764e-05, "loss": 4.3343, "step": 800 }, { "epoch": 0.5232729054385106, "grad_norm": Infinity, "learning_rate": 9.76652446549796e-05, "loss": 1.4279, "step": 801 }, { "epoch": 0.5239261799771354, "grad_norm": Infinity, "learning_rate": 9.745303816336489e-05, "loss": 1.3824, "step": 802 }, { "epoch": 0.5245794545157603, "grad_norm": Infinity, "learning_rate": 9.724084314796292e-05, "loss": 1.3597, "step": 803 }, { "epoch": 0.5252327290543851, "grad_norm": Infinity, "learning_rate": 9.702866056489125e-05, "loss": 1.3498, "step": 804 }, { "epoch": 0.52588600359301, "grad_norm": Infinity, "learning_rate": 9.681649137021158e-05, "loss": 1.3747, "step": 805 }, { "epoch": 0.5265392781316348, "grad_norm": Infinity, "learning_rate": 9.660433651992514e-05, "loss": 1.3791, "step": 806 }, { "epoch": 0.5271925526702597, "grad_norm": Infinity, "learning_rate": 9.639219696996861e-05, "loss": 1.5428, "step": 807 }, { "epoch": 0.5278458272088845, "grad_norm": Infinity, "learning_rate": 9.618007367620972e-05, "loss": 1.586, "step": 808 }, { "epoch": 0.5284991017475094, "grad_norm": Infinity, "learning_rate": 9.596796759444293e-05, "loss": 1.5683, "step": 809 }, { "epoch": 0.5291523762861342, "grad_norm": Infinity, "learning_rate": 9.57558796803852e-05, "loss": 1.6566, "step": 810 }, { "epoch": 0.5298056508247591, "grad_norm": Infinity, "learning_rate": 9.554381088967148e-05, "loss": 1.7084, "step": 811 }, { "epoch": 0.530458925363384, "grad_norm": Infinity, "learning_rate": 9.533176217785075e-05, "loss": 1.9305, "step": 812 }, { "epoch": 0.5311121999020089, "grad_norm": Infinity, "learning_rate": 9.51197345003814e-05, "loss": 1.8239, "step": 813 }, { "epoch": 0.5317654744406337, "grad_norm": Infinity, "learning_rate": 9.490772881262709e-05, "loss": 1.7874, "step": 814 }, { "epoch": 0.5324187489792586, "grad_norm": Infinity, "learning_rate": 9.46957460698523e-05, "loss": 1.8129, "step": 815 }, { "epoch": 0.5330720235178834, "grad_norm": Infinity, "learning_rate": 9.448378722721825e-05, "loss": 2.0876, "step": 816 }, { "epoch": 0.5337252980565083, "grad_norm": Infinity, "learning_rate": 9.427185323977845e-05, "loss": 2.2503, "step": 817 }, { "epoch": 0.5343785725951331, "grad_norm": Infinity, "learning_rate": 9.405994506247432e-05, "loss": 2.2582, "step": 818 }, { "epoch": 0.535031847133758, "grad_norm": Infinity, "learning_rate": 9.384806365013113e-05, "loss": 2.4585, "step": 819 }, { "epoch": 0.5356851216723828, "grad_norm": Infinity, "learning_rate": 9.363620995745337e-05, "loss": 2.5637, "step": 820 }, { "epoch": 0.5363383962110077, "grad_norm": Infinity, "learning_rate": 9.342438493902085e-05, "loss": 2.8806, "step": 821 }, { "epoch": 0.5369916707496325, "grad_norm": Infinity, "learning_rate": 9.321258954928393e-05, "loss": 2.8588, "step": 822 }, { "epoch": 0.5376449452882573, "grad_norm": Infinity, "learning_rate": 9.300082474255967e-05, "loss": 3.0883, "step": 823 }, { "epoch": 0.5382982198268822, "grad_norm": Infinity, "learning_rate": 9.278909147302727e-05, "loss": 3.3387, "step": 824 }, { "epoch": 0.5389514943655072, "grad_norm": Infinity, "learning_rate": 9.257739069472374e-05, "loss": 4.0844, "step": 825 }, { "epoch": 0.539604768904132, "grad_norm": Infinity, "learning_rate": 9.236572336153986e-05, "loss": 1.3864, "step": 826 }, { "epoch": 0.5402580434427569, "grad_norm": Infinity, "learning_rate": 9.215409042721552e-05, "loss": 1.3945, "step": 827 }, { "epoch": 0.5409113179813817, "grad_norm": Infinity, "learning_rate": 9.194249284533576e-05, "loss": 1.4255, "step": 828 }, { "epoch": 0.5415645925200065, "grad_norm": Infinity, "learning_rate": 9.173093156932623e-05, "loss": 1.3899, "step": 829 }, { "epoch": 0.5422178670586314, "grad_norm": Infinity, "learning_rate": 9.151940755244912e-05, "loss": 1.4801, "step": 830 }, { "epoch": 0.5428711415972562, "grad_norm": Infinity, "learning_rate": 9.130792174779854e-05, "loss": 1.5015, "step": 831 }, { "epoch": 0.5435244161358811, "grad_norm": Infinity, "learning_rate": 9.109647510829657e-05, "loss": 1.5265, "step": 832 }, { "epoch": 0.5441776906745059, "grad_norm": Infinity, "learning_rate": 9.088506858668888e-05, "loss": 1.5976, "step": 833 }, { "epoch": 0.5448309652131308, "grad_norm": Infinity, "learning_rate": 9.067370313554015e-05, "loss": 1.5211, "step": 834 }, { "epoch": 0.5454842397517556, "grad_norm": Infinity, "learning_rate": 9.04623797072302e-05, "loss": 1.7078, "step": 835 }, { "epoch": 0.5461375142903805, "grad_norm": Infinity, "learning_rate": 9.025109925394939e-05, "loss": 1.6801, "step": 836 }, { "epoch": 0.5467907888290053, "grad_norm": Infinity, "learning_rate": 9.003986272769455e-05, "loss": 1.755, "step": 837 }, { "epoch": 0.5474440633676303, "grad_norm": Infinity, "learning_rate": 8.982867108026442e-05, "loss": 1.796, "step": 838 }, { "epoch": 0.5480973379062551, "grad_norm": Infinity, "learning_rate": 8.961752526325565e-05, "loss": 1.9277, "step": 839 }, { "epoch": 0.54875061244488, "grad_norm": Infinity, "learning_rate": 8.94064262280584e-05, "loss": 2.0079, "step": 840 }, { "epoch": 0.5494038869835048, "grad_norm": Infinity, "learning_rate": 8.919537492585194e-05, "loss": 2.1747, "step": 841 }, { "epoch": 0.5500571615221297, "grad_norm": Infinity, "learning_rate": 8.898437230760058e-05, "loss": 2.2313, "step": 842 }, { "epoch": 0.5507104360607545, "grad_norm": Infinity, "learning_rate": 8.877341932404909e-05, "loss": 2.6037, "step": 843 }, { "epoch": 0.5513637105993794, "grad_norm": Infinity, "learning_rate": 8.856251692571879e-05, "loss": 2.5185, "step": 844 }, { "epoch": 0.5520169851380042, "grad_norm": Infinity, "learning_rate": 8.835166606290295e-05, "loss": 2.5605, "step": 845 }, { "epoch": 0.5526702596766291, "grad_norm": Infinity, "learning_rate": 8.814086768566272e-05, "loss": 3.0754, "step": 846 }, { "epoch": 0.5533235342152539, "grad_norm": Infinity, "learning_rate": 8.793012274382261e-05, "loss": 3.1223, "step": 847 }, { "epoch": 0.5539768087538788, "grad_norm": Infinity, "learning_rate": 8.771943218696649e-05, "loss": 3.3635, "step": 848 }, { "epoch": 0.5546300832925036, "grad_norm": Infinity, "learning_rate": 8.750879696443321e-05, "loss": 3.5695, "step": 849 }, { "epoch": 0.5552833578311285, "grad_norm": Infinity, "learning_rate": 8.729821802531212e-05, "loss": 4.3607, "step": 850 }, { "epoch": 0.5559366323697534, "grad_norm": Infinity, "learning_rate": 8.708769631843916e-05, "loss": 1.1649, "step": 851 }, { "epoch": 0.5565899069083783, "grad_norm": Infinity, "learning_rate": 8.687723279239222e-05, "loss": 1.412, "step": 852 }, { "epoch": 0.5572431814470031, "grad_norm": Infinity, "learning_rate": 8.666682839548719e-05, "loss": 1.341, "step": 853 }, { "epoch": 0.557896455985628, "grad_norm": Infinity, "learning_rate": 8.645648407577338e-05, "loss": 1.3507, "step": 854 }, { "epoch": 0.5585497305242528, "grad_norm": Infinity, "learning_rate": 8.624620078102951e-05, "loss": 1.4897, "step": 855 }, { "epoch": 0.5592030050628777, "grad_norm": Infinity, "learning_rate": 8.603597945875935e-05, "loss": 1.4492, "step": 856 }, { "epoch": 0.5598562796015025, "grad_norm": Infinity, "learning_rate": 8.582582105618733e-05, "loss": 1.4236, "step": 857 }, { "epoch": 0.5605095541401274, "grad_norm": Infinity, "learning_rate": 8.561572652025447e-05, "loss": 1.5401, "step": 858 }, { "epoch": 0.5611628286787522, "grad_norm": Infinity, "learning_rate": 8.540569679761391e-05, "loss": 1.6108, "step": 859 }, { "epoch": 0.5618161032173771, "grad_norm": Infinity, "learning_rate": 8.519573283462687e-05, "loss": 1.582, "step": 860 }, { "epoch": 0.5624693777560019, "grad_norm": Infinity, "learning_rate": 8.498583557735819e-05, "loss": 1.6815, "step": 861 }, { "epoch": 0.5631226522946268, "grad_norm": Infinity, "learning_rate": 8.47760059715722e-05, "loss": 1.5396, "step": 862 }, { "epoch": 0.5637759268332517, "grad_norm": Infinity, "learning_rate": 8.456624496272829e-05, "loss": 1.7314, "step": 863 }, { "epoch": 0.5644292013718766, "grad_norm": Infinity, "learning_rate": 8.435655349597689e-05, "loss": 1.9425, "step": 864 }, { "epoch": 0.5650824759105014, "grad_norm": Infinity, "learning_rate": 8.414693251615512e-05, "loss": 1.8167, "step": 865 }, { "epoch": 0.5657357504491263, "grad_norm": Infinity, "learning_rate": 8.393738296778228e-05, "loss": 2.0253, "step": 866 }, { "epoch": 0.5663890249877511, "grad_norm": Infinity, "learning_rate": 8.3727905795056e-05, "loss": 2.0881, "step": 867 }, { "epoch": 0.567042299526376, "grad_norm": Infinity, "learning_rate": 8.351850194184775e-05, "loss": 2.3032, "step": 868 }, { "epoch": 0.5676955740650008, "grad_norm": Infinity, "learning_rate": 8.330917235169867e-05, "loss": 2.4988, "step": 869 }, { "epoch": 0.5683488486036257, "grad_norm": Infinity, "learning_rate": 8.309991796781511e-05, "loss": 2.4876, "step": 870 }, { "epoch": 0.5690021231422505, "grad_norm": Infinity, "learning_rate": 8.289073973306478e-05, "loss": 2.951, "step": 871 }, { "epoch": 0.5696553976808754, "grad_norm": Infinity, "learning_rate": 8.268163858997215e-05, "loss": 2.8837, "step": 872 }, { "epoch": 0.5703086722195002, "grad_norm": Infinity, "learning_rate": 8.247261548071435e-05, "loss": 3.1574, "step": 873 }, { "epoch": 0.5709619467581251, "grad_norm": Infinity, "learning_rate": 8.226367134711692e-05, "loss": 3.2576, "step": 874 }, { "epoch": 0.5716152212967499, "grad_norm": Infinity, "learning_rate": 8.205480713064946e-05, "loss": 4.0353, "step": 875 }, { "epoch": 0.5722684958353749, "grad_norm": Infinity, "learning_rate": 8.184602377242168e-05, "loss": 1.2324, "step": 876 }, { "epoch": 0.5729217703739997, "grad_norm": Infinity, "learning_rate": 8.163732221317868e-05, "loss": 1.3859, "step": 877 }, { "epoch": 0.5735750449126246, "grad_norm": Infinity, "learning_rate": 8.142870339329723e-05, "loss": 1.4323, "step": 878 }, { "epoch": 0.5742283194512494, "grad_norm": Infinity, "learning_rate": 8.12201682527811e-05, "loss": 1.4522, "step": 879 }, { "epoch": 0.5748815939898743, "grad_norm": Infinity, "learning_rate": 8.101171773125716e-05, "loss": 1.5134, "step": 880 }, { "epoch": 0.5755348685284991, "grad_norm": Infinity, "learning_rate": 8.080335276797097e-05, "loss": 1.5799, "step": 881 }, { "epoch": 0.576188143067124, "grad_norm": Infinity, "learning_rate": 8.059507430178247e-05, "loss": 1.4346, "step": 882 }, { "epoch": 0.5768414176057488, "grad_norm": Infinity, "learning_rate": 8.0386883271162e-05, "loss": 1.5796, "step": 883 }, { "epoch": 0.5774946921443737, "grad_norm": Infinity, "learning_rate": 8.017878061418582e-05, "loss": 1.6774, "step": 884 }, { "epoch": 0.5781479666829985, "grad_norm": Infinity, "learning_rate": 7.99707672685321e-05, "loss": 1.6591, "step": 885 }, { "epoch": 0.5788012412216234, "grad_norm": Infinity, "learning_rate": 7.976284417147644e-05, "loss": 1.7876, "step": 886 }, { "epoch": 0.5794545157602482, "grad_norm": Infinity, "learning_rate": 7.955501225988792e-05, "loss": 1.7762, "step": 887 }, { "epoch": 0.580107790298873, "grad_norm": Infinity, "learning_rate": 7.934727247022475e-05, "loss": 1.8609, "step": 888 }, { "epoch": 0.580761064837498, "grad_norm": Infinity, "learning_rate": 7.913962573852996e-05, "loss": 1.9347, "step": 889 }, { "epoch": 0.5814143393761229, "grad_norm": Infinity, "learning_rate": 7.89320730004274e-05, "loss": 2.2418, "step": 890 }, { "epoch": 0.5820676139147477, "grad_norm": Infinity, "learning_rate": 7.87246151911172e-05, "loss": 2.1152, "step": 891 }, { "epoch": 0.5827208884533726, "grad_norm": Infinity, "learning_rate": 7.851725324537204e-05, "loss": 2.2936, "step": 892 }, { "epoch": 0.5833741629919974, "grad_norm": Infinity, "learning_rate": 7.830998809753237e-05, "loss": 2.5237, "step": 893 }, { "epoch": 0.5840274375306223, "grad_norm": Infinity, "learning_rate": 7.81028206815027e-05, "loss": 2.33, "step": 894 }, { "epoch": 0.5846807120692471, "grad_norm": Infinity, "learning_rate": 7.789575193074704e-05, "loss": 2.8523, "step": 895 }, { "epoch": 0.585333986607872, "grad_norm": Infinity, "learning_rate": 7.768878277828486e-05, "loss": 2.9459, "step": 896 }, { "epoch": 0.5859872611464968, "grad_norm": Infinity, "learning_rate": 7.748191415668698e-05, "loss": 3.0787, "step": 897 }, { "epoch": 0.5866405356851216, "grad_norm": Infinity, "learning_rate": 7.727514699807102e-05, "loss": 2.9497, "step": 898 }, { "epoch": 0.5872938102237465, "grad_norm": Infinity, "learning_rate": 7.706848223409759e-05, "loss": 3.8379, "step": 899 }, { "epoch": 0.5879470847623713, "grad_norm": Infinity, "learning_rate": 7.686192079596586e-05, "loss": 4.5886, "step": 900 }, { "epoch": 0.5886003593009962, "grad_norm": Infinity, "learning_rate": 7.66554636144095e-05, "loss": 1.2221, "step": 901 }, { "epoch": 0.5892536338396211, "grad_norm": Infinity, "learning_rate": 7.644911161969226e-05, "loss": 1.3855, "step": 902 }, { "epoch": 0.589906908378246, "grad_norm": Infinity, "learning_rate": 7.624286574160409e-05, "loss": 1.3057, "step": 903 }, { "epoch": 0.5905601829168708, "grad_norm": Infinity, "learning_rate": 7.603672690945682e-05, "loss": 1.4719, "step": 904 }, { "epoch": 0.5912134574554957, "grad_norm": Infinity, "learning_rate": 7.583069605207975e-05, "loss": 1.3908, "step": 905 }, { "epoch": 0.5918667319941205, "grad_norm": Infinity, "learning_rate": 7.562477409781586e-05, "loss": 1.5683, "step": 906 }, { "epoch": 0.5925200065327454, "grad_norm": Infinity, "learning_rate": 7.541896197451727e-05, "loss": 1.4981, "step": 907 }, { "epoch": 0.5931732810713702, "grad_norm": Infinity, "learning_rate": 7.521326060954143e-05, "loss": 1.5204, "step": 908 }, { "epoch": 0.5938265556099951, "grad_norm": Infinity, "learning_rate": 7.500767092974647e-05, "loss": 1.6046, "step": 909 }, { "epoch": 0.5944798301486199, "grad_norm": Infinity, "learning_rate": 7.48021938614875e-05, "loss": 1.4955, "step": 910 }, { "epoch": 0.5951331046872448, "grad_norm": Infinity, "learning_rate": 7.459683033061208e-05, "loss": 1.6148, "step": 911 }, { "epoch": 0.5957863792258696, "grad_norm": Infinity, "learning_rate": 7.439158126245627e-05, "loss": 1.6611, "step": 912 }, { "epoch": 0.5964396537644945, "grad_norm": Infinity, "learning_rate": 7.418644758184038e-05, "loss": 1.9497, "step": 913 }, { "epoch": 0.5970929283031194, "grad_norm": Infinity, "learning_rate": 7.398143021306471e-05, "loss": 1.964, "step": 914 }, { "epoch": 0.5977462028417443, "grad_norm": Infinity, "learning_rate": 7.37765300799056e-05, "loss": 2.007, "step": 915 }, { "epoch": 0.5983994773803691, "grad_norm": Infinity, "learning_rate": 7.357174810561103e-05, "loss": 2.3996, "step": 916 }, { "epoch": 0.599052751918994, "grad_norm": Infinity, "learning_rate": 7.336708521289669e-05, "loss": 2.3556, "step": 917 }, { "epoch": 0.5997060264576188, "grad_norm": Infinity, "learning_rate": 7.316254232394158e-05, "loss": 2.3301, "step": 918 }, { "epoch": 0.6003593009962437, "grad_norm": Infinity, "learning_rate": 7.295812036038407e-05, "loss": 2.5678, "step": 919 }, { "epoch": 0.6010125755348685, "grad_norm": Infinity, "learning_rate": 7.275382024331772e-05, "loss": 2.7143, "step": 920 }, { "epoch": 0.6016658500734934, "grad_norm": Infinity, "learning_rate": 7.254964289328688e-05, "loss": 2.7722, "step": 921 }, { "epoch": 0.6023191246121182, "grad_norm": Infinity, "learning_rate": 7.234558923028289e-05, "loss": 3.4389, "step": 922 }, { "epoch": 0.6029723991507431, "grad_norm": Infinity, "learning_rate": 7.21416601737397e-05, "loss": 3.2806, "step": 923 }, { "epoch": 0.6036256736893679, "grad_norm": Infinity, "learning_rate": 7.193785664252987e-05, "loss": 3.4732, "step": 924 }, { "epoch": 0.6042789482279928, "grad_norm": Infinity, "learning_rate": 7.173417955496024e-05, "loss": 4.3247, "step": 925 }, { "epoch": 0.6049322227666176, "grad_norm": Infinity, "learning_rate": 7.153062982876805e-05, "loss": 1.3828, "step": 926 }, { "epoch": 0.6055854973052426, "grad_norm": Infinity, "learning_rate": 7.132720838111659e-05, "loss": 1.3161, "step": 927 }, { "epoch": 0.6062387718438674, "grad_norm": Infinity, "learning_rate": 7.112391612859118e-05, "loss": 1.4399, "step": 928 }, { "epoch": 0.6068920463824923, "grad_norm": Infinity, "learning_rate": 7.092075398719502e-05, "loss": 1.3693, "step": 929 }, { "epoch": 0.6075453209211171, "grad_norm": Infinity, "learning_rate": 7.071772287234497e-05, "loss": 1.453, "step": 930 }, { "epoch": 0.608198595459742, "grad_norm": Infinity, "learning_rate": 7.051482369886761e-05, "loss": 1.4425, "step": 931 }, { "epoch": 0.6088518699983668, "grad_norm": Infinity, "learning_rate": 7.031205738099491e-05, "loss": 1.5857, "step": 932 }, { "epoch": 0.6095051445369917, "grad_norm": Infinity, "learning_rate": 7.010942483236036e-05, "loss": 1.6388, "step": 933 }, { "epoch": 0.6101584190756165, "grad_norm": Infinity, "learning_rate": 6.990692696599447e-05, "loss": 1.6966, "step": 934 }, { "epoch": 0.6108116936142414, "grad_norm": Infinity, "learning_rate": 6.970456469432117e-05, "loss": 1.7996, "step": 935 }, { "epoch": 0.6114649681528662, "grad_norm": Infinity, "learning_rate": 6.950233892915323e-05, "loss": 1.6195, "step": 936 }, { "epoch": 0.6121182426914911, "grad_norm": Infinity, "learning_rate": 6.93002505816884e-05, "loss": 1.6846, "step": 937 }, { "epoch": 0.6127715172301159, "grad_norm": Infinity, "learning_rate": 6.909830056250527e-05, "loss": 1.9402, "step": 938 }, { "epoch": 0.6134247917687408, "grad_norm": Infinity, "learning_rate": 6.889648978155909e-05, "loss": 1.8907, "step": 939 }, { "epoch": 0.6140780663073657, "grad_norm": Infinity, "learning_rate": 6.869481914817779e-05, "loss": 1.855, "step": 940 }, { "epoch": 0.6147313408459906, "grad_norm": Infinity, "learning_rate": 6.849328957105773e-05, "loss": 2.1814, "step": 941 }, { "epoch": 0.6153846153846154, "grad_norm": Infinity, "learning_rate": 6.829190195825983e-05, "loss": 2.4298, "step": 942 }, { "epoch": 0.6160378899232403, "grad_norm": Infinity, "learning_rate": 6.809065721720516e-05, "loss": 2.5652, "step": 943 }, { "epoch": 0.6166911644618651, "grad_norm": Infinity, "learning_rate": 6.788955625467119e-05, "loss": 2.3691, "step": 944 }, { "epoch": 0.61734443900049, "grad_norm": Infinity, "learning_rate": 6.768859997678751e-05, "loss": 2.7052, "step": 945 }, { "epoch": 0.6179977135391148, "grad_norm": Infinity, "learning_rate": 6.748778928903166e-05, "loss": 3.2163, "step": 946 }, { "epoch": 0.6186509880777397, "grad_norm": Infinity, "learning_rate": 6.728712509622539e-05, "loss": 2.8301, "step": 947 }, { "epoch": 0.6193042626163645, "grad_norm": Infinity, "learning_rate": 6.708660830253015e-05, "loss": 3.2148, "step": 948 }, { "epoch": 0.6199575371549894, "grad_norm": Infinity, "learning_rate": 6.688623981144339e-05, "loss": 3.767, "step": 949 }, { "epoch": 0.6206108116936142, "grad_norm": Infinity, "learning_rate": 6.668602052579424e-05, "loss": 4.4411, "step": 950 }, { "epoch": 0.621264086232239, "grad_norm": Infinity, "learning_rate": 6.648595134773958e-05, "loss": 1.2227, "step": 951 }, { "epoch": 0.6219173607708639, "grad_norm": Infinity, "learning_rate": 6.628603317875996e-05, "loss": 1.466, "step": 952 }, { "epoch": 0.6225706353094889, "grad_norm": Infinity, "learning_rate": 6.608626691965541e-05, "loss": 1.3718, "step": 953 }, { "epoch": 0.6232239098481137, "grad_norm": Infinity, "learning_rate": 6.588665347054153e-05, "loss": 1.4628, "step": 954 }, { "epoch": 0.6238771843867386, "grad_norm": Infinity, "learning_rate": 6.568719373084538e-05, "loss": 1.4955, "step": 955 }, { "epoch": 0.6245304589253634, "grad_norm": Infinity, "learning_rate": 6.548788859930146e-05, "loss": 1.5999, "step": 956 }, { "epoch": 0.6251837334639883, "grad_norm": Infinity, "learning_rate": 6.528873897394757e-05, "loss": 1.5084, "step": 957 }, { "epoch": 0.6258370080026131, "grad_norm": Infinity, "learning_rate": 6.508974575212088e-05, "loss": 1.4152, "step": 958 }, { "epoch": 0.626490282541238, "grad_norm": Infinity, "learning_rate": 6.489090983045379e-05, "loss": 1.6017, "step": 959 }, { "epoch": 0.6271435570798628, "grad_norm": Infinity, "learning_rate": 6.469223210486992e-05, "loss": 1.642, "step": 960 }, { "epoch": 0.6277968316184876, "grad_norm": Infinity, "learning_rate": 6.449371347058019e-05, "loss": 1.6396, "step": 961 }, { "epoch": 0.6284501061571125, "grad_norm": Infinity, "learning_rate": 6.429535482207846e-05, "loss": 1.8072, "step": 962 }, { "epoch": 0.6291033806957373, "grad_norm": Infinity, "learning_rate": 6.409715705313804e-05, "loss": 1.7597, "step": 963 }, { "epoch": 0.6297566552343622, "grad_norm": Infinity, "learning_rate": 6.389912105680697e-05, "loss": 1.8747, "step": 964 }, { "epoch": 0.630409929772987, "grad_norm": Infinity, "learning_rate": 6.370124772540469e-05, "loss": 2.0892, "step": 965 }, { "epoch": 0.631063204311612, "grad_norm": Infinity, "learning_rate": 6.350353795051748e-05, "loss": 2.1664, "step": 966 }, { "epoch": 0.6317164788502369, "grad_norm": Infinity, "learning_rate": 6.330599262299479e-05, "loss": 2.094, "step": 967 }, { "epoch": 0.6323697533888617, "grad_norm": Infinity, "learning_rate": 6.310861263294507e-05, "loss": 2.3747, "step": 968 }, { "epoch": 0.6330230279274865, "grad_norm": Infinity, "learning_rate": 6.291139886973169e-05, "loss": 2.2346, "step": 969 }, { "epoch": 0.6336763024661114, "grad_norm": Infinity, "learning_rate": 6.271435222196916e-05, "loss": 2.6554, "step": 970 }, { "epoch": 0.6343295770047362, "grad_norm": Infinity, "learning_rate": 6.251747357751889e-05, "loss": 2.7962, "step": 971 }, { "epoch": 0.6349828515433611, "grad_norm": Infinity, "learning_rate": 6.232076382348539e-05, "loss": 3.1112, "step": 972 }, { "epoch": 0.6356361260819859, "grad_norm": Infinity, "learning_rate": 6.212422384621208e-05, "loss": 3.0868, "step": 973 }, { "epoch": 0.6362894006206108, "grad_norm": Infinity, "learning_rate": 6.192785453127748e-05, "loss": 3.2682, "step": 974 }, { "epoch": 0.6369426751592356, "grad_norm": Infinity, "learning_rate": 6.173165676349103e-05, "loss": 4.1133, "step": 975 }, { "epoch": 0.6375959496978605, "grad_norm": Infinity, "learning_rate": 6.15356314268893e-05, "loss": 1.2235, "step": 976 }, { "epoch": 0.6382492242364853, "grad_norm": Infinity, "learning_rate": 6.13397794047319e-05, "loss": 1.3234, "step": 977 }, { "epoch": 0.6389024987751103, "grad_norm": Infinity, "learning_rate": 6.114410157949745e-05, "loss": 1.4325, "step": 978 }, { "epoch": 0.6395557733137351, "grad_norm": Infinity, "learning_rate": 6.094859883287977e-05, "loss": 1.3914, "step": 979 }, { "epoch": 0.64020904785236, "grad_norm": Infinity, "learning_rate": 6.0753272045783625e-05, "loss": 1.4688, "step": 980 }, { "epoch": 0.6408623223909848, "grad_norm": Infinity, "learning_rate": 6.0558122098321144e-05, "loss": 1.4596, "step": 981 }, { "epoch": 0.6415155969296097, "grad_norm": Infinity, "learning_rate": 6.036314986980749e-05, "loss": 1.5476, "step": 982 }, { "epoch": 0.6421688714682345, "grad_norm": Infinity, "learning_rate": 6.016835623875716e-05, "loss": 1.5854, "step": 983 }, { "epoch": 0.6428221460068594, "grad_norm": Infinity, "learning_rate": 5.9973742082879815e-05, "loss": 1.7013, "step": 984 }, { "epoch": 0.6434754205454842, "grad_norm": Infinity, "learning_rate": 5.977930827907649e-05, "loss": 1.7004, "step": 985 }, { "epoch": 0.6441286950841091, "grad_norm": Infinity, "learning_rate": 5.958505570343561e-05, "loss": 1.7823, "step": 986 }, { "epoch": 0.6447819696227339, "grad_norm": Infinity, "learning_rate": 5.9390985231228936e-05, "loss": 1.7044, "step": 987 }, { "epoch": 0.6454352441613588, "grad_norm": Infinity, "learning_rate": 5.91970977369078e-05, "loss": 1.9262, "step": 988 }, { "epoch": 0.6460885186999836, "grad_norm": Infinity, "learning_rate": 5.90033940940989e-05, "loss": 1.9572, "step": 989 }, { "epoch": 0.6467417932386085, "grad_norm": Infinity, "learning_rate": 5.880987517560075e-05, "loss": 1.9857, "step": 990 }, { "epoch": 0.6473950677772334, "grad_norm": Infinity, "learning_rate": 5.861654185337933e-05, "loss": 2.3491, "step": 991 }, { "epoch": 0.6480483423158583, "grad_norm": Infinity, "learning_rate": 5.842339499856444e-05, "loss": 2.3709, "step": 992 }, { "epoch": 0.6487016168544831, "grad_norm": Infinity, "learning_rate": 5.8230435481445734e-05, "loss": 2.3352, "step": 993 }, { "epoch": 0.649354891393108, "grad_norm": Infinity, "learning_rate": 5.8037664171468596e-05, "loss": 2.7182, "step": 994 }, { "epoch": 0.6500081659317328, "grad_norm": Infinity, "learning_rate": 5.784508193723057e-05, "loss": 2.823, "step": 995 }, { "epoch": 0.6506614404703577, "grad_norm": Infinity, "learning_rate": 5.765268964647712e-05, "loss": 2.8539, "step": 996 }, { "epoch": 0.6513147150089825, "grad_norm": Infinity, "learning_rate": 5.746048816609788e-05, "loss": 2.871, "step": 997 }, { "epoch": 0.6519679895476074, "grad_norm": Infinity, "learning_rate": 5.726847836212277e-05, "loss": 3.035, "step": 998 }, { "epoch": 0.6526212640862322, "grad_norm": Infinity, "learning_rate": 5.7076661099717986e-05, "loss": 4.0633, "step": 999 }, { "epoch": 0.6532745386248571, "grad_norm": Infinity, "learning_rate": 5.688503724318217e-05, "loss": 3.8549, "step": 1000 }, { "epoch": 0.6539278131634819, "grad_norm": Infinity, "learning_rate": 5.669360765594258e-05, "loss": 1.2725, "step": 1001 }, { "epoch": 0.6545810877021068, "grad_norm": Infinity, "learning_rate": 5.6502373200551065e-05, "loss": 1.3742, "step": 1002 }, { "epoch": 0.6552343622407316, "grad_norm": Infinity, "learning_rate": 5.631133473868018e-05, "loss": 1.3706, "step": 1003 }, { "epoch": 0.6558876367793566, "grad_norm": Infinity, "learning_rate": 5.6120493131119555e-05, "loss": 1.4821, "step": 1004 }, { "epoch": 0.6565409113179814, "grad_norm": Infinity, "learning_rate": 5.5929849237771556e-05, "loss": 1.4066, "step": 1005 }, { "epoch": 0.6571941858566063, "grad_norm": Infinity, "learning_rate": 5.573940391764796e-05, "loss": 1.4638, "step": 1006 }, { "epoch": 0.6578474603952311, "grad_norm": Infinity, "learning_rate": 5.554915802886558e-05, "loss": 1.4518, "step": 1007 }, { "epoch": 0.658500734933856, "grad_norm": Infinity, "learning_rate": 5.535911242864271e-05, "loss": 1.515, "step": 1008 }, { "epoch": 0.6591540094724808, "grad_norm": Infinity, "learning_rate": 5.5169267973295294e-05, "loss": 1.5506, "step": 1009 }, { "epoch": 0.6598072840111057, "grad_norm": Infinity, "learning_rate": 5.497962551823266e-05, "loss": 1.6251, "step": 1010 }, { "epoch": 0.6604605585497305, "grad_norm": Infinity, "learning_rate": 5.479018591795426e-05, "loss": 1.5996, "step": 1011 }, { "epoch": 0.6611138330883554, "grad_norm": Infinity, "learning_rate": 5.4600950026045326e-05, "loss": 1.6873, "step": 1012 }, { "epoch": 0.6617671076269802, "grad_norm": Infinity, "learning_rate": 5.441191869517328e-05, "loss": 1.7156, "step": 1013 }, { "epoch": 0.6624203821656051, "grad_norm": Infinity, "learning_rate": 5.422309277708379e-05, "loss": 1.8523, "step": 1014 }, { "epoch": 0.6630736567042299, "grad_norm": Infinity, "learning_rate": 5.403447312259702e-05, "loss": 2.0574, "step": 1015 }, { "epoch": 0.6637269312428548, "grad_norm": Infinity, "learning_rate": 5.384606058160363e-05, "loss": 1.8924, "step": 1016 }, { "epoch": 0.6643802057814797, "grad_norm": Infinity, "learning_rate": 5.365785600306124e-05, "loss": 2.3307, "step": 1017 }, { "epoch": 0.6650334803201046, "grad_norm": Infinity, "learning_rate": 5.346986023499027e-05, "loss": 2.5785, "step": 1018 }, { "epoch": 0.6656867548587294, "grad_norm": Infinity, "learning_rate": 5.3282074124470284e-05, "loss": 2.4408, "step": 1019 }, { "epoch": 0.6663400293973543, "grad_norm": Infinity, "learning_rate": 5.309449851763633e-05, "loss": 2.4936, "step": 1020 }, { "epoch": 0.6669933039359791, "grad_norm": Infinity, "learning_rate": 5.290713425967466e-05, "loss": 2.7817, "step": 1021 }, { "epoch": 0.667646578474604, "grad_norm": Infinity, "learning_rate": 5.271998219481953e-05, "loss": 3.0024, "step": 1022 }, { "epoch": 0.6682998530132288, "grad_norm": Infinity, "learning_rate": 5.2533043166348886e-05, "loss": 2.867, "step": 1023 }, { "epoch": 0.6689531275518537, "grad_norm": Infinity, "learning_rate": 5.234631801658081e-05, "loss": 3.1437, "step": 1024 }, { "epoch": 0.6696064020904785, "grad_norm": Infinity, "learning_rate": 5.215980758686978e-05, "loss": 3.8861, "step": 1025 }, { "epoch": 0.6702596766291034, "grad_norm": Infinity, "learning_rate": 5.197351271760258e-05, "loss": 1.2455, "step": 1026 }, { "epoch": 0.6709129511677282, "grad_norm": Infinity, "learning_rate": 5.178743424819492e-05, "loss": 1.4044, "step": 1027 }, { "epoch": 0.671566225706353, "grad_norm": Infinity, "learning_rate": 5.160157301708732e-05, "loss": 1.4414, "step": 1028 }, { "epoch": 0.6722195002449779, "grad_norm": Infinity, "learning_rate": 5.141592986174151e-05, "loss": 1.4843, "step": 1029 }, { "epoch": 0.6728727747836029, "grad_norm": Infinity, "learning_rate": 5.123050561863657e-05, "loss": 1.473, "step": 1030 }, { "epoch": 0.6735260493222277, "grad_norm": Infinity, "learning_rate": 5.104530112326522e-05, "loss": 1.5881, "step": 1031 }, { "epoch": 0.6741793238608526, "grad_norm": Infinity, "learning_rate": 5.086031721012998e-05, "loss": 1.5053, "step": 1032 }, { "epoch": 0.6748325983994774, "grad_norm": Infinity, "learning_rate": 5.067555471273957e-05, "loss": 1.5942, "step": 1033 }, { "epoch": 0.6754858729381022, "grad_norm": Infinity, "learning_rate": 5.049101446360498e-05, "loss": 1.5184, "step": 1034 }, { "epoch": 0.6761391474767271, "grad_norm": Infinity, "learning_rate": 5.0306697294235714e-05, "loss": 1.6699, "step": 1035 }, { "epoch": 0.676792422015352, "grad_norm": Infinity, "learning_rate": 5.01226040351363e-05, "loss": 1.7125, "step": 1036 }, { "epoch": 0.6774456965539768, "grad_norm": Infinity, "learning_rate": 4.99387355158021e-05, "loss": 1.8265, "step": 1037 }, { "epoch": 0.6780989710926016, "grad_norm": Infinity, "learning_rate": 4.97550925647161e-05, "loss": 1.8673, "step": 1038 }, { "epoch": 0.6787522456312265, "grad_norm": Infinity, "learning_rate": 4.957167600934474e-05, "loss": 1.935, "step": 1039 }, { "epoch": 0.6794055201698513, "grad_norm": Infinity, "learning_rate": 4.938848667613436e-05, "loss": 2.0304, "step": 1040 }, { "epoch": 0.6800587947084762, "grad_norm": Infinity, "learning_rate": 4.9205525390507644e-05, "loss": 1.9897, "step": 1041 }, { "epoch": 0.6807120692471011, "grad_norm": Infinity, "learning_rate": 4.9022792976859455e-05, "loss": 2.2204, "step": 1042 }, { "epoch": 0.681365343785726, "grad_norm": Infinity, "learning_rate": 4.884029025855364e-05, "loss": 2.268, "step": 1043 }, { "epoch": 0.6820186183243508, "grad_norm": Infinity, "learning_rate": 4.8658018057918964e-05, "loss": 2.5273, "step": 1044 }, { "epoch": 0.6826718928629757, "grad_norm": Infinity, "learning_rate": 4.8475977196245504e-05, "loss": 2.3542, "step": 1045 }, { "epoch": 0.6833251674016005, "grad_norm": Infinity, "learning_rate": 4.8294168493780955e-05, "loss": 2.7903, "step": 1046 }, { "epoch": 0.6839784419402254, "grad_norm": Infinity, "learning_rate": 4.8112592769727085e-05, "loss": 2.9665, "step": 1047 }, { "epoch": 0.6846317164788502, "grad_norm": Infinity, "learning_rate": 4.793125084223562e-05, "loss": 3.3895, "step": 1048 }, { "epoch": 0.6852849910174751, "grad_norm": Infinity, "learning_rate": 4.7750143528405126e-05, "loss": 3.2519, "step": 1049 }, { "epoch": 0.6859382655560999, "grad_norm": Infinity, "learning_rate": 4.756927164427685e-05, "loss": 4.7098, "step": 1050 }, { "epoch": 0.6865915400947248, "grad_norm": Infinity, "learning_rate": 4.738863600483125e-05, "loss": 1.2615, "step": 1051 }, { "epoch": 0.6872448146333496, "grad_norm": Infinity, "learning_rate": 4.720823742398447e-05, "loss": 1.3003, "step": 1052 }, { "epoch": 0.6878980891719745, "grad_norm": Infinity, "learning_rate": 4.70280767145842e-05, "loss": 1.46, "step": 1053 }, { "epoch": 0.6885513637105993, "grad_norm": Infinity, "learning_rate": 4.684815468840662e-05, "loss": 1.4526, "step": 1054 }, { "epoch": 0.6892046382492243, "grad_norm": Infinity, "learning_rate": 4.666847215615226e-05, "loss": 1.4448, "step": 1055 }, { "epoch": 0.6898579127878491, "grad_norm": Infinity, "learning_rate": 4.648902992744255e-05, "loss": 1.4982, "step": 1056 }, { "epoch": 0.690511187326474, "grad_norm": Infinity, "learning_rate": 4.630982881081629e-05, "loss": 1.5638, "step": 1057 }, { "epoch": 0.6911644618650988, "grad_norm": Infinity, "learning_rate": 4.6130869613725614e-05, "loss": 1.5881, "step": 1058 }, { "epoch": 0.6918177364037237, "grad_norm": Infinity, "learning_rate": 4.595215314253285e-05, "loss": 1.6855, "step": 1059 }, { "epoch": 0.6924710109423485, "grad_norm": Infinity, "learning_rate": 4.57736802025065e-05, "loss": 1.7135, "step": 1060 }, { "epoch": 0.6931242854809734, "grad_norm": Infinity, "learning_rate": 4.5595451597817795e-05, "loss": 1.6494, "step": 1061 }, { "epoch": 0.6937775600195982, "grad_norm": Infinity, "learning_rate": 4.5417468131536975e-05, "loss": 1.7395, "step": 1062 }, { "epoch": 0.6944308345582231, "grad_norm": Infinity, "learning_rate": 4.52397306056299e-05, "loss": 1.8408, "step": 1063 }, { "epoch": 0.6950841090968479, "grad_norm": Infinity, "learning_rate": 4.5062239820953986e-05, "loss": 1.9947, "step": 1064 }, { "epoch": 0.6957373836354728, "grad_norm": Infinity, "learning_rate": 4.488499657725511e-05, "loss": 2.0566, "step": 1065 }, { "epoch": 0.6963906581740976, "grad_norm": Infinity, "learning_rate": 4.470800167316367e-05, "loss": 2.0069, "step": 1066 }, { "epoch": 0.6970439327127225, "grad_norm": Infinity, "learning_rate": 4.453125590619104e-05, "loss": 2.2135, "step": 1067 }, { "epoch": 0.6976972072513474, "grad_norm": Infinity, "learning_rate": 4.4354760072726176e-05, "loss": 2.2858, "step": 1068 }, { "epoch": 0.6983504817899723, "grad_norm": Infinity, "learning_rate": 4.417851496803164e-05, "loss": 2.7049, "step": 1069 }, { "epoch": 0.6990037563285971, "grad_norm": Infinity, "learning_rate": 4.4002521386240466e-05, "loss": 2.6026, "step": 1070 }, { "epoch": 0.699657030867222, "grad_norm": Infinity, "learning_rate": 4.382678012035227e-05, "loss": 2.8106, "step": 1071 }, { "epoch": 0.7003103054058468, "grad_norm": Infinity, "learning_rate": 4.365129196222972e-05, "loss": 2.8605, "step": 1072 }, { "epoch": 0.7009635799444717, "grad_norm": Infinity, "learning_rate": 4.347605770259522e-05, "loss": 3.2817, "step": 1073 }, { "epoch": 0.7016168544830965, "grad_norm": Infinity, "learning_rate": 4.3301078131026826e-05, "loss": 3.3869, "step": 1074 }, { "epoch": 0.7022701290217214, "grad_norm": Infinity, "learning_rate": 4.312635403595532e-05, "loss": 4.4504, "step": 1075 }, { "epoch": 0.7029234035603462, "grad_norm": Infinity, "learning_rate": 4.2951886204660175e-05, "loss": 1.2931, "step": 1076 }, { "epoch": 0.7035766780989711, "grad_norm": Infinity, "learning_rate": 4.2777675423266227e-05, "loss": 1.3579, "step": 1077 }, { "epoch": 0.7042299526375959, "grad_norm": Infinity, "learning_rate": 4.260372247674004e-05, "loss": 1.4018, "step": 1078 }, { "epoch": 0.7048832271762208, "grad_norm": Infinity, "learning_rate": 4.243002814888656e-05, "loss": 1.415, "step": 1079 }, { "epoch": 0.7055365017148456, "grad_norm": Infinity, "learning_rate": 4.2256593222345185e-05, "loss": 1.4629, "step": 1080 }, { "epoch": 0.7061897762534706, "grad_norm": Infinity, "learning_rate": 4.2083418478586755e-05, "loss": 1.478, "step": 1081 }, { "epoch": 0.7068430507920954, "grad_norm": Infinity, "learning_rate": 4.1910504697909614e-05, "loss": 1.5336, "step": 1082 }, { "epoch": 0.7074963253307203, "grad_norm": Infinity, "learning_rate": 4.173785265943624e-05, "loss": 1.5161, "step": 1083 }, { "epoch": 0.7081495998693451, "grad_norm": Infinity, "learning_rate": 4.1565463141109894e-05, "loss": 1.5435, "step": 1084 }, { "epoch": 0.70880287440797, "grad_norm": Infinity, "learning_rate": 4.139333691969071e-05, "loss": 1.7314, "step": 1085 }, { "epoch": 0.7094561489465948, "grad_norm": Infinity, "learning_rate": 4.12214747707527e-05, "loss": 1.6485, "step": 1086 }, { "epoch": 0.7101094234852197, "grad_norm": Infinity, "learning_rate": 4.1049877468679856e-05, "loss": 1.8288, "step": 1087 }, { "epoch": 0.7107626980238445, "grad_norm": Infinity, "learning_rate": 4.087854578666282e-05, "loss": 1.945, "step": 1088 }, { "epoch": 0.7114159725624694, "grad_norm": Infinity, "learning_rate": 4.0707480496695514e-05, "loss": 2.0181, "step": 1089 }, { "epoch": 0.7120692471010942, "grad_norm": Infinity, "learning_rate": 4.053668236957134e-05, "loss": 2.1645, "step": 1090 }, { "epoch": 0.712722521639719, "grad_norm": Infinity, "learning_rate": 4.036615217488009e-05, "loss": 2.2705, "step": 1091 }, { "epoch": 0.7133757961783439, "grad_norm": Infinity, "learning_rate": 4.01958906810042e-05, "loss": 2.2283, "step": 1092 }, { "epoch": 0.7140290707169689, "grad_norm": Infinity, "learning_rate": 4.0025898655115394e-05, "loss": 2.4277, "step": 1093 }, { "epoch": 0.7146823452555937, "grad_norm": Infinity, "learning_rate": 3.985617686317118e-05, "loss": 2.6549, "step": 1094 }, { "epoch": 0.7153356197942186, "grad_norm": Infinity, "learning_rate": 3.96867260699116e-05, "loss": 2.3398, "step": 1095 }, { "epoch": 0.7159888943328434, "grad_norm": Infinity, "learning_rate": 3.951754703885533e-05, "loss": 2.6731, "step": 1096 }, { "epoch": 0.7166421688714683, "grad_norm": Infinity, "learning_rate": 3.934864053229681e-05, "loss": 2.6785, "step": 1097 }, { "epoch": 0.7172954434100931, "grad_norm": Infinity, "learning_rate": 3.918000731130238e-05, "loss": 3.2614, "step": 1098 }, { "epoch": 0.717948717948718, "grad_norm": Infinity, "learning_rate": 3.9011648135706966e-05, "loss": 3.9284, "step": 1099 }, { "epoch": 0.7186019924873428, "grad_norm": Infinity, "learning_rate": 3.884356376411089e-05, "loss": 4.2049, "step": 1100 }, { "epoch": 0.7192552670259676, "grad_norm": Infinity, "learning_rate": 3.8675754953875956e-05, "loss": 1.3125, "step": 1101 }, { "epoch": 0.7199085415645925, "grad_norm": Infinity, "learning_rate": 3.8508222461122567e-05, "loss": 1.3905, "step": 1102 }, { "epoch": 0.7205618161032173, "grad_norm": Infinity, "learning_rate": 3.8340967040725995e-05, "loss": 1.4399, "step": 1103 }, { "epoch": 0.7212150906418422, "grad_norm": Infinity, "learning_rate": 3.817398944631302e-05, "loss": 1.4085, "step": 1104 }, { "epoch": 0.721868365180467, "grad_norm": Infinity, "learning_rate": 3.800729043025871e-05, "loss": 1.463, "step": 1105 }, { "epoch": 0.722521639719092, "grad_norm": Infinity, "learning_rate": 3.784087074368279e-05, "loss": 1.4755, "step": 1106 }, { "epoch": 0.7231749142577168, "grad_norm": Infinity, "learning_rate": 3.767473113644641e-05, "loss": 1.5932, "step": 1107 }, { "epoch": 0.7238281887963417, "grad_norm": Infinity, "learning_rate": 3.750887235714872e-05, "loss": 1.6179, "step": 1108 }, { "epoch": 0.7244814633349665, "grad_norm": Infinity, "learning_rate": 3.734329515312349e-05, "loss": 1.5271, "step": 1109 }, { "epoch": 0.7251347378735914, "grad_norm": Infinity, "learning_rate": 3.717800027043576e-05, "loss": 1.5595, "step": 1110 }, { "epoch": 0.7257880124122162, "grad_norm": Infinity, "learning_rate": 3.7012988453878586e-05, "loss": 1.6898, "step": 1111 }, { "epoch": 0.7264412869508411, "grad_norm": Infinity, "learning_rate": 3.6848260446969306e-05, "loss": 1.7962, "step": 1112 }, { "epoch": 0.7270945614894659, "grad_norm": Infinity, "learning_rate": 3.668381699194676e-05, "loss": 1.7829, "step": 1113 }, { "epoch": 0.7277478360280908, "grad_norm": Infinity, "learning_rate": 3.6519658829767455e-05, "loss": 1.874, "step": 1114 }, { "epoch": 0.7284011105667156, "grad_norm": Infinity, "learning_rate": 3.635578670010242e-05, "loss": 1.9904, "step": 1115 }, { "epoch": 0.7290543851053405, "grad_norm": Infinity, "learning_rate": 3.619220134133402e-05, "loss": 1.9767, "step": 1116 }, { "epoch": 0.7297076596439653, "grad_norm": Infinity, "learning_rate": 3.602890349055221e-05, "loss": 2.2286, "step": 1117 }, { "epoch": 0.7303609341825902, "grad_norm": Infinity, "learning_rate": 3.586589388355176e-05, "loss": 2.4116, "step": 1118 }, { "epoch": 0.7310142087212151, "grad_norm": Infinity, "learning_rate": 3.570317325482847e-05, "loss": 2.5585, "step": 1119 }, { "epoch": 0.73166748325984, "grad_norm": Infinity, "learning_rate": 3.554074233757608e-05, "loss": 2.3875, "step": 1120 }, { "epoch": 0.7323207577984648, "grad_norm": Infinity, "learning_rate": 3.537860186368305e-05, "loss": 2.9333, "step": 1121 }, { "epoch": 0.7329740323370897, "grad_norm": Infinity, "learning_rate": 3.5216752563729e-05, "loss": 3.1705, "step": 1122 }, { "epoch": 0.7336273068757145, "grad_norm": Infinity, "learning_rate": 3.5055195166981645e-05, "loss": 3.3577, "step": 1123 }, { "epoch": 0.7342805814143394, "grad_norm": Infinity, "learning_rate": 3.4893930401393406e-05, "loss": 3.4905, "step": 1124 }, { "epoch": 0.7349338559529642, "grad_norm": Infinity, "learning_rate": 3.4732958993598154e-05, "loss": 4.1618, "step": 1125 }, { "epoch": 0.7355871304915891, "grad_norm": Infinity, "learning_rate": 3.457228166890791e-05, "loss": 1.2767, "step": 1126 }, { "epoch": 0.7362404050302139, "grad_norm": Infinity, "learning_rate": 3.441189915130974e-05, "loss": 1.3949, "step": 1127 }, { "epoch": 0.7368936795688388, "grad_norm": Infinity, "learning_rate": 3.425181216346213e-05, "loss": 1.4078, "step": 1128 }, { "epoch": 0.7375469541074636, "grad_norm": Infinity, "learning_rate": 3.409202142669213e-05, "loss": 1.4977, "step": 1129 }, { "epoch": 0.7382002286460885, "grad_norm": Infinity, "learning_rate": 3.393252766099187e-05, "loss": 1.4244, "step": 1130 }, { "epoch": 0.7388535031847133, "grad_norm": Infinity, "learning_rate": 3.377333158501534e-05, "loss": 1.6322, "step": 1131 }, { "epoch": 0.7395067777233383, "grad_norm": Infinity, "learning_rate": 3.3614433916075326e-05, "loss": 1.5867, "step": 1132 }, { "epoch": 0.7401600522619631, "grad_norm": Infinity, "learning_rate": 3.34558353701398e-05, "loss": 1.539, "step": 1133 }, { "epoch": 0.740813326800588, "grad_norm": Infinity, "learning_rate": 3.3297536661829155e-05, "loss": 1.5412, "step": 1134 }, { "epoch": 0.7414666013392128, "grad_norm": Infinity, "learning_rate": 3.313953850441266e-05, "loss": 1.6732, "step": 1135 }, { "epoch": 0.7421198758778377, "grad_norm": Infinity, "learning_rate": 3.298184160980532e-05, "loss": 1.6536, "step": 1136 }, { "epoch": 0.7427731504164625, "grad_norm": Infinity, "learning_rate": 3.2824446688564815e-05, "loss": 1.7995, "step": 1137 }, { "epoch": 0.7434264249550874, "grad_norm": Infinity, "learning_rate": 3.266735444988808e-05, "loss": 1.7265, "step": 1138 }, { "epoch": 0.7440796994937122, "grad_norm": Infinity, "learning_rate": 3.251056560160821e-05, "loss": 1.9421, "step": 1139 }, { "epoch": 0.7447329740323371, "grad_norm": Infinity, "learning_rate": 3.2354080850191324e-05, "loss": 2.1221, "step": 1140 }, { "epoch": 0.7453862485709619, "grad_norm": Infinity, "learning_rate": 3.219790090073329e-05, "loss": 2.1755, "step": 1141 }, { "epoch": 0.7460395231095868, "grad_norm": Infinity, "learning_rate": 3.2042026456956555e-05, "loss": 2.3494, "step": 1142 }, { "epoch": 0.7466927976482116, "grad_norm": Infinity, "learning_rate": 3.188645822120716e-05, "loss": 2.5545, "step": 1143 }, { "epoch": 0.7473460721868365, "grad_norm": Infinity, "learning_rate": 3.1731196894451154e-05, "loss": 2.7081, "step": 1144 }, { "epoch": 0.7479993467254614, "grad_norm": Infinity, "learning_rate": 3.157624317627195e-05, "loss": 2.7034, "step": 1145 }, { "epoch": 0.7486526212640863, "grad_norm": Infinity, "learning_rate": 3.1421597764866864e-05, "loss": 2.9047, "step": 1146 }, { "epoch": 0.7493058958027111, "grad_norm": Infinity, "learning_rate": 3.126726135704389e-05, "loss": 2.915, "step": 1147 }, { "epoch": 0.749959170341336, "grad_norm": Infinity, "learning_rate": 3.1113234648218883e-05, "loss": 3.1268, "step": 1148 }, { "epoch": 0.7506124448799608, "grad_norm": Infinity, "learning_rate": 3.095951833241213e-05, "loss": 3.4791, "step": 1149 }, { "epoch": 0.7506124448799608, "eval_loss": 2.0990819931030273, "eval_runtime": 161.0223, "eval_samples_per_second": 16.01, "eval_steps_per_second": 4.006, "step": 1149 }, { "epoch": 0.7512657194185857, "grad_norm": Infinity, "learning_rate": 3.080611310224539e-05, "loss": 4.3268, "step": 1150 }, { "epoch": 0.7519189939572105, "grad_norm": Infinity, "learning_rate": 3.0653019648938685e-05, "loss": 1.2606, "step": 1151 }, { "epoch": 0.7525722684958354, "grad_norm": Infinity, "learning_rate": 3.0500238662307212e-05, "loss": 1.3563, "step": 1152 }, { "epoch": 0.7532255430344602, "grad_norm": Infinity, "learning_rate": 3.0347770830758316e-05, "loss": 1.4777, "step": 1153 }, { "epoch": 0.7538788175730851, "grad_norm": Infinity, "learning_rate": 3.019561684128823e-05, "loss": 1.373, "step": 1154 }, { "epoch": 0.7545320921117099, "grad_norm": Infinity, "learning_rate": 3.0043777379479098e-05, "loss": 1.4508, "step": 1155 }, { "epoch": 0.7551853666503348, "grad_norm": Infinity, "learning_rate": 2.989225312949584e-05, "loss": 1.4653, "step": 1156 }, { "epoch": 0.7558386411889597, "grad_norm": Infinity, "learning_rate": 2.9741044774083094e-05, "loss": 1.5171, "step": 1157 }, { "epoch": 0.7564919157275846, "grad_norm": Infinity, "learning_rate": 2.9590152994562104e-05, "loss": 1.6195, "step": 1158 }, { "epoch": 0.7571451902662094, "grad_norm": Infinity, "learning_rate": 2.9439578470827755e-05, "loss": 1.5933, "step": 1159 }, { "epoch": 0.7577984648048343, "grad_norm": Infinity, "learning_rate": 2.9289321881345254e-05, "loss": 1.6688, "step": 1160 }, { "epoch": 0.7584517393434591, "grad_norm": Infinity, "learning_rate": 2.913938390314741e-05, "loss": 1.7732, "step": 1161 }, { "epoch": 0.759105013882084, "grad_norm": Infinity, "learning_rate": 2.8989765211831433e-05, "loss": 1.6398, "step": 1162 }, { "epoch": 0.7597582884207088, "grad_norm": Infinity, "learning_rate": 2.8840466481555672e-05, "loss": 1.8894, "step": 1163 }, { "epoch": 0.7604115629593337, "grad_norm": Infinity, "learning_rate": 2.8691488385037012e-05, "loss": 1.773, "step": 1164 }, { "epoch": 0.7610648374979585, "grad_norm": Infinity, "learning_rate": 2.854283159354748e-05, "loss": 1.9317, "step": 1165 }, { "epoch": 0.7617181120365834, "grad_norm": Infinity, "learning_rate": 2.8394496776911382e-05, "loss": 2.2086, "step": 1166 }, { "epoch": 0.7623713865752082, "grad_norm": Infinity, "learning_rate": 2.8246484603502275e-05, "loss": 2.3171, "step": 1167 }, { "epoch": 0.763024661113833, "grad_norm": Infinity, "learning_rate": 2.8098795740239923e-05, "loss": 2.2845, "step": 1168 }, { "epoch": 0.7636779356524579, "grad_norm": Infinity, "learning_rate": 2.7951430852587268e-05, "loss": 2.4874, "step": 1169 }, { "epoch": 0.7643312101910829, "grad_norm": Infinity, "learning_rate": 2.7804390604547557e-05, "loss": 2.7623, "step": 1170 }, { "epoch": 0.7649844847297077, "grad_norm": Infinity, "learning_rate": 2.7657675658661198e-05, "loss": 2.576, "step": 1171 }, { "epoch": 0.7656377592683326, "grad_norm": Infinity, "learning_rate": 2.7511286676002823e-05, "loss": 2.7432, "step": 1172 }, { "epoch": 0.7662910338069574, "grad_norm": Infinity, "learning_rate": 2.7365224316178384e-05, "loss": 2.7977, "step": 1173 }, { "epoch": 0.7669443083455822, "grad_norm": Infinity, "learning_rate": 2.7219489237322026e-05, "loss": 3.5219, "step": 1174 }, { "epoch": 0.7675975828842071, "grad_norm": Infinity, "learning_rate": 2.707408209609339e-05, "loss": 3.8416, "step": 1175 }, { "epoch": 0.768250857422832, "grad_norm": Infinity, "learning_rate": 2.692900354767425e-05, "loss": 1.4184, "step": 1176 }, { "epoch": 0.7689041319614568, "grad_norm": Infinity, "learning_rate": 2.678425424576596e-05, "loss": 1.3855, "step": 1177 }, { "epoch": 0.7695574065000816, "grad_norm": Infinity, "learning_rate": 2.6639834842586365e-05, "loss": 1.4003, "step": 1178 }, { "epoch": 0.7702106810387065, "grad_norm": Infinity, "learning_rate": 2.649574598886665e-05, "loss": 1.5033, "step": 1179 }, { "epoch": 0.7708639555773313, "grad_norm": Infinity, "learning_rate": 2.6351988333848788e-05, "loss": 1.4147, "step": 1180 }, { "epoch": 0.7715172301159562, "grad_norm": Infinity, "learning_rate": 2.620856252528232e-05, "loss": 1.4997, "step": 1181 }, { "epoch": 0.772170504654581, "grad_norm": Infinity, "learning_rate": 2.6065469209421566e-05, "loss": 1.5917, "step": 1182 }, { "epoch": 0.772823779193206, "grad_norm": Infinity, "learning_rate": 2.5922709031022686e-05, "loss": 1.5591, "step": 1183 }, { "epoch": 0.7734770537318308, "grad_norm": Infinity, "learning_rate": 2.578028263334078e-05, "loss": 1.5877, "step": 1184 }, { "epoch": 0.7741303282704557, "grad_norm": Infinity, "learning_rate": 2.5638190658126938e-05, "loss": 1.6238, "step": 1185 }, { "epoch": 0.7747836028090805, "grad_norm": Infinity, "learning_rate": 2.549643374562549e-05, "loss": 1.8098, "step": 1186 }, { "epoch": 0.7754368773477054, "grad_norm": Infinity, "learning_rate": 2.5355012534570953e-05, "loss": 1.8082, "step": 1187 }, { "epoch": 0.7760901518863302, "grad_norm": Infinity, "learning_rate": 2.52139276621852e-05, "loss": 1.8164, "step": 1188 }, { "epoch": 0.7767434264249551, "grad_norm": Infinity, "learning_rate": 2.507317976417475e-05, "loss": 2.0629, "step": 1189 }, { "epoch": 0.7773967009635799, "grad_norm": Infinity, "learning_rate": 2.493276947472756e-05, "loss": 2.1745, "step": 1190 }, { "epoch": 0.7780499755022048, "grad_norm": Infinity, "learning_rate": 2.4792697426510582e-05, "loss": 2.1568, "step": 1191 }, { "epoch": 0.7787032500408296, "grad_norm": Infinity, "learning_rate": 2.4652964250666567e-05, "loss": 2.3944, "step": 1192 }, { "epoch": 0.7793565245794545, "grad_norm": Infinity, "learning_rate": 2.4513570576811395e-05, "loss": 2.4762, "step": 1193 }, { "epoch": 0.7800097991180793, "grad_norm": Infinity, "learning_rate": 2.4374517033031285e-05, "loss": 2.3231, "step": 1194 }, { "epoch": 0.7806630736567042, "grad_norm": Infinity, "learning_rate": 2.4235804245879723e-05, "loss": 2.4375, "step": 1195 }, { "epoch": 0.7813163481953291, "grad_norm": Infinity, "learning_rate": 2.4097432840374945e-05, "loss": 2.8067, "step": 1196 }, { "epoch": 0.781969622733954, "grad_norm": Infinity, "learning_rate": 2.3959403439996907e-05, "loss": 2.957, "step": 1197 }, { "epoch": 0.7826228972725788, "grad_norm": Infinity, "learning_rate": 2.382171666668456e-05, "loss": 3.3291, "step": 1198 }, { "epoch": 0.7832761718112037, "grad_norm": Infinity, "learning_rate": 2.3684373140833016e-05, "loss": 3.5427, "step": 1199 }, { "epoch": 0.7839294463498285, "grad_norm": Infinity, "learning_rate": 2.354737348129077e-05, "loss": 3.9099, "step": 1200 }, { "epoch": 0.7845827208884534, "grad_norm": Infinity, "learning_rate": 2.3410718305356894e-05, "loss": 1.3826, "step": 1201 }, { "epoch": 0.7852359954270782, "grad_norm": Infinity, "learning_rate": 2.3274408228778355e-05, "loss": 1.4272, "step": 1202 }, { "epoch": 0.7858892699657031, "grad_norm": Infinity, "learning_rate": 2.3138443865747062e-05, "loss": 1.5302, "step": 1203 }, { "epoch": 0.7865425445043279, "grad_norm": Infinity, "learning_rate": 2.300282582889719e-05, "loss": 1.4341, "step": 1204 }, { "epoch": 0.7871958190429528, "grad_norm": Infinity, "learning_rate": 2.2867554729302542e-05, "loss": 1.5308, "step": 1205 }, { "epoch": 0.7878490935815776, "grad_norm": Infinity, "learning_rate": 2.2732631176473485e-05, "loss": 1.4427, "step": 1206 }, { "epoch": 0.7885023681202025, "grad_norm": Infinity, "learning_rate": 2.2598055778354587e-05, "loss": 1.4804, "step": 1207 }, { "epoch": 0.7891556426588274, "grad_norm": Infinity, "learning_rate": 2.2463829141321548e-05, "loss": 1.6638, "step": 1208 }, { "epoch": 0.7898089171974523, "grad_norm": Infinity, "learning_rate": 2.2329951870178655e-05, "loss": 1.5226, "step": 1209 }, { "epoch": 0.7904621917360771, "grad_norm": Infinity, "learning_rate": 2.2196424568156073e-05, "loss": 1.7148, "step": 1210 }, { "epoch": 0.791115466274702, "grad_norm": Infinity, "learning_rate": 2.2063247836906886e-05, "loss": 1.6323, "step": 1211 }, { "epoch": 0.7917687408133268, "grad_norm": Infinity, "learning_rate": 2.1930422276504747e-05, "loss": 1.7264, "step": 1212 }, { "epoch": 0.7924220153519517, "grad_norm": Infinity, "learning_rate": 2.17979484854409e-05, "loss": 1.8764, "step": 1213 }, { "epoch": 0.7930752898905765, "grad_norm": Infinity, "learning_rate": 2.1665827060621568e-05, "loss": 1.8152, "step": 1214 }, { "epoch": 0.7937285644292014, "grad_norm": Infinity, "learning_rate": 2.153405859736528e-05, "loss": 2.0223, "step": 1215 }, { "epoch": 0.7943818389678262, "grad_norm": Infinity, "learning_rate": 2.1402643689400192e-05, "loss": 1.9798, "step": 1216 }, { "epoch": 0.7950351135064511, "grad_norm": Infinity, "learning_rate": 2.1271582928861323e-05, "loss": 2.2652, "step": 1217 }, { "epoch": 0.7956883880450759, "grad_norm": Infinity, "learning_rate": 2.1140876906288086e-05, "loss": 2.5652, "step": 1218 }, { "epoch": 0.7963416625837008, "grad_norm": Infinity, "learning_rate": 2.1010526210621406e-05, "loss": 2.6046, "step": 1219 }, { "epoch": 0.7969949371223256, "grad_norm": Infinity, "learning_rate": 2.0880531429201145e-05, "loss": 2.4434, "step": 1220 }, { "epoch": 0.7976482116609506, "grad_norm": Infinity, "learning_rate": 2.0750893147763596e-05, "loss": 2.7545, "step": 1221 }, { "epoch": 0.7983014861995754, "grad_norm": Infinity, "learning_rate": 2.0621611950438512e-05, "loss": 3.1435, "step": 1222 }, { "epoch": 0.7989547607382003, "grad_norm": Infinity, "learning_rate": 2.0492688419746897e-05, "loss": 3.2472, "step": 1223 }, { "epoch": 0.7996080352768251, "grad_norm": Infinity, "learning_rate": 2.0364123136598035e-05, "loss": 3.4464, "step": 1224 }, { "epoch": 0.80026130981545, "grad_norm": Infinity, "learning_rate": 2.0235916680287015e-05, "loss": 3.9101, "step": 1225 }, { "epoch": 0.8009145843540748, "grad_norm": Infinity, "learning_rate": 2.010806962849219e-05, "loss": 1.3025, "step": 1226 }, { "epoch": 0.8015678588926997, "grad_norm": Infinity, "learning_rate": 1.998058255727234e-05, "loss": 1.2785, "step": 1227 }, { "epoch": 0.8022211334313245, "grad_norm": Infinity, "learning_rate": 1.985345604106439e-05, "loss": 1.397, "step": 1228 }, { "epoch": 0.8028744079699494, "grad_norm": Infinity, "learning_rate": 1.9726690652680578e-05, "loss": 1.4421, "step": 1229 }, { "epoch": 0.8035276825085742, "grad_norm": Infinity, "learning_rate": 1.9600286963305957e-05, "loss": 1.5002, "step": 1230 }, { "epoch": 0.804180957047199, "grad_norm": Infinity, "learning_rate": 1.9474245542495807e-05, "loss": 1.5233, "step": 1231 }, { "epoch": 0.8048342315858239, "grad_norm": Infinity, "learning_rate": 1.9348566958173197e-05, "loss": 1.5117, "step": 1232 }, { "epoch": 0.8054875061244487, "grad_norm": Infinity, "learning_rate": 1.9223251776626107e-05, "loss": 1.597, "step": 1233 }, { "epoch": 0.8061407806630737, "grad_norm": Infinity, "learning_rate": 1.9098300562505266e-05, "loss": 1.5572, "step": 1234 }, { "epoch": 0.8067940552016986, "grad_norm": Infinity, "learning_rate": 1.897371387882134e-05, "loss": 1.7387, "step": 1235 }, { "epoch": 0.8074473297403234, "grad_norm": Infinity, "learning_rate": 1.884949228694246e-05, "loss": 1.738, "step": 1236 }, { "epoch": 0.8081006042789483, "grad_norm": Infinity, "learning_rate": 1.8725636346591822e-05, "loss": 1.7022, "step": 1237 }, { "epoch": 0.8087538788175731, "grad_norm": Infinity, "learning_rate": 1.860214661584486e-05, "loss": 1.8925, "step": 1238 }, { "epoch": 0.809407153356198, "grad_norm": Infinity, "learning_rate": 1.8479023651127115e-05, "loss": 2.0958, "step": 1239 }, { "epoch": 0.8100604278948228, "grad_norm": Infinity, "learning_rate": 1.835626800721144e-05, "loss": 1.893, "step": 1240 }, { "epoch": 0.8107137024334476, "grad_norm": Infinity, "learning_rate": 1.8233880237215585e-05, "loss": 2.0566, "step": 1241 }, { "epoch": 0.8113669769720725, "grad_norm": Infinity, "learning_rate": 1.811186089259983e-05, "loss": 2.0772, "step": 1242 }, { "epoch": 0.8120202515106973, "grad_norm": Infinity, "learning_rate": 1.7990210523164198e-05, "loss": 2.3327, "step": 1243 }, { "epoch": 0.8126735260493222, "grad_norm": Infinity, "learning_rate": 1.7868929677046364e-05, "loss": 2.2319, "step": 1244 }, { "epoch": 0.813326800587947, "grad_norm": Infinity, "learning_rate": 1.7748018900718854e-05, "loss": 2.6381, "step": 1245 }, { "epoch": 0.8139800751265719, "grad_norm": Infinity, "learning_rate": 1.762747873898679e-05, "loss": 2.5717, "step": 1246 }, { "epoch": 0.8146333496651968, "grad_norm": Infinity, "learning_rate": 1.750730973498529e-05, "loss": 3.0298, "step": 1247 }, { "epoch": 0.8152866242038217, "grad_norm": Infinity, "learning_rate": 1.7387512430177234e-05, "loss": 3.1643, "step": 1248 }, { "epoch": 0.8159398987424465, "grad_norm": Infinity, "learning_rate": 1.726808736435046e-05, "loss": 3.1612, "step": 1249 }, { "epoch": 0.8165931732810714, "grad_norm": Infinity, "learning_rate": 1.7149035075615794e-05, "loss": 3.9105, "step": 1250 }, { "epoch": 0.8172464478196962, "grad_norm": Infinity, "learning_rate": 1.703035610040423e-05, "loss": 1.1581, "step": 1251 }, { "epoch": 0.8178997223583211, "grad_norm": Infinity, "learning_rate": 1.69120509734647e-05, "loss": 1.3175, "step": 1252 }, { "epoch": 0.8185529968969459, "grad_norm": Infinity, "learning_rate": 1.679412022786172e-05, "loss": 1.3558, "step": 1253 }, { "epoch": 0.8192062714355708, "grad_norm": Infinity, "learning_rate": 1.6676564394972727e-05, "loss": 1.48, "step": 1254 }, { "epoch": 0.8198595459741956, "grad_norm": Infinity, "learning_rate": 1.6559384004486055e-05, "loss": 1.5116, "step": 1255 }, { "epoch": 0.8205128205128205, "grad_norm": Infinity, "learning_rate": 1.6442579584398232e-05, "loss": 1.4282, "step": 1256 }, { "epoch": 0.8211660950514453, "grad_norm": Infinity, "learning_rate": 1.6326151661011724e-05, "loss": 1.4939, "step": 1257 }, { "epoch": 0.8218193695900702, "grad_norm": Infinity, "learning_rate": 1.6210100758932667e-05, "loss": 1.4974, "step": 1258 }, { "epoch": 0.822472644128695, "grad_norm": Infinity, "learning_rate": 1.6094427401068224e-05, "loss": 1.4659, "step": 1259 }, { "epoch": 0.82312591866732, "grad_norm": Infinity, "learning_rate": 1.5979132108624574e-05, "loss": 1.5381, "step": 1260 }, { "epoch": 0.8237791932059448, "grad_norm": Infinity, "learning_rate": 1.5864215401104287e-05, "loss": 1.7296, "step": 1261 }, { "epoch": 0.8244324677445697, "grad_norm": Infinity, "learning_rate": 1.574967779630414e-05, "loss": 1.7319, "step": 1262 }, { "epoch": 0.8250857422831945, "grad_norm": Infinity, "learning_rate": 1.563551981031267e-05, "loss": 1.7433, "step": 1263 }, { "epoch": 0.8257390168218194, "grad_norm": Infinity, "learning_rate": 1.552174195750803e-05, "loss": 1.8011, "step": 1264 }, { "epoch": 0.8263922913604442, "grad_norm": Infinity, "learning_rate": 1.5408344750555383e-05, "loss": 2.0143, "step": 1265 }, { "epoch": 0.8270455658990691, "grad_norm": Infinity, "learning_rate": 1.529532870040492e-05, "loss": 2.0743, "step": 1266 }, { "epoch": 0.8276988404376939, "grad_norm": Infinity, "learning_rate": 1.5182694316289314e-05, "loss": 2.1506, "step": 1267 }, { "epoch": 0.8283521149763188, "grad_norm": Infinity, "learning_rate": 1.5070442105721495e-05, "loss": 2.1783, "step": 1268 }, { "epoch": 0.8290053895149436, "grad_norm": Infinity, "learning_rate": 1.4958572574492501e-05, "loss": 2.5343, "step": 1269 }, { "epoch": 0.8296586640535685, "grad_norm": Infinity, "learning_rate": 1.4847086226668872e-05, "loss": 2.6854, "step": 1270 }, { "epoch": 0.8303119385921933, "grad_norm": Infinity, "learning_rate": 1.4735983564590783e-05, "loss": 2.8695, "step": 1271 }, { "epoch": 0.8309652131308183, "grad_norm": Infinity, "learning_rate": 1.4625265088869477e-05, "loss": 2.959, "step": 1272 }, { "epoch": 0.8316184876694431, "grad_norm": Infinity, "learning_rate": 1.4514931298385093e-05, "loss": 3.3266, "step": 1273 }, { "epoch": 0.832271762208068, "grad_norm": Infinity, "learning_rate": 1.4404982690284575e-05, "loss": 3.8966, "step": 1274 }, { "epoch": 0.8329250367466928, "grad_norm": Infinity, "learning_rate": 1.429541975997908e-05, "loss": 4.5467, "step": 1275 }, { "epoch": 0.8335783112853177, "grad_norm": Infinity, "learning_rate": 1.4186243001142164e-05, "loss": 1.2431, "step": 1276 }, { "epoch": 0.8342315858239425, "grad_norm": Infinity, "learning_rate": 1.4077452905707234e-05, "loss": 1.424, "step": 1277 }, { "epoch": 0.8348848603625674, "grad_norm": Infinity, "learning_rate": 1.396904996386551e-05, "loss": 1.4275, "step": 1278 }, { "epoch": 0.8355381349011922, "grad_norm": Infinity, "learning_rate": 1.38610346640637e-05, "loss": 1.3774, "step": 1279 }, { "epoch": 0.8361914094398171, "grad_norm": Infinity, "learning_rate": 1.3753407493001968e-05, "loss": 1.4958, "step": 1280 }, { "epoch": 0.8368446839784419, "grad_norm": Infinity, "learning_rate": 1.3646168935631464e-05, "loss": 1.4944, "step": 1281 }, { "epoch": 0.8374979585170668, "grad_norm": Infinity, "learning_rate": 1.3539319475152457e-05, "loss": 1.5639, "step": 1282 }, { "epoch": 0.8381512330556916, "grad_norm": Infinity, "learning_rate": 1.3432859593011948e-05, "loss": 1.6105, "step": 1283 }, { "epoch": 0.8388045075943165, "grad_norm": Infinity, "learning_rate": 1.3326789768901504e-05, "loss": 1.7179, "step": 1284 }, { "epoch": 0.8394577821329414, "grad_norm": Infinity, "learning_rate": 1.3221110480755305e-05, "loss": 1.5552, "step": 1285 }, { "epoch": 0.8401110566715663, "grad_norm": Infinity, "learning_rate": 1.3115822204747619e-05, "loss": 1.6532, "step": 1286 }, { "epoch": 0.8407643312101911, "grad_norm": Infinity, "learning_rate": 1.3010925415291075e-05, "loss": 1.7276, "step": 1287 }, { "epoch": 0.841417605748816, "grad_norm": Infinity, "learning_rate": 1.2906420585034229e-05, "loss": 1.731, "step": 1288 }, { "epoch": 0.8420708802874408, "grad_norm": Infinity, "learning_rate": 1.2802308184859502e-05, "loss": 1.8079, "step": 1289 }, { "epoch": 0.8427241548260657, "grad_norm": Infinity, "learning_rate": 1.2698588683881186e-05, "loss": 1.8644, "step": 1290 }, { "epoch": 0.8433774293646905, "grad_norm": Infinity, "learning_rate": 1.2595262549443133e-05, "loss": 2.2312, "step": 1291 }, { "epoch": 0.8440307039033154, "grad_norm": Infinity, "learning_rate": 1.2492330247116802e-05, "loss": 2.2468, "step": 1292 }, { "epoch": 0.8446839784419402, "grad_norm": Infinity, "learning_rate": 1.2389792240699084e-05, "loss": 2.4955, "step": 1293 }, { "epoch": 0.8453372529805651, "grad_norm": Infinity, "learning_rate": 1.2287648992210243e-05, "loss": 2.4229, "step": 1294 }, { "epoch": 0.8459905275191899, "grad_norm": Infinity, "learning_rate": 1.2185900961891794e-05, "loss": 2.4597, "step": 1295 }, { "epoch": 0.8466438020578148, "grad_norm": Infinity, "learning_rate": 1.208454860820456e-05, "loss": 3.0822, "step": 1296 }, { "epoch": 0.8472970765964396, "grad_norm": Infinity, "learning_rate": 1.1983592387826347e-05, "loss": 3.045, "step": 1297 }, { "epoch": 0.8479503511350646, "grad_norm": Infinity, "learning_rate": 1.1883032755650204e-05, "loss": 3.0758, "step": 1298 }, { "epoch": 0.8486036256736894, "grad_norm": Infinity, "learning_rate": 1.1782870164782111e-05, "loss": 3.4634, "step": 1299 }, { "epoch": 0.8492569002123143, "grad_norm": Infinity, "learning_rate": 1.1683105066539068e-05, "loss": 4.4563, "step": 1300 }, { "epoch": 0.8499101747509391, "grad_norm": Infinity, "learning_rate": 1.15837379104471e-05, "loss": 1.1769, "step": 1301 }, { "epoch": 0.850563449289564, "grad_norm": Infinity, "learning_rate": 1.1484769144239037e-05, "loss": 1.3331, "step": 1302 }, { "epoch": 0.8512167238281888, "grad_norm": Infinity, "learning_rate": 1.1386199213852755e-05, "loss": 1.361, "step": 1303 }, { "epoch": 0.8518699983668137, "grad_norm": Infinity, "learning_rate": 1.1288028563428965e-05, "loss": 1.4695, "step": 1304 }, { "epoch": 0.8525232729054385, "grad_norm": Infinity, "learning_rate": 1.1190257635309275e-05, "loss": 1.5785, "step": 1305 }, { "epoch": 0.8531765474440633, "grad_norm": Infinity, "learning_rate": 1.1092886870034291e-05, "loss": 1.5306, "step": 1306 }, { "epoch": 0.8538298219826882, "grad_norm": Infinity, "learning_rate": 1.0995916706341459e-05, "loss": 1.4277, "step": 1307 }, { "epoch": 0.854483096521313, "grad_norm": Infinity, "learning_rate": 1.0899347581163221e-05, "loss": 1.6138, "step": 1308 }, { "epoch": 0.8551363710599379, "grad_norm": Infinity, "learning_rate": 1.0803179929624973e-05, "loss": 1.6152, "step": 1309 }, { "epoch": 0.8557896455985627, "grad_norm": Infinity, "learning_rate": 1.0707414185043163e-05, "loss": 1.6077, "step": 1310 }, { "epoch": 0.8564429201371877, "grad_norm": Infinity, "learning_rate": 1.0612050778923276e-05, "loss": 1.6382, "step": 1311 }, { "epoch": 0.8570961946758126, "grad_norm": Infinity, "learning_rate": 1.0517090140957998e-05, "loss": 1.7534, "step": 1312 }, { "epoch": 0.8577494692144374, "grad_norm": Infinity, "learning_rate": 1.0422532699025068e-05, "loss": 1.8991, "step": 1313 }, { "epoch": 0.8584027437530622, "grad_norm": Infinity, "learning_rate": 1.0328378879185641e-05, "loss": 1.9086, "step": 1314 }, { "epoch": 0.8590560182916871, "grad_norm": Infinity, "learning_rate": 1.0234629105682103e-05, "loss": 2.1443, "step": 1315 }, { "epoch": 0.8597092928303119, "grad_norm": Infinity, "learning_rate": 1.0141283800936297e-05, "loss": 2.0489, "step": 1316 }, { "epoch": 0.8603625673689368, "grad_norm": Infinity, "learning_rate": 1.0048343385547676e-05, "loss": 2.0992, "step": 1317 }, { "epoch": 0.8610158419075616, "grad_norm": Infinity, "learning_rate": 9.955808278291156e-06, "loss": 2.5535, "step": 1318 }, { "epoch": 0.8616691164461865, "grad_norm": Infinity, "learning_rate": 9.863678896115559e-06, "loss": 2.5644, "step": 1319 }, { "epoch": 0.8623223909848113, "grad_norm": Infinity, "learning_rate": 9.771955654141496e-06, "loss": 2.6898, "step": 1320 }, { "epoch": 0.8629756655234362, "grad_norm": Infinity, "learning_rate": 9.68063896565955e-06, "loss": 2.8441, "step": 1321 }, { "epoch": 0.863628940062061, "grad_norm": Infinity, "learning_rate": 9.589729242128532e-06, "loss": 2.7607, "step": 1322 }, { "epoch": 0.8642822146006859, "grad_norm": Infinity, "learning_rate": 9.499226893173453e-06, "loss": 3.2205, "step": 1323 }, { "epoch": 0.8649354891393108, "grad_norm": Infinity, "learning_rate": 9.409132326583758e-06, "loss": 3.5218, "step": 1324 }, { "epoch": 0.8655887636779357, "grad_norm": Infinity, "learning_rate": 9.319445948311534e-06, "loss": 3.9074, "step": 1325 }, { "epoch": 0.8662420382165605, "grad_norm": Infinity, "learning_rate": 9.230168162469599e-06, "loss": 1.3831, "step": 1326 }, { "epoch": 0.8668953127551854, "grad_norm": Infinity, "learning_rate": 9.141299371329704e-06, "loss": 1.446, "step": 1327 }, { "epoch": 0.8675485872938102, "grad_norm": Infinity, "learning_rate": 9.052839975320836e-06, "loss": 1.3385, "step": 1328 }, { "epoch": 0.8682018618324351, "grad_norm": Infinity, "learning_rate": 8.964790373027132e-06, "loss": 1.4632, "step": 1329 }, { "epoch": 0.8688551363710599, "grad_norm": Infinity, "learning_rate": 8.87715096118642e-06, "loss": 1.5529, "step": 1330 }, { "epoch": 0.8695084109096848, "grad_norm": Infinity, "learning_rate": 8.789922134688244e-06, "loss": 1.572, "step": 1331 }, { "epoch": 0.8701616854483096, "grad_norm": Infinity, "learning_rate": 8.703104286572028e-06, "loss": 1.5595, "step": 1332 }, { "epoch": 0.8708149599869345, "grad_norm": Infinity, "learning_rate": 8.616697808025486e-06, "loss": 1.5692, "step": 1333 }, { "epoch": 0.8714682345255593, "grad_norm": Infinity, "learning_rate": 8.53070308838273e-06, "loss": 1.535, "step": 1334 }, { "epoch": 0.8721215090641842, "grad_norm": Infinity, "learning_rate": 8.445120515122551e-06, "loss": 1.5121, "step": 1335 }, { "epoch": 0.8727747836028091, "grad_norm": Infinity, "learning_rate": 8.359950473866663e-06, "loss": 1.6005, "step": 1336 }, { "epoch": 0.873428058141434, "grad_norm": Infinity, "learning_rate": 8.275193348377974e-06, "loss": 1.7782, "step": 1337 }, { "epoch": 0.8740813326800588, "grad_norm": Infinity, "learning_rate": 8.190849520558908e-06, "loss": 2.003, "step": 1338 }, { "epoch": 0.8747346072186837, "grad_norm": Infinity, "learning_rate": 8.106919370449572e-06, "loss": 1.9703, "step": 1339 }, { "epoch": 0.8753878817573085, "grad_norm": Infinity, "learning_rate": 8.023403276226126e-06, "loss": 2.0316, "step": 1340 }, { "epoch": 0.8760411562959334, "grad_norm": Infinity, "learning_rate": 7.940301614199074e-06, "loss": 1.9369, "step": 1341 }, { "epoch": 0.8766944308345582, "grad_norm": Infinity, "learning_rate": 7.857614758811527e-06, "loss": 2.2225, "step": 1342 }, { "epoch": 0.8773477053731831, "grad_norm": Infinity, "learning_rate": 7.775343082637553e-06, "loss": 2.26, "step": 1343 }, { "epoch": 0.8780009799118079, "grad_norm": Infinity, "learning_rate": 7.693486956380525e-06, "loss": 2.3674, "step": 1344 }, { "epoch": 0.8786542544504328, "grad_norm": Infinity, "learning_rate": 7.612046748871327e-06, "loss": 2.9854, "step": 1345 }, { "epoch": 0.8793075289890576, "grad_norm": Infinity, "learning_rate": 7.531022827066858e-06, "loss": 2.6, "step": 1346 }, { "epoch": 0.8799608035276825, "grad_norm": Infinity, "learning_rate": 7.450415556048296e-06, "loss": 3.0325, "step": 1347 }, { "epoch": 0.8806140780663073, "grad_norm": Infinity, "learning_rate": 7.370225299019362e-06, "loss": 3.0668, "step": 1348 }, { "epoch": 0.8812673526049323, "grad_norm": Infinity, "learning_rate": 7.290452417304916e-06, "loss": 3.2571, "step": 1349 }, { "epoch": 0.8819206271435571, "grad_norm": Infinity, "learning_rate": 7.211097270349066e-06, "loss": 3.7934, "step": 1350 }, { "epoch": 0.882573901682182, "grad_norm": Infinity, "learning_rate": 7.132160215713757e-06, "loss": 1.2629, "step": 1351 }, { "epoch": 0.8832271762208068, "grad_norm": Infinity, "learning_rate": 7.053641609077033e-06, "loss": 1.4117, "step": 1352 }, { "epoch": 0.8838804507594317, "grad_norm": Infinity, "learning_rate": 6.975541804231478e-06, "loss": 1.3355, "step": 1353 }, { "epoch": 0.8845337252980565, "grad_norm": Infinity, "learning_rate": 6.897861153082618e-06, "loss": 1.4311, "step": 1354 }, { "epoch": 0.8851869998366814, "grad_norm": Infinity, "learning_rate": 6.820600005647382e-06, "loss": 1.5243, "step": 1355 }, { "epoch": 0.8858402743753062, "grad_norm": Infinity, "learning_rate": 6.743758710052439e-06, "loss": 1.4378, "step": 1356 }, { "epoch": 0.8864935489139311, "grad_norm": Infinity, "learning_rate": 6.6673376125326645e-06, "loss": 1.4713, "step": 1357 }, { "epoch": 0.8871468234525559, "grad_norm": Infinity, "learning_rate": 6.591337057429603e-06, "loss": 1.5855, "step": 1358 }, { "epoch": 0.8878000979911808, "grad_norm": Infinity, "learning_rate": 6.515757387189902e-06, "loss": 1.5555, "step": 1359 }, { "epoch": 0.8884533725298056, "grad_norm": Infinity, "learning_rate": 6.440598942363796e-06, "loss": 1.573, "step": 1360 }, { "epoch": 0.8891066470684305, "grad_norm": Infinity, "learning_rate": 6.365862061603456e-06, "loss": 1.7148, "step": 1361 }, { "epoch": 0.8897599216070554, "grad_norm": Infinity, "learning_rate": 6.291547081661631e-06, "loss": 1.6704, "step": 1362 }, { "epoch": 0.8904131961456803, "grad_norm": Infinity, "learning_rate": 6.217654337390078e-06, "loss": 1.8345, "step": 1363 }, { "epoch": 0.8910664706843051, "grad_norm": Infinity, "learning_rate": 6.144184161737887e-06, "loss": 1.8895, "step": 1364 }, { "epoch": 0.89171974522293, "grad_norm": Infinity, "learning_rate": 6.071136885750272e-06, "loss": 1.9469, "step": 1365 }, { "epoch": 0.8923730197615548, "grad_norm": Infinity, "learning_rate": 5.99851283856685e-06, "loss": 2.2034, "step": 1366 }, { "epoch": 0.8930262943001797, "grad_norm": Infinity, "learning_rate": 5.926312347420238e-06, "loss": 2.2942, "step": 1367 }, { "epoch": 0.8936795688388045, "grad_norm": Infinity, "learning_rate": 5.854535737634581e-06, "loss": 2.514, "step": 1368 }, { "epoch": 0.8943328433774294, "grad_norm": Infinity, "learning_rate": 5.783183332624098e-06, "loss": 2.5924, "step": 1369 }, { "epoch": 0.8949861179160542, "grad_norm": Infinity, "learning_rate": 5.71225545389158e-06, "loss": 2.6971, "step": 1370 }, { "epoch": 0.895639392454679, "grad_norm": Infinity, "learning_rate": 5.641752421027014e-06, "loss": 2.4167, "step": 1371 }, { "epoch": 0.8962926669933039, "grad_norm": Infinity, "learning_rate": 5.571674551706041e-06, "loss": 3.0307, "step": 1372 }, { "epoch": 0.8969459415319287, "grad_norm": Infinity, "learning_rate": 5.5020221616886025e-06, "loss": 3.0822, "step": 1373 }, { "epoch": 0.8975992160705536, "grad_norm": Infinity, "learning_rate": 5.432795564817539e-06, "loss": 3.6224, "step": 1374 }, { "epoch": 0.8982524906091786, "grad_norm": Infinity, "learning_rate": 5.363995073017047e-06, "loss": 3.9485, "step": 1375 }, { "epoch": 0.8989057651478034, "grad_norm": Infinity, "learning_rate": 5.295620996291451e-06, "loss": 1.369, "step": 1376 }, { "epoch": 0.8995590396864283, "grad_norm": Infinity, "learning_rate": 5.227673642723651e-06, "loss": 1.3912, "step": 1377 }, { "epoch": 0.9002123142250531, "grad_norm": Infinity, "learning_rate": 5.160153318473815e-06, "loss": 1.4591, "step": 1378 }, { "epoch": 0.900865588763678, "grad_norm": Infinity, "learning_rate": 5.093060327778043e-06, "loss": 1.491, "step": 1379 }, { "epoch": 0.9015188633023028, "grad_norm": Infinity, "learning_rate": 5.026394972946813e-06, "loss": 1.472, "step": 1380 }, { "epoch": 0.9021721378409276, "grad_norm": Infinity, "learning_rate": 4.9601575543638535e-06, "loss": 1.4614, "step": 1381 }, { "epoch": 0.9028254123795525, "grad_norm": Infinity, "learning_rate": 4.8943483704846475e-06, "loss": 1.5487, "step": 1382 }, { "epoch": 0.9034786869181773, "grad_norm": Infinity, "learning_rate": 4.828967717835087e-06, "loss": 1.546, "step": 1383 }, { "epoch": 0.9041319614568022, "grad_norm": Infinity, "learning_rate": 4.76401589101021e-06, "loss": 1.4852, "step": 1384 }, { "epoch": 0.904785235995427, "grad_norm": Infinity, "learning_rate": 4.69949318267281e-06, "loss": 1.742, "step": 1385 }, { "epoch": 0.9054385105340519, "grad_norm": Infinity, "learning_rate": 4.635399883552128e-06, "loss": 1.6791, "step": 1386 }, { "epoch": 0.9060917850726768, "grad_norm": Infinity, "learning_rate": 4.571736282442607e-06, "loss": 1.7485, "step": 1387 }, { "epoch": 0.9067450596113017, "grad_norm": Infinity, "learning_rate": 4.508502666202474e-06, "loss": 1.9477, "step": 1388 }, { "epoch": 0.9073983341499265, "grad_norm": Infinity, "learning_rate": 4.445699319752539e-06, "loss": 1.8895, "step": 1389 }, { "epoch": 0.9080516086885514, "grad_norm": Infinity, "learning_rate": 4.383326526074916e-06, "loss": 1.9285, "step": 1390 }, { "epoch": 0.9087048832271762, "grad_norm": Infinity, "learning_rate": 4.32138456621164e-06, "loss": 2.2509, "step": 1391 }, { "epoch": 0.9093581577658011, "grad_norm": Infinity, "learning_rate": 4.2598737192635405e-06, "loss": 2.3064, "step": 1392 }, { "epoch": 0.9100114323044259, "grad_norm": Infinity, "learning_rate": 4.198794262388905e-06, "loss": 2.6755, "step": 1393 }, { "epoch": 0.9106647068430508, "grad_norm": Infinity, "learning_rate": 4.138146470802218e-06, "loss": 2.3408, "step": 1394 }, { "epoch": 0.9113179813816756, "grad_norm": Infinity, "learning_rate": 4.077930617773007e-06, "loss": 2.8286, "step": 1395 }, { "epoch": 0.9119712559203005, "grad_norm": Infinity, "learning_rate": 4.018146974624448e-06, "loss": 2.7035, "step": 1396 }, { "epoch": 0.9126245304589253, "grad_norm": Infinity, "learning_rate": 3.958795810732363e-06, "loss": 2.9971, "step": 1397 }, { "epoch": 0.9132778049975502, "grad_norm": Infinity, "learning_rate": 3.899877393523821e-06, "loss": 3.2009, "step": 1398 }, { "epoch": 0.913931079536175, "grad_norm": Infinity, "learning_rate": 3.841391988476018e-06, "loss": 3.1459, "step": 1399 }, { "epoch": 0.9145843540748, "grad_norm": Infinity, "learning_rate": 3.783339859115065e-06, "loss": 3.91, "step": 1400 }, { "epoch": 0.9152376286134248, "grad_norm": Infinity, "learning_rate": 3.7257212670148035e-06, "loss": 1.3089, "step": 1401 }, { "epoch": 0.9158909031520497, "grad_norm": Infinity, "learning_rate": 3.6685364717956138e-06, "loss": 1.3872, "step": 1402 }, { "epoch": 0.9165441776906745, "grad_norm": Infinity, "learning_rate": 3.611785731123274e-06, "loss": 1.3304, "step": 1403 }, { "epoch": 0.9171974522292994, "grad_norm": Infinity, "learning_rate": 3.555469300707759e-06, "loss": 1.3995, "step": 1404 }, { "epoch": 0.9178507267679242, "grad_norm": Infinity, "learning_rate": 3.4995874343021094e-06, "loss": 1.4048, "step": 1405 }, { "epoch": 0.9185040013065491, "grad_norm": Infinity, "learning_rate": 3.444140383701333e-06, "loss": 1.4064, "step": 1406 }, { "epoch": 0.9191572758451739, "grad_norm": Infinity, "learning_rate": 3.389128398741148e-06, "loss": 1.596, "step": 1407 }, { "epoch": 0.9198105503837988, "grad_norm": Infinity, "learning_rate": 3.3345517272970084e-06, "loss": 1.5013, "step": 1408 }, { "epoch": 0.9204638249224236, "grad_norm": Infinity, "learning_rate": 3.2804106152828582e-06, "loss": 1.7069, "step": 1409 }, { "epoch": 0.9211170994610485, "grad_norm": Infinity, "learning_rate": 3.226705306650113e-06, "loss": 1.6444, "step": 1410 }, { "epoch": 0.9217703739996733, "grad_norm": Infinity, "learning_rate": 3.173436043386535e-06, "loss": 1.5149, "step": 1411 }, { "epoch": 0.9224236485382982, "grad_norm": Infinity, "learning_rate": 3.120603065515071e-06, "loss": 1.7224, "step": 1412 }, { "epoch": 0.9230769230769231, "grad_norm": Infinity, "learning_rate": 3.068206611092905e-06, "loss": 1.7857, "step": 1413 }, { "epoch": 0.923730197615548, "grad_norm": Infinity, "learning_rate": 3.016246916210297e-06, "loss": 1.8834, "step": 1414 }, { "epoch": 0.9243834721541728, "grad_norm": Infinity, "learning_rate": 2.9647242149895006e-06, "loss": 1.9377, "step": 1415 }, { "epoch": 0.9250367466927977, "grad_norm": Infinity, "learning_rate": 2.9136387395837683e-06, "loss": 2.1515, "step": 1416 }, { "epoch": 0.9256900212314225, "grad_norm": Infinity, "learning_rate": 2.8629907201763283e-06, "loss": 2.3141, "step": 1417 }, { "epoch": 0.9263432957700474, "grad_norm": Infinity, "learning_rate": 2.8127803849791968e-06, "loss": 2.3314, "step": 1418 }, { "epoch": 0.9269965703086722, "grad_norm": Infinity, "learning_rate": 2.7630079602323442e-06, "loss": 2.3136, "step": 1419 }, { "epoch": 0.9276498448472971, "grad_norm": Infinity, "learning_rate": 2.7136736702025433e-06, "loss": 2.6563, "step": 1420 }, { "epoch": 0.9283031193859219, "grad_norm": Infinity, "learning_rate": 2.6647777371824e-06, "loss": 2.7181, "step": 1421 }, { "epoch": 0.9289563939245468, "grad_norm": Infinity, "learning_rate": 2.616320381489401e-06, "loss": 3.1091, "step": 1422 }, { "epoch": 0.9296096684631716, "grad_norm": Infinity, "learning_rate": 2.5683018214647693e-06, "loss": 3.1406, "step": 1423 }, { "epoch": 0.9302629430017965, "grad_norm": Infinity, "learning_rate": 2.520722273472698e-06, "loss": 3.4426, "step": 1424 }, { "epoch": 0.9309162175404213, "grad_norm": Infinity, "learning_rate": 2.473581951899184e-06, "loss": 4.6078, "step": 1425 }, { "epoch": 0.9315694920790463, "grad_norm": Infinity, "learning_rate": 2.4268810691511546e-06, "loss": 1.3089, "step": 1426 }, { "epoch": 0.9322227666176711, "grad_norm": Infinity, "learning_rate": 2.3806198356555287e-06, "loss": 1.4224, "step": 1427 }, { "epoch": 0.932876041156296, "grad_norm": Infinity, "learning_rate": 2.3347984598581783e-06, "loss": 1.4428, "step": 1428 }, { "epoch": 0.9335293156949208, "grad_norm": Infinity, "learning_rate": 2.289417148223094e-06, "loss": 1.4401, "step": 1429 }, { "epoch": 0.9341825902335457, "grad_norm": Infinity, "learning_rate": 2.2444761052313856e-06, "loss": 1.4837, "step": 1430 }, { "epoch": 0.9348358647721705, "grad_norm": Infinity, "learning_rate": 2.1999755333803718e-06, "loss": 1.5233, "step": 1431 }, { "epoch": 0.9354891393107954, "grad_norm": Infinity, "learning_rate": 2.1559156331826926e-06, "loss": 1.5427, "step": 1432 }, { "epoch": 0.9361424138494202, "grad_norm": Infinity, "learning_rate": 2.1122966031653977e-06, "loss": 1.5739, "step": 1433 }, { "epoch": 0.9367956883880451, "grad_norm": Infinity, "learning_rate": 2.069118639868994e-06, "loss": 1.525, "step": 1434 }, { "epoch": 0.9374489629266699, "grad_norm": Infinity, "learning_rate": 2.0263819378466884e-06, "loss": 1.7342, "step": 1435 }, { "epoch": 0.9381022374652948, "grad_norm": Infinity, "learning_rate": 1.984086689663378e-06, "loss": 1.9, "step": 1436 }, { "epoch": 0.9387555120039196, "grad_norm": Infinity, "learning_rate": 1.942233085894851e-06, "loss": 1.7909, "step": 1437 }, { "epoch": 0.9394087865425444, "grad_norm": Infinity, "learning_rate": 1.9008213151269328e-06, "loss": 1.8013, "step": 1438 }, { "epoch": 0.9400620610811694, "grad_norm": Infinity, "learning_rate": 1.8598515639545622e-06, "loss": 1.8483, "step": 1439 }, { "epoch": 0.9407153356197943, "grad_norm": Infinity, "learning_rate": 1.8193240169810943e-06, "loss": 2.0898, "step": 1440 }, { "epoch": 0.9413686101584191, "grad_norm": Infinity, "learning_rate": 1.7792388568173002e-06, "loss": 2.1185, "step": 1441 }, { "epoch": 0.942021884697044, "grad_norm": Infinity, "learning_rate": 1.7395962640806674e-06, "loss": 2.164, "step": 1442 }, { "epoch": 0.9426751592356688, "grad_norm": Infinity, "learning_rate": 1.7003964173945563e-06, "loss": 2.3025, "step": 1443 }, { "epoch": 0.9433284337742937, "grad_norm": Infinity, "learning_rate": 1.6616394933873235e-06, "loss": 2.4831, "step": 1444 }, { "epoch": 0.9439817083129185, "grad_norm": Infinity, "learning_rate": 1.623325666691644e-06, "loss": 2.6088, "step": 1445 }, { "epoch": 0.9446349828515433, "grad_norm": Infinity, "learning_rate": 1.5854551099436455e-06, "loss": 2.8619, "step": 1446 }, { "epoch": 0.9452882573901682, "grad_norm": Infinity, "learning_rate": 1.5480279937821418e-06, "loss": 2.9166, "step": 1447 }, { "epoch": 0.945941531928793, "grad_norm": Infinity, "learning_rate": 1.511044486847879e-06, "loss": 3.3526, "step": 1448 }, { "epoch": 0.9465948064674179, "grad_norm": Infinity, "learning_rate": 1.4745047557827796e-06, "loss": 3.6262, "step": 1449 }, { "epoch": 0.9472480810060427, "grad_norm": Infinity, "learning_rate": 1.4384089652291543e-06, "loss": 4.5926, "step": 1450 }, { "epoch": 0.9479013555446677, "grad_norm": Infinity, "learning_rate": 1.4027572778290255e-06, "loss": 1.2628, "step": 1451 }, { "epoch": 0.9485546300832925, "grad_norm": Infinity, "learning_rate": 1.3675498542233156e-06, "loss": 1.3955, "step": 1452 }, { "epoch": 0.9492079046219174, "grad_norm": Infinity, "learning_rate": 1.3327868530511934e-06, "loss": 1.3785, "step": 1453 }, { "epoch": 0.9498611791605422, "grad_norm": Infinity, "learning_rate": 1.2984684309493179e-06, "loss": 1.4708, "step": 1454 }, { "epoch": 0.9505144536991671, "grad_norm": Infinity, "learning_rate": 1.2645947425511395e-06, "loss": 1.4621, "step": 1455 }, { "epoch": 0.9511677282377919, "grad_norm": Infinity, "learning_rate": 1.231165940486234e-06, "loss": 1.567, "step": 1456 }, { "epoch": 0.9518210027764168, "grad_norm": Infinity, "learning_rate": 1.1981821753795365e-06, "loss": 1.6199, "step": 1457 }, { "epoch": 0.9524742773150416, "grad_norm": Infinity, "learning_rate": 1.1656435958507738e-06, "loss": 1.5783, "step": 1458 }, { "epoch": 0.9531275518536665, "grad_norm": Infinity, "learning_rate": 1.133550348513701e-06, "loss": 1.5525, "step": 1459 }, { "epoch": 0.9537808263922913, "grad_norm": Infinity, "learning_rate": 1.1019025779754666e-06, "loss": 1.6696, "step": 1460 }, { "epoch": 0.9544341009309162, "grad_norm": Infinity, "learning_rate": 1.0707004268360243e-06, "loss": 1.7598, "step": 1461 }, { "epoch": 0.955087375469541, "grad_norm": Infinity, "learning_rate": 1.039944035687368e-06, "loss": 1.8395, "step": 1462 }, { "epoch": 0.9557406500081659, "grad_norm": Infinity, "learning_rate": 1.0096335431130089e-06, "loss": 2.0216, "step": 1463 }, { "epoch": 0.9563939245467908, "grad_norm": Infinity, "learning_rate": 9.797690856872987e-07, "loss": 1.8501, "step": 1464 }, { "epoch": 0.9570471990854157, "grad_norm": Infinity, "learning_rate": 9.503507979748305e-07, "loss": 1.9238, "step": 1465 }, { "epoch": 0.9577004736240405, "grad_norm": Infinity, "learning_rate": 9.21378812529794e-07, "loss": 2.2574, "step": 1466 }, { "epoch": 0.9583537481626654, "grad_norm": Infinity, "learning_rate": 8.928532598954654e-07, "loss": 2.3334, "step": 1467 }, { "epoch": 0.9590070227012902, "grad_norm": Infinity, "learning_rate": 8.647742686035298e-07, "loss": 2.5487, "step": 1468 }, { "epoch": 0.9596602972399151, "grad_norm": Infinity, "learning_rate": 8.371419651735268e-07, "loss": 2.7241, "step": 1469 }, { "epoch": 0.9603135717785399, "grad_norm": Infinity, "learning_rate": 8.099564741123166e-07, "loss": 2.6671, "step": 1470 }, { "epoch": 0.9609668463171648, "grad_norm": Infinity, "learning_rate": 7.832179179134703e-07, "loss": 2.8966, "step": 1471 }, { "epoch": 0.9616201208557896, "grad_norm": Infinity, "learning_rate": 7.569264170567691e-07, "loss": 2.9527, "step": 1472 }, { "epoch": 0.9622733953944145, "grad_norm": Infinity, "learning_rate": 7.310820900075955e-07, "loss": 3.0179, "step": 1473 }, { "epoch": 0.9629266699330393, "grad_norm": Infinity, "learning_rate": 7.05685053216465e-07, "loss": 3.6077, "step": 1474 }, { "epoch": 0.9635799444716642, "grad_norm": Infinity, "learning_rate": 6.807354211184613e-07, "loss": 4.2041, "step": 1475 }, { "epoch": 0.964233219010289, "grad_norm": Infinity, "learning_rate": 6.562333061327364e-07, "loss": 1.3957, "step": 1476 }, { "epoch": 0.964886493548914, "grad_norm": Infinity, "learning_rate": 6.321788186619992e-07, "loss": 1.4075, "step": 1477 }, { "epoch": 0.9655397680875388, "grad_norm": Infinity, "learning_rate": 6.08572067092017e-07, "loss": 1.4448, "step": 1478 }, { "epoch": 0.9661930426261637, "grad_norm": Infinity, "learning_rate": 5.854131577911259e-07, "loss": 1.4893, "step": 1479 }, { "epoch": 0.9668463171647885, "grad_norm": Infinity, "learning_rate": 5.627021951097545e-07, "loss": 1.4216, "step": 1480 }, { "epoch": 0.9674995917034134, "grad_norm": Infinity, "learning_rate": 5.404392813799675e-07, "loss": 1.5241, "step": 1481 }, { "epoch": 0.9681528662420382, "grad_norm": Infinity, "learning_rate": 5.186245169149784e-07, "loss": 1.4522, "step": 1482 }, { "epoch": 0.9688061407806631, "grad_norm": Infinity, "learning_rate": 4.972580000087046e-07, "loss": 1.5146, "step": 1483 }, { "epoch": 0.9694594153192879, "grad_norm": Infinity, "learning_rate": 4.7633982693535693e-07, "loss": 1.6367, "step": 1484 }, { "epoch": 0.9701126898579128, "grad_norm": Infinity, "learning_rate": 4.5587009194894004e-07, "loss": 1.6609, "step": 1485 }, { "epoch": 0.9707659643965376, "grad_norm": Infinity, "learning_rate": 4.3584888728289695e-07, "loss": 1.6756, "step": 1486 }, { "epoch": 0.9714192389351625, "grad_norm": Infinity, "learning_rate": 4.1627630314965415e-07, "loss": 1.6661, "step": 1487 }, { "epoch": 0.9720725134737873, "grad_norm": Infinity, "learning_rate": 3.9715242774021055e-07, "loss": 1.8335, "step": 1488 }, { "epoch": 0.9727257880124122, "grad_norm": Infinity, "learning_rate": 3.7847734722378234e-07, "loss": 1.9859, "step": 1489 }, { "epoch": 0.9733790625510371, "grad_norm": Infinity, "learning_rate": 3.6025114574734785e-07, "loss": 2.034, "step": 1490 }, { "epoch": 0.974032337089662, "grad_norm": Infinity, "learning_rate": 3.424739054353476e-07, "loss": 1.9654, "step": 1491 }, { "epoch": 0.9746856116282868, "grad_norm": Infinity, "learning_rate": 3.2514570638925157e-07, "loss": 2.3266, "step": 1492 }, { "epoch": 0.9753388861669117, "grad_norm": Infinity, "learning_rate": 3.0826662668720364e-07, "loss": 2.358, "step": 1493 }, { "epoch": 0.9759921607055365, "grad_norm": Infinity, "learning_rate": 2.9183674238372206e-07, "loss": 2.384, "step": 1494 }, { "epoch": 0.9766454352441614, "grad_norm": Infinity, "learning_rate": 2.758561275092886e-07, "loss": 2.6566, "step": 1495 }, { "epoch": 0.9772987097827862, "grad_norm": Infinity, "learning_rate": 2.603248540700709e-07, "loss": 3.0257, "step": 1496 }, { "epoch": 0.9779519843214111, "grad_norm": Infinity, "learning_rate": 2.452429920475674e-07, "loss": 2.866, "step": 1497 }, { "epoch": 0.9786052588600359, "grad_norm": Infinity, "learning_rate": 2.3061060939828517e-07, "loss": 2.8559, "step": 1498 }, { "epoch": 0.9792585333986608, "grad_norm": Infinity, "learning_rate": 2.1642777205346242e-07, "loss": 3.8677, "step": 1499 }, { "epoch": 0.9799118079372856, "grad_norm": Infinity, "learning_rate": 2.0269454391874666e-07, "loss": 3.9609, "step": 1500 }, { "epoch": 0.9805650824759105, "grad_norm": Infinity, "learning_rate": 1.89410986873928e-07, "loss": 1.2609, "step": 1501 }, { "epoch": 0.9812183570145354, "grad_norm": Infinity, "learning_rate": 1.7657716077265073e-07, "loss": 1.307, "step": 1502 }, { "epoch": 0.9818716315531603, "grad_norm": Infinity, "learning_rate": 1.6419312344211347e-07, "loss": 1.3644, "step": 1503 }, { "epoch": 0.9825249060917851, "grad_norm": Infinity, "learning_rate": 1.5225893068286922e-07, "loss": 1.4293, "step": 1504 }, { "epoch": 0.98317818063041, "grad_norm": Infinity, "learning_rate": 1.4077463626852582e-07, "loss": 1.3799, "step": 1505 }, { "epoch": 0.9838314551690348, "grad_norm": Infinity, "learning_rate": 1.2974029194551262e-07, "loss": 1.4381, "step": 1506 }, { "epoch": 0.9844847297076597, "grad_norm": Infinity, "learning_rate": 1.1915594743288072e-07, "loss": 1.5463, "step": 1507 }, { "epoch": 0.9851380042462845, "grad_norm": Infinity, "learning_rate": 1.0902165042202539e-07, "loss": 1.5154, "step": 1508 }, { "epoch": 0.9857912787849094, "grad_norm": Infinity, "learning_rate": 9.933744657651956e-08, "loss": 1.6106, "step": 1509 }, { "epoch": 0.9864445533235342, "grad_norm": Infinity, "learning_rate": 9.010337953185843e-08, "loss": 1.6065, "step": 1510 }, { "epoch": 0.987097827862159, "grad_norm": Infinity, "learning_rate": 8.131949089531521e-08, "loss": 1.7721, "step": 1511 }, { "epoch": 0.9877511024007839, "grad_norm": Infinity, "learning_rate": 7.298582024571899e-08, "loss": 1.8281, "step": 1512 }, { "epoch": 0.9884043769394087, "grad_norm": Infinity, "learning_rate": 6.510240513328824e-08, "loss": 1.8281, "step": 1513 }, { "epoch": 0.9890576514780336, "grad_norm": Infinity, "learning_rate": 5.7669281079475446e-08, "loss": 1.8849, "step": 1514 }, { "epoch": 0.9897109260166586, "grad_norm": Infinity, "learning_rate": 5.068648157675604e-08, "loss": 1.9238, "step": 1515 }, { "epoch": 0.9903642005552834, "grad_norm": Infinity, "learning_rate": 4.4154038088561886e-08, "loss": 2.0281, "step": 1516 }, { "epoch": 0.9910174750939083, "grad_norm": Infinity, "learning_rate": 3.8071980049037006e-08, "loss": 2.3601, "step": 1517 }, { "epoch": 0.9916707496325331, "grad_norm": Infinity, "learning_rate": 3.244033486300424e-08, "loss": 2.4496, "step": 1518 }, { "epoch": 0.992324024171158, "grad_norm": Infinity, "learning_rate": 2.7259127905776562e-08, "loss": 2.399, "step": 1519 }, { "epoch": 0.9929772987097828, "grad_norm": Infinity, "learning_rate": 2.2528382523057113e-08, "loss": 2.5236, "step": 1520 }, { "epoch": 0.9936305732484076, "grad_norm": Infinity, "learning_rate": 1.8248120030850413e-08, "loss": 2.7336, "step": 1521 }, { "epoch": 0.9942838477870325, "grad_norm": Infinity, "learning_rate": 1.4418359715351327e-08, "loss": 2.898, "step": 1522 }, { "epoch": 0.9949371223256573, "grad_norm": Infinity, "learning_rate": 1.1039118832867345e-08, "loss": 3.4187, "step": 1523 }, { "epoch": 0.9955903968642822, "grad_norm": Infinity, "learning_rate": 8.110412609718676e-09, "loss": 3.3366, "step": 1524 }, { "epoch": 0.996243671402907, "grad_norm": Infinity, "learning_rate": 5.6322542422049266e-09, "loss": 4.1065, "step": 1525 }, { "epoch": 0.9968969459415319, "grad_norm": Infinity, "learning_rate": 3.604654896527393e-09, "loss": 1.3718, "step": 1526 }, { "epoch": 0.9975502204801567, "grad_norm": Infinity, "learning_rate": 2.027623708722448e-09, "loss": 1.5273, "step": 1527 }, { "epoch": 0.9982034950187817, "grad_norm": Infinity, "learning_rate": 9.011677846504363e-10, "loss": 1.8061, "step": 1528 }, { "epoch": 0.9988567695574065, "grad_norm": Infinity, "learning_rate": 2.252921999401636e-10, "loss": 2.0641, "step": 1529 }, { "epoch": 0.9995100440960314, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 2.8842, "step": 1530 } ], "logging_steps": 1, "max_steps": 1530, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 383, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.299736884851507e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }