{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008, "grad_norm": 5.9686970710754395, "learning_rate": 4.9999921043206356e-06, "loss": 6.1536, "step": 1 }, { "epoch": 0.0016, "grad_norm": 5.313859462738037, "learning_rate": 4.999968417332415e-06, "loss": 1.8192, "step": 2 }, { "epoch": 0.0024, "grad_norm": 3.8611130714416504, "learning_rate": 4.999928939184958e-06, "loss": 5.7147, "step": 3 }, { "epoch": 0.0032, "grad_norm": 8.215139389038086, "learning_rate": 4.99987367012763e-06, "loss": 1.9633, "step": 4 }, { "epoch": 0.004, "grad_norm": 2.859307050704956, "learning_rate": 4.999802610509541e-06, "loss": 5.4413, "step": 5 }, { "epoch": 0.0048, "grad_norm": 10.999748229980469, "learning_rate": 4.999715760779541e-06, "loss": 1.9931, "step": 6 }, { "epoch": 0.0056, "grad_norm": 2.5857369899749756, "learning_rate": 4.999613121486222e-06, "loss": 5.2138, "step": 7 }, { "epoch": 0.0064, "grad_norm": 4.739017009735107, "learning_rate": 4.9994946932779076e-06, "loss": 1.5203, "step": 8 }, { "epoch": 0.0072, "grad_norm": 2.03410267829895, "learning_rate": 4.999360476902656e-06, "loss": 5.1349, "step": 9 }, { "epoch": 0.008, "grad_norm": 4.154623508453369, "learning_rate": 4.99921047320825e-06, "loss": 1.6121, "step": 10 }, { "epoch": 0.0088, "grad_norm": 1.8263472318649292, "learning_rate": 4.999044683142196e-06, "loss": 4.9737, "step": 11 }, { "epoch": 0.0096, "grad_norm": 4.39143705368042, "learning_rate": 4.998863107751711e-06, "loss": 1.4866, "step": 12 }, { "epoch": 0.0104, "grad_norm": 1.6841758489608765, "learning_rate": 4.998665748183727e-06, "loss": 5.0078, "step": 13 }, { "epoch": 0.0112, "grad_norm": 4.099013805389404, "learning_rate": 4.998452605684874e-06, "loss": 1.6304, "step": 14 }, { "epoch": 0.012, "grad_norm": 1.6769129037857056, "learning_rate": 4.9982236816014735e-06, "loss": 4.8359, "step": 15 }, { "epoch": 0.0128, "grad_norm": 3.6601269245147705, "learning_rate": 4.9979789773795365e-06, "loss": 1.6408, "step": 16 }, { "epoch": 0.0136, "grad_norm": 1.6234138011932373, "learning_rate": 4.997718494564747e-06, "loss": 4.9268, "step": 17 }, { "epoch": 0.0144, "grad_norm": 4.540709018707275, "learning_rate": 4.9974422348024565e-06, "loss": 1.4653, "step": 18 }, { "epoch": 0.0152, "grad_norm": 2.201477527618408, "learning_rate": 4.997150199837671e-06, "loss": 4.8255, "step": 19 }, { "epoch": 0.016, "grad_norm": 3.3589704036712646, "learning_rate": 4.996842391515045e-06, "loss": 1.3599, "step": 20 }, { "epoch": 0.0168, "grad_norm": 1.7828714847564697, "learning_rate": 4.996518811778858e-06, "loss": 4.7924, "step": 21 }, { "epoch": 0.0176, "grad_norm": 4.722200870513916, "learning_rate": 4.99617946267302e-06, "loss": 1.8165, "step": 22 }, { "epoch": 0.0184, "grad_norm": 1.5609182119369507, "learning_rate": 4.995824346341041e-06, "loss": 4.8322, "step": 23 }, { "epoch": 0.0192, "grad_norm": 3.8967134952545166, "learning_rate": 4.995453465026033e-06, "loss": 1.49, "step": 24 }, { "epoch": 0.02, "grad_norm": 2.199491024017334, "learning_rate": 4.9950668210706795e-06, "loss": 4.6516, "step": 25 }, { "epoch": 0.0208, "grad_norm": 4.164550304412842, "learning_rate": 4.994664416917236e-06, "loss": 1.3359, "step": 26 }, { "epoch": 0.0216, "grad_norm": 1.9813035726547241, "learning_rate": 4.994246255107506e-06, "loss": 4.6697, "step": 27 }, { "epoch": 0.0224, "grad_norm": 5.564512729644775, "learning_rate": 4.993812338282826e-06, "loss": 1.6348, "step": 28 }, { "epoch": 0.0232, "grad_norm": 2.7316086292266846, "learning_rate": 4.993362669184051e-06, "loss": 4.4999, "step": 29 }, { "epoch": 0.024, "grad_norm": 4.501605987548828, "learning_rate": 4.992897250651535e-06, "loss": 1.4643, "step": 30 }, { "epoch": 0.0248, "grad_norm": 1.8927552700042725, "learning_rate": 4.992416085625115e-06, "loss": 4.7085, "step": 31 }, { "epoch": 0.0256, "grad_norm": 4.785287380218506, "learning_rate": 4.9919191771440905e-06, "loss": 1.3398, "step": 32 }, { "epoch": 0.0264, "grad_norm": 2.4881515502929688, "learning_rate": 4.991406528347206e-06, "loss": 4.5912, "step": 33 }, { "epoch": 0.0272, "grad_norm": 4.189312934875488, "learning_rate": 4.990878142472628e-06, "loss": 1.4647, "step": 34 }, { "epoch": 0.028, "grad_norm": 2.654892921447754, "learning_rate": 4.990334022857932e-06, "loss": 4.4038, "step": 35 }, { "epoch": 0.0288, "grad_norm": 5.841195583343506, "learning_rate": 4.989774172940071e-06, "loss": 1.5347, "step": 36 }, { "epoch": 0.0296, "grad_norm": 3.269841432571411, "learning_rate": 4.989198596255361e-06, "loss": 4.4978, "step": 37 }, { "epoch": 0.0304, "grad_norm": 3.6912543773651123, "learning_rate": 4.988607296439459e-06, "loss": 1.3615, "step": 38 }, { "epoch": 0.0312, "grad_norm": 3.773468255996704, "learning_rate": 4.988000277227334e-06, "loss": 4.4462, "step": 39 }, { "epoch": 0.032, "grad_norm": 4.216678142547607, "learning_rate": 4.9873775424532515e-06, "loss": 1.3803, "step": 40 }, { "epoch": 0.0328, "grad_norm": 4.231056213378906, "learning_rate": 4.98673909605074e-06, "loss": 4.4349, "step": 41 }, { "epoch": 0.0336, "grad_norm": 4.05332088470459, "learning_rate": 4.986084942052577e-06, "loss": 1.3321, "step": 42 }, { "epoch": 0.0344, "grad_norm": 3.9502322673797607, "learning_rate": 4.985415084590752e-06, "loss": 4.2693, "step": 43 }, { "epoch": 0.0352, "grad_norm": 8.568007469177246, "learning_rate": 4.984729527896451e-06, "loss": 1.6135, "step": 44 }, { "epoch": 0.036, "grad_norm": 4.460508346557617, "learning_rate": 4.984028276300021e-06, "loss": 4.4412, "step": 45 }, { "epoch": 0.0368, "grad_norm": 7.591355323791504, "learning_rate": 4.9833113342309495e-06, "loss": 1.6569, "step": 46 }, { "epoch": 0.0376, "grad_norm": 5.810396194458008, "learning_rate": 4.9825787062178315e-06, "loss": 4.1632, "step": 47 }, { "epoch": 0.0384, "grad_norm": 10.894949913024902, "learning_rate": 4.9818303968883445e-06, "loss": 1.6721, "step": 48 }, { "epoch": 0.0392, "grad_norm": 4.217193126678467, "learning_rate": 4.981066410969215e-06, "loss": 4.1738, "step": 49 }, { "epoch": 0.04, "grad_norm": 8.75684642791748, "learning_rate": 4.980286753286196e-06, "loss": 1.3856, "step": 50 }, { "epoch": 0.0408, "grad_norm": 3.8983495235443115, "learning_rate": 4.9794914287640264e-06, "loss": 4.0982, "step": 51 }, { "epoch": 0.0416, "grad_norm": 6.7597527503967285, "learning_rate": 4.978680442426409e-06, "loss": 1.4406, "step": 52 }, { "epoch": 0.0424, "grad_norm": 5.493980407714844, "learning_rate": 4.977853799395976e-06, "loss": 4.3028, "step": 53 }, { "epoch": 0.0432, "grad_norm": 7.1781487464904785, "learning_rate": 4.977011504894253e-06, "loss": 1.4716, "step": 54 }, { "epoch": 0.044, "grad_norm": 4.196126937866211, "learning_rate": 4.9761535642416284e-06, "loss": 4.1292, "step": 55 }, { "epoch": 0.0448, "grad_norm": 7.720696926116943, "learning_rate": 4.975279982857324e-06, "loss": 1.5968, "step": 56 }, { "epoch": 0.0456, "grad_norm": 1.6588771343231201, "learning_rate": 4.974390766259353e-06, "loss": 4.2463, "step": 57 }, { "epoch": 0.0464, "grad_norm": 10.156800270080566, "learning_rate": 4.973485920064491e-06, "loss": 1.4834, "step": 58 }, { "epoch": 0.0472, "grad_norm": 1.59371018409729, "learning_rate": 4.972565449988238e-06, "loss": 4.0996, "step": 59 }, { "epoch": 0.048, "grad_norm": 8.833647727966309, "learning_rate": 4.971629361844785e-06, "loss": 1.6226, "step": 60 }, { "epoch": 0.0488, "grad_norm": 1.8904303312301636, "learning_rate": 4.970677661546972e-06, "loss": 4.1373, "step": 61 }, { "epoch": 0.0496, "grad_norm": 7.343002796173096, "learning_rate": 4.969710355106256e-06, "loss": 1.5989, "step": 62 }, { "epoch": 0.0504, "grad_norm": 1.5326752662658691, "learning_rate": 4.968727448632669e-06, "loss": 4.067, "step": 63 }, { "epoch": 0.0512, "grad_norm": 5.595536708831787, "learning_rate": 4.967728948334784e-06, "loss": 1.515, "step": 64 }, { "epoch": 0.052, "grad_norm": 2.240656852722168, "learning_rate": 4.96671486051967e-06, "loss": 3.9452, "step": 65 }, { "epoch": 0.0528, "grad_norm": 8.656717300415039, "learning_rate": 4.965685191592859e-06, "loss": 1.7592, "step": 66 }, { "epoch": 0.0536, "grad_norm": 1.6276272535324097, "learning_rate": 4.964639948058297e-06, "loss": 3.9894, "step": 67 }, { "epoch": 0.0544, "grad_norm": 5.7422075271606445, "learning_rate": 4.963579136518312e-06, "loss": 1.5689, "step": 68 }, { "epoch": 0.0552, "grad_norm": 1.9765911102294922, "learning_rate": 4.962502763673566e-06, "loss": 4.0761, "step": 69 }, { "epoch": 0.056, "grad_norm": 6.2184224128723145, "learning_rate": 4.961410836323014e-06, "loss": 1.5643, "step": 70 }, { "epoch": 0.0568, "grad_norm": 1.7013366222381592, "learning_rate": 4.960303361363863e-06, "loss": 3.9535, "step": 71 }, { "epoch": 0.0576, "grad_norm": 5.7151713371276855, "learning_rate": 4.959180345791528e-06, "loss": 1.3778, "step": 72 }, { "epoch": 0.0584, "grad_norm": 2.092637777328491, "learning_rate": 4.958041796699583e-06, "loss": 4.043, "step": 73 }, { "epoch": 0.0592, "grad_norm": 6.953094482421875, "learning_rate": 4.956887721279726e-06, "loss": 1.4149, "step": 74 }, { "epoch": 0.06, "grad_norm": 2.5431764125823975, "learning_rate": 4.9557181268217225e-06, "loss": 4.1433, "step": 75 }, { "epoch": 0.0608, "grad_norm": 5.6638665199279785, "learning_rate": 4.954533020713367e-06, "loss": 1.3123, "step": 76 }, { "epoch": 0.0616, "grad_norm": 2.033217668533325, "learning_rate": 4.953332410440434e-06, "loss": 4.12, "step": 77 }, { "epoch": 0.0624, "grad_norm": 5.832539081573486, "learning_rate": 4.952116303586631e-06, "loss": 1.4276, "step": 78 }, { "epoch": 0.0632, "grad_norm": 1.4119787216186523, "learning_rate": 4.95088470783355e-06, "loss": 3.9499, "step": 79 }, { "epoch": 0.064, "grad_norm": 5.931257247924805, "learning_rate": 4.949637630960618e-06, "loss": 1.5232, "step": 80 }, { "epoch": 0.0648, "grad_norm": 1.5532656908035278, "learning_rate": 4.94837508084505e-06, "loss": 3.9162, "step": 81 }, { "epoch": 0.0656, "grad_norm": 5.160223007202148, "learning_rate": 4.947097065461801e-06, "loss": 1.7749, "step": 82 }, { "epoch": 0.0664, "grad_norm": 1.274683952331543, "learning_rate": 4.945803592883509e-06, "loss": 3.9429, "step": 83 }, { "epoch": 0.0672, "grad_norm": 4.50646448135376, "learning_rate": 4.94449467128045e-06, "loss": 1.3428, "step": 84 }, { "epoch": 0.068, "grad_norm": 2.7638394832611084, "learning_rate": 4.943170308920484e-06, "loss": 4.0664, "step": 85 }, { "epoch": 0.0688, "grad_norm": 5.305659770965576, "learning_rate": 4.9418305141690045e-06, "loss": 1.6382, "step": 86 }, { "epoch": 0.0696, "grad_norm": 1.672782301902771, "learning_rate": 4.940475295488882e-06, "loss": 3.9736, "step": 87 }, { "epoch": 0.0704, "grad_norm": 4.357553482055664, "learning_rate": 4.939104661440415e-06, "loss": 1.2025, "step": 88 }, { "epoch": 0.0712, "grad_norm": 1.9459145069122314, "learning_rate": 4.937718620681273e-06, "loss": 3.8823, "step": 89 }, { "epoch": 0.072, "grad_norm": 4.6320085525512695, "learning_rate": 4.9363171819664434e-06, "loss": 1.4891, "step": 90 }, { "epoch": 0.0728, "grad_norm": 1.9804147481918335, "learning_rate": 4.934900354148173e-06, "loss": 3.673, "step": 91 }, { "epoch": 0.0736, "grad_norm": 5.650574684143066, "learning_rate": 4.933468146175918e-06, "loss": 1.6462, "step": 92 }, { "epoch": 0.0744, "grad_norm": 2.002102851867676, "learning_rate": 4.9320205670962815e-06, "loss": 3.9996, "step": 93 }, { "epoch": 0.0752, "grad_norm": 5.602189540863037, "learning_rate": 4.930557626052961e-06, "loss": 1.57, "step": 94 }, { "epoch": 0.076, "grad_norm": 1.618115782737732, "learning_rate": 4.929079332286685e-06, "loss": 3.9771, "step": 95 }, { "epoch": 0.0768, "grad_norm": 4.976815223693848, "learning_rate": 4.927585695135162e-06, "loss": 1.3109, "step": 96 }, { "epoch": 0.0776, "grad_norm": 1.5383416414260864, "learning_rate": 4.926076724033016e-06, "loss": 3.943, "step": 97 }, { "epoch": 0.0784, "grad_norm": 5.538623809814453, "learning_rate": 4.924552428511727e-06, "loss": 1.5928, "step": 98 }, { "epoch": 0.0792, "grad_norm": 1.1636689901351929, "learning_rate": 4.923012818199576e-06, "loss": 3.9089, "step": 99 }, { "epoch": 0.08, "grad_norm": 5.035048484802246, "learning_rate": 4.921457902821578e-06, "loss": 1.709, "step": 100 }, { "epoch": 0.0808, "grad_norm": 1.3163026571273804, "learning_rate": 4.919887692199423e-06, "loss": 3.9234, "step": 101 }, { "epoch": 0.0816, "grad_norm": 4.93280029296875, "learning_rate": 4.9183021962514145e-06, "loss": 1.4215, "step": 102 }, { "epoch": 0.0824, "grad_norm": 2.1531784534454346, "learning_rate": 4.9167014249924075e-06, "loss": 3.8196, "step": 103 }, { "epoch": 0.0832, "grad_norm": 4.800553798675537, "learning_rate": 4.915085388533743e-06, "loss": 1.573, "step": 104 }, { "epoch": 0.084, "grad_norm": 1.383305311203003, "learning_rate": 4.913454097083185e-06, "loss": 3.9708, "step": 105 }, { "epoch": 0.0848, "grad_norm": 4.389811038970947, "learning_rate": 4.911807560944858e-06, "loss": 1.3961, "step": 106 }, { "epoch": 0.0856, "grad_norm": 1.5299296379089355, "learning_rate": 4.910145790519177e-06, "loss": 3.8796, "step": 107 }, { "epoch": 0.0864, "grad_norm": 5.052987575531006, "learning_rate": 4.90846879630279e-06, "loss": 1.3103, "step": 108 }, { "epoch": 0.0872, "grad_norm": 1.417496919631958, "learning_rate": 4.906776588888502e-06, "loss": 3.9388, "step": 109 }, { "epoch": 0.088, "grad_norm": 4.012498378753662, "learning_rate": 4.905069178965215e-06, "loss": 1.1366, "step": 110 }, { "epoch": 0.0888, "grad_norm": 1.2801809310913086, "learning_rate": 4.903346577317859e-06, "loss": 3.872, "step": 111 }, { "epoch": 0.0896, "grad_norm": 5.76353120803833, "learning_rate": 4.901608794827321e-06, "loss": 1.5188, "step": 112 }, { "epoch": 0.0904, "grad_norm": 1.5510302782058716, "learning_rate": 4.89985584247038e-06, "loss": 3.807, "step": 113 }, { "epoch": 0.0912, "grad_norm": 4.934327125549316, "learning_rate": 4.898087731319637e-06, "loss": 1.6052, "step": 114 }, { "epoch": 0.092, "grad_norm": 1.849161982536316, "learning_rate": 4.89630447254344e-06, "loss": 3.8367, "step": 115 }, { "epoch": 0.0928, "grad_norm": 5.75076150894165, "learning_rate": 4.894506077405824e-06, "loss": 1.6729, "step": 116 }, { "epoch": 0.0936, "grad_norm": 1.3285000324249268, "learning_rate": 4.892692557266429e-06, "loss": 3.9178, "step": 117 }, { "epoch": 0.0944, "grad_norm": 5.176731586456299, "learning_rate": 4.8908639235804324e-06, "loss": 1.3498, "step": 118 }, { "epoch": 0.0952, "grad_norm": 2.258445978164673, "learning_rate": 4.88902018789848e-06, "loss": 3.9289, "step": 119 }, { "epoch": 0.096, "grad_norm": 4.080480575561523, "learning_rate": 4.887161361866608e-06, "loss": 1.2727, "step": 120 }, { "epoch": 0.0968, "grad_norm": 1.3605031967163086, "learning_rate": 4.8852874572261715e-06, "loss": 3.8425, "step": 121 }, { "epoch": 0.0976, "grad_norm": 4.4306135177612305, "learning_rate": 4.883398485813772e-06, "loss": 1.4429, "step": 122 }, { "epoch": 0.0984, "grad_norm": 1.9310946464538574, "learning_rate": 4.881494459561177e-06, "loss": 3.7989, "step": 123 }, { "epoch": 0.0992, "grad_norm": 5.516058444976807, "learning_rate": 4.879575390495254e-06, "loss": 1.6466, "step": 124 }, { "epoch": 0.1, "grad_norm": 1.665083646774292, "learning_rate": 4.8776412907378845e-06, "loss": 3.7725, "step": 125 }, { "epoch": 0.1008, "grad_norm": 5.122972011566162, "learning_rate": 4.8756921725058935e-06, "loss": 1.4164, "step": 126 }, { "epoch": 0.1016, "grad_norm": 1.7785176038742065, "learning_rate": 4.873728048110973e-06, "loss": 3.8428, "step": 127 }, { "epoch": 0.1024, "grad_norm": 4.19711446762085, "learning_rate": 4.871748929959598e-06, "loss": 1.4346, "step": 128 }, { "epoch": 0.1032, "grad_norm": 1.5167326927185059, "learning_rate": 4.869754830552956e-06, "loss": 3.7787, "step": 129 }, { "epoch": 0.104, "grad_norm": 4.343649387359619, "learning_rate": 4.867745762486862e-06, "loss": 1.4161, "step": 130 }, { "epoch": 0.1048, "grad_norm": 1.7682503461837769, "learning_rate": 4.86572173845168e-06, "loss": 3.7656, "step": 131 }, { "epoch": 0.1056, "grad_norm": 5.387735843658447, "learning_rate": 4.863682771232249e-06, "loss": 1.5529, "step": 132 }, { "epoch": 0.1064, "grad_norm": 1.6323776245117188, "learning_rate": 4.861628873707792e-06, "loss": 3.7581, "step": 133 }, { "epoch": 0.1072, "grad_norm": 4.973332405090332, "learning_rate": 4.859560058851844e-06, "loss": 1.3401, "step": 134 }, { "epoch": 0.108, "grad_norm": 2.288790464401245, "learning_rate": 4.857476339732162e-06, "loss": 3.5462, "step": 135 }, { "epoch": 0.1088, "grad_norm": 4.954509735107422, "learning_rate": 4.855377729510648e-06, "loss": 1.4214, "step": 136 }, { "epoch": 0.1096, "grad_norm": 1.466504693031311, "learning_rate": 4.8532642414432675e-06, "loss": 3.7383, "step": 137 }, { "epoch": 0.1104, "grad_norm": 4.507660865783691, "learning_rate": 4.851135888879958e-06, "loss": 1.429, "step": 138 }, { "epoch": 0.1112, "grad_norm": 1.4335397481918335, "learning_rate": 4.8489926852645505e-06, "loss": 3.8185, "step": 139 }, { "epoch": 0.112, "grad_norm": 5.188979148864746, "learning_rate": 4.846834644134686e-06, "loss": 1.288, "step": 140 }, { "epoch": 0.1128, "grad_norm": 1.4267185926437378, "learning_rate": 4.844661779121723e-06, "loss": 3.7755, "step": 141 }, { "epoch": 0.1136, "grad_norm": 4.5999555587768555, "learning_rate": 4.842474103950658e-06, "loss": 1.4337, "step": 142 }, { "epoch": 0.1144, "grad_norm": 1.5960358381271362, "learning_rate": 4.8402716324400375e-06, "loss": 3.8674, "step": 143 }, { "epoch": 0.1152, "grad_norm": 4.50584077835083, "learning_rate": 4.838054378501868e-06, "loss": 1.4054, "step": 144 }, { "epoch": 0.116, "grad_norm": 2.3714451789855957, "learning_rate": 4.8358223561415304e-06, "loss": 3.6878, "step": 145 }, { "epoch": 0.1168, "grad_norm": 4.409125328063965, "learning_rate": 4.833575579457691e-06, "loss": 1.4443, "step": 146 }, { "epoch": 0.1176, "grad_norm": 1.876566767692566, "learning_rate": 4.831314062642213e-06, "loss": 3.9204, "step": 147 }, { "epoch": 0.1184, "grad_norm": 4.678242206573486, "learning_rate": 4.829037819980065e-06, "loss": 1.3475, "step": 148 }, { "epoch": 0.1192, "grad_norm": 1.5604186058044434, "learning_rate": 4.8267468658492335e-06, "loss": 3.8065, "step": 149 }, { "epoch": 0.12, "grad_norm": 4.738994598388672, "learning_rate": 4.824441214720629e-06, "loss": 1.2868, "step": 150 }, { "epoch": 0.1208, "grad_norm": 1.2587168216705322, "learning_rate": 4.822120881157998e-06, "loss": 3.8178, "step": 151 }, { "epoch": 0.1216, "grad_norm": 4.9535298347473145, "learning_rate": 4.819785879817827e-06, "loss": 1.4865, "step": 152 }, { "epoch": 0.1224, "grad_norm": 1.3460506200790405, "learning_rate": 4.8174362254492555e-06, "loss": 3.7509, "step": 153 }, { "epoch": 0.1232, "grad_norm": 6.2948832511901855, "learning_rate": 4.815071932893976e-06, "loss": 1.6562, "step": 154 }, { "epoch": 0.124, "grad_norm": 1.2623156309127808, "learning_rate": 4.812693017086145e-06, "loss": 3.7352, "step": 155 }, { "epoch": 0.1248, "grad_norm": 4.746945858001709, "learning_rate": 4.810299493052289e-06, "loss": 1.4701, "step": 156 }, { "epoch": 0.1256, "grad_norm": 1.41659414768219, "learning_rate": 4.807891375911207e-06, "loss": 3.7158, "step": 157 }, { "epoch": 0.1264, "grad_norm": 5.151709079742432, "learning_rate": 4.805468680873874e-06, "loss": 1.5235, "step": 158 }, { "epoch": 0.1272, "grad_norm": 1.1390382051467896, "learning_rate": 4.803031423243349e-06, "loss": 3.7685, "step": 159 }, { "epoch": 0.128, "grad_norm": 4.6451802253723145, "learning_rate": 4.800579618414677e-06, "loss": 1.3374, "step": 160 }, { "epoch": 0.1288, "grad_norm": 2.0730605125427246, "learning_rate": 4.798113281874788e-06, "loss": 3.7551, "step": 161 }, { "epoch": 0.1296, "grad_norm": 4.244422435760498, "learning_rate": 4.7956324292024045e-06, "loss": 1.4507, "step": 162 }, { "epoch": 0.1304, "grad_norm": 1.437325119972229, "learning_rate": 4.7931370760679415e-06, "loss": 3.8459, "step": 163 }, { "epoch": 0.1312, "grad_norm": 4.308803558349609, "learning_rate": 4.790627238233405e-06, "loss": 1.4397, "step": 164 }, { "epoch": 0.132, "grad_norm": 1.3514691591262817, "learning_rate": 4.788102931552294e-06, "loss": 3.7826, "step": 165 }, { "epoch": 0.1328, "grad_norm": 4.431159973144531, "learning_rate": 4.785564171969503e-06, "loss": 1.3688, "step": 166 }, { "epoch": 0.1336, "grad_norm": 1.9444341659545898, "learning_rate": 4.783010975521216e-06, "loss": 3.786, "step": 167 }, { "epoch": 0.1344, "grad_norm": 4.421632289886475, "learning_rate": 4.78044335833481e-06, "loss": 1.3799, "step": 168 }, { "epoch": 0.1352, "grad_norm": 1.30320143699646, "learning_rate": 4.777861336628751e-06, "loss": 3.7414, "step": 169 }, { "epoch": 0.136, "grad_norm": 4.836937427520752, "learning_rate": 4.775264926712489e-06, "loss": 1.3762, "step": 170 }, { "epoch": 0.1368, "grad_norm": 1.720489501953125, "learning_rate": 4.772654144986364e-06, "loss": 3.7693, "step": 171 }, { "epoch": 0.1376, "grad_norm": 4.573201656341553, "learning_rate": 4.77002900794149e-06, "loss": 1.4831, "step": 172 }, { "epoch": 0.1384, "grad_norm": 1.4767590761184692, "learning_rate": 4.767389532159659e-06, "loss": 3.7936, "step": 173 }, { "epoch": 0.1392, "grad_norm": 4.3813090324401855, "learning_rate": 4.764735734313236e-06, "loss": 1.3468, "step": 174 }, { "epoch": 0.14, "grad_norm": 1.5614203214645386, "learning_rate": 4.762067631165049e-06, "loss": 3.8268, "step": 175 }, { "epoch": 0.1408, "grad_norm": 4.7881317138671875, "learning_rate": 4.75938523956829e-06, "loss": 1.6201, "step": 176 }, { "epoch": 0.1416, "grad_norm": 1.2957278490066528, "learning_rate": 4.756688576466398e-06, "loss": 3.7073, "step": 177 }, { "epoch": 0.1424, "grad_norm": 4.188969612121582, "learning_rate": 4.753977658892967e-06, "loss": 1.4572, "step": 178 }, { "epoch": 0.1432, "grad_norm": 2.046276330947876, "learning_rate": 4.751252503971624e-06, "loss": 3.6809, "step": 179 }, { "epoch": 0.144, "grad_norm": 4.05677604675293, "learning_rate": 4.748513128915928e-06, "loss": 1.3311, "step": 180 }, { "epoch": 0.1448, "grad_norm": 1.2244303226470947, "learning_rate": 4.7457595510292615e-06, "loss": 3.8316, "step": 181 }, { "epoch": 0.1456, "grad_norm": 4.775726795196533, "learning_rate": 4.74299178770472e-06, "loss": 1.5603, "step": 182 }, { "epoch": 0.1464, "grad_norm": 1.41436767578125, "learning_rate": 4.740209856424998e-06, "loss": 3.7105, "step": 183 }, { "epoch": 0.1472, "grad_norm": 5.448317527770996, "learning_rate": 4.737413774762287e-06, "loss": 1.2361, "step": 184 }, { "epoch": 0.148, "grad_norm": 1.222730040550232, "learning_rate": 4.73460356037816e-06, "loss": 3.8072, "step": 185 }, { "epoch": 0.1488, "grad_norm": 4.413971900939941, "learning_rate": 4.731779231023456e-06, "loss": 1.6303, "step": 186 }, { "epoch": 0.1496, "grad_norm": 1.4510987997055054, "learning_rate": 4.728940804538176e-06, "loss": 3.6988, "step": 187 }, { "epoch": 0.1504, "grad_norm": 4.780493259429932, "learning_rate": 4.726088298851362e-06, "loss": 1.1804, "step": 188 }, { "epoch": 0.1512, "grad_norm": 1.5533583164215088, "learning_rate": 4.723221731980993e-06, "loss": 3.6128, "step": 189 }, { "epoch": 0.152, "grad_norm": 4.775524616241455, "learning_rate": 4.720341122033862e-06, "loss": 1.5147, "step": 190 }, { "epoch": 0.1528, "grad_norm": 1.6876249313354492, "learning_rate": 4.717446487205466e-06, "loss": 3.7315, "step": 191 }, { "epoch": 0.1536, "grad_norm": 3.9606497287750244, "learning_rate": 4.714537845779894e-06, "loss": 1.3284, "step": 192 }, { "epoch": 0.1544, "grad_norm": 1.2425357103347778, "learning_rate": 4.7116152161297045e-06, "loss": 3.7983, "step": 193 }, { "epoch": 0.1552, "grad_norm": 3.9687187671661377, "learning_rate": 4.708678616715815e-06, "loss": 1.3479, "step": 194 }, { "epoch": 0.156, "grad_norm": 1.5664615631103516, "learning_rate": 4.705728066087384e-06, "loss": 3.7247, "step": 195 }, { "epoch": 0.1568, "grad_norm": 4.444562911987305, "learning_rate": 4.702763582881692e-06, "loss": 1.2835, "step": 196 }, { "epoch": 0.1576, "grad_norm": 1.8698633909225464, "learning_rate": 4.699785185824026e-06, "loss": 3.8091, "step": 197 }, { "epoch": 0.1584, "grad_norm": 4.637014389038086, "learning_rate": 4.696792893727562e-06, "loss": 1.3871, "step": 198 }, { "epoch": 0.1592, "grad_norm": 1.3571611642837524, "learning_rate": 4.693786725493242e-06, "loss": 3.7813, "step": 199 }, { "epoch": 0.16, "grad_norm": 4.458593368530273, "learning_rate": 4.690766700109659e-06, "loss": 1.4933, "step": 200 }, { "epoch": 0.1608, "grad_norm": 1.5887341499328613, "learning_rate": 4.687732836652935e-06, "loss": 3.6873, "step": 201 }, { "epoch": 0.1616, "grad_norm": 6.06688928604126, "learning_rate": 4.684685154286599e-06, "loss": 1.312, "step": 202 }, { "epoch": 0.1624, "grad_norm": 1.5234293937683105, "learning_rate": 4.6816236722614694e-06, "loss": 3.7146, "step": 203 }, { "epoch": 0.1632, "grad_norm": 4.001331806182861, "learning_rate": 4.6785484099155324e-06, "loss": 1.4507, "step": 204 }, { "epoch": 0.164, "grad_norm": 1.5702141523361206, "learning_rate": 4.675459386673815e-06, "loss": 3.6801, "step": 205 }, { "epoch": 0.1648, "grad_norm": 3.6314635276794434, "learning_rate": 4.672356622048266e-06, "loss": 1.2263, "step": 206 }, { "epoch": 0.1656, "grad_norm": 1.422735571861267, "learning_rate": 4.669240135637635e-06, "loss": 3.6963, "step": 207 }, { "epoch": 0.1664, "grad_norm": 4.454765796661377, "learning_rate": 4.666109947127343e-06, "loss": 1.1784, "step": 208 }, { "epoch": 0.1672, "grad_norm": 2.0289947986602783, "learning_rate": 4.662966076289363e-06, "loss": 3.8096, "step": 209 }, { "epoch": 0.168, "grad_norm": 4.10106086730957, "learning_rate": 4.659808542982089e-06, "loss": 1.3621, "step": 210 }, { "epoch": 0.1688, "grad_norm": 1.7755879163742065, "learning_rate": 4.65663736715022e-06, "loss": 3.6229, "step": 211 }, { "epoch": 0.1696, "grad_norm": 3.9878623485565186, "learning_rate": 4.653452568824625e-06, "loss": 1.3814, "step": 212 }, { "epoch": 0.1704, "grad_norm": 1.2768726348876953, "learning_rate": 4.650254168122222e-06, "loss": 3.7008, "step": 213 }, { "epoch": 0.1712, "grad_norm": 3.8291852474212646, "learning_rate": 4.647042185245848e-06, "loss": 1.3145, "step": 214 }, { "epoch": 0.172, "grad_norm": 1.5507771968841553, "learning_rate": 4.6438166404841316e-06, "loss": 3.6915, "step": 215 }, { "epoch": 0.1728, "grad_norm": 4.554000377655029, "learning_rate": 4.640577554211366e-06, "loss": 1.2244, "step": 216 }, { "epoch": 0.1736, "grad_norm": 1.2744420766830444, "learning_rate": 4.637324946887384e-06, "loss": 3.7756, "step": 217 }, { "epoch": 0.1744, "grad_norm": 5.061426162719727, "learning_rate": 4.634058839057417e-06, "loss": 1.479, "step": 218 }, { "epoch": 0.1752, "grad_norm": 1.7611600160598755, "learning_rate": 4.63077925135198e-06, "loss": 3.7824, "step": 219 }, { "epoch": 0.176, "grad_norm": 5.889009952545166, "learning_rate": 4.62748620448673e-06, "loss": 1.4081, "step": 220 }, { "epoch": 0.1768, "grad_norm": 1.560341238975525, "learning_rate": 4.624179719262342e-06, "loss": 3.7535, "step": 221 }, { "epoch": 0.1776, "grad_norm": 4.9289231300354, "learning_rate": 4.620859816564371e-06, "loss": 1.4075, "step": 222 }, { "epoch": 0.1784, "grad_norm": 1.3027839660644531, "learning_rate": 4.6175265173631304e-06, "loss": 3.7511, "step": 223 }, { "epoch": 0.1792, "grad_norm": 4.20517635345459, "learning_rate": 4.6141798427135475e-06, "loss": 1.2056, "step": 224 }, { "epoch": 0.18, "grad_norm": 1.9253166913986206, "learning_rate": 4.610819813755038e-06, "loss": 3.5762, "step": 225 }, { "epoch": 0.1808, "grad_norm": 4.654662609100342, "learning_rate": 4.607446451711372e-06, "loss": 1.4106, "step": 226 }, { "epoch": 0.1816, "grad_norm": 1.6170463562011719, "learning_rate": 4.604059777890537e-06, "loss": 3.5927, "step": 227 }, { "epoch": 0.1824, "grad_norm": 4.272345066070557, "learning_rate": 4.6006598136846056e-06, "loss": 1.3751, "step": 228 }, { "epoch": 0.1832, "grad_norm": 1.1468439102172852, "learning_rate": 4.5972465805696e-06, "loss": 3.7235, "step": 229 }, { "epoch": 0.184, "grad_norm": 4.337528705596924, "learning_rate": 4.593820100105355e-06, "loss": 1.212, "step": 230 }, { "epoch": 0.1848, "grad_norm": 1.6321645975112915, "learning_rate": 4.590380393935383e-06, "loss": 3.7544, "step": 231 }, { "epoch": 0.1856, "grad_norm": 4.132114410400391, "learning_rate": 4.586927483786739e-06, "loss": 1.4566, "step": 232 }, { "epoch": 0.1864, "grad_norm": 1.6077178716659546, "learning_rate": 4.583461391469879e-06, "loss": 3.6934, "step": 233 }, { "epoch": 0.1872, "grad_norm": 4.226905345916748, "learning_rate": 4.579982138878527e-06, "loss": 1.5507, "step": 234 }, { "epoch": 0.188, "grad_norm": 1.280689001083374, "learning_rate": 4.576489747989532e-06, "loss": 3.77, "step": 235 }, { "epoch": 0.1888, "grad_norm": 3.9274861812591553, "learning_rate": 4.572984240862733e-06, "loss": 1.5939, "step": 236 }, { "epoch": 0.1896, "grad_norm": 1.420904278755188, "learning_rate": 4.56946563964082e-06, "loss": 3.5977, "step": 237 }, { "epoch": 0.1904, "grad_norm": 4.135627746582031, "learning_rate": 4.5659339665491894e-06, "loss": 1.2989, "step": 238 }, { "epoch": 0.1912, "grad_norm": 1.301414966583252, "learning_rate": 4.562389243895807e-06, "loss": 3.6786, "step": 239 }, { "epoch": 0.192, "grad_norm": 4.637629508972168, "learning_rate": 4.558831494071069e-06, "loss": 1.4187, "step": 240 }, { "epoch": 0.1928, "grad_norm": 1.2166482210159302, "learning_rate": 4.555260739547657e-06, "loss": 3.6755, "step": 241 }, { "epoch": 0.1936, "grad_norm": 3.494554281234741, "learning_rate": 4.551677002880395e-06, "loss": 1.0023, "step": 242 }, { "epoch": 0.1944, "grad_norm": 1.2456482648849487, "learning_rate": 4.548080306706114e-06, "loss": 3.7268, "step": 243 }, { "epoch": 0.1952, "grad_norm": 3.789717674255371, "learning_rate": 4.544470673743502e-06, "loss": 1.1345, "step": 244 }, { "epoch": 0.196, "grad_norm": 1.615335464477539, "learning_rate": 4.54084812679296e-06, "loss": 3.5679, "step": 245 }, { "epoch": 0.1968, "grad_norm": 4.087082862854004, "learning_rate": 4.537212688736466e-06, "loss": 1.5294, "step": 246 }, { "epoch": 0.1976, "grad_norm": 1.3239346742630005, "learning_rate": 4.533564382537421e-06, "loss": 3.8232, "step": 247 }, { "epoch": 0.1984, "grad_norm": 3.6679818630218506, "learning_rate": 4.529903231240511e-06, "loss": 1.1619, "step": 248 }, { "epoch": 0.1992, "grad_norm": 1.6263890266418457, "learning_rate": 4.526229257971556e-06, "loss": 3.7185, "step": 249 }, { "epoch": 0.2, "grad_norm": 4.270927429199219, "learning_rate": 4.522542485937369e-06, "loss": 1.4918, "step": 250 }, { "epoch": 0.2008, "grad_norm": 1.6562573909759521, "learning_rate": 4.518842938425606e-06, "loss": 3.7609, "step": 251 }, { "epoch": 0.2016, "grad_norm": 4.229763031005859, "learning_rate": 4.5151306388046175e-06, "loss": 1.1358, "step": 252 }, { "epoch": 0.2024, "grad_norm": 1.3031507730484009, "learning_rate": 4.511405610523309e-06, "loss": 3.6721, "step": 253 }, { "epoch": 0.2032, "grad_norm": 4.729180335998535, "learning_rate": 4.507667877110982e-06, "loss": 1.5732, "step": 254 }, { "epoch": 0.204, "grad_norm": 1.4898425340652466, "learning_rate": 4.503917462177192e-06, "loss": 3.6121, "step": 255 }, { "epoch": 0.2048, "grad_norm": 4.497402667999268, "learning_rate": 4.500154389411598e-06, "loss": 1.3272, "step": 256 }, { "epoch": 0.2056, "grad_norm": 1.141797423362732, "learning_rate": 4.496378682583813e-06, "loss": 3.6704, "step": 257 }, { "epoch": 0.2064, "grad_norm": 4.572139739990234, "learning_rate": 4.492590365543253e-06, "loss": 1.4076, "step": 258 }, { "epoch": 0.2072, "grad_norm": 1.6577672958374023, "learning_rate": 4.488789462218988e-06, "loss": 3.6953, "step": 259 }, { "epoch": 0.208, "grad_norm": 4.384160041809082, "learning_rate": 4.4849759966195885e-06, "loss": 1.2979, "step": 260 }, { "epoch": 0.2088, "grad_norm": 1.2096525430679321, "learning_rate": 4.4811499928329775e-06, "loss": 3.7744, "step": 261 }, { "epoch": 0.2096, "grad_norm": 4.4223246574401855, "learning_rate": 4.477311475026271e-06, "loss": 1.3639, "step": 262 }, { "epoch": 0.2104, "grad_norm": 1.2359306812286377, "learning_rate": 4.473460467445637e-06, "loss": 3.6689, "step": 263 }, { "epoch": 0.2112, "grad_norm": 4.513794898986816, "learning_rate": 4.469596994416131e-06, "loss": 1.2571, "step": 264 }, { "epoch": 0.212, "grad_norm": 1.4100075960159302, "learning_rate": 4.465721080341547e-06, "loss": 3.669, "step": 265 }, { "epoch": 0.2128, "grad_norm": 4.375431537628174, "learning_rate": 4.4618327497042676e-06, "loss": 1.3244, "step": 266 }, { "epoch": 0.2136, "grad_norm": 1.1597020626068115, "learning_rate": 4.457932027065102e-06, "loss": 3.7463, "step": 267 }, { "epoch": 0.2144, "grad_norm": 4.304786682128906, "learning_rate": 4.4540189370631315e-06, "loss": 1.2498, "step": 268 }, { "epoch": 0.2152, "grad_norm": 1.5611578226089478, "learning_rate": 4.450093504415562e-06, "loss": 3.7, "step": 269 }, { "epoch": 0.216, "grad_norm": 4.710305213928223, "learning_rate": 4.446155753917559e-06, "loss": 1.4829, "step": 270 }, { "epoch": 0.2168, "grad_norm": 1.0595712661743164, "learning_rate": 4.442205710442095e-06, "loss": 3.7709, "step": 271 }, { "epoch": 0.2176, "grad_norm": 4.113396644592285, "learning_rate": 4.43824339893979e-06, "loss": 1.4732, "step": 272 }, { "epoch": 0.2184, "grad_norm": 1.346928358078003, "learning_rate": 4.434268844438758e-06, "loss": 3.6034, "step": 273 }, { "epoch": 0.2192, "grad_norm": 4.2482452392578125, "learning_rate": 4.4302820720444454e-06, "loss": 1.3669, "step": 274 }, { "epoch": 0.22, "grad_norm": 1.1629118919372559, "learning_rate": 4.426283106939474e-06, "loss": 3.7432, "step": 275 }, { "epoch": 0.2208, "grad_norm": 3.7786972522735596, "learning_rate": 4.422271974383479e-06, "loss": 1.3379, "step": 276 }, { "epoch": 0.2216, "grad_norm": 1.7842165231704712, "learning_rate": 4.418248699712955e-06, "loss": 3.6675, "step": 277 }, { "epoch": 0.2224, "grad_norm": 3.950294017791748, "learning_rate": 4.414213308341092e-06, "loss": 1.5301, "step": 278 }, { "epoch": 0.2232, "grad_norm": 1.4630101919174194, "learning_rate": 4.410165825757613e-06, "loss": 3.571, "step": 279 }, { "epoch": 0.224, "grad_norm": 4.155986309051514, "learning_rate": 4.40610627752862e-06, "loss": 1.3453, "step": 280 }, { "epoch": 0.2248, "grad_norm": 1.698153018951416, "learning_rate": 4.402034689296425e-06, "loss": 3.6699, "step": 281 }, { "epoch": 0.2256, "grad_norm": 4.893118858337402, "learning_rate": 4.397951086779392e-06, "loss": 1.6296, "step": 282 }, { "epoch": 0.2264, "grad_norm": 1.9244930744171143, "learning_rate": 4.393855495771774e-06, "loss": 3.728, "step": 283 }, { "epoch": 0.2272, "grad_norm": 4.7193827629089355, "learning_rate": 4.389747942143549e-06, "loss": 1.3797, "step": 284 }, { "epoch": 0.228, "grad_norm": 1.3077738285064697, "learning_rate": 4.38562845184026e-06, "loss": 3.7899, "step": 285 }, { "epoch": 0.2288, "grad_norm": 4.431347370147705, "learning_rate": 4.381497050882845e-06, "loss": 1.6555, "step": 286 }, { "epoch": 0.2296, "grad_norm": 1.5692718029022217, "learning_rate": 4.377353765367479e-06, "loss": 3.6771, "step": 287 }, { "epoch": 0.2304, "grad_norm": 3.9838104248046875, "learning_rate": 4.373198621465405e-06, "loss": 1.1383, "step": 288 }, { "epoch": 0.2312, "grad_norm": 1.101969838142395, "learning_rate": 4.369031645422768e-06, "loss": 3.6786, "step": 289 }, { "epoch": 0.232, "grad_norm": 4.563289165496826, "learning_rate": 4.364852863560456e-06, "loss": 1.2641, "step": 290 }, { "epoch": 0.2328, "grad_norm": 1.3112094402313232, "learning_rate": 4.360662302273926e-06, "loss": 3.7925, "step": 291 }, { "epoch": 0.2336, "grad_norm": 4.193509578704834, "learning_rate": 4.356459988033039e-06, "loss": 1.1937, "step": 292 }, { "epoch": 0.2344, "grad_norm": 1.167222499847412, "learning_rate": 4.352245947381897e-06, "loss": 3.6606, "step": 293 }, { "epoch": 0.2352, "grad_norm": 5.211182117462158, "learning_rate": 4.348020206938672e-06, "loss": 1.5236, "step": 294 }, { "epoch": 0.236, "grad_norm": 1.5906448364257812, "learning_rate": 4.343782793395435e-06, "loss": 3.6172, "step": 295 }, { "epoch": 0.2368, "grad_norm": 4.557344913482666, "learning_rate": 4.3395337335179945e-06, "loss": 1.2071, "step": 296 }, { "epoch": 0.2376, "grad_norm": 1.5080584287643433, "learning_rate": 4.3352730541457215e-06, "loss": 3.5182, "step": 297 }, { "epoch": 0.2384, "grad_norm": 4.691150665283203, "learning_rate": 4.331000782191384e-06, "loss": 1.4428, "step": 298 }, { "epoch": 0.2392, "grad_norm": 1.2369650602340698, "learning_rate": 4.32671694464097e-06, "loss": 3.6389, "step": 299 }, { "epoch": 0.24, "grad_norm": 5.130438327789307, "learning_rate": 4.322421568553529e-06, "loss": 1.4164, "step": 300 }, { "epoch": 0.2408, "grad_norm": 1.76595938205719, "learning_rate": 4.318114681060989e-06, "loss": 3.5655, "step": 301 }, { "epoch": 0.2416, "grad_norm": 4.4846954345703125, "learning_rate": 4.3137963093679945e-06, "loss": 1.4369, "step": 302 }, { "epoch": 0.2424, "grad_norm": 1.5124865770339966, "learning_rate": 4.309466480751726e-06, "loss": 3.5159, "step": 303 }, { "epoch": 0.2432, "grad_norm": 4.232130527496338, "learning_rate": 4.305125222561736e-06, "loss": 1.5252, "step": 304 }, { "epoch": 0.244, "grad_norm": 1.544097900390625, "learning_rate": 4.3007725622197675e-06, "loss": 3.7571, "step": 305 }, { "epoch": 0.2448, "grad_norm": 3.7335703372955322, "learning_rate": 4.296408527219592e-06, "loss": 1.2674, "step": 306 }, { "epoch": 0.2456, "grad_norm": 1.2222108840942383, "learning_rate": 4.2920331451268246e-06, "loss": 3.6799, "step": 307 }, { "epoch": 0.2464, "grad_norm": 4.682336807250977, "learning_rate": 4.2876464435787576e-06, "loss": 1.3907, "step": 308 }, { "epoch": 0.2472, "grad_norm": 1.7839024066925049, "learning_rate": 4.283248450284182e-06, "loss": 3.4632, "step": 309 }, { "epoch": 0.248, "grad_norm": 4.441279411315918, "learning_rate": 4.278839193023214e-06, "loss": 1.4755, "step": 310 }, { "epoch": 0.2488, "grad_norm": 1.5365478992462158, "learning_rate": 4.274418699647117e-06, "loss": 3.5074, "step": 311 }, { "epoch": 0.2496, "grad_norm": 4.5583062171936035, "learning_rate": 4.269986998078132e-06, "loss": 1.681, "step": 312 }, { "epoch": 0.2504, "grad_norm": 1.4559458494186401, "learning_rate": 4.265544116309294e-06, "loss": 3.5942, "step": 313 }, { "epoch": 0.2512, "grad_norm": 4.114186763763428, "learning_rate": 4.2610900824042575e-06, "loss": 1.6586, "step": 314 }, { "epoch": 0.252, "grad_norm": 1.3927795886993408, "learning_rate": 4.256624924497124e-06, "loss": 3.6604, "step": 315 }, { "epoch": 0.2528, "grad_norm": 3.7071781158447266, "learning_rate": 4.2521486707922545e-06, "loss": 1.3165, "step": 316 }, { "epoch": 0.2536, "grad_norm": 1.5977774858474731, "learning_rate": 4.247661349564103e-06, "loss": 3.71, "step": 317 }, { "epoch": 0.2544, "grad_norm": 4.849422931671143, "learning_rate": 4.243162989157027e-06, "loss": 1.4173, "step": 318 }, { "epoch": 0.2552, "grad_norm": 1.525455355644226, "learning_rate": 4.2386536179851175e-06, "loss": 3.5833, "step": 319 }, { "epoch": 0.256, "grad_norm": 4.420166969299316, "learning_rate": 4.234133264532012e-06, "loss": 1.2962, "step": 320 }, { "epoch": 0.2568, "grad_norm": 1.18903386592865, "learning_rate": 4.229601957350722e-06, "loss": 3.6984, "step": 321 }, { "epoch": 0.2576, "grad_norm": 3.8449833393096924, "learning_rate": 4.225059725063444e-06, "loss": 1.3112, "step": 322 }, { "epoch": 0.2584, "grad_norm": 1.7980787754058838, "learning_rate": 4.220506596361387e-06, "loss": 3.5587, "step": 323 }, { "epoch": 0.2592, "grad_norm": 3.5607681274414062, "learning_rate": 4.215942600004586e-06, "loss": 1.2554, "step": 324 }, { "epoch": 0.26, "grad_norm": 1.572067379951477, "learning_rate": 4.211367764821722e-06, "loss": 3.7133, "step": 325 }, { "epoch": 0.2608, "grad_norm": 10.11608600616455, "learning_rate": 4.206782119709942e-06, "loss": 1.5166, "step": 326 }, { "epoch": 0.2616, "grad_norm": 1.5986098051071167, "learning_rate": 4.202185693634671e-06, "loss": 3.6253, "step": 327 }, { "epoch": 0.2624, "grad_norm": 3.9274239540100098, "learning_rate": 4.197578515629435e-06, "loss": 1.311, "step": 328 }, { "epoch": 0.2632, "grad_norm": 1.2195369005203247, "learning_rate": 4.192960614795676e-06, "loss": 3.7322, "step": 329 }, { "epoch": 0.264, "grad_norm": 4.052531719207764, "learning_rate": 4.188332020302561e-06, "loss": 1.3612, "step": 330 }, { "epoch": 0.2648, "grad_norm": 1.4489315748214722, "learning_rate": 4.183692761386813e-06, "loss": 3.534, "step": 331 }, { "epoch": 0.2656, "grad_norm": 5.4260053634643555, "learning_rate": 4.1790428673525104e-06, "loss": 1.523, "step": 332 }, { "epoch": 0.2664, "grad_norm": 1.6070371866226196, "learning_rate": 4.1743823675709115e-06, "loss": 3.4917, "step": 333 }, { "epoch": 0.2672, "grad_norm": 4.363175392150879, "learning_rate": 4.1697112914802665e-06, "loss": 1.6258, "step": 334 }, { "epoch": 0.268, "grad_norm": 1.6007026433944702, "learning_rate": 4.16502966858563e-06, "loss": 3.575, "step": 335 }, { "epoch": 0.2688, "grad_norm": 4.8055419921875, "learning_rate": 4.160337528458676e-06, "loss": 1.7682, "step": 336 }, { "epoch": 0.2696, "grad_norm": 1.2397737503051758, "learning_rate": 4.155634900737513e-06, "loss": 3.6629, "step": 337 }, { "epoch": 0.2704, "grad_norm": 4.131043910980225, "learning_rate": 4.150921815126493e-06, "loss": 1.5988, "step": 338 }, { "epoch": 0.2712, "grad_norm": 1.2639617919921875, "learning_rate": 4.146198301396025e-06, "loss": 3.5698, "step": 339 }, { "epoch": 0.272, "grad_norm": 4.381173610687256, "learning_rate": 4.141464389382392e-06, "loss": 1.3198, "step": 340 }, { "epoch": 0.2728, "grad_norm": 1.440491795539856, "learning_rate": 4.136720108987552e-06, "loss": 3.6658, "step": 341 }, { "epoch": 0.2736, "grad_norm": 8.941045761108398, "learning_rate": 4.13196549017896e-06, "loss": 1.2674, "step": 342 }, { "epoch": 0.2744, "grad_norm": 1.5544283390045166, "learning_rate": 4.127200562989372e-06, "loss": 3.5196, "step": 343 }, { "epoch": 0.2752, "grad_norm": 4.094554424285889, "learning_rate": 4.122425357516658e-06, "loss": 1.2112, "step": 344 }, { "epoch": 0.276, "grad_norm": 1.1563968658447266, "learning_rate": 4.117639903923611e-06, "loss": 3.6399, "step": 345 }, { "epoch": 0.2768, "grad_norm": 4.3765482902526855, "learning_rate": 4.112844232437757e-06, "loss": 1.3016, "step": 346 }, { "epoch": 0.2776, "grad_norm": 1.073043704032898, "learning_rate": 4.108038373351163e-06, "loss": 3.6758, "step": 347 }, { "epoch": 0.2784, "grad_norm": 4.243771553039551, "learning_rate": 4.103222357020248e-06, "loss": 1.4512, "step": 348 }, { "epoch": 0.2792, "grad_norm": 1.4195610284805298, "learning_rate": 4.098396213865587e-06, "loss": 3.6391, "step": 349 }, { "epoch": 0.28, "grad_norm": 4.04062032699585, "learning_rate": 4.093559974371725e-06, "loss": 1.2876, "step": 350 }, { "epoch": 0.2808, "grad_norm": 1.384352207183838, "learning_rate": 4.0887136690869774e-06, "loss": 3.6527, "step": 351 }, { "epoch": 0.2816, "grad_norm": 4.134579181671143, "learning_rate": 4.083857328623243e-06, "loss": 1.3498, "step": 352 }, { "epoch": 0.2824, "grad_norm": 1.8394545316696167, "learning_rate": 4.078990983655807e-06, "loss": 3.5694, "step": 353 }, { "epoch": 0.2832, "grad_norm": 4.24132776260376, "learning_rate": 4.07411466492315e-06, "loss": 1.6123, "step": 354 }, { "epoch": 0.284, "grad_norm": 1.1497430801391602, "learning_rate": 4.069228403226751e-06, "loss": 3.6655, "step": 355 }, { "epoch": 0.2848, "grad_norm": 3.8187551498413086, "learning_rate": 4.064332229430895e-06, "loss": 1.4159, "step": 356 }, { "epoch": 0.2856, "grad_norm": 1.5703147649765015, "learning_rate": 4.059426174462476e-06, "loss": 3.5892, "step": 357 }, { "epoch": 0.2864, "grad_norm": 4.054878234863281, "learning_rate": 4.054510269310803e-06, "loss": 1.3898, "step": 358 }, { "epoch": 0.2872, "grad_norm": 1.7447679042816162, "learning_rate": 4.049584545027406e-06, "loss": 3.5291, "step": 359 }, { "epoch": 0.288, "grad_norm": 3.6220648288726807, "learning_rate": 4.044649032725836e-06, "loss": 1.1255, "step": 360 }, { "epoch": 0.2888, "grad_norm": 1.4866344928741455, "learning_rate": 4.039703763581472e-06, "loss": 3.647, "step": 361 }, { "epoch": 0.2896, "grad_norm": 4.575165271759033, "learning_rate": 4.034748768831319e-06, "loss": 1.3781, "step": 362 }, { "epoch": 0.2904, "grad_norm": 1.0558618307113647, "learning_rate": 4.02978407977382e-06, "loss": 3.6163, "step": 363 }, { "epoch": 0.2912, "grad_norm": 4.454329490661621, "learning_rate": 4.024809727768648e-06, "loss": 1.3233, "step": 364 }, { "epoch": 0.292, "grad_norm": 1.3956743478775024, "learning_rate": 4.019825744236514e-06, "loss": 3.5997, "step": 365 }, { "epoch": 0.2928, "grad_norm": 4.550688743591309, "learning_rate": 4.014832160658966e-06, "loss": 1.4364, "step": 366 }, { "epoch": 0.2936, "grad_norm": 1.2573503255844116, "learning_rate": 4.009829008578192e-06, "loss": 3.6729, "step": 367 }, { "epoch": 0.2944, "grad_norm": 4.038947582244873, "learning_rate": 4.004816319596822e-06, "loss": 1.2911, "step": 368 }, { "epoch": 0.2952, "grad_norm": 1.9488675594329834, "learning_rate": 3.999794125377721e-06, "loss": 3.5393, "step": 369 }, { "epoch": 0.296, "grad_norm": 4.447761535644531, "learning_rate": 3.9947624576437975e-06, "loss": 1.5997, "step": 370 }, { "epoch": 0.2968, "grad_norm": 1.2472996711730957, "learning_rate": 3.989721348177801e-06, "loss": 3.6067, "step": 371 }, { "epoch": 0.2976, "grad_norm": 4.081388473510742, "learning_rate": 3.984670828822118e-06, "loss": 1.4171, "step": 372 }, { "epoch": 0.2984, "grad_norm": 1.7100144624710083, "learning_rate": 3.979610931478574e-06, "loss": 3.7103, "step": 373 }, { "epoch": 0.2992, "grad_norm": 4.408793926239014, "learning_rate": 3.97454168810823e-06, "loss": 1.3243, "step": 374 }, { "epoch": 0.3, "grad_norm": 1.326974868774414, "learning_rate": 3.969463130731183e-06, "loss": 3.6149, "step": 375 }, { "epoch": 0.3008, "grad_norm": 4.624994277954102, "learning_rate": 3.964375291426361e-06, "loss": 1.5994, "step": 376 }, { "epoch": 0.3016, "grad_norm": 1.3679853677749634, "learning_rate": 3.959278202331323e-06, "loss": 3.5478, "step": 377 }, { "epoch": 0.3024, "grad_norm": 4.432180881500244, "learning_rate": 3.954171895642052e-06, "loss": 1.4198, "step": 378 }, { "epoch": 0.3032, "grad_norm": 1.0665056705474854, "learning_rate": 3.949056403612758e-06, "loss": 3.7173, "step": 379 }, { "epoch": 0.304, "grad_norm": 3.6534807682037354, "learning_rate": 3.943931758555669e-06, "loss": 1.2773, "step": 380 }, { "epoch": 0.3048, "grad_norm": 1.4018532037734985, "learning_rate": 3.938797992840828e-06, "loss": 3.5796, "step": 381 }, { "epoch": 0.3056, "grad_norm": 4.3174357414245605, "learning_rate": 3.933655138895889e-06, "loss": 1.0747, "step": 382 }, { "epoch": 0.3064, "grad_norm": 1.893721342086792, "learning_rate": 3.928503229205913e-06, "loss": 3.5452, "step": 383 }, { "epoch": 0.3072, "grad_norm": 4.509764194488525, "learning_rate": 3.923342296313162e-06, "loss": 1.4684, "step": 384 }, { "epoch": 0.308, "grad_norm": 1.2628504037857056, "learning_rate": 3.918172372816892e-06, "loss": 3.5872, "step": 385 }, { "epoch": 0.3088, "grad_norm": 3.868783712387085, "learning_rate": 3.91299349137315e-06, "loss": 1.316, "step": 386 }, { "epoch": 0.3096, "grad_norm": 1.3258881568908691, "learning_rate": 3.907805684694567e-06, "loss": 3.6877, "step": 387 }, { "epoch": 0.3104, "grad_norm": 3.9455106258392334, "learning_rate": 3.9026089855501475e-06, "loss": 1.2362, "step": 388 }, { "epoch": 0.3112, "grad_norm": 1.0947574377059937, "learning_rate": 3.8974034267650695e-06, "loss": 3.735, "step": 389 }, { "epoch": 0.312, "grad_norm": 4.135454177856445, "learning_rate": 3.89218904122047e-06, "loss": 1.3921, "step": 390 }, { "epoch": 0.3128, "grad_norm": 1.3168636560440063, "learning_rate": 3.886965861853243e-06, "loss": 3.5585, "step": 391 }, { "epoch": 0.3136, "grad_norm": 3.532658100128174, "learning_rate": 3.881733921655829e-06, "loss": 1.2495, "step": 392 }, { "epoch": 0.3144, "grad_norm": 1.3559529781341553, "learning_rate": 3.876493253676004e-06, "loss": 3.561, "step": 393 }, { "epoch": 0.3152, "grad_norm": 4.4542036056518555, "learning_rate": 3.871243891016676e-06, "loss": 1.3177, "step": 394 }, { "epoch": 0.316, "grad_norm": 1.6158586740493774, "learning_rate": 3.8659858668356735e-06, "loss": 3.623, "step": 395 }, { "epoch": 0.3168, "grad_norm": 4.352112293243408, "learning_rate": 3.8607192143455325e-06, "loss": 1.3388, "step": 396 }, { "epoch": 0.3176, "grad_norm": 1.2379918098449707, "learning_rate": 3.855443966813295e-06, "loss": 3.6086, "step": 397 }, { "epoch": 0.3184, "grad_norm": 4.482300758361816, "learning_rate": 3.85016015756029e-06, "loss": 1.3943, "step": 398 }, { "epoch": 0.3192, "grad_norm": 1.632942795753479, "learning_rate": 3.844867819961928e-06, "loss": 3.5682, "step": 399 }, { "epoch": 0.32, "grad_norm": 4.9489521980285645, "learning_rate": 3.839566987447492e-06, "loss": 1.1445, "step": 400 }, { "epoch": 0.3208, "grad_norm": 1.3084850311279297, "learning_rate": 3.8342576934999184e-06, "loss": 3.7127, "step": 401 }, { "epoch": 0.3216, "grad_norm": 3.8171467781066895, "learning_rate": 3.828939971655595e-06, "loss": 1.302, "step": 402 }, { "epoch": 0.3224, "grad_norm": 1.2390443086624146, "learning_rate": 3.823613855504144e-06, "loss": 3.5798, "step": 403 }, { "epoch": 0.3232, "grad_norm": 4.057291507720947, "learning_rate": 3.8182793786882065e-06, "loss": 1.3189, "step": 404 }, { "epoch": 0.324, "grad_norm": 1.3859179019927979, "learning_rate": 3.8129365749032398e-06, "loss": 3.6643, "step": 405 }, { "epoch": 0.3248, "grad_norm": 4.864846706390381, "learning_rate": 3.807585477897296e-06, "loss": 1.4575, "step": 406 }, { "epoch": 0.3256, "grad_norm": 1.0886560678482056, "learning_rate": 3.802226121470811e-06, "loss": 3.7321, "step": 407 }, { "epoch": 0.3264, "grad_norm": 3.940027952194214, "learning_rate": 3.796858539476394e-06, "loss": 1.2742, "step": 408 }, { "epoch": 0.3272, "grad_norm": 1.2309926748275757, "learning_rate": 3.7914827658186104e-06, "loss": 3.5766, "step": 409 }, { "epoch": 0.328, "grad_norm": 4.414444446563721, "learning_rate": 3.7860988344537664e-06, "loss": 1.2858, "step": 410 }, { "epoch": 0.3288, "grad_norm": 1.0498713254928589, "learning_rate": 3.7807067793897006e-06, "loss": 3.6743, "step": 411 }, { "epoch": 0.3296, "grad_norm": 4.1902313232421875, "learning_rate": 3.775306634685562e-06, "loss": 1.4446, "step": 412 }, { "epoch": 0.3304, "grad_norm": 1.1650660037994385, "learning_rate": 3.7698984344516e-06, "loss": 3.6178, "step": 413 }, { "epoch": 0.3312, "grad_norm": 4.5790910720825195, "learning_rate": 3.7644822128489476e-06, "loss": 1.5761, "step": 414 }, { "epoch": 0.332, "grad_norm": 1.0688635110855103, "learning_rate": 3.7590580040894025e-06, "loss": 3.689, "step": 415 }, { "epoch": 0.3328, "grad_norm": 4.05617094039917, "learning_rate": 3.7536258424352164e-06, "loss": 1.6174, "step": 416 }, { "epoch": 0.3336, "grad_norm": 1.236042857170105, "learning_rate": 3.7481857621988734e-06, "loss": 3.6902, "step": 417 }, { "epoch": 0.3344, "grad_norm": 4.205336093902588, "learning_rate": 3.742737797742878e-06, "loss": 1.3125, "step": 418 }, { "epoch": 0.3352, "grad_norm": 1.460862159729004, "learning_rate": 3.737281983479534e-06, "loss": 3.503, "step": 419 }, { "epoch": 0.336, "grad_norm": 4.190709114074707, "learning_rate": 3.731818353870729e-06, "loss": 1.2207, "step": 420 }, { "epoch": 0.3368, "grad_norm": 2.0372729301452637, "learning_rate": 3.726346943427719e-06, "loss": 3.5128, "step": 421 }, { "epoch": 0.3376, "grad_norm": 4.000549793243408, "learning_rate": 3.7208677867109042e-06, "loss": 1.244, "step": 422 }, { "epoch": 0.3384, "grad_norm": 1.509992003440857, "learning_rate": 3.7153809183296174e-06, "loss": 3.6028, "step": 423 }, { "epoch": 0.3392, "grad_norm": 3.7690091133117676, "learning_rate": 3.7098863729418997e-06, "loss": 1.1382, "step": 424 }, { "epoch": 0.34, "grad_norm": 1.0848690271377563, "learning_rate": 3.7043841852542884e-06, "loss": 3.7097, "step": 425 }, { "epoch": 0.3408, "grad_norm": 4.2273359298706055, "learning_rate": 3.6988743900215895e-06, "loss": 1.3459, "step": 426 }, { "epoch": 0.3416, "grad_norm": 1.30433189868927, "learning_rate": 3.6933570220466654e-06, "loss": 3.5762, "step": 427 }, { "epoch": 0.3424, "grad_norm": 3.894927740097046, "learning_rate": 3.6878321161802106e-06, "loss": 1.411, "step": 428 }, { "epoch": 0.3432, "grad_norm": 1.23166024684906, "learning_rate": 3.682299707320532e-06, "loss": 3.7625, "step": 429 }, { "epoch": 0.344, "grad_norm": 4.281452655792236, "learning_rate": 3.6767598304133325e-06, "loss": 1.2892, "step": 430 }, { "epoch": 0.3448, "grad_norm": 1.510961890220642, "learning_rate": 3.6712125204514836e-06, "loss": 3.5778, "step": 431 }, { "epoch": 0.3456, "grad_norm": 3.6072661876678467, "learning_rate": 3.665657812474812e-06, "loss": 1.2145, "step": 432 }, { "epoch": 0.3464, "grad_norm": 1.6257572174072266, "learning_rate": 3.660095741569871e-06, "loss": 3.7148, "step": 433 }, { "epoch": 0.3472, "grad_norm": 4.151918411254883, "learning_rate": 3.654526342869724e-06, "loss": 1.3151, "step": 434 }, { "epoch": 0.348, "grad_norm": 1.7173959016799927, "learning_rate": 3.6489496515537204e-06, "loss": 3.5563, "step": 435 }, { "epoch": 0.3488, "grad_norm": 3.5843987464904785, "learning_rate": 3.643365702847272e-06, "loss": 1.1541, "step": 436 }, { "epoch": 0.3496, "grad_norm": 1.2119823694229126, "learning_rate": 3.6377745320216346e-06, "loss": 3.6086, "step": 437 }, { "epoch": 0.3504, "grad_norm": 4.704022407531738, "learning_rate": 3.632176174393682e-06, "loss": 1.5989, "step": 438 }, { "epoch": 0.3512, "grad_norm": 1.3486601114273071, "learning_rate": 3.6265706653256837e-06, "loss": 3.6383, "step": 439 }, { "epoch": 0.352, "grad_norm": 4.133458614349365, "learning_rate": 3.6209580402250816e-06, "loss": 1.2559, "step": 440 }, { "epoch": 0.3528, "grad_norm": 1.3388392925262451, "learning_rate": 3.615338334544265e-06, "loss": 3.6902, "step": 441 }, { "epoch": 0.3536, "grad_norm": 4.311944961547852, "learning_rate": 3.6097115837803504e-06, "loss": 1.1318, "step": 442 }, { "epoch": 0.3544, "grad_norm": 1.4599226713180542, "learning_rate": 3.604077823474954e-06, "loss": 3.6407, "step": 443 }, { "epoch": 0.3552, "grad_norm": 4.284412384033203, "learning_rate": 3.5984370892139663e-06, "loss": 1.4261, "step": 444 }, { "epoch": 0.356, "grad_norm": 1.4893653392791748, "learning_rate": 3.5927894166273324e-06, "loss": 3.6037, "step": 445 }, { "epoch": 0.3568, "grad_norm": 3.953293800354004, "learning_rate": 3.5871348413888207e-06, "loss": 1.2646, "step": 446 }, { "epoch": 0.3576, "grad_norm": 1.2986643314361572, "learning_rate": 3.5814733992158025e-06, "loss": 3.5551, "step": 447 }, { "epoch": 0.3584, "grad_norm": 4.767986297607422, "learning_rate": 3.5758051258690223e-06, "loss": 1.6051, "step": 448 }, { "epoch": 0.3592, "grad_norm": 1.4707053899765015, "learning_rate": 3.5701300571523757e-06, "loss": 3.4898, "step": 449 }, { "epoch": 0.36, "grad_norm": 4.075262546539307, "learning_rate": 3.564448228912682e-06, "loss": 1.0939, "step": 450 }, { "epoch": 0.3608, "grad_norm": 1.6893370151519775, "learning_rate": 3.558759677039455e-06, "loss": 3.524, "step": 451 }, { "epoch": 0.3616, "grad_norm": 4.155539035797119, "learning_rate": 3.553064437464682e-06, "loss": 1.3009, "step": 452 }, { "epoch": 0.3624, "grad_norm": 1.3253870010375977, "learning_rate": 3.5473625461625884e-06, "loss": 3.5764, "step": 453 }, { "epoch": 0.3632, "grad_norm": 4.075945854187012, "learning_rate": 3.54165403914942e-06, "loss": 1.2607, "step": 454 }, { "epoch": 0.364, "grad_norm": 1.059866189956665, "learning_rate": 3.535938952483211e-06, "loss": 3.6742, "step": 455 }, { "epoch": 0.3648, "grad_norm": 4.110774993896484, "learning_rate": 3.5302173222635526e-06, "loss": 1.4106, "step": 456 }, { "epoch": 0.3656, "grad_norm": 1.3632076978683472, "learning_rate": 3.5244891846313733e-06, "loss": 3.6836, "step": 457 }, { "epoch": 0.3664, "grad_norm": 3.705369472503662, "learning_rate": 3.518754575768702e-06, "loss": 1.3081, "step": 458 }, { "epoch": 0.3672, "grad_norm": 1.1472023725509644, "learning_rate": 3.5130135318984454e-06, "loss": 3.6175, "step": 459 }, { "epoch": 0.368, "grad_norm": 3.85665225982666, "learning_rate": 3.507266089284157e-06, "loss": 1.3936, "step": 460 }, { "epoch": 0.3688, "grad_norm": 1.0957272052764893, "learning_rate": 3.501512284229807e-06, "loss": 3.6699, "step": 461 }, { "epoch": 0.3696, "grad_norm": 5.635092735290527, "learning_rate": 3.4957521530795576e-06, "loss": 1.5143, "step": 462 }, { "epoch": 0.3704, "grad_norm": 1.2065218687057495, "learning_rate": 3.4899857322175252e-06, "loss": 3.6554, "step": 463 }, { "epoch": 0.3712, "grad_norm": 3.687448263168335, "learning_rate": 3.484213058067559e-06, "loss": 1.3567, "step": 464 }, { "epoch": 0.372, "grad_norm": 1.4137887954711914, "learning_rate": 3.4784341670930067e-06, "loss": 3.5039, "step": 465 }, { "epoch": 0.3728, "grad_norm": 3.735736131668091, "learning_rate": 3.4726490957964836e-06, "loss": 1.1562, "step": 466 }, { "epoch": 0.3736, "grad_norm": 1.429471731185913, "learning_rate": 3.466857880719645e-06, "loss": 3.4816, "step": 467 }, { "epoch": 0.3744, "grad_norm": 3.8104074001312256, "learning_rate": 3.4610605584429526e-06, "loss": 1.2771, "step": 468 }, { "epoch": 0.3752, "grad_norm": 1.0887689590454102, "learning_rate": 3.455257165585444e-06, "loss": 3.6168, "step": 469 }, { "epoch": 0.376, "grad_norm": 4.246683120727539, "learning_rate": 3.4494477388045035e-06, "loss": 1.4563, "step": 470 }, { "epoch": 0.3768, "grad_norm": 1.181482195854187, "learning_rate": 3.443632314795627e-06, "loss": 3.5803, "step": 471 }, { "epoch": 0.3776, "grad_norm": 4.463985443115234, "learning_rate": 3.4378109302921946e-06, "loss": 1.3947, "step": 472 }, { "epoch": 0.3784, "grad_norm": 2.0847549438476562, "learning_rate": 3.4319836220652334e-06, "loss": 3.5447, "step": 473 }, { "epoch": 0.3792, "grad_norm": 3.957758903503418, "learning_rate": 3.4261504269231904e-06, "loss": 1.3876, "step": 474 }, { "epoch": 0.38, "grad_norm": 1.2002718448638916, "learning_rate": 3.4203113817116955e-06, "loss": 3.6171, "step": 475 }, { "epoch": 0.3808, "grad_norm": 3.7537636756896973, "learning_rate": 3.4144665233133318e-06, "loss": 1.3785, "step": 476 }, { "epoch": 0.3816, "grad_norm": 1.081315517425537, "learning_rate": 3.408615888647402e-06, "loss": 3.6535, "step": 477 }, { "epoch": 0.3824, "grad_norm": 4.511240005493164, "learning_rate": 3.402759514669694e-06, "loss": 1.5004, "step": 478 }, { "epoch": 0.3832, "grad_norm": 1.60770845413208, "learning_rate": 3.3968974383722497e-06, "loss": 3.6355, "step": 479 }, { "epoch": 0.384, "grad_norm": 4.516547679901123, "learning_rate": 3.391029696783127e-06, "loss": 1.2093, "step": 480 }, { "epoch": 0.3848, "grad_norm": 1.8860230445861816, "learning_rate": 3.385156326966173e-06, "loss": 3.5089, "step": 481 }, { "epoch": 0.3856, "grad_norm": 4.554468631744385, "learning_rate": 3.379277366020782e-06, "loss": 1.477, "step": 482 }, { "epoch": 0.3864, "grad_norm": 1.258987307548523, "learning_rate": 3.3733928510816677e-06, "loss": 3.583, "step": 483 }, { "epoch": 0.3872, "grad_norm": 4.783546447753906, "learning_rate": 3.3675028193186243e-06, "loss": 1.5192, "step": 484 }, { "epoch": 0.388, "grad_norm": 1.0193849802017212, "learning_rate": 3.3616073079362925e-06, "loss": 3.629, "step": 485 }, { "epoch": 0.3888, "grad_norm": 4.146661758422852, "learning_rate": 3.3557063541739283e-06, "loss": 1.2621, "step": 486 }, { "epoch": 0.3896, "grad_norm": 1.25571608543396, "learning_rate": 3.349799995305162e-06, "loss": 3.5985, "step": 487 }, { "epoch": 0.3904, "grad_norm": 4.230064868927002, "learning_rate": 3.343888268637765e-06, "loss": 1.232, "step": 488 }, { "epoch": 0.3912, "grad_norm": 1.292047142982483, "learning_rate": 3.337971211513417e-06, "loss": 3.587, "step": 489 }, { "epoch": 0.392, "grad_norm": 4.458502769470215, "learning_rate": 3.332048861307467e-06, "loss": 1.5272, "step": 490 }, { "epoch": 0.3928, "grad_norm": 1.4470558166503906, "learning_rate": 3.3261212554286977e-06, "loss": 3.617, "step": 491 }, { "epoch": 0.3936, "grad_norm": 3.8012030124664307, "learning_rate": 3.320188431319088e-06, "loss": 1.2316, "step": 492 }, { "epoch": 0.3944, "grad_norm": 1.446913242340088, "learning_rate": 3.3142504264535808e-06, "loss": 3.6562, "step": 493 }, { "epoch": 0.3952, "grad_norm": 4.147583961486816, "learning_rate": 3.308307278339842e-06, "loss": 1.3471, "step": 494 }, { "epoch": 0.396, "grad_norm": 1.4276149272918701, "learning_rate": 3.3023590245180237e-06, "loss": 3.5495, "step": 495 }, { "epoch": 0.3968, "grad_norm": 3.8174455165863037, "learning_rate": 3.296405702560532e-06, "loss": 1.0808, "step": 496 }, { "epoch": 0.3976, "grad_norm": 1.4224337339401245, "learning_rate": 3.2904473500717826e-06, "loss": 3.5136, "step": 497 }, { "epoch": 0.3984, "grad_norm": 4.157987117767334, "learning_rate": 3.284484004687969e-06, "loss": 1.3679, "step": 498 }, { "epoch": 0.3992, "grad_norm": 1.2928471565246582, "learning_rate": 3.278515704076821e-06, "loss": 3.6342, "step": 499 }, { "epoch": 0.4, "grad_norm": 4.097792625427246, "learning_rate": 3.272542485937369e-06, "loss": 1.3664, "step": 500 }, { "epoch": 0.4008, "grad_norm": 1.1602492332458496, "learning_rate": 3.2665643879997054e-06, "loss": 3.6839, "step": 501 }, { "epoch": 0.4016, "grad_norm": 3.862520456314087, "learning_rate": 3.2605814480247454e-06, "loss": 1.4261, "step": 502 }, { "epoch": 0.4024, "grad_norm": 1.335418462753296, "learning_rate": 3.2545937038039904e-06, "loss": 3.599, "step": 503 }, { "epoch": 0.4032, "grad_norm": 4.205375671386719, "learning_rate": 3.2486011931592863e-06, "loss": 1.5577, "step": 504 }, { "epoch": 0.404, "grad_norm": 1.6254982948303223, "learning_rate": 3.2426039539425875e-06, "loss": 3.4938, "step": 505 }, { "epoch": 0.4048, "grad_norm": 4.060510158538818, "learning_rate": 3.2366020240357166e-06, "loss": 1.3317, "step": 506 }, { "epoch": 0.4056, "grad_norm": 1.3750642538070679, "learning_rate": 3.2305954413501252e-06, "loss": 3.5692, "step": 507 }, { "epoch": 0.4064, "grad_norm": 4.146080017089844, "learning_rate": 3.2245842438266526e-06, "loss": 1.1754, "step": 508 }, { "epoch": 0.4072, "grad_norm": 1.4431229829788208, "learning_rate": 3.2185684694352913e-06, "loss": 3.4761, "step": 509 }, { "epoch": 0.408, "grad_norm": 3.423323392868042, "learning_rate": 3.2125481561749406e-06, "loss": 1.2221, "step": 510 }, { "epoch": 0.4088, "grad_norm": 1.5508882999420166, "learning_rate": 3.2065233420731717e-06, "loss": 3.6483, "step": 511 }, { "epoch": 0.4096, "grad_norm": 3.5361711978912354, "learning_rate": 3.2004940651859844e-06, "loss": 1.1119, "step": 512 }, { "epoch": 0.4104, "grad_norm": 1.326869010925293, "learning_rate": 3.194460363597569e-06, "loss": 3.5423, "step": 513 }, { "epoch": 0.4112, "grad_norm": 4.03769588470459, "learning_rate": 3.188422275420063e-06, "loss": 1.4117, "step": 514 }, { "epoch": 0.412, "grad_norm": 1.3623450994491577, "learning_rate": 3.1823798387933134e-06, "loss": 3.498, "step": 515 }, { "epoch": 0.4128, "grad_norm": 4.137259483337402, "learning_rate": 3.1763330918846347e-06, "loss": 1.2982, "step": 516 }, { "epoch": 0.4136, "grad_norm": 1.067256212234497, "learning_rate": 3.1702820728885657e-06, "loss": 3.7067, "step": 517 }, { "epoch": 0.4144, "grad_norm": 4.063728332519531, "learning_rate": 3.164226820026632e-06, "loss": 1.3187, "step": 518 }, { "epoch": 0.4152, "grad_norm": 1.2824773788452148, "learning_rate": 3.1581673715471007e-06, "loss": 3.5527, "step": 519 }, { "epoch": 0.416, "grad_norm": 3.7093420028686523, "learning_rate": 3.152103765724743e-06, "loss": 1.1281, "step": 520 }, { "epoch": 0.4168, "grad_norm": 1.288455843925476, "learning_rate": 3.1460360408605866e-06, "loss": 3.5115, "step": 521 }, { "epoch": 0.4176, "grad_norm": 4.3098063468933105, "learning_rate": 3.1399642352816825e-06, "loss": 1.3113, "step": 522 }, { "epoch": 0.4184, "grad_norm": 1.1683874130249023, "learning_rate": 3.1338883873408517e-06, "loss": 3.6437, "step": 523 }, { "epoch": 0.4192, "grad_norm": 4.025966167449951, "learning_rate": 3.127808535416454e-06, "loss": 1.2751, "step": 524 }, { "epoch": 0.42, "grad_norm": 1.7916266918182373, "learning_rate": 3.121724717912138e-06, "loss": 3.5067, "step": 525 }, { "epoch": 0.4208, "grad_norm": 4.328076362609863, "learning_rate": 3.1156369732566006e-06, "loss": 1.6473, "step": 526 }, { "epoch": 0.4216, "grad_norm": 1.400840163230896, "learning_rate": 3.109545339903347e-06, "loss": 3.5727, "step": 527 }, { "epoch": 0.4224, "grad_norm": 3.689484119415283, "learning_rate": 3.1034498563304435e-06, "loss": 1.3867, "step": 528 }, { "epoch": 0.4232, "grad_norm": 1.0594552755355835, "learning_rate": 3.0973505610402767e-06, "loss": 3.7167, "step": 529 }, { "epoch": 0.424, "grad_norm": 4.328317642211914, "learning_rate": 3.0912474925593124e-06, "loss": 1.5036, "step": 530 }, { "epoch": 0.4248, "grad_norm": 1.1060447692871094, "learning_rate": 3.085140689437846e-06, "loss": 3.6933, "step": 531 }, { "epoch": 0.4256, "grad_norm": 4.118087291717529, "learning_rate": 3.0790301902497664e-06, "loss": 1.3451, "step": 532 }, { "epoch": 0.4264, "grad_norm": 1.254740595817566, "learning_rate": 3.072916033592307e-06, "loss": 3.5871, "step": 533 }, { "epoch": 0.4272, "grad_norm": 4.144657611846924, "learning_rate": 3.0667982580858047e-06, "loss": 1.4215, "step": 534 }, { "epoch": 0.428, "grad_norm": 1.1598517894744873, "learning_rate": 3.0606769023734535e-06, "loss": 3.6583, "step": 535 }, { "epoch": 0.4288, "grad_norm": 4.24267578125, "learning_rate": 3.0545520051210637e-06, "loss": 1.2563, "step": 536 }, { "epoch": 0.4296, "grad_norm": 1.5326381921768188, "learning_rate": 3.048423605016815e-06, "loss": 3.5047, "step": 537 }, { "epoch": 0.4304, "grad_norm": 4.730625629425049, "learning_rate": 3.042291740771014e-06, "loss": 1.3603, "step": 538 }, { "epoch": 0.4312, "grad_norm": 1.132880687713623, "learning_rate": 3.036156451115846e-06, "loss": 3.6709, "step": 539 }, { "epoch": 0.432, "grad_norm": 3.7942590713500977, "learning_rate": 3.0300177748051375e-06, "loss": 1.3794, "step": 540 }, { "epoch": 0.4328, "grad_norm": 1.4315778017044067, "learning_rate": 3.0238757506141013e-06, "loss": 3.5769, "step": 541 }, { "epoch": 0.4336, "grad_norm": 3.5602166652679443, "learning_rate": 3.0177304173391038e-06, "loss": 1.2704, "step": 542 }, { "epoch": 0.4344, "grad_norm": 1.1675716638565063, "learning_rate": 3.0115818137974066e-06, "loss": 3.5886, "step": 543 }, { "epoch": 0.4352, "grad_norm": 4.345582962036133, "learning_rate": 3.0054299788269343e-06, "loss": 1.4216, "step": 544 }, { "epoch": 0.436, "grad_norm": 1.762725591659546, "learning_rate": 2.9992749512860177e-06, "loss": 3.4446, "step": 545 }, { "epoch": 0.4368, "grad_norm": 3.993100643157959, "learning_rate": 2.9931167700531575e-06, "loss": 1.343, "step": 546 }, { "epoch": 0.4376, "grad_norm": 1.2319386005401611, "learning_rate": 2.9869554740267726e-06, "loss": 3.603, "step": 547 }, { "epoch": 0.4384, "grad_norm": 4.317058086395264, "learning_rate": 2.9807911021249573e-06, "loss": 1.4564, "step": 548 }, { "epoch": 0.4392, "grad_norm": 1.6317486763000488, "learning_rate": 2.9746236932852355e-06, "loss": 3.5411, "step": 549 }, { "epoch": 0.44, "grad_norm": 3.8238189220428467, "learning_rate": 2.9684532864643123e-06, "loss": 1.1421, "step": 550 }, { "epoch": 0.4408, "grad_norm": 1.9044779539108276, "learning_rate": 2.9622799206378306e-06, "loss": 3.6848, "step": 551 }, { "epoch": 0.4416, "grad_norm": 3.827505588531494, "learning_rate": 2.956103634800126e-06, "loss": 1.3386, "step": 552 }, { "epoch": 0.4424, "grad_norm": 1.3661056756973267, "learning_rate": 2.949924467963975e-06, "loss": 3.4422, "step": 553 }, { "epoch": 0.4432, "grad_norm": 4.082735538482666, "learning_rate": 2.943742459160354e-06, "loss": 1.3541, "step": 554 }, { "epoch": 0.444, "grad_norm": 1.28450345993042, "learning_rate": 2.9375576474381907e-06, "loss": 3.5994, "step": 555 }, { "epoch": 0.4448, "grad_norm": 3.4685943126678467, "learning_rate": 2.9313700718641167e-06, "loss": 1.4483, "step": 556 }, { "epoch": 0.4456, "grad_norm": 1.7730368375778198, "learning_rate": 2.925179771522223e-06, "loss": 3.6276, "step": 557 }, { "epoch": 0.4464, "grad_norm": 3.9150004386901855, "learning_rate": 2.9189867855138103e-06, "loss": 1.3486, "step": 558 }, { "epoch": 0.4472, "grad_norm": 1.5707478523254395, "learning_rate": 2.912791152957145e-06, "loss": 3.5531, "step": 559 }, { "epoch": 0.448, "grad_norm": 4.4283766746521, "learning_rate": 2.9065929129872097e-06, "loss": 1.4254, "step": 560 }, { "epoch": 0.4488, "grad_norm": 1.5481115579605103, "learning_rate": 2.900392104755455e-06, "loss": 3.4633, "step": 561 }, { "epoch": 0.4496, "grad_norm": 3.5355985164642334, "learning_rate": 2.8941887674295573e-06, "loss": 1.3703, "step": 562 }, { "epoch": 0.4504, "grad_norm": 1.2419151067733765, "learning_rate": 2.887982940193165e-06, "loss": 3.6656, "step": 563 }, { "epoch": 0.4512, "grad_norm": 4.397960186004639, "learning_rate": 2.8817746622456585e-06, "loss": 1.338, "step": 564 }, { "epoch": 0.452, "grad_norm": 1.4676947593688965, "learning_rate": 2.875563972801893e-06, "loss": 3.6548, "step": 565 }, { "epoch": 0.4528, "grad_norm": 4.111155033111572, "learning_rate": 2.8693509110919597e-06, "loss": 1.3694, "step": 566 }, { "epoch": 0.4536, "grad_norm": 1.7541122436523438, "learning_rate": 2.863135516360932e-06, "loss": 3.4508, "step": 567 }, { "epoch": 0.4544, "grad_norm": 4.085772514343262, "learning_rate": 2.8569178278686222e-06, "loss": 1.3314, "step": 568 }, { "epoch": 0.4552, "grad_norm": 1.2001174688339233, "learning_rate": 2.85069788488933e-06, "loss": 3.5885, "step": 569 }, { "epoch": 0.456, "grad_norm": 4.38803768157959, "learning_rate": 2.844475726711595e-06, "loss": 1.1816, "step": 570 }, { "epoch": 0.4568, "grad_norm": 1.2394533157348633, "learning_rate": 2.8382513926379508e-06, "loss": 3.6019, "step": 571 }, { "epoch": 0.4576, "grad_norm": 4.420421600341797, "learning_rate": 2.832024921984674e-06, "loss": 1.4351, "step": 572 }, { "epoch": 0.4584, "grad_norm": 1.2522428035736084, "learning_rate": 2.825796354081537e-06, "loss": 3.6141, "step": 573 }, { "epoch": 0.4592, "grad_norm": 4.002085208892822, "learning_rate": 2.8195657282715595e-06, "loss": 1.1009, "step": 574 }, { "epoch": 0.46, "grad_norm": 1.433961272239685, "learning_rate": 2.813333083910761e-06, "loss": 3.6517, "step": 575 }, { "epoch": 0.4608, "grad_norm": 4.165874004364014, "learning_rate": 2.807098460367911e-06, "loss": 1.3473, "step": 576 }, { "epoch": 0.4616, "grad_norm": 1.468865990638733, "learning_rate": 2.800861897024279e-06, "loss": 3.6747, "step": 577 }, { "epoch": 0.4624, "grad_norm": 4.306812286376953, "learning_rate": 2.79462343327339e-06, "loss": 1.416, "step": 578 }, { "epoch": 0.4632, "grad_norm": 1.0383753776550293, "learning_rate": 2.7883831085207707e-06, "loss": 3.575, "step": 579 }, { "epoch": 0.464, "grad_norm": 4.186305999755859, "learning_rate": 2.7821409621837042e-06, "loss": 1.5874, "step": 580 }, { "epoch": 0.4648, "grad_norm": 1.3052856922149658, "learning_rate": 2.7758970336909795e-06, "loss": 3.6154, "step": 581 }, { "epoch": 0.4656, "grad_norm": 3.598694324493408, "learning_rate": 2.7696513624826422e-06, "loss": 1.2231, "step": 582 }, { "epoch": 0.4664, "grad_norm": 1.3978124856948853, "learning_rate": 2.763403988009746e-06, "loss": 3.5403, "step": 583 }, { "epoch": 0.4672, "grad_norm": 3.618967056274414, "learning_rate": 2.7571549497341044e-06, "loss": 1.29, "step": 584 }, { "epoch": 0.468, "grad_norm": 1.4016177654266357, "learning_rate": 2.7509042871280373e-06, "loss": 3.6256, "step": 585 }, { "epoch": 0.4688, "grad_norm": 3.9204423427581787, "learning_rate": 2.7446520396741293e-06, "loss": 1.4597, "step": 586 }, { "epoch": 0.4696, "grad_norm": 1.4617024660110474, "learning_rate": 2.7383982468649715e-06, "loss": 3.482, "step": 587 }, { "epoch": 0.4704, "grad_norm": 4.012588024139404, "learning_rate": 2.73214294820292e-06, "loss": 1.2928, "step": 588 }, { "epoch": 0.4712, "grad_norm": 1.4617540836334229, "learning_rate": 2.725886183199839e-06, "loss": 3.626, "step": 589 }, { "epoch": 0.472, "grad_norm": 3.5914876461029053, "learning_rate": 2.7196279913768587e-06, "loss": 1.3148, "step": 590 }, { "epoch": 0.4728, "grad_norm": 1.4136903285980225, "learning_rate": 2.713368412264118e-06, "loss": 3.5289, "step": 591 }, { "epoch": 0.4736, "grad_norm": 3.7139124870300293, "learning_rate": 2.7071074854005206e-06, "loss": 1.3292, "step": 592 }, { "epoch": 0.4744, "grad_norm": 1.2121789455413818, "learning_rate": 2.700845250333486e-06, "loss": 3.6458, "step": 593 }, { "epoch": 0.4752, "grad_norm": 4.53924036026001, "learning_rate": 2.694581746618691e-06, "loss": 1.3469, "step": 594 }, { "epoch": 0.476, "grad_norm": 1.2464954853057861, "learning_rate": 2.688317013819832e-06, "loss": 3.5712, "step": 595 }, { "epoch": 0.4768, "grad_norm": 3.8551762104034424, "learning_rate": 2.682051091508365e-06, "loss": 1.3476, "step": 596 }, { "epoch": 0.4776, "grad_norm": 1.2209997177124023, "learning_rate": 2.67578401926326e-06, "loss": 3.6444, "step": 597 }, { "epoch": 0.4784, "grad_norm": 4.334421634674072, "learning_rate": 2.6695158366707526e-06, "loss": 1.4771, "step": 598 }, { "epoch": 0.4792, "grad_norm": 1.5928137302398682, "learning_rate": 2.6632465833240895e-06, "loss": 3.4254, "step": 599 }, { "epoch": 0.48, "grad_norm": 3.963142156600952, "learning_rate": 2.6569762988232838e-06, "loss": 1.3901, "step": 600 }, { "epoch": 0.4808, "grad_norm": 1.2593353986740112, "learning_rate": 2.6507050227748595e-06, "loss": 3.5619, "step": 601 }, { "epoch": 0.4816, "grad_norm": 4.629072189331055, "learning_rate": 2.6444327947916037e-06, "loss": 1.5413, "step": 602 }, { "epoch": 0.4824, "grad_norm": 1.2204415798187256, "learning_rate": 2.6381596544923184e-06, "loss": 3.6041, "step": 603 }, { "epoch": 0.4832, "grad_norm": 4.39404821395874, "learning_rate": 2.6318856415015664e-06, "loss": 1.1507, "step": 604 }, { "epoch": 0.484, "grad_norm": 1.2167773246765137, "learning_rate": 2.625610795449424e-06, "loss": 3.5377, "step": 605 }, { "epoch": 0.4848, "grad_norm": 4.067314624786377, "learning_rate": 2.6193351559712294e-06, "loss": 1.3543, "step": 606 }, { "epoch": 0.4856, "grad_norm": 1.054069995880127, "learning_rate": 2.6130587627073315e-06, "loss": 3.678, "step": 607 }, { "epoch": 0.4864, "grad_norm": 4.561433792114258, "learning_rate": 2.606781655302843e-06, "loss": 1.5264, "step": 608 }, { "epoch": 0.4872, "grad_norm": 1.6582963466644287, "learning_rate": 2.6005038734073833e-06, "loss": 3.4737, "step": 609 }, { "epoch": 0.488, "grad_norm": 4.4807233810424805, "learning_rate": 2.594225456674837e-06, "loss": 1.5468, "step": 610 }, { "epoch": 0.4888, "grad_norm": 1.4780353307724, "learning_rate": 2.5879464447630947e-06, "loss": 3.6692, "step": 611 }, { "epoch": 0.4896, "grad_norm": 4.209949493408203, "learning_rate": 2.58166687733381e-06, "loss": 1.2275, "step": 612 }, { "epoch": 0.4904, "grad_norm": 1.4267958402633667, "learning_rate": 2.575386794052142e-06, "loss": 3.4531, "step": 613 }, { "epoch": 0.4912, "grad_norm": 3.8919217586517334, "learning_rate": 2.569106234586511e-06, "loss": 1.3178, "step": 614 }, { "epoch": 0.492, "grad_norm": 1.4168897867202759, "learning_rate": 2.5628252386083443e-06, "loss": 3.4955, "step": 615 }, { "epoch": 0.4928, "grad_norm": 3.9594831466674805, "learning_rate": 2.5565438457918247e-06, "loss": 1.3968, "step": 616 }, { "epoch": 0.4936, "grad_norm": 1.1420923471450806, "learning_rate": 2.5502620958136444e-06, "loss": 3.6264, "step": 617 }, { "epoch": 0.4944, "grad_norm": 4.060093402862549, "learning_rate": 2.5439800283527495e-06, "loss": 1.3898, "step": 618 }, { "epoch": 0.4952, "grad_norm": 1.4885039329528809, "learning_rate": 2.537697683090093e-06, "loss": 3.492, "step": 619 }, { "epoch": 0.496, "grad_norm": 4.163914203643799, "learning_rate": 2.531415099708382e-06, "loss": 1.1859, "step": 620 }, { "epoch": 0.4968, "grad_norm": 1.1269545555114746, "learning_rate": 2.525132317891827e-06, "loss": 3.5954, "step": 621 }, { "epoch": 0.4976, "grad_norm": 4.090238571166992, "learning_rate": 2.518849377325893e-06, "loss": 1.3966, "step": 622 }, { "epoch": 0.4984, "grad_norm": 1.5226904153823853, "learning_rate": 2.5125663176970475e-06, "loss": 3.6323, "step": 623 }, { "epoch": 0.4992, "grad_norm": 3.7972140312194824, "learning_rate": 2.5062831786925102e-06, "loss": 1.39, "step": 624 }, { "epoch": 0.5, "grad_norm": 1.4045028686523438, "learning_rate": 2.5e-06, "loss": 3.5625, "step": 625 }, { "epoch": 0.5008, "grad_norm": 3.8131749629974365, "learning_rate": 2.4937168213074906e-06, "loss": 1.2028, "step": 626 }, { "epoch": 0.5016, "grad_norm": 2.0082039833068848, "learning_rate": 2.487433682302953e-06, "loss": 3.3618, "step": 627 }, { "epoch": 0.5024, "grad_norm": 4.199687957763672, "learning_rate": 2.4811506226741077e-06, "loss": 1.2716, "step": 628 }, { "epoch": 0.5032, "grad_norm": 1.1121747493743896, "learning_rate": 2.474867682108174e-06, "loss": 3.5795, "step": 629 }, { "epoch": 0.504, "grad_norm": 3.8342783451080322, "learning_rate": 2.4685849002916184e-06, "loss": 1.2034, "step": 630 }, { "epoch": 0.5048, "grad_norm": 1.6797664165496826, "learning_rate": 2.4623023169099074e-06, "loss": 3.5073, "step": 631 }, { "epoch": 0.5056, "grad_norm": 5.292508125305176, "learning_rate": 2.456019971647251e-06, "loss": 1.3187, "step": 632 }, { "epoch": 0.5064, "grad_norm": 1.0428590774536133, "learning_rate": 2.449737904186357e-06, "loss": 3.6168, "step": 633 }, { "epoch": 0.5072, "grad_norm": 3.81816029548645, "learning_rate": 2.4434561542081765e-06, "loss": 1.3212, "step": 634 }, { "epoch": 0.508, "grad_norm": 1.0982403755187988, "learning_rate": 2.4371747613916566e-06, "loss": 3.6012, "step": 635 }, { "epoch": 0.5088, "grad_norm": 4.740167617797852, "learning_rate": 2.4308937654134893e-06, "loss": 1.3399, "step": 636 }, { "epoch": 0.5096, "grad_norm": 1.26600980758667, "learning_rate": 2.4246132059478582e-06, "loss": 3.5275, "step": 637 }, { "epoch": 0.5104, "grad_norm": 4.418180465698242, "learning_rate": 2.4183331226661913e-06, "loss": 1.4019, "step": 638 }, { "epoch": 0.5112, "grad_norm": 2.0348660945892334, "learning_rate": 2.4120535552369057e-06, "loss": 3.5616, "step": 639 }, { "epoch": 0.512, "grad_norm": 3.7417869567871094, "learning_rate": 2.4057745433251637e-06, "loss": 1.3269, "step": 640 }, { "epoch": 0.5128, "grad_norm": 1.818655252456665, "learning_rate": 2.3994961265926166e-06, "loss": 3.5734, "step": 641 }, { "epoch": 0.5136, "grad_norm": 3.8714828491210938, "learning_rate": 2.3932183446971584e-06, "loss": 1.3336, "step": 642 }, { "epoch": 0.5144, "grad_norm": 1.1985024213790894, "learning_rate": 2.386941237292669e-06, "loss": 3.5905, "step": 643 }, { "epoch": 0.5152, "grad_norm": 3.901711940765381, "learning_rate": 2.3806648440287715e-06, "loss": 1.1541, "step": 644 }, { "epoch": 0.516, "grad_norm": 1.3076053857803345, "learning_rate": 2.3743892045505764e-06, "loss": 3.6319, "step": 645 }, { "epoch": 0.5168, "grad_norm": 3.9768855571746826, "learning_rate": 2.368114358498434e-06, "loss": 1.5297, "step": 646 }, { "epoch": 0.5176, "grad_norm": 1.135161280632019, "learning_rate": 2.361840345507683e-06, "loss": 3.6021, "step": 647 }, { "epoch": 0.5184, "grad_norm": 3.6397156715393066, "learning_rate": 2.355567205208397e-06, "loss": 1.3282, "step": 648 }, { "epoch": 0.5192, "grad_norm": 1.3913445472717285, "learning_rate": 2.3492949772251418e-06, "loss": 3.4597, "step": 649 }, { "epoch": 0.52, "grad_norm": 3.9108190536499023, "learning_rate": 2.3430237011767166e-06, "loss": 1.0836, "step": 650 }, { "epoch": 0.5208, "grad_norm": 1.6176162958145142, "learning_rate": 2.3367534166759105e-06, "loss": 3.5934, "step": 651 }, { "epoch": 0.5216, "grad_norm": 3.639057159423828, "learning_rate": 2.3304841633292487e-06, "loss": 1.2418, "step": 652 }, { "epoch": 0.5224, "grad_norm": 1.5021276473999023, "learning_rate": 2.324215980736741e-06, "loss": 3.4284, "step": 653 }, { "epoch": 0.5232, "grad_norm": 5.434640407562256, "learning_rate": 2.317948908491636e-06, "loss": 1.3802, "step": 654 }, { "epoch": 0.524, "grad_norm": 1.7329832315444946, "learning_rate": 2.3116829861801687e-06, "loss": 3.4577, "step": 655 }, { "epoch": 0.5248, "grad_norm": 3.633262872695923, "learning_rate": 2.305418253381309e-06, "loss": 1.1311, "step": 656 }, { "epoch": 0.5256, "grad_norm": 1.2898222208023071, "learning_rate": 2.299154749666515e-06, "loss": 3.5833, "step": 657 }, { "epoch": 0.5264, "grad_norm": 3.3343076705932617, "learning_rate": 2.2928925145994798e-06, "loss": 1.2565, "step": 658 }, { "epoch": 0.5272, "grad_norm": 1.1492732763290405, "learning_rate": 2.286631587735883e-06, "loss": 3.6572, "step": 659 }, { "epoch": 0.528, "grad_norm": 4.284005165100098, "learning_rate": 2.280372008623142e-06, "loss": 1.4464, "step": 660 }, { "epoch": 0.5288, "grad_norm": 1.7030223608016968, "learning_rate": 2.274113816800161e-06, "loss": 3.4687, "step": 661 }, { "epoch": 0.5296, "grad_norm": 4.307010650634766, "learning_rate": 2.267857051797081e-06, "loss": 1.3294, "step": 662 }, { "epoch": 0.5304, "grad_norm": 1.5467772483825684, "learning_rate": 2.261601753135029e-06, "loss": 3.5568, "step": 663 }, { "epoch": 0.5312, "grad_norm": 3.650076150894165, "learning_rate": 2.255347960325871e-06, "loss": 1.3358, "step": 664 }, { "epoch": 0.532, "grad_norm": 1.5734375715255737, "learning_rate": 2.2490957128719627e-06, "loss": 3.4565, "step": 665 }, { "epoch": 0.5328, "grad_norm": 3.6878743171691895, "learning_rate": 2.2428450502658964e-06, "loss": 1.1379, "step": 666 }, { "epoch": 0.5336, "grad_norm": 1.115048885345459, "learning_rate": 2.2365960119902543e-06, "loss": 3.6159, "step": 667 }, { "epoch": 0.5344, "grad_norm": 4.451643943786621, "learning_rate": 2.2303486375173586e-06, "loss": 1.3798, "step": 668 }, { "epoch": 0.5352, "grad_norm": 1.2209587097167969, "learning_rate": 2.224102966309021e-06, "loss": 3.5913, "step": 669 }, { "epoch": 0.536, "grad_norm": 3.687743663787842, "learning_rate": 2.2178590378162957e-06, "loss": 1.2116, "step": 670 }, { "epoch": 0.5368, "grad_norm": 1.4728742837905884, "learning_rate": 2.2116168914792293e-06, "loss": 3.5415, "step": 671 }, { "epoch": 0.5376, "grad_norm": 3.96630859375, "learning_rate": 2.205376566726611e-06, "loss": 1.3889, "step": 672 }, { "epoch": 0.5384, "grad_norm": 1.215154767036438, "learning_rate": 2.1991381029757216e-06, "loss": 3.5867, "step": 673 }, { "epoch": 0.5392, "grad_norm": 3.8956687450408936, "learning_rate": 2.19290153963209e-06, "loss": 1.5616, "step": 674 }, { "epoch": 0.54, "grad_norm": 1.183532476425171, "learning_rate": 2.186666916089239e-06, "loss": 3.5136, "step": 675 }, { "epoch": 0.5408, "grad_norm": 3.5824153423309326, "learning_rate": 2.1804342717284414e-06, "loss": 1.2544, "step": 676 }, { "epoch": 0.5416, "grad_norm": 1.325810432434082, "learning_rate": 2.174203645918464e-06, "loss": 3.5406, "step": 677 }, { "epoch": 0.5424, "grad_norm": 3.4541144371032715, "learning_rate": 2.1679750780153265e-06, "loss": 1.3576, "step": 678 }, { "epoch": 0.5432, "grad_norm": 1.5813454389572144, "learning_rate": 2.1617486073620496e-06, "loss": 3.4813, "step": 679 }, { "epoch": 0.544, "grad_norm": 3.9602949619293213, "learning_rate": 2.155524273288405e-06, "loss": 1.426, "step": 680 }, { "epoch": 0.5448, "grad_norm": 1.4534196853637695, "learning_rate": 2.1493021151106704e-06, "loss": 3.5585, "step": 681 }, { "epoch": 0.5456, "grad_norm": 3.9135422706604004, "learning_rate": 2.143082172131378e-06, "loss": 1.3641, "step": 682 }, { "epoch": 0.5464, "grad_norm": 1.6020511388778687, "learning_rate": 2.1368644836390684e-06, "loss": 3.5024, "step": 683 }, { "epoch": 0.5472, "grad_norm": 4.677028179168701, "learning_rate": 2.130649088908041e-06, "loss": 1.366, "step": 684 }, { "epoch": 0.548, "grad_norm": 1.4928466081619263, "learning_rate": 2.1244360271981073e-06, "loss": 3.5495, "step": 685 }, { "epoch": 0.5488, "grad_norm": 4.278928279876709, "learning_rate": 2.1182253377543428e-06, "loss": 1.3534, "step": 686 }, { "epoch": 0.5496, "grad_norm": 1.3462296724319458, "learning_rate": 2.1120170598068353e-06, "loss": 3.6396, "step": 687 }, { "epoch": 0.5504, "grad_norm": 5.2212653160095215, "learning_rate": 2.1058112325704436e-06, "loss": 1.3357, "step": 688 }, { "epoch": 0.5512, "grad_norm": 1.1819498538970947, "learning_rate": 2.0996078952445453e-06, "loss": 3.6596, "step": 689 }, { "epoch": 0.552, "grad_norm": 3.7068729400634766, "learning_rate": 2.093407087012791e-06, "loss": 1.3518, "step": 690 }, { "epoch": 0.5528, "grad_norm": 1.0458273887634277, "learning_rate": 2.0872088470428553e-06, "loss": 3.607, "step": 691 }, { "epoch": 0.5536, "grad_norm": 4.25509786605835, "learning_rate": 2.08101321448619e-06, "loss": 1.4629, "step": 692 }, { "epoch": 0.5544, "grad_norm": 1.1481705904006958, "learning_rate": 2.0748202284777775e-06, "loss": 3.6161, "step": 693 }, { "epoch": 0.5552, "grad_norm": 3.934365749359131, "learning_rate": 2.0686299281358837e-06, "loss": 1.4318, "step": 694 }, { "epoch": 0.556, "grad_norm": 1.4977188110351562, "learning_rate": 2.0624423525618097e-06, "loss": 3.6224, "step": 695 }, { "epoch": 0.5568, "grad_norm": 3.6773321628570557, "learning_rate": 2.0562575408396475e-06, "loss": 1.1651, "step": 696 }, { "epoch": 0.5576, "grad_norm": 1.449863314628601, "learning_rate": 2.0500755320360263e-06, "loss": 3.6073, "step": 697 }, { "epoch": 0.5584, "grad_norm": 3.81058406829834, "learning_rate": 2.0438963651998747e-06, "loss": 1.2255, "step": 698 }, { "epoch": 0.5592, "grad_norm": 1.1542376279830933, "learning_rate": 2.0377200793621694e-06, "loss": 3.6066, "step": 699 }, { "epoch": 0.56, "grad_norm": 4.023213863372803, "learning_rate": 2.031546713535688e-06, "loss": 1.3477, "step": 700 }, { "epoch": 0.5608, "grad_norm": 1.3673769235610962, "learning_rate": 2.0253763067147657e-06, "loss": 3.5453, "step": 701 }, { "epoch": 0.5616, "grad_norm": 4.080592155456543, "learning_rate": 2.019208897875043e-06, "loss": 1.4669, "step": 702 }, { "epoch": 0.5624, "grad_norm": 1.4954679012298584, "learning_rate": 2.0130445259732282e-06, "loss": 3.4227, "step": 703 }, { "epoch": 0.5632, "grad_norm": 4.1900248527526855, "learning_rate": 2.006883229946843e-06, "loss": 1.4427, "step": 704 }, { "epoch": 0.564, "grad_norm": 1.4168885946273804, "learning_rate": 2.0007250487139827e-06, "loss": 3.6209, "step": 705 }, { "epoch": 0.5648, "grad_norm": 3.834075450897217, "learning_rate": 1.994570021173067e-06, "loss": 1.2146, "step": 706 }, { "epoch": 0.5656, "grad_norm": 1.18809974193573, "learning_rate": 1.9884181862025938e-06, "loss": 3.5612, "step": 707 }, { "epoch": 0.5664, "grad_norm": 3.8719165325164795, "learning_rate": 1.9822695826608975e-06, "loss": 1.4709, "step": 708 }, { "epoch": 0.5672, "grad_norm": 1.2471320629119873, "learning_rate": 1.9761242493858987e-06, "loss": 3.5347, "step": 709 }, { "epoch": 0.568, "grad_norm": 3.889285087585449, "learning_rate": 1.969982225194864e-06, "loss": 1.1893, "step": 710 }, { "epoch": 0.5688, "grad_norm": 1.6830719709396362, "learning_rate": 1.9638435488841543e-06, "loss": 3.3654, "step": 711 }, { "epoch": 0.5696, "grad_norm": 3.806553363800049, "learning_rate": 1.957708259228987e-06, "loss": 1.179, "step": 712 }, { "epoch": 0.5704, "grad_norm": 1.273412823677063, "learning_rate": 1.9515763949831852e-06, "loss": 3.5977, "step": 713 }, { "epoch": 0.5712, "grad_norm": 3.846447229385376, "learning_rate": 1.945447994878937e-06, "loss": 1.559, "step": 714 }, { "epoch": 0.572, "grad_norm": 1.3436466455459595, "learning_rate": 1.9393230976265478e-06, "loss": 3.6578, "step": 715 }, { "epoch": 0.5728, "grad_norm": 3.7785065174102783, "learning_rate": 1.933201741914196e-06, "loss": 1.4349, "step": 716 }, { "epoch": 0.5736, "grad_norm": 1.8797110319137573, "learning_rate": 1.9270839664076937e-06, "loss": 3.545, "step": 717 }, { "epoch": 0.5744, "grad_norm": 4.088225841522217, "learning_rate": 1.920969809750234e-06, "loss": 1.31, "step": 718 }, { "epoch": 0.5752, "grad_norm": 1.348626732826233, "learning_rate": 1.9148593105621542e-06, "loss": 3.5437, "step": 719 }, { "epoch": 0.576, "grad_norm": 3.5283923149108887, "learning_rate": 1.908752507440689e-06, "loss": 1.179, "step": 720 }, { "epoch": 0.5768, "grad_norm": 1.4678329229354858, "learning_rate": 1.9026494389597239e-06, "loss": 3.5683, "step": 721 }, { "epoch": 0.5776, "grad_norm": 4.486749172210693, "learning_rate": 1.8965501436695578e-06, "loss": 1.2648, "step": 722 }, { "epoch": 0.5784, "grad_norm": 1.4773081541061401, "learning_rate": 1.8904546600966539e-06, "loss": 3.5973, "step": 723 }, { "epoch": 0.5792, "grad_norm": 4.043974876403809, "learning_rate": 1.8843630267434e-06, "loss": 1.425, "step": 724 }, { "epoch": 0.58, "grad_norm": 1.2826696634292603, "learning_rate": 1.8782752820878636e-06, "loss": 3.5147, "step": 725 }, { "epoch": 0.5808, "grad_norm": 3.6155593395233154, "learning_rate": 1.872191464583547e-06, "loss": 1.4485, "step": 726 }, { "epoch": 0.5816, "grad_norm": 1.2381564378738403, "learning_rate": 1.8661116126591492e-06, "loss": 3.64, "step": 727 }, { "epoch": 0.5824, "grad_norm": 4.1232380867004395, "learning_rate": 1.8600357647183188e-06, "loss": 1.3699, "step": 728 }, { "epoch": 0.5832, "grad_norm": 1.070135474205017, "learning_rate": 1.8539639591394131e-06, "loss": 3.5735, "step": 729 }, { "epoch": 0.584, "grad_norm": 3.9993014335632324, "learning_rate": 1.8478962342752584e-06, "loss": 1.46, "step": 730 }, { "epoch": 0.5848, "grad_norm": 1.5479552745819092, "learning_rate": 1.8418326284528997e-06, "loss": 3.431, "step": 731 }, { "epoch": 0.5856, "grad_norm": 4.261895656585693, "learning_rate": 1.8357731799733686e-06, "loss": 1.5391, "step": 732 }, { "epoch": 0.5864, "grad_norm": 0.9864424467086792, "learning_rate": 1.8297179271114345e-06, "loss": 3.6108, "step": 733 }, { "epoch": 0.5872, "grad_norm": 4.133561134338379, "learning_rate": 1.8236669081153657e-06, "loss": 1.3051, "step": 734 }, { "epoch": 0.588, "grad_norm": 1.7257312536239624, "learning_rate": 1.8176201612066874e-06, "loss": 3.5698, "step": 735 }, { "epoch": 0.5888, "grad_norm": 3.8284997940063477, "learning_rate": 1.8115777245799383e-06, "loss": 1.1011, "step": 736 }, { "epoch": 0.5896, "grad_norm": 1.4894834756851196, "learning_rate": 1.8055396364024318e-06, "loss": 3.5975, "step": 737 }, { "epoch": 0.5904, "grad_norm": 4.291233539581299, "learning_rate": 1.7995059348140165e-06, "loss": 1.4558, "step": 738 }, { "epoch": 0.5912, "grad_norm": 1.2095164060592651, "learning_rate": 1.7934766579268292e-06, "loss": 3.5745, "step": 739 }, { "epoch": 0.592, "grad_norm": 4.15226936340332, "learning_rate": 1.7874518438250598e-06, "loss": 1.4725, "step": 740 }, { "epoch": 0.5928, "grad_norm": 1.2965120077133179, "learning_rate": 1.7814315305647095e-06, "loss": 3.5479, "step": 741 }, { "epoch": 0.5936, "grad_norm": 3.704596519470215, "learning_rate": 1.7754157561733476e-06, "loss": 1.2924, "step": 742 }, { "epoch": 0.5944, "grad_norm": 1.8090176582336426, "learning_rate": 1.7694045586498754e-06, "loss": 3.418, "step": 743 }, { "epoch": 0.5952, "grad_norm": 3.9790186882019043, "learning_rate": 1.7633979759642844e-06, "loss": 1.4173, "step": 744 }, { "epoch": 0.596, "grad_norm": 1.8232885599136353, "learning_rate": 1.7573960460574133e-06, "loss": 3.5081, "step": 745 }, { "epoch": 0.5968, "grad_norm": 3.6959445476531982, "learning_rate": 1.7513988068407145e-06, "loss": 1.2422, "step": 746 }, { "epoch": 0.5976, "grad_norm": 1.4322175979614258, "learning_rate": 1.7454062961960102e-06, "loss": 3.5851, "step": 747 }, { "epoch": 0.5984, "grad_norm": 3.444291591644287, "learning_rate": 1.7394185519752546e-06, "loss": 1.2407, "step": 748 }, { "epoch": 0.5992, "grad_norm": 1.024861454963684, "learning_rate": 1.7334356120002956e-06, "loss": 3.6587, "step": 749 }, { "epoch": 0.6, "grad_norm": 4.007371425628662, "learning_rate": 1.7274575140626318e-06, "loss": 1.3341, "step": 750 }, { "epoch": 0.6008, "grad_norm": 1.387477159500122, "learning_rate": 1.7214842959231796e-06, "loss": 3.5696, "step": 751 }, { "epoch": 0.6016, "grad_norm": 3.6198816299438477, "learning_rate": 1.7155159953120315e-06, "loss": 1.1709, "step": 752 }, { "epoch": 0.6024, "grad_norm": 1.5271052122116089, "learning_rate": 1.7095526499282172e-06, "loss": 3.5466, "step": 753 }, { "epoch": 0.6032, "grad_norm": 4.3780317306518555, "learning_rate": 1.703594297439469e-06, "loss": 1.4056, "step": 754 }, { "epoch": 0.604, "grad_norm": 1.0889999866485596, "learning_rate": 1.6976409754819767e-06, "loss": 3.6382, "step": 755 }, { "epoch": 0.6048, "grad_norm": 4.148120403289795, "learning_rate": 1.6916927216601593e-06, "loss": 1.3061, "step": 756 }, { "epoch": 0.6056, "grad_norm": 1.0028917789459229, "learning_rate": 1.6857495735464196e-06, "loss": 3.6111, "step": 757 }, { "epoch": 0.6064, "grad_norm": 3.956118583679199, "learning_rate": 1.6798115686809125e-06, "loss": 1.4431, "step": 758 }, { "epoch": 0.6072, "grad_norm": 1.1292115449905396, "learning_rate": 1.673878744571304e-06, "loss": 3.6654, "step": 759 }, { "epoch": 0.608, "grad_norm": 3.675584554672241, "learning_rate": 1.6679511386925337e-06, "loss": 1.2957, "step": 760 }, { "epoch": 0.6088, "grad_norm": 1.6884305477142334, "learning_rate": 1.6620287884865831e-06, "loss": 3.471, "step": 761 }, { "epoch": 0.6096, "grad_norm": 3.8323042392730713, "learning_rate": 1.656111731362236e-06, "loss": 1.1559, "step": 762 }, { "epoch": 0.6104, "grad_norm": 1.2776001691818237, "learning_rate": 1.650200004694839e-06, "loss": 3.5601, "step": 763 }, { "epoch": 0.6112, "grad_norm": 3.951807737350464, "learning_rate": 1.6442936458260723e-06, "loss": 1.2963, "step": 764 }, { "epoch": 0.612, "grad_norm": 1.0104762315750122, "learning_rate": 1.6383926920637077e-06, "loss": 3.6454, "step": 765 }, { "epoch": 0.6128, "grad_norm": 3.8364481925964355, "learning_rate": 1.6324971806813766e-06, "loss": 1.2477, "step": 766 }, { "epoch": 0.6136, "grad_norm": 1.404075264930725, "learning_rate": 1.6266071489183327e-06, "loss": 3.5319, "step": 767 }, { "epoch": 0.6144, "grad_norm": 3.647761583328247, "learning_rate": 1.620722633979219e-06, "loss": 1.3192, "step": 768 }, { "epoch": 0.6152, "grad_norm": 1.2602980136871338, "learning_rate": 1.6148436730338279e-06, "loss": 3.5468, "step": 769 }, { "epoch": 0.616, "grad_norm": 4.292653560638428, "learning_rate": 1.6089703032168736e-06, "loss": 1.1626, "step": 770 }, { "epoch": 0.6168, "grad_norm": 1.8109797239303589, "learning_rate": 1.6031025616277512e-06, "loss": 3.5154, "step": 771 }, { "epoch": 0.6176, "grad_norm": 4.427074909210205, "learning_rate": 1.5972404853303061e-06, "loss": 1.1841, "step": 772 }, { "epoch": 0.6184, "grad_norm": 1.114534854888916, "learning_rate": 1.591384111352599e-06, "loss": 3.5374, "step": 773 }, { "epoch": 0.6192, "grad_norm": 3.930265426635742, "learning_rate": 1.585533476686669e-06, "loss": 1.203, "step": 774 }, { "epoch": 0.62, "grad_norm": 1.7864525318145752, "learning_rate": 1.5796886182883053e-06, "loss": 3.4942, "step": 775 }, { "epoch": 0.6208, "grad_norm": 4.248049259185791, "learning_rate": 1.5738495730768104e-06, "loss": 1.5361, "step": 776 }, { "epoch": 0.6216, "grad_norm": 1.1578404903411865, "learning_rate": 1.5680163779347668e-06, "loss": 3.5659, "step": 777 }, { "epoch": 0.6224, "grad_norm": 4.111908435821533, "learning_rate": 1.5621890697078069e-06, "loss": 1.582, "step": 778 }, { "epoch": 0.6232, "grad_norm": 1.2350143194198608, "learning_rate": 1.5563676852043738e-06, "loss": 3.5397, "step": 779 }, { "epoch": 0.624, "grad_norm": 4.6647562980651855, "learning_rate": 1.5505522611954977e-06, "loss": 1.5677, "step": 780 }, { "epoch": 0.6248, "grad_norm": 1.5898746252059937, "learning_rate": 1.5447428344145565e-06, "loss": 3.4637, "step": 781 }, { "epoch": 0.6256, "grad_norm": 4.031108856201172, "learning_rate": 1.538939441557048e-06, "loss": 1.5085, "step": 782 }, { "epoch": 0.6264, "grad_norm": 1.1129035949707031, "learning_rate": 1.5331421192803565e-06, "loss": 3.7525, "step": 783 }, { "epoch": 0.6272, "grad_norm": 3.7480621337890625, "learning_rate": 1.5273509042035172e-06, "loss": 1.3526, "step": 784 }, { "epoch": 0.628, "grad_norm": 1.4506335258483887, "learning_rate": 1.521565832906994e-06, "loss": 3.4543, "step": 785 }, { "epoch": 0.6288, "grad_norm": 4.091665267944336, "learning_rate": 1.515786941932441e-06, "loss": 1.3925, "step": 786 }, { "epoch": 0.6296, "grad_norm": 1.7259176969528198, "learning_rate": 1.5100142677824752e-06, "loss": 3.5212, "step": 787 }, { "epoch": 0.6304, "grad_norm": 3.6364309787750244, "learning_rate": 1.5042478469204437e-06, "loss": 1.486, "step": 788 }, { "epoch": 0.6312, "grad_norm": 1.0510691404342651, "learning_rate": 1.4984877157701932e-06, "loss": 3.5759, "step": 789 }, { "epoch": 0.632, "grad_norm": 3.974539041519165, "learning_rate": 1.4927339107158437e-06, "loss": 1.3787, "step": 790 }, { "epoch": 0.6328, "grad_norm": 1.5087684392929077, "learning_rate": 1.486986468101555e-06, "loss": 3.547, "step": 791 }, { "epoch": 0.6336, "grad_norm": 3.6339049339294434, "learning_rate": 1.481245424231298e-06, "loss": 1.321, "step": 792 }, { "epoch": 0.6344, "grad_norm": 1.1450809240341187, "learning_rate": 1.4755108153686275e-06, "loss": 3.6239, "step": 793 }, { "epoch": 0.6352, "grad_norm": 3.5662426948547363, "learning_rate": 1.4697826777364478e-06, "loss": 1.2403, "step": 794 }, { "epoch": 0.636, "grad_norm": 1.2532669305801392, "learning_rate": 1.46406104751679e-06, "loss": 3.5814, "step": 795 }, { "epoch": 0.6368, "grad_norm": 3.5871071815490723, "learning_rate": 1.45834596085058e-06, "loss": 1.2413, "step": 796 }, { "epoch": 0.6376, "grad_norm": 1.7455424070358276, "learning_rate": 1.4526374538374133e-06, "loss": 3.5806, "step": 797 }, { "epoch": 0.6384, "grad_norm": 4.081576824188232, "learning_rate": 1.4469355625353199e-06, "loss": 1.314, "step": 798 }, { "epoch": 0.6392, "grad_norm": 1.2774088382720947, "learning_rate": 1.4412403229605453e-06, "loss": 3.5766, "step": 799 }, { "epoch": 0.64, "grad_norm": 4.024228572845459, "learning_rate": 1.4355517710873184e-06, "loss": 1.3179, "step": 800 }, { "epoch": 0.6408, "grad_norm": 1.5069676637649536, "learning_rate": 1.4298699428476236e-06, "loss": 3.4628, "step": 801 }, { "epoch": 0.6416, "grad_norm": 3.8722047805786133, "learning_rate": 1.4241948741309783e-06, "loss": 1.2991, "step": 802 }, { "epoch": 0.6424, "grad_norm": 1.4869807958602905, "learning_rate": 1.418526600784198e-06, "loss": 3.5303, "step": 803 }, { "epoch": 0.6432, "grad_norm": 4.096463680267334, "learning_rate": 1.412865158611179e-06, "loss": 1.4464, "step": 804 }, { "epoch": 0.644, "grad_norm": 1.3232511281967163, "learning_rate": 1.4072105833726685e-06, "loss": 3.5599, "step": 805 }, { "epoch": 0.6448, "grad_norm": 3.500465154647827, "learning_rate": 1.401562910786034e-06, "loss": 1.3568, "step": 806 }, { "epoch": 0.6456, "grad_norm": 1.6436785459518433, "learning_rate": 1.395922176525047e-06, "loss": 3.5835, "step": 807 }, { "epoch": 0.6464, "grad_norm": 3.5307986736297607, "learning_rate": 1.3902884162196509e-06, "loss": 1.3578, "step": 808 }, { "epoch": 0.6472, "grad_norm": 1.2310173511505127, "learning_rate": 1.384661665455736e-06, "loss": 3.626, "step": 809 }, { "epoch": 0.648, "grad_norm": 5.397148132324219, "learning_rate": 1.3790419597749198e-06, "loss": 1.3758, "step": 810 }, { "epoch": 0.6488, "grad_norm": 1.2223182916641235, "learning_rate": 1.373429334674317e-06, "loss": 3.5392, "step": 811 }, { "epoch": 0.6496, "grad_norm": 5.135192394256592, "learning_rate": 1.3678238256063193e-06, "loss": 1.27, "step": 812 }, { "epoch": 0.6504, "grad_norm": 1.457159161567688, "learning_rate": 1.3622254679783665e-06, "loss": 3.5182, "step": 813 }, { "epoch": 0.6512, "grad_norm": 3.729689359664917, "learning_rate": 1.356634297152729e-06, "loss": 1.219, "step": 814 }, { "epoch": 0.652, "grad_norm": 1.7926121950149536, "learning_rate": 1.3510503484462807e-06, "loss": 3.4169, "step": 815 }, { "epoch": 0.6528, "grad_norm": 3.46643328666687, "learning_rate": 1.3454736571302761e-06, "loss": 1.2486, "step": 816 }, { "epoch": 0.6536, "grad_norm": 1.3711421489715576, "learning_rate": 1.3399042584301298e-06, "loss": 3.5197, "step": 817 }, { "epoch": 0.6544, "grad_norm": 4.594119071960449, "learning_rate": 1.334342187525189e-06, "loss": 1.2484, "step": 818 }, { "epoch": 0.6552, "grad_norm": 1.1788302659988403, "learning_rate": 1.3287874795485168e-06, "loss": 3.574, "step": 819 }, { "epoch": 0.656, "grad_norm": 3.5496530532836914, "learning_rate": 1.3232401695866686e-06, "loss": 1.1791, "step": 820 }, { "epoch": 0.6568, "grad_norm": 1.140120267868042, "learning_rate": 1.3177002926794685e-06, "loss": 3.6431, "step": 821 }, { "epoch": 0.6576, "grad_norm": 4.5700554847717285, "learning_rate": 1.312167883819791e-06, "loss": 1.3331, "step": 822 }, { "epoch": 0.6584, "grad_norm": 1.6417975425720215, "learning_rate": 1.3066429779533352e-06, "loss": 3.4451, "step": 823 }, { "epoch": 0.6592, "grad_norm": 3.6675314903259277, "learning_rate": 1.3011256099784103e-06, "loss": 1.1985, "step": 824 }, { "epoch": 0.66, "grad_norm": 0.9253246784210205, "learning_rate": 1.2956158147457116e-06, "loss": 3.6082, "step": 825 }, { "epoch": 0.6608, "grad_norm": 4.173038482666016, "learning_rate": 1.2901136270580994e-06, "loss": 1.2908, "step": 826 }, { "epoch": 0.6616, "grad_norm": 1.7744218111038208, "learning_rate": 1.2846190816703836e-06, "loss": 3.4493, "step": 827 }, { "epoch": 0.6624, "grad_norm": 3.8822882175445557, "learning_rate": 1.279132213289096e-06, "loss": 1.5025, "step": 828 }, { "epoch": 0.6632, "grad_norm": 1.4533785581588745, "learning_rate": 1.273653056572282e-06, "loss": 3.5351, "step": 829 }, { "epoch": 0.664, "grad_norm": 3.9480459690093994, "learning_rate": 1.2681816461292715e-06, "loss": 1.3216, "step": 830 }, { "epoch": 0.6648, "grad_norm": 1.3655693531036377, "learning_rate": 1.2627180165204671e-06, "loss": 3.5135, "step": 831 }, { "epoch": 0.6656, "grad_norm": 3.7476413249969482, "learning_rate": 1.257262202257124e-06, "loss": 1.4918, "step": 832 }, { "epoch": 0.6664, "grad_norm": 1.7849209308624268, "learning_rate": 1.251814237801128e-06, "loss": 3.4437, "step": 833 }, { "epoch": 0.6672, "grad_norm": 4.042788982391357, "learning_rate": 1.246374157564785e-06, "loss": 1.1764, "step": 834 }, { "epoch": 0.668, "grad_norm": 1.2156387567520142, "learning_rate": 1.2409419959105981e-06, "loss": 3.565, "step": 835 }, { "epoch": 0.6688, "grad_norm": 3.900473117828369, "learning_rate": 1.2355177871510538e-06, "loss": 1.2951, "step": 836 }, { "epoch": 0.6696, "grad_norm": 1.0474777221679688, "learning_rate": 1.2301015655484006e-06, "loss": 3.6051, "step": 837 }, { "epoch": 0.6704, "grad_norm": 3.8230295181274414, "learning_rate": 1.2246933653144386e-06, "loss": 1.4542, "step": 838 }, { "epoch": 0.6712, "grad_norm": 1.6013360023498535, "learning_rate": 1.2192932206103e-06, "loss": 3.4223, "step": 839 }, { "epoch": 0.672, "grad_norm": 3.603398084640503, "learning_rate": 1.2139011655462338e-06, "loss": 1.1428, "step": 840 }, { "epoch": 0.6728, "grad_norm": 0.9630873203277588, "learning_rate": 1.208517234181391e-06, "loss": 3.63, "step": 841 }, { "epoch": 0.6736, "grad_norm": 3.746964931488037, "learning_rate": 1.2031414605236066e-06, "loss": 1.2324, "step": 842 }, { "epoch": 0.6744, "grad_norm": 1.1261411905288696, "learning_rate": 1.1977738785291894e-06, "loss": 3.5977, "step": 843 }, { "epoch": 0.6752, "grad_norm": 3.895467519760132, "learning_rate": 1.1924145221027048e-06, "loss": 1.1571, "step": 844 }, { "epoch": 0.676, "grad_norm": 1.2304555177688599, "learning_rate": 1.1870634250967606e-06, "loss": 3.613, "step": 845 }, { "epoch": 0.6768, "grad_norm": 3.7354040145874023, "learning_rate": 1.1817206213117943e-06, "loss": 1.4115, "step": 846 }, { "epoch": 0.6776, "grad_norm": 1.3557534217834473, "learning_rate": 1.1763861444958573e-06, "loss": 3.5227, "step": 847 }, { "epoch": 0.6784, "grad_norm": 8.678403854370117, "learning_rate": 1.1710600283444048e-06, "loss": 1.3812, "step": 848 }, { "epoch": 0.6792, "grad_norm": 1.2234259843826294, "learning_rate": 1.1657423065000811e-06, "loss": 3.5525, "step": 849 }, { "epoch": 0.68, "grad_norm": 4.474430084228516, "learning_rate": 1.160433012552508e-06, "loss": 1.5074, "step": 850 }, { "epoch": 0.6808, "grad_norm": 1.9095535278320312, "learning_rate": 1.1551321800380722e-06, "loss": 3.3455, "step": 851 }, { "epoch": 0.6816, "grad_norm": 4.141076564788818, "learning_rate": 1.1498398424397106e-06, "loss": 1.2947, "step": 852 }, { "epoch": 0.6824, "grad_norm": 1.9714593887329102, "learning_rate": 1.1445560331867054e-06, "loss": 3.455, "step": 853 }, { "epoch": 0.6832, "grad_norm": 4.287348747253418, "learning_rate": 1.1392807856544682e-06, "loss": 1.3707, "step": 854 }, { "epoch": 0.684, "grad_norm": 1.3626141548156738, "learning_rate": 1.1340141331643276e-06, "loss": 3.4847, "step": 855 }, { "epoch": 0.6848, "grad_norm": 4.172240734100342, "learning_rate": 1.128756108983325e-06, "loss": 1.1837, "step": 856 }, { "epoch": 0.6856, "grad_norm": 1.6149402856826782, "learning_rate": 1.123506746323997e-06, "loss": 3.3876, "step": 857 }, { "epoch": 0.6864, "grad_norm": 4.046041011810303, "learning_rate": 1.1182660783441719e-06, "loss": 1.199, "step": 858 }, { "epoch": 0.6872, "grad_norm": 1.2951021194458008, "learning_rate": 1.1130341381467569e-06, "loss": 3.546, "step": 859 }, { "epoch": 0.688, "grad_norm": 3.817901611328125, "learning_rate": 1.1078109587795311e-06, "loss": 1.2792, "step": 860 }, { "epoch": 0.6888, "grad_norm": 1.45967435836792, "learning_rate": 1.1025965732349318e-06, "loss": 3.5619, "step": 861 }, { "epoch": 0.6896, "grad_norm": 3.8560800552368164, "learning_rate": 1.0973910144498534e-06, "loss": 1.3367, "step": 862 }, { "epoch": 0.6904, "grad_norm": 1.186650037765503, "learning_rate": 1.0921943153054343e-06, "loss": 3.5638, "step": 863 }, { "epoch": 0.6912, "grad_norm": 3.8473381996154785, "learning_rate": 1.0870065086268506e-06, "loss": 1.3076, "step": 864 }, { "epoch": 0.692, "grad_norm": 1.6394022703170776, "learning_rate": 1.0818276271831094e-06, "loss": 3.5127, "step": 865 }, { "epoch": 0.6928, "grad_norm": 4.1624016761779785, "learning_rate": 1.0766577036868395e-06, "loss": 1.3827, "step": 866 }, { "epoch": 0.6936, "grad_norm": 1.134089469909668, "learning_rate": 1.0714967707940876e-06, "loss": 3.5572, "step": 867 }, { "epoch": 0.6944, "grad_norm": 4.057480335235596, "learning_rate": 1.0663448611041114e-06, "loss": 1.4129, "step": 868 }, { "epoch": 0.6952, "grad_norm": 1.2894881963729858, "learning_rate": 1.0612020071591722e-06, "loss": 3.5994, "step": 869 }, { "epoch": 0.696, "grad_norm": 3.5933890342712402, "learning_rate": 1.0560682414443315e-06, "loss": 1.1426, "step": 870 }, { "epoch": 0.6968, "grad_norm": 1.4715263843536377, "learning_rate": 1.0509435963872422e-06, "loss": 3.5776, "step": 871 }, { "epoch": 0.6976, "grad_norm": 3.6835391521453857, "learning_rate": 1.0458281043579482e-06, "loss": 1.3991, "step": 872 }, { "epoch": 0.6984, "grad_norm": 1.2193199396133423, "learning_rate": 1.0407217976686777e-06, "loss": 3.5754, "step": 873 }, { "epoch": 0.6992, "grad_norm": 3.6208441257476807, "learning_rate": 1.0356247085736388e-06, "loss": 1.2799, "step": 874 }, { "epoch": 0.7, "grad_norm": 1.3012170791625977, "learning_rate": 1.0305368692688175e-06, "loss": 3.5576, "step": 875 }, { "epoch": 0.7008, "grad_norm": 3.988499879837036, "learning_rate": 1.0254583118917699e-06, "loss": 1.4413, "step": 876 }, { "epoch": 0.7016, "grad_norm": 1.3237192630767822, "learning_rate": 1.020389068521426e-06, "loss": 3.5586, "step": 877 }, { "epoch": 0.7024, "grad_norm": 4.113298416137695, "learning_rate": 1.0153291711778825e-06, "loss": 1.4436, "step": 878 }, { "epoch": 0.7032, "grad_norm": 1.1641186475753784, "learning_rate": 1.0102786518221997e-06, "loss": 3.5658, "step": 879 }, { "epoch": 0.704, "grad_norm": 4.27529239654541, "learning_rate": 1.0052375423562038e-06, "loss": 1.3145, "step": 880 }, { "epoch": 0.7048, "grad_norm": 1.370846152305603, "learning_rate": 1.0002058746222807e-06, "loss": 3.5536, "step": 881 }, { "epoch": 0.7056, "grad_norm": 4.043067932128906, "learning_rate": 9.951836804031795e-07, "loss": 1.2685, "step": 882 }, { "epoch": 0.7064, "grad_norm": 1.643572211265564, "learning_rate": 9.90170991421808e-07, "loss": 3.5677, "step": 883 }, { "epoch": 0.7072, "grad_norm": 4.03674840927124, "learning_rate": 9.851678393410343e-07, "loss": 1.3122, "step": 884 }, { "epoch": 0.708, "grad_norm": 1.0866400003433228, "learning_rate": 9.801742557634872e-07, "loss": 3.5932, "step": 885 }, { "epoch": 0.7088, "grad_norm": 3.896414279937744, "learning_rate": 9.751902722313527e-07, "loss": 1.2974, "step": 886 }, { "epoch": 0.7096, "grad_norm": 1.1581923961639404, "learning_rate": 9.702159202261802e-07, "loss": 3.5641, "step": 887 }, { "epoch": 0.7104, "grad_norm": 3.8378193378448486, "learning_rate": 9.65251231168681e-07, "loss": 1.2477, "step": 888 }, { "epoch": 0.7112, "grad_norm": 1.1178447008132935, "learning_rate": 9.602962364185286e-07, "loss": 3.5832, "step": 889 }, { "epoch": 0.712, "grad_norm": 3.76153302192688, "learning_rate": 9.553509672741646e-07, "loss": 1.5284, "step": 890 }, { "epoch": 0.7128, "grad_norm": 1.6611312627792358, "learning_rate": 9.504154549725944e-07, "loss": 3.4278, "step": 891 }, { "epoch": 0.7136, "grad_norm": 3.821173906326294, "learning_rate": 9.454897306891972e-07, "loss": 1.3952, "step": 892 }, { "epoch": 0.7144, "grad_norm": 0.9451780915260315, "learning_rate": 9.405738255375243e-07, "loss": 3.5839, "step": 893 }, { "epoch": 0.7152, "grad_norm": 5.367844104766846, "learning_rate": 9.356677705691058e-07, "loss": 1.3163, "step": 894 }, { "epoch": 0.716, "grad_norm": 1.4917246103286743, "learning_rate": 9.307715967732492e-07, "loss": 3.3808, "step": 895 }, { "epoch": 0.7168, "grad_norm": 4.245250225067139, "learning_rate": 9.258853350768499e-07, "loss": 1.3849, "step": 896 }, { "epoch": 0.7176, "grad_norm": 1.8379777669906616, "learning_rate": 9.210090163441928e-07, "loss": 3.5479, "step": 897 }, { "epoch": 0.7184, "grad_norm": 3.840579032897949, "learning_rate": 9.161426713767574e-07, "loss": 1.3287, "step": 898 }, { "epoch": 0.7192, "grad_norm": 1.2158552408218384, "learning_rate": 9.112863309130235e-07, "loss": 3.5524, "step": 899 }, { "epoch": 0.72, "grad_norm": 4.019105434417725, "learning_rate": 9.064400256282757e-07, "loss": 1.2645, "step": 900 }, { "epoch": 0.7208, "grad_norm": 1.4201416969299316, "learning_rate": 9.01603786134413e-07, "loss": 3.5722, "step": 901 }, { "epoch": 0.7216, "grad_norm": 3.683457851409912, "learning_rate": 8.967776429797529e-07, "loss": 1.2652, "step": 902 }, { "epoch": 0.7224, "grad_norm": 1.3120098114013672, "learning_rate": 8.919616266488373e-07, "loss": 3.5835, "step": 903 }, { "epoch": 0.7232, "grad_norm": 3.85827898979187, "learning_rate": 8.871557675622442e-07, "loss": 1.407, "step": 904 }, { "epoch": 0.724, "grad_norm": 1.2667253017425537, "learning_rate": 8.823600960763901e-07, "loss": 3.5396, "step": 905 }, { "epoch": 0.7248, "grad_norm": 3.5598056316375732, "learning_rate": 8.775746424833428e-07, "loss": 1.1467, "step": 906 }, { "epoch": 0.7256, "grad_norm": 1.2805604934692383, "learning_rate": 8.727994370106288e-07, "loss": 3.5316, "step": 907 }, { "epoch": 0.7264, "grad_norm": 4.258754253387451, "learning_rate": 8.680345098210408e-07, "loss": 1.312, "step": 908 }, { "epoch": 0.7272, "grad_norm": 1.3038127422332764, "learning_rate": 8.632798910124493e-07, "loss": 3.5995, "step": 909 }, { "epoch": 0.728, "grad_norm": 3.3651838302612305, "learning_rate": 8.585356106176093e-07, "loss": 1.12, "step": 910 }, { "epoch": 0.7288, "grad_norm": 1.9212744235992432, "learning_rate": 8.538016986039751e-07, "loss": 3.5292, "step": 911 }, { "epoch": 0.7296, "grad_norm": 4.390267848968506, "learning_rate": 8.49078184873508e-07, "loss": 1.2082, "step": 912 }, { "epoch": 0.7304, "grad_norm": 1.133646845817566, "learning_rate": 8.443650992624877e-07, "loss": 3.6091, "step": 913 }, { "epoch": 0.7312, "grad_norm": 3.671508550643921, "learning_rate": 8.396624715413251e-07, "loss": 1.2595, "step": 914 }, { "epoch": 0.732, "grad_norm": 1.238884687423706, "learning_rate": 8.349703314143712e-07, "loss": 3.516, "step": 915 }, { "epoch": 0.7328, "grad_norm": 4.374630451202393, "learning_rate": 8.302887085197342e-07, "loss": 1.2724, "step": 916 }, { "epoch": 0.7336, "grad_norm": 1.0681443214416504, "learning_rate": 8.256176324290885e-07, "loss": 3.5777, "step": 917 }, { "epoch": 0.7344, "grad_norm": 4.399445056915283, "learning_rate": 8.209571326474897e-07, "loss": 1.5055, "step": 918 }, { "epoch": 0.7352, "grad_norm": 1.302098035812378, "learning_rate": 8.163072386131876e-07, "loss": 3.5391, "step": 919 }, { "epoch": 0.736, "grad_norm": 4.033039093017578, "learning_rate": 8.116679796974389e-07, "loss": 1.4171, "step": 920 }, { "epoch": 0.7368, "grad_norm": 1.2380177974700928, "learning_rate": 8.070393852043251e-07, "loss": 3.5787, "step": 921 }, { "epoch": 0.7376, "grad_norm": 4.127280235290527, "learning_rate": 8.024214843705647e-07, "loss": 1.4362, "step": 922 }, { "epoch": 0.7384, "grad_norm": 1.448819875717163, "learning_rate": 7.978143063653296e-07, "loss": 3.5109, "step": 923 }, { "epoch": 0.7392, "grad_norm": 4.252338886260986, "learning_rate": 7.93217880290059e-07, "loss": 1.2241, "step": 924 }, { "epoch": 0.74, "grad_norm": 1.3917127847671509, "learning_rate": 7.886322351782782e-07, "loss": 3.5236, "step": 925 }, { "epoch": 0.7408, "grad_norm": 3.9095723628997803, "learning_rate": 7.840573999954154e-07, "loss": 1.3039, "step": 926 }, { "epoch": 0.7416, "grad_norm": 1.6759053468704224, "learning_rate": 7.794934036386139e-07, "loss": 3.5408, "step": 927 }, { "epoch": 0.7424, "grad_norm": 3.9729490280151367, "learning_rate": 7.749402749365573e-07, "loss": 1.2951, "step": 928 }, { "epoch": 0.7432, "grad_norm": 1.7310004234313965, "learning_rate": 7.703980426492791e-07, "loss": 3.4605, "step": 929 }, { "epoch": 0.744, "grad_norm": 4.3605523109436035, "learning_rate": 7.65866735467988e-07, "loss": 1.2495, "step": 930 }, { "epoch": 0.7448, "grad_norm": 1.055009365081787, "learning_rate": 7.613463820148831e-07, "loss": 3.5749, "step": 931 }, { "epoch": 0.7456, "grad_norm": 4.379756450653076, "learning_rate": 7.568370108429732e-07, "loss": 1.3678, "step": 932 }, { "epoch": 0.7464, "grad_norm": 1.133419156074524, "learning_rate": 7.523386504358984e-07, "loss": 3.6624, "step": 933 }, { "epoch": 0.7472, "grad_norm": 3.2285141944885254, "learning_rate": 7.478513292077463e-07, "loss": 1.2785, "step": 934 }, { "epoch": 0.748, "grad_norm": 1.2085245847702026, "learning_rate": 7.433750755028774e-07, "loss": 3.6372, "step": 935 }, { "epoch": 0.7488, "grad_norm": 3.985098123550415, "learning_rate": 7.389099175957426e-07, "loss": 1.3853, "step": 936 }, { "epoch": 0.7496, "grad_norm": 1.3521220684051514, "learning_rate": 7.344558836907067e-07, "loss": 3.4587, "step": 937 }, { "epoch": 0.7504, "grad_norm": 3.7972023487091064, "learning_rate": 7.300130019218688e-07, "loss": 1.4041, "step": 938 }, { "epoch": 0.7512, "grad_norm": 1.1607991456985474, "learning_rate": 7.255813003528834e-07, "loss": 3.5921, "step": 939 }, { "epoch": 0.752, "grad_norm": 4.701716423034668, "learning_rate": 7.211608069767867e-07, "loss": 1.1838, "step": 940 }, { "epoch": 0.7528, "grad_norm": 1.6962052583694458, "learning_rate": 7.167515497158179e-07, "loss": 3.4455, "step": 941 }, { "epoch": 0.7536, "grad_norm": 3.769155502319336, "learning_rate": 7.123535564212419e-07, "loss": 1.417, "step": 942 }, { "epoch": 0.7544, "grad_norm": 1.5282889604568481, "learning_rate": 7.079668548731757e-07, "loss": 3.4607, "step": 943 }, { "epoch": 0.7552, "grad_norm": 4.213266372680664, "learning_rate": 7.035914727804085e-07, "loss": 1.1793, "step": 944 }, { "epoch": 0.756, "grad_norm": 1.5362334251403809, "learning_rate": 6.992274377802328e-07, "loss": 3.5102, "step": 945 }, { "epoch": 0.7568, "grad_norm": 3.7498528957366943, "learning_rate": 6.94874777438265e-07, "loss": 1.2506, "step": 946 }, { "epoch": 0.7576, "grad_norm": 1.2717052698135376, "learning_rate": 6.905335192482734e-07, "loss": 3.5799, "step": 947 }, { "epoch": 0.7584, "grad_norm": 4.157364368438721, "learning_rate": 6.862036906320055e-07, "loss": 1.3018, "step": 948 }, { "epoch": 0.7592, "grad_norm": 1.7433124780654907, "learning_rate": 6.818853189390104e-07, "loss": 3.4984, "step": 949 }, { "epoch": 0.76, "grad_norm": 4.441183567047119, "learning_rate": 6.775784314464717e-07, "loss": 1.4515, "step": 950 }, { "epoch": 0.7608, "grad_norm": 1.48224675655365, "learning_rate": 6.732830553590305e-07, "loss": 3.5688, "step": 951 }, { "epoch": 0.7616, "grad_norm": 3.9499704837799072, "learning_rate": 6.689992178086174e-07, "loss": 1.2271, "step": 952 }, { "epoch": 0.7624, "grad_norm": 1.458235263824463, "learning_rate": 6.647269458542793e-07, "loss": 3.5244, "step": 953 }, { "epoch": 0.7632, "grad_norm": 3.810727596282959, "learning_rate": 6.604662664820063e-07, "loss": 1.2276, "step": 954 }, { "epoch": 0.764, "grad_norm": 1.6759514808654785, "learning_rate": 6.562172066045655e-07, "loss": 3.4945, "step": 955 }, { "epoch": 0.7648, "grad_norm": 4.024814128875732, "learning_rate": 6.519797930613289e-07, "loss": 1.3065, "step": 956 }, { "epoch": 0.7656, "grad_norm": 1.238553524017334, "learning_rate": 6.477540526181036e-07, "loss": 3.5006, "step": 957 }, { "epoch": 0.7664, "grad_norm": 3.444575786590576, "learning_rate": 6.435400119669618e-07, "loss": 1.3996, "step": 958 }, { "epoch": 0.7672, "grad_norm": 1.3021897077560425, "learning_rate": 6.393376977260754e-07, "loss": 3.5961, "step": 959 }, { "epoch": 0.768, "grad_norm": 4.322812080383301, "learning_rate": 6.351471364395448e-07, "loss": 1.5874, "step": 960 }, { "epoch": 0.7688, "grad_norm": 1.3130619525909424, "learning_rate": 6.309683545772327e-07, "loss": 3.5893, "step": 961 }, { "epoch": 0.7696, "grad_norm": 4.154742240905762, "learning_rate": 6.268013785345969e-07, "loss": 1.5529, "step": 962 }, { "epoch": 0.7704, "grad_norm": 1.2372699975967407, "learning_rate": 6.226462346325221e-07, "loss": 3.5887, "step": 963 }, { "epoch": 0.7712, "grad_norm": 3.7366716861724854, "learning_rate": 6.185029491171554e-07, "loss": 1.3078, "step": 964 }, { "epoch": 0.772, "grad_norm": 1.2591793537139893, "learning_rate": 6.143715481597404e-07, "loss": 3.5405, "step": 965 }, { "epoch": 0.7728, "grad_norm": 3.966529369354248, "learning_rate": 6.102520578564508e-07, "loss": 1.2979, "step": 966 }, { "epoch": 0.7736, "grad_norm": 1.7405962944030762, "learning_rate": 6.061445042282271e-07, "loss": 3.4681, "step": 967 }, { "epoch": 0.7744, "grad_norm": 4.989678859710693, "learning_rate": 6.02048913220609e-07, "loss": 1.6273, "step": 968 }, { "epoch": 0.7752, "grad_norm": 1.1819043159484863, "learning_rate": 5.979653107035754e-07, "loss": 3.5553, "step": 969 }, { "epoch": 0.776, "grad_norm": 4.24968957901001, "learning_rate": 5.9389372247138e-07, "loss": 1.7848, "step": 970 }, { "epoch": 0.7768, "grad_norm": 1.146349549293518, "learning_rate": 5.898341742423866e-07, "loss": 3.5557, "step": 971 }, { "epoch": 0.7776, "grad_norm": 3.359968423843384, "learning_rate": 5.857866916589089e-07, "loss": 1.1097, "step": 972 }, { "epoch": 0.7784, "grad_norm": 1.3294552564620972, "learning_rate": 5.817513002870451e-07, "loss": 3.5291, "step": 973 }, { "epoch": 0.7792, "grad_norm": 3.7747585773468018, "learning_rate": 5.777280256165218e-07, "loss": 1.1422, "step": 974 }, { "epoch": 0.78, "grad_norm": 1.3020869493484497, "learning_rate": 5.737168930605272e-07, "loss": 3.5797, "step": 975 }, { "epoch": 0.7808, "grad_norm": 4.284913063049316, "learning_rate": 5.697179279555551e-07, "loss": 1.2182, "step": 976 }, { "epoch": 0.7816, "grad_norm": 1.17784583568573, "learning_rate": 5.657311555612433e-07, "loss": 3.5849, "step": 977 }, { "epoch": 0.7824, "grad_norm": 3.8503072261810303, "learning_rate": 5.617566010602113e-07, "loss": 1.1606, "step": 978 }, { "epoch": 0.7832, "grad_norm": 1.4357177019119263, "learning_rate": 5.577942895579064e-07, "loss": 3.4606, "step": 979 }, { "epoch": 0.784, "grad_norm": 4.020089626312256, "learning_rate": 5.538442460824417e-07, "loss": 1.2557, "step": 980 }, { "epoch": 0.7848, "grad_norm": 1.3439040184020996, "learning_rate": 5.499064955844383e-07, "loss": 3.5545, "step": 981 }, { "epoch": 0.7856, "grad_norm": 3.5121538639068604, "learning_rate": 5.459810629368692e-07, "loss": 1.1383, "step": 982 }, { "epoch": 0.7864, "grad_norm": 1.4466603994369507, "learning_rate": 5.420679729348993e-07, "loss": 3.4426, "step": 983 }, { "epoch": 0.7872, "grad_norm": 4.1092047691345215, "learning_rate": 5.381672502957324e-07, "loss": 1.3047, "step": 984 }, { "epoch": 0.788, "grad_norm": 1.4652632474899292, "learning_rate": 5.342789196584527e-07, "loss": 3.4522, "step": 985 }, { "epoch": 0.7888, "grad_norm": 4.341894626617432, "learning_rate": 5.304030055838704e-07, "loss": 1.5886, "step": 986 }, { "epoch": 0.7896, "grad_norm": 1.5312821865081787, "learning_rate": 5.26539532554364e-07, "loss": 3.4746, "step": 987 }, { "epoch": 0.7904, "grad_norm": 3.956395149230957, "learning_rate": 5.226885249737292e-07, "loss": 1.3278, "step": 988 }, { "epoch": 0.7912, "grad_norm": 1.5505242347717285, "learning_rate": 5.188500071670235e-07, "loss": 3.4367, "step": 989 }, { "epoch": 0.792, "grad_norm": 3.910429000854492, "learning_rate": 5.150240033804116e-07, "loss": 1.0932, "step": 990 }, { "epoch": 0.7928, "grad_norm": 1.518563985824585, "learning_rate": 5.112105377810128e-07, "loss": 3.412, "step": 991 }, { "epoch": 0.7936, "grad_norm": 3.3202965259552, "learning_rate": 5.074096344567475e-07, "loss": 1.1174, "step": 992 }, { "epoch": 0.7944, "grad_norm": 1.5806505680084229, "learning_rate": 5.036213174161877e-07, "loss": 3.47, "step": 993 }, { "epoch": 0.7952, "grad_norm": 6.9575324058532715, "learning_rate": 4.998456105884025e-07, "loss": 1.5321, "step": 994 }, { "epoch": 0.796, "grad_norm": 1.1276708841323853, "learning_rate": 4.960825378228082e-07, "loss": 3.6015, "step": 995 }, { "epoch": 0.7968, "grad_norm": 3.954547166824341, "learning_rate": 4.923321228890184e-07, "loss": 1.1861, "step": 996 }, { "epoch": 0.7976, "grad_norm": 1.08054780960083, "learning_rate": 4.885943894766909e-07, "loss": 3.5029, "step": 997 }, { "epoch": 0.7984, "grad_norm": 3.6978795528411865, "learning_rate": 4.848693611953825e-07, "loss": 1.3936, "step": 998 }, { "epoch": 0.7992, "grad_norm": 1.0338634252548218, "learning_rate": 4.811570615743952e-07, "loss": 3.6014, "step": 999 }, { "epoch": 0.8, "grad_norm": 4.1188507080078125, "learning_rate": 4.774575140626317e-07, "loss": 1.2529, "step": 1000 }, { "epoch": 0.8008, "grad_norm": 1.9042516946792603, "learning_rate": 4.7377074202844514e-07, "loss": 3.4267, "step": 1001 }, { "epoch": 0.8016, "grad_norm": 4.330513954162598, "learning_rate": 4.700967687594901e-07, "loss": 1.369, "step": 1002 }, { "epoch": 0.8024, "grad_norm": 1.0320863723754883, "learning_rate": 4.664356174625795e-07, "loss": 3.5636, "step": 1003 }, { "epoch": 0.8032, "grad_norm": 4.5047287940979, "learning_rate": 4.6278731126353447e-07, "loss": 1.3017, "step": 1004 }, { "epoch": 0.804, "grad_norm": 1.59553062915802, "learning_rate": 4.591518732070402e-07, "loss": 3.5466, "step": 1005 }, { "epoch": 0.8048, "grad_norm": 3.6305763721466064, "learning_rate": 4.555293262564994e-07, "loss": 1.3101, "step": 1006 }, { "epoch": 0.8056, "grad_norm": 1.155205488204956, "learning_rate": 4.5191969329388627e-07, "loss": 3.5494, "step": 1007 }, { "epoch": 0.8064, "grad_norm": 4.001699924468994, "learning_rate": 4.483229971196054e-07, "loss": 1.1268, "step": 1008 }, { "epoch": 0.8072, "grad_norm": 1.1981041431427002, "learning_rate": 4.447392604523443e-07, "loss": 3.5732, "step": 1009 }, { "epoch": 0.808, "grad_norm": 3.6024370193481445, "learning_rate": 4.411685059289314e-07, "loss": 1.1444, "step": 1010 }, { "epoch": 0.8088, "grad_norm": 1.3383228778839111, "learning_rate": 4.376107561041937e-07, "loss": 3.5367, "step": 1011 }, { "epoch": 0.8096, "grad_norm": 3.9421496391296387, "learning_rate": 4.340660334508115e-07, "loss": 1.3883, "step": 1012 }, { "epoch": 0.8104, "grad_norm": 1.0924482345581055, "learning_rate": 4.305343603591802e-07, "loss": 3.5681, "step": 1013 }, { "epoch": 0.8112, "grad_norm": 3.4752144813537598, "learning_rate": 4.2701575913726644e-07, "loss": 1.059, "step": 1014 }, { "epoch": 0.812, "grad_norm": 1.952444314956665, "learning_rate": 4.235102520104681e-07, "loss": 3.5588, "step": 1015 }, { "epoch": 0.8128, "grad_norm": 4.0423688888549805, "learning_rate": 4.200178611214736e-07, "loss": 1.1042, "step": 1016 }, { "epoch": 0.8136, "grad_norm": 1.218482494354248, "learning_rate": 4.165386085301212e-07, "loss": 3.5486, "step": 1017 }, { "epoch": 0.8144, "grad_norm": 4.175278663635254, "learning_rate": 4.1307251621326124e-07, "loss": 1.4889, "step": 1018 }, { "epoch": 0.8152, "grad_norm": 2.6647427082061768, "learning_rate": 4.096196060646168e-07, "loss": 3.5716, "step": 1019 }, { "epoch": 0.816, "grad_norm": 4.009509563446045, "learning_rate": 4.061798998946459e-07, "loss": 1.2765, "step": 1020 }, { "epoch": 0.8168, "grad_norm": 1.1483063697814941, "learning_rate": 4.0275341943040057e-07, "loss": 3.6826, "step": 1021 }, { "epoch": 0.8176, "grad_norm": 3.944807291030884, "learning_rate": 3.9934018631539506e-07, "loss": 1.2861, "step": 1022 }, { "epoch": 0.8184, "grad_norm": 1.6391054391860962, "learning_rate": 3.9594022210946355e-07, "loss": 3.3965, "step": 1023 }, { "epoch": 0.8192, "grad_norm": 3.9761102199554443, "learning_rate": 3.925535482886286e-07, "loss": 1.2771, "step": 1024 }, { "epoch": 0.82, "grad_norm": 1.8166158199310303, "learning_rate": 3.891801862449629e-07, "loss": 3.481, "step": 1025 }, { "epoch": 0.8208, "grad_norm": 3.909714460372925, "learning_rate": 3.8582015728645366e-07, "loss": 1.3296, "step": 1026 }, { "epoch": 0.8216, "grad_norm": 1.1448289155960083, "learning_rate": 3.8247348263687035e-07, "loss": 3.5438, "step": 1027 }, { "epoch": 0.8224, "grad_norm": 3.7021570205688477, "learning_rate": 3.7914018343562896e-07, "loss": 1.3568, "step": 1028 }, { "epoch": 0.8232, "grad_norm": 1.1746755838394165, "learning_rate": 3.75820280737659e-07, "loss": 3.631, "step": 1029 }, { "epoch": 0.824, "grad_norm": 4.372186660766602, "learning_rate": 3.725137955132707e-07, "loss": 1.5514, "step": 1030 }, { "epoch": 0.8248, "grad_norm": 1.2693135738372803, "learning_rate": 3.6922074864802095e-07, "loss": 3.6151, "step": 1031 }, { "epoch": 0.8256, "grad_norm": 4.060328483581543, "learning_rate": 3.659411609425834e-07, "loss": 1.2585, "step": 1032 }, { "epoch": 0.8264, "grad_norm": 1.1194394826889038, "learning_rate": 3.626750531126169e-07, "loss": 3.5576, "step": 1033 }, { "epoch": 0.8272, "grad_norm": 4.196378707885742, "learning_rate": 3.594224457886336e-07, "loss": 1.1795, "step": 1034 }, { "epoch": 0.828, "grad_norm": 1.4582164287567139, "learning_rate": 3.561833595158698e-07, "loss": 3.4901, "step": 1035 }, { "epoch": 0.8288, "grad_norm": 3.783414602279663, "learning_rate": 3.529578147541532e-07, "loss": 1.1758, "step": 1036 }, { "epoch": 0.8296, "grad_norm": 1.4051135778427124, "learning_rate": 3.4974583187777853e-07, "loss": 3.4493, "step": 1037 }, { "epoch": 0.8304, "grad_norm": 3.584596633911133, "learning_rate": 3.4654743117537525e-07, "loss": 1.2126, "step": 1038 }, { "epoch": 0.8312, "grad_norm": 1.3267326354980469, "learning_rate": 3.433626328497805e-07, "loss": 3.6435, "step": 1039 }, { "epoch": 0.832, "grad_norm": 4.257800579071045, "learning_rate": 3.4019145701791186e-07, "loss": 1.4825, "step": 1040 }, { "epoch": 0.8328, "grad_norm": 1.1711785793304443, "learning_rate": 3.370339237106385e-07, "loss": 3.5212, "step": 1041 }, { "epoch": 0.8336, "grad_norm": 4.394068717956543, "learning_rate": 3.3389005287265713e-07, "loss": 1.1283, "step": 1042 }, { "epoch": 0.8344, "grad_norm": 1.297494888305664, "learning_rate": 3.3075986436236494e-07, "loss": 3.5152, "step": 1043 }, { "epoch": 0.8352, "grad_norm": 3.9251017570495605, "learning_rate": 3.2764337795173433e-07, "loss": 1.2356, "step": 1044 }, { "epoch": 0.836, "grad_norm": 1.0191597938537598, "learning_rate": 3.245406133261858e-07, "loss": 3.6092, "step": 1045 }, { "epoch": 0.8368, "grad_norm": 4.02804708480835, "learning_rate": 3.214515900844681e-07, "loss": 1.2928, "step": 1046 }, { "epoch": 0.8376, "grad_norm": 1.1345746517181396, "learning_rate": 3.18376327738531e-07, "loss": 3.5869, "step": 1047 }, { "epoch": 0.8384, "grad_norm": 4.080638408660889, "learning_rate": 3.15314845713402e-07, "loss": 1.3423, "step": 1048 }, { "epoch": 0.8392, "grad_norm": 1.3001468181610107, "learning_rate": 3.122671633470664e-07, "loss": 3.4875, "step": 1049 }, { "epoch": 0.84, "grad_norm": 3.684081792831421, "learning_rate": 3.092332998903416e-07, "loss": 1.3089, "step": 1050 }, { "epoch": 0.8408, "grad_norm": 1.3111592531204224, "learning_rate": 3.0621327450675806e-07, "loss": 3.5502, "step": 1051 }, { "epoch": 0.8416, "grad_norm": 4.330699443817139, "learning_rate": 3.0320710627243815e-07, "loss": 1.4276, "step": 1052 }, { "epoch": 0.8424, "grad_norm": 1.4837126731872559, "learning_rate": 3.002148141759739e-07, "loss": 3.5433, "step": 1053 }, { "epoch": 0.8432, "grad_norm": 3.8255903720855713, "learning_rate": 2.9723641711830896e-07, "loss": 1.3503, "step": 1054 }, { "epoch": 0.844, "grad_norm": 1.0839451551437378, "learning_rate": 2.942719339126171e-07, "loss": 3.659, "step": 1055 }, { "epoch": 0.8448, "grad_norm": 4.035921573638916, "learning_rate": 2.913213832841857e-07, "loss": 1.3085, "step": 1056 }, { "epoch": 0.8456, "grad_norm": 1.2930865287780762, "learning_rate": 2.8838478387029605e-07, "loss": 3.4512, "step": 1057 }, { "epoch": 0.8464, "grad_norm": 3.7543997764587402, "learning_rate": 2.854621542201064e-07, "loss": 1.1318, "step": 1058 }, { "epoch": 0.8472, "grad_norm": 1.1573505401611328, "learning_rate": 2.8255351279453446e-07, "loss": 3.5605, "step": 1059 }, { "epoch": 0.848, "grad_norm": 3.8682708740234375, "learning_rate": 2.796588779661388e-07, "loss": 1.3628, "step": 1060 }, { "epoch": 0.8488, "grad_norm": 2.039510726928711, "learning_rate": 2.767782680190073e-07, "loss": 3.5517, "step": 1061 }, { "epoch": 0.8496, "grad_norm": 3.9016358852386475, "learning_rate": 2.739117011486378e-07, "loss": 1.1586, "step": 1062 }, { "epoch": 0.8504, "grad_norm": 1.1205612421035767, "learning_rate": 2.710591954618247e-07, "loss": 3.5143, "step": 1063 }, { "epoch": 0.8512, "grad_norm": 4.346203327178955, "learning_rate": 2.6822076897654453e-07, "loss": 1.3599, "step": 1064 }, { "epoch": 0.852, "grad_norm": 1.4595547914505005, "learning_rate": 2.653964396218406e-07, "loss": 3.5174, "step": 1065 }, { "epoch": 0.8528, "grad_norm": 3.893127918243408, "learning_rate": 2.625862252377129e-07, "loss": 1.1346, "step": 1066 }, { "epoch": 0.8536, "grad_norm": 1.3180551528930664, "learning_rate": 2.597901435750025e-07, "loss": 3.4543, "step": 1067 }, { "epoch": 0.8544, "grad_norm": 3.9734368324279785, "learning_rate": 2.5700821229528164e-07, "loss": 1.2548, "step": 1068 }, { "epoch": 0.8552, "grad_norm": 1.505300521850586, "learning_rate": 2.5424044897073895e-07, "loss": 3.5335, "step": 1069 }, { "epoch": 0.856, "grad_norm": 3.921257972717285, "learning_rate": 2.514868710840723e-07, "loss": 1.5256, "step": 1070 }, { "epoch": 0.8568, "grad_norm": 1.551336407661438, "learning_rate": 2.48747496028377e-07, "loss": 3.3823, "step": 1071 }, { "epoch": 0.8576, "grad_norm": 3.929121494293213, "learning_rate": 2.460223411070337e-07, "loss": 1.2628, "step": 1072 }, { "epoch": 0.8584, "grad_norm": 1.1952719688415527, "learning_rate": 2.4331142353360206e-07, "loss": 3.4138, "step": 1073 }, { "epoch": 0.8592, "grad_norm": 3.588552713394165, "learning_rate": 2.406147604317119e-07, "loss": 1.2508, "step": 1074 }, { "epoch": 0.86, "grad_norm": 1.0674008131027222, "learning_rate": 2.3793236883495164e-07, "loss": 3.5885, "step": 1075 }, { "epoch": 0.8608, "grad_norm": 3.9291443824768066, "learning_rate": 2.3526426568676485e-07, "loss": 1.5289, "step": 1076 }, { "epoch": 0.8616, "grad_norm": 1.1263163089752197, "learning_rate": 2.3261046784034154e-07, "loss": 3.5685, "step": 1077 }, { "epoch": 0.8624, "grad_norm": 3.7272915840148926, "learning_rate": 2.299709920585108e-07, "loss": 1.0725, "step": 1078 }, { "epoch": 0.8632, "grad_norm": 1.9841383695602417, "learning_rate": 2.2734585501363676e-07, "loss": 3.4305, "step": 1079 }, { "epoch": 0.864, "grad_norm": 3.725369691848755, "learning_rate": 2.2473507328751086e-07, "loss": 1.2885, "step": 1080 }, { "epoch": 0.8648, "grad_norm": 1.2514499425888062, "learning_rate": 2.2213866337125022e-07, "loss": 3.6041, "step": 1081 }, { "epoch": 0.8656, "grad_norm": 3.798311233520508, "learning_rate": 2.1955664166519036e-07, "loss": 1.3569, "step": 1082 }, { "epoch": 0.8664, "grad_norm": 1.05547034740448, "learning_rate": 2.1698902447878478e-07, "loss": 3.6443, "step": 1083 }, { "epoch": 0.8672, "grad_norm": 4.112440586090088, "learning_rate": 2.1443582803049757e-07, "loss": 1.3431, "step": 1084 }, { "epoch": 0.868, "grad_norm": 1.1724605560302734, "learning_rate": 2.118970684477062e-07, "loss": 3.5914, "step": 1085 }, { "epoch": 0.8688, "grad_norm": 3.977243423461914, "learning_rate": 2.0937276176659553e-07, "loss": 1.4519, "step": 1086 }, { "epoch": 0.8696, "grad_norm": 1.413366436958313, "learning_rate": 2.068629239320588e-07, "loss": 3.5239, "step": 1087 }, { "epoch": 0.8704, "grad_norm": 3.696100950241089, "learning_rate": 2.043675707975959e-07, "loss": 1.5434, "step": 1088 }, { "epoch": 0.8712, "grad_norm": 1.1970295906066895, "learning_rate": 2.0188671812521293e-07, "loss": 3.4977, "step": 1089 }, { "epoch": 0.872, "grad_norm": 4.029970169067383, "learning_rate": 1.9942038158532407e-07, "loss": 1.3306, "step": 1090 }, { "epoch": 0.8728, "grad_norm": 1.2960518598556519, "learning_rate": 1.9696857675665122e-07, "loss": 3.5162, "step": 1091 }, { "epoch": 0.8736, "grad_norm": 3.725883960723877, "learning_rate": 1.9453131912612694e-07, "loss": 1.4022, "step": 1092 }, { "epoch": 0.8744, "grad_norm": 1.3842031955718994, "learning_rate": 1.9210862408879373e-07, "loss": 3.5151, "step": 1093 }, { "epoch": 0.8752, "grad_norm": 3.8603460788726807, "learning_rate": 1.8970050694771064e-07, "loss": 1.2135, "step": 1094 }, { "epoch": 0.876, "grad_norm": 1.2414811849594116, "learning_rate": 1.8730698291385518e-07, "loss": 3.5374, "step": 1095 }, { "epoch": 0.8768, "grad_norm": 4.625464916229248, "learning_rate": 1.8492806710602495e-07, "loss": 1.3096, "step": 1096 }, { "epoch": 0.8776, "grad_norm": 1.5665608644485474, "learning_rate": 1.8256377455074526e-07, "loss": 3.4397, "step": 1097 }, { "epoch": 0.8784, "grad_norm": 3.919268846511841, "learning_rate": 1.802141201821736e-07, "loss": 1.376, "step": 1098 }, { "epoch": 0.8792, "grad_norm": 1.4185221195220947, "learning_rate": 1.7787911884200314e-07, "loss": 3.6158, "step": 1099 }, { "epoch": 0.88, "grad_norm": 4.121542930603027, "learning_rate": 1.7555878527937164e-07, "loss": 1.3549, "step": 1100 }, { "epoch": 0.8808, "grad_norm": 1.706099033355713, "learning_rate": 1.7325313415076705e-07, "loss": 3.5284, "step": 1101 }, { "epoch": 0.8816, "grad_norm": 4.369479656219482, "learning_rate": 1.7096218001993514e-07, "loss": 1.5352, "step": 1102 }, { "epoch": 0.8824, "grad_norm": 1.2528761625289917, "learning_rate": 1.686859373577876e-07, "loss": 3.6018, "step": 1103 }, { "epoch": 0.8832, "grad_norm": 3.7873117923736572, "learning_rate": 1.6642442054230935e-07, "loss": 1.1694, "step": 1104 }, { "epoch": 0.884, "grad_norm": 1.2879388332366943, "learning_rate": 1.6417764385846996e-07, "loss": 3.4757, "step": 1105 }, { "epoch": 0.8848, "grad_norm": 3.334120988845825, "learning_rate": 1.6194562149813241e-07, "loss": 0.8637, "step": 1106 }, { "epoch": 0.8856, "grad_norm": 1.3120352029800415, "learning_rate": 1.5972836755996286e-07, "loss": 3.4815, "step": 1107 }, { "epoch": 0.8864, "grad_norm": 3.6376547813415527, "learning_rate": 1.5752589604934255e-07, "loss": 1.2615, "step": 1108 }, { "epoch": 0.8872, "grad_norm": 1.1396851539611816, "learning_rate": 1.5533822087827805e-07, "loss": 3.5342, "step": 1109 }, { "epoch": 0.888, "grad_norm": 3.7635209560394287, "learning_rate": 1.5316535586531483e-07, "loss": 1.1877, "step": 1110 }, { "epoch": 0.8888, "grad_norm": 1.371699571609497, "learning_rate": 1.5100731473544932e-07, "loss": 3.5637, "step": 1111 }, { "epoch": 0.8896, "grad_norm": 3.8787107467651367, "learning_rate": 1.4886411112004258e-07, "loss": 1.3821, "step": 1112 }, { "epoch": 0.8904, "grad_norm": 1.8077179193496704, "learning_rate": 1.4673575855673278e-07, "loss": 3.4341, "step": 1113 }, { "epoch": 0.8912, "grad_norm": 4.23999547958374, "learning_rate": 1.4462227048935185e-07, "loss": 1.5234, "step": 1114 }, { "epoch": 0.892, "grad_norm": 1.4485225677490234, "learning_rate": 1.425236602678387e-07, "loss": 3.4551, "step": 1115 }, { "epoch": 0.8928, "grad_norm": 3.488999128341675, "learning_rate": 1.4043994114815663e-07, "loss": 1.1846, "step": 1116 }, { "epoch": 0.8936, "grad_norm": 1.237518072128296, "learning_rate": 1.38371126292208e-07, "loss": 3.5263, "step": 1117 }, { "epoch": 0.8944, "grad_norm": 3.7093005180358887, "learning_rate": 1.3631722876775137e-07, "loss": 1.3514, "step": 1118 }, { "epoch": 0.8952, "grad_norm": 1.2599142789840698, "learning_rate": 1.342782615483204e-07, "loss": 3.528, "step": 1119 }, { "epoch": 0.896, "grad_norm": 3.7309329509735107, "learning_rate": 1.3225423751313942e-07, "loss": 1.5911, "step": 1120 }, { "epoch": 0.8968, "grad_norm": 1.202618956565857, "learning_rate": 1.3024516944704495e-07, "loss": 3.4832, "step": 1121 }, { "epoch": 0.8976, "grad_norm": 4.492614269256592, "learning_rate": 1.2825107004040272e-07, "loss": 1.2915, "step": 1122 }, { "epoch": 0.8984, "grad_norm": 1.1479798555374146, "learning_rate": 1.262719518890279e-07, "loss": 3.5571, "step": 1123 }, { "epoch": 0.8992, "grad_norm": 4.050600528717041, "learning_rate": 1.2430782749410676e-07, "loss": 1.388, "step": 1124 }, { "epoch": 0.9, "grad_norm": 1.292321801185608, "learning_rate": 1.223587092621162e-07, "loss": 3.5855, "step": 1125 }, { "epoch": 0.9008, "grad_norm": 4.229612350463867, "learning_rate": 1.204246095047465e-07, "loss": 1.3577, "step": 1126 }, { "epoch": 0.9016, "grad_norm": 1.274814248085022, "learning_rate": 1.1850554043882329e-07, "loss": 3.5057, "step": 1127 }, { "epoch": 0.9024, "grad_norm": 3.170250654220581, "learning_rate": 1.1660151418622923e-07, "loss": 0.8845, "step": 1128 }, { "epoch": 0.9032, "grad_norm": 1.3429255485534668, "learning_rate": 1.1471254277382882e-07, "loss": 3.5239, "step": 1129 }, { "epoch": 0.904, "grad_norm": 3.8732850551605225, "learning_rate": 1.1283863813339263e-07, "loss": 1.4954, "step": 1130 }, { "epoch": 0.9048, "grad_norm": 1.0475130081176758, "learning_rate": 1.1097981210152042e-07, "loss": 3.5743, "step": 1131 }, { "epoch": 0.9056, "grad_norm": 4.163371562957764, "learning_rate": 1.0913607641956842e-07, "loss": 1.3211, "step": 1132 }, { "epoch": 0.9064, "grad_norm": 1.1388672590255737, "learning_rate": 1.0730744273357213e-07, "loss": 3.6136, "step": 1133 }, { "epoch": 0.9072, "grad_norm": 3.882986068725586, "learning_rate": 1.0549392259417646e-07, "loss": 1.1432, "step": 1134 }, { "epoch": 0.908, "grad_norm": 1.1615536212921143, "learning_rate": 1.0369552745656014e-07, "loss": 3.6521, "step": 1135 }, { "epoch": 0.9088, "grad_norm": 3.6023221015930176, "learning_rate": 1.0191226868036419e-07, "loss": 1.3323, "step": 1136 }, { "epoch": 0.9096, "grad_norm": 1.2144973278045654, "learning_rate": 1.0014415752962081e-07, "loss": 3.5626, "step": 1137 }, { "epoch": 0.9104, "grad_norm": 3.877840280532837, "learning_rate": 9.839120517267986e-08, "loss": 1.3083, "step": 1138 }, { "epoch": 0.9112, "grad_norm": 1.4756907224655151, "learning_rate": 9.665342268214167e-08, "loss": 3.4514, "step": 1139 }, { "epoch": 0.912, "grad_norm": 4.363102436065674, "learning_rate": 9.493082103478519e-08, "loss": 1.1601, "step": 1140 }, { "epoch": 0.9128, "grad_norm": 1.2879115343093872, "learning_rate": 9.322341111149852e-08, "loss": 3.4346, "step": 1141 }, { "epoch": 0.9136, "grad_norm": 4.510580539703369, "learning_rate": 9.153120369721047e-08, "loss": 1.3901, "step": 1142 }, { "epoch": 0.9144, "grad_norm": 1.3555859327316284, "learning_rate": 8.985420948082329e-08, "loss": 3.4953, "step": 1143 }, { "epoch": 0.9152, "grad_norm": 4.071751594543457, "learning_rate": 8.819243905514308e-08, "loss": 1.2933, "step": 1144 }, { "epoch": 0.916, "grad_norm": 1.0624727010726929, "learning_rate": 8.654590291681531e-08, "loss": 3.6109, "step": 1145 }, { "epoch": 0.9168, "grad_norm": 4.541050910949707, "learning_rate": 8.491461146625774e-08, "loss": 1.5013, "step": 1146 }, { "epoch": 0.9176, "grad_norm": 1.036971926689148, "learning_rate": 8.329857500759291e-08, "loss": 3.5826, "step": 1147 }, { "epoch": 0.9184, "grad_norm": 4.1964287757873535, "learning_rate": 8.169780374858577e-08, "loss": 1.4736, "step": 1148 }, { "epoch": 0.9192, "grad_norm": 1.3899742364883423, "learning_rate": 8.011230780057749e-08, "loss": 3.4604, "step": 1149 }, { "epoch": 0.92, "grad_norm": 3.7320985794067383, "learning_rate": 7.854209717842231e-08, "loss": 1.1507, "step": 1150 }, { "epoch": 0.9208, "grad_norm": 1.4710829257965088, "learning_rate": 7.698718180042392e-08, "loss": 3.5542, "step": 1151 }, { "epoch": 0.9216, "grad_norm": 3.88554048538208, "learning_rate": 7.544757148827297e-08, "loss": 1.0699, "step": 1152 }, { "epoch": 0.9224, "grad_norm": 1.352371096611023, "learning_rate": 7.392327596698474e-08, "loss": 3.5077, "step": 1153 }, { "epoch": 0.9232, "grad_norm": 3.7906062602996826, "learning_rate": 7.24143048648382e-08, "loss": 1.3162, "step": 1154 }, { "epoch": 0.924, "grad_norm": 1.3275525569915771, "learning_rate": 7.092066771331507e-08, "loss": 3.516, "step": 1155 }, { "epoch": 0.9248, "grad_norm": 3.684339761734009, "learning_rate": 6.944237394703985e-08, "loss": 1.0855, "step": 1156 }, { "epoch": 0.9256, "grad_norm": 1.6030592918395996, "learning_rate": 6.797943290371839e-08, "loss": 3.3999, "step": 1157 }, { "epoch": 0.9264, "grad_norm": 3.9943041801452637, "learning_rate": 6.653185382408195e-08, "loss": 1.3748, "step": 1158 }, { "epoch": 0.9272, "grad_norm": 2.058311939239502, "learning_rate": 6.509964585182688e-08, "loss": 3.4637, "step": 1159 }, { "epoch": 0.928, "grad_norm": 4.087345123291016, "learning_rate": 6.368281803355692e-08, "loss": 1.3247, "step": 1160 }, { "epoch": 0.9288, "grad_norm": 1.4231693744659424, "learning_rate": 6.228137931872713e-08, "loss": 3.5084, "step": 1161 }, { "epoch": 0.9296, "grad_norm": 3.276982545852661, "learning_rate": 6.089533855958508e-08, "loss": 1.0859, "step": 1162 }, { "epoch": 0.9304, "grad_norm": 0.9627519249916077, "learning_rate": 5.9524704511118305e-08, "loss": 3.6085, "step": 1163 }, { "epoch": 0.9312, "grad_norm": 4.000705242156982, "learning_rate": 5.8169485830996134e-08, "loss": 1.2021, "step": 1164 }, { "epoch": 0.932, "grad_norm": 1.0772417783737183, "learning_rate": 5.68296910795163e-08, "loss": 3.5649, "step": 1165 }, { "epoch": 0.9328, "grad_norm": 4.611580848693848, "learning_rate": 5.550532871955061e-08, "loss": 1.2716, "step": 1166 }, { "epoch": 0.9336, "grad_norm": 1.6169544458389282, "learning_rate": 5.419640711649188e-08, "loss": 3.4921, "step": 1167 }, { "epoch": 0.9344, "grad_norm": 3.6111767292022705, "learning_rate": 5.290293453819956e-08, "loss": 1.1447, "step": 1168 }, { "epoch": 0.9352, "grad_norm": 1.527208924293518, "learning_rate": 5.162491915495005e-08, "loss": 3.5345, "step": 1169 }, { "epoch": 0.936, "grad_norm": 3.3724429607391357, "learning_rate": 5.036236903938285e-08, "loss": 1.1051, "step": 1170 }, { "epoch": 0.9368, "grad_norm": 1.2857189178466797, "learning_rate": 4.911529216645089e-08, "loss": 3.5927, "step": 1171 }, { "epoch": 0.9376, "grad_norm": 3.823451519012451, "learning_rate": 4.788369641336943e-08, "loss": 1.2766, "step": 1172 }, { "epoch": 0.9384, "grad_norm": 1.3951259851455688, "learning_rate": 4.6667589559566405e-08, "loss": 3.5188, "step": 1173 }, { "epoch": 0.9392, "grad_norm": 4.200174331665039, "learning_rate": 4.546697928663357e-08, "loss": 1.4409, "step": 1174 }, { "epoch": 0.94, "grad_norm": 1.4412181377410889, "learning_rate": 4.428187317827848e-08, "loss": 3.536, "step": 1175 }, { "epoch": 0.9408, "grad_norm": 4.055942058563232, "learning_rate": 4.311227872027479e-08, "loss": 1.3862, "step": 1176 }, { "epoch": 0.9416, "grad_norm": 1.1776350736618042, "learning_rate": 4.1958203300417056e-08, "loss": 3.6454, "step": 1177 }, { "epoch": 0.9424, "grad_norm": 3.8492658138275146, "learning_rate": 4.0819654208472947e-08, "loss": 1.2609, "step": 1178 }, { "epoch": 0.9432, "grad_norm": 1.2920982837677002, "learning_rate": 3.969663863613721e-08, "loss": 3.4813, "step": 1179 }, { "epoch": 0.944, "grad_norm": 3.726270914077759, "learning_rate": 3.8589163676986674e-08, "loss": 1.3, "step": 1180 }, { "epoch": 0.9448, "grad_norm": 1.0104079246520996, "learning_rate": 3.749723632643476e-08, "loss": 3.6193, "step": 1181 }, { "epoch": 0.9456, "grad_norm": 3.768679618835449, "learning_rate": 3.642086348168844e-08, "loss": 1.2007, "step": 1182 }, { "epoch": 0.9464, "grad_norm": 1.5914446115493774, "learning_rate": 3.536005194170328e-08, "loss": 3.4693, "step": 1183 }, { "epoch": 0.9472, "grad_norm": 3.930814743041992, "learning_rate": 3.431480840714152e-08, "loss": 1.4124, "step": 1184 }, { "epoch": 0.948, "grad_norm": 1.1689213514328003, "learning_rate": 3.328513948032991e-08, "loss": 3.5226, "step": 1185 }, { "epoch": 0.9488, "grad_norm": 3.568666934967041, "learning_rate": 3.227105166521638e-08, "loss": 1.3847, "step": 1186 }, { "epoch": 0.9496, "grad_norm": 1.2137675285339355, "learning_rate": 3.127255136733093e-08, "loss": 3.5211, "step": 1187 }, { "epoch": 0.9504, "grad_norm": 4.159763336181641, "learning_rate": 3.028964489374453e-08, "loss": 1.3348, "step": 1188 }, { "epoch": 0.9512, "grad_norm": 0.9644594788551331, "learning_rate": 2.9322338453028066e-08, "loss": 3.5866, "step": 1189 }, { "epoch": 0.952, "grad_norm": 3.9226300716400146, "learning_rate": 2.8370638155215125e-08, "loss": 1.4359, "step": 1190 }, { "epoch": 0.9528, "grad_norm": 1.1887046098709106, "learning_rate": 2.7434550011761763e-08, "loss": 3.578, "step": 1191 }, { "epoch": 0.9536, "grad_norm": 3.7943222522735596, "learning_rate": 2.6514079935509586e-08, "loss": 1.2984, "step": 1192 }, { "epoch": 0.9544, "grad_norm": 1.480806589126587, "learning_rate": 2.560923374064772e-08, "loss": 3.4495, "step": 1193 }, { "epoch": 0.9552, "grad_norm": 3.667187213897705, "learning_rate": 2.4720017142676745e-08, "loss": 1.4821, "step": 1194 }, { "epoch": 0.956, "grad_norm": 1.1104971170425415, "learning_rate": 2.3846435758372034e-08, "loss": 3.6191, "step": 1195 }, { "epoch": 0.9568, "grad_norm": 3.9890453815460205, "learning_rate": 2.2988495105748245e-08, "loss": 1.2608, "step": 1196 }, { "epoch": 0.9576, "grad_norm": 1.3386608362197876, "learning_rate": 2.2146200604024614e-08, "loss": 3.5502, "step": 1197 }, { "epoch": 0.9584, "grad_norm": 3.8145041465759277, "learning_rate": 2.131955757359111e-08, "loss": 1.3914, "step": 1198 }, { "epoch": 0.9592, "grad_norm": 1.692157506942749, "learning_rate": 2.050857123597455e-08, "loss": 3.5147, "step": 1199 }, { "epoch": 0.96, "grad_norm": 3.8497886657714844, "learning_rate": 1.9713246713805588e-08, "loss": 1.2747, "step": 1200 }, { "epoch": 0.9608, "grad_norm": 1.7304649353027344, "learning_rate": 1.893358903078568e-08, "loss": 3.4559, "step": 1201 }, { "epoch": 0.9616, "grad_norm": 4.028602123260498, "learning_rate": 1.8169603111656554e-08, "loss": 1.2436, "step": 1202 }, { "epoch": 0.9624, "grad_norm": 1.0460162162780762, "learning_rate": 1.7421293782168837e-08, "loss": 3.6491, "step": 1203 }, { "epoch": 0.9632, "grad_norm": 4.187633514404297, "learning_rate": 1.6688665769050704e-08, "loss": 1.2076, "step": 1204 }, { "epoch": 0.964, "grad_norm": 1.656624674797058, "learning_rate": 1.5971723699979015e-08, "loss": 3.5022, "step": 1205 }, { "epoch": 0.9648, "grad_norm": 4.018679141998291, "learning_rate": 1.5270472103549317e-08, "loss": 1.4379, "step": 1206 }, { "epoch": 0.9656, "grad_norm": 1.5885015726089478, "learning_rate": 1.4584915409248113e-08, "loss": 3.4547, "step": 1207 }, { "epoch": 0.9664, "grad_norm": 3.9813663959503174, "learning_rate": 1.3915057947423705e-08, "loss": 1.3217, "step": 1208 }, { "epoch": 0.9672, "grad_norm": 1.4755148887634277, "learning_rate": 1.3260903949260107e-08, "loss": 3.4995, "step": 1209 }, { "epoch": 0.968, "grad_norm": 3.5924222469329834, "learning_rate": 1.2622457546749567e-08, "loss": 1.3469, "step": 1210 }, { "epoch": 0.9688, "grad_norm": 1.0457367897033691, "learning_rate": 1.1999722772666478e-08, "loss": 3.5185, "step": 1211 }, { "epoch": 0.9696, "grad_norm": 4.9514994621276855, "learning_rate": 1.1392703560542118e-08, "loss": 1.3577, "step": 1212 }, { "epoch": 0.9704, "grad_norm": 1.328444004058838, "learning_rate": 1.0801403744639672e-08, "loss": 3.4504, "step": 1213 }, { "epoch": 0.9712, "grad_norm": 3.700564384460449, "learning_rate": 1.0225827059930082e-08, "loss": 1.2764, "step": 1214 }, { "epoch": 0.972, "grad_norm": 1.7747372388839722, "learning_rate": 9.665977142068738e-09, "loss": 3.4396, "step": 1215 }, { "epoch": 0.9728, "grad_norm": 3.901719331741333, "learning_rate": 9.121857527372157e-09, "loss": 1.4179, "step": 1216 }, { "epoch": 0.9736, "grad_norm": 1.1439679861068726, "learning_rate": 8.59347165279495e-09, "loss": 3.5297, "step": 1217 }, { "epoch": 0.9744, "grad_norm": 4.542992115020752, "learning_rate": 8.080822855909832e-09, "loss": 1.4076, "step": 1218 }, { "epoch": 0.9752, "grad_norm": 1.05239737033844, "learning_rate": 7.583914374885426e-09, "loss": 3.6203, "step": 1219 }, { "epoch": 0.976, "grad_norm": 3.649535655975342, "learning_rate": 7.102749348465166e-09, "loss": 1.2697, "step": 1220 }, { "epoch": 0.9768, "grad_norm": 1.6955548524856567, "learning_rate": 6.6373308159495275e-09, "loss": 3.4582, "step": 1221 }, { "epoch": 0.9776, "grad_norm": 4.211562156677246, "learning_rate": 6.1876617171743865e-09, "loss": 1.3995, "step": 1222 }, { "epoch": 0.9784, "grad_norm": 1.1870956420898438, "learning_rate": 5.753744892494639e-09, "loss": 3.5536, "step": 1223 }, { "epoch": 0.9792, "grad_norm": 3.487827777862549, "learning_rate": 5.335583082764495e-09, "loss": 1.4411, "step": 1224 }, { "epoch": 0.98, "grad_norm": 1.736832857131958, "learning_rate": 4.933178929321103e-09, "loss": 3.5151, "step": 1225 }, { "epoch": 0.9808, "grad_norm": 3.914550304412842, "learning_rate": 4.546534973968175e-09, "loss": 1.2732, "step": 1226 }, { "epoch": 0.9816, "grad_norm": 1.4647449254989624, "learning_rate": 4.175653658958501e-09, "loss": 3.3779, "step": 1227 }, { "epoch": 0.9824, "grad_norm": 4.559305191040039, "learning_rate": 3.820537326980622e-09, "loss": 1.5739, "step": 1228 }, { "epoch": 0.9832, "grad_norm": 1.1620067358016968, "learning_rate": 3.481188221142184e-09, "loss": 3.5552, "step": 1229 }, { "epoch": 0.984, "grad_norm": 3.963010787963867, "learning_rate": 3.1576084849563315e-09, "loss": 1.3199, "step": 1230 }, { "epoch": 0.9848, "grad_norm": 1.101914644241333, "learning_rate": 2.849800162328664e-09, "loss": 3.5772, "step": 1231 }, { "epoch": 0.9856, "grad_norm": 3.9038467407226562, "learning_rate": 2.557765197543638e-09, "loss": 1.2684, "step": 1232 }, { "epoch": 0.9864, "grad_norm": 1.2498347759246826, "learning_rate": 2.2815054352531842e-09, "loss": 3.6124, "step": 1233 }, { "epoch": 0.9872, "grad_norm": 3.7474238872528076, "learning_rate": 2.0210226204639414e-09, "loss": 1.2981, "step": 1234 }, { "epoch": 0.988, "grad_norm": 1.3778389692306519, "learning_rate": 1.7763183985269882e-09, "loss": 3.5426, "step": 1235 }, { "epoch": 0.9888, "grad_norm": 3.6975715160369873, "learning_rate": 1.5473943151270155e-09, "loss": 1.3295, "step": 1236 }, { "epoch": 0.9896, "grad_norm": 1.4429659843444824, "learning_rate": 1.3342518162728913e-09, "loss": 3.6067, "step": 1237 }, { "epoch": 0.9904, "grad_norm": 3.43681263923645, "learning_rate": 1.1368922482887789e-09, "loss": 1.1235, "step": 1238 }, { "epoch": 0.9912, "grad_norm": 1.3926042318344116, "learning_rate": 9.553168578049776e-10, "loss": 3.4841, "step": 1239 }, { "epoch": 0.992, "grad_norm": 3.8875744342803955, "learning_rate": 7.895267917501503e-10, "loss": 1.3565, "step": 1240 }, { "epoch": 0.9928, "grad_norm": 1.6624120473861694, "learning_rate": 6.395230973443856e-10, "loss": 3.4427, "step": 1241 }, { "epoch": 0.9936, "grad_norm": 3.605576753616333, "learning_rate": 5.053067220925356e-10, "loss": 1.1553, "step": 1242 }, { "epoch": 0.9944, "grad_norm": 1.560855507850647, "learning_rate": 3.868785137786657e-10, "loss": 3.4811, "step": 1243 }, { "epoch": 0.9952, "grad_norm": 4.160490989685059, "learning_rate": 2.842392204591149e-10, "loss": 1.2979, "step": 1244 }, { "epoch": 0.996, "grad_norm": 1.5523591041564941, "learning_rate": 1.9738949045972068e-10, "loss": 3.4412, "step": 1245 }, { "epoch": 0.9968, "grad_norm": 4.556288719177246, "learning_rate": 1.2632987237054527e-10, "loss": 1.2008, "step": 1246 }, { "epoch": 0.9976, "grad_norm": 1.2331137657165527, "learning_rate": 7.106081504254514e-11, "loss": 3.4326, "step": 1247 }, { "epoch": 0.9984, "grad_norm": 4.683450222015381, "learning_rate": 3.158266758562789e-11, "loss": 1.5665, "step": 1248 }, { "epoch": 0.9992, "grad_norm": 1.4326642751693726, "learning_rate": 7.89567936476665e-12, "loss": 3.5399, "step": 1249 }, { "epoch": 1.0, "grad_norm": 3.5186572074890137, "learning_rate": 0.0, "loss": 1.4857, "step": 1250 } ], "logging_steps": 1, "max_steps": 1250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.22349105912873e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }